parse_qs / urlparse
from urllib.parse import parse_qs
from urllib.parse import urlparse
url = 'https://server.tld/path/to/resource;par?value-one=foo&value-two=bar&value-two=baz#xyz'
parsed = urlparse(url)
print(f'scheme: {parsed.scheme }') # https
print(f'netloc: {parsed.netloc }') # server.tld
print(f'path: {parsed.path }') # /path/to/resource
print(f'params: {parsed.params }') # par
print(f'query: {parsed.query }') # value-one=foo&value-two=bar&value-two=baz
print(f'fragment: {parsed.fragment}') # xyz
print('')
values = parse_qs(parsed.query)
print(values['value-one']) # ['foo']
print(values['value-two']) # ['bar', 'baz']
print(values.get('unobtainium', ['n/a'])[0]) # n/a
Note: the values in the
dict returned by
parse_qs
are
lists!
urljoin
import urllib.parse
print(urllib.parse.urljoin('https://tld.xy/path/to/xyz.html', '/abc.html' )) # https://tld.xy/abc.html
print(urllib.parse.urljoin('https://tld.xy/path/to/xyz.html', 'abc.html' )) # https://tld.xy/path/to/abc.html
print(urllib.parse.urljoin('https://tld.xy/path/to/xyz.html', '../abc.html' )) # https://tld.xy/path/abc.html
print(urllib.parse.urljoin('https://tld.xy/path/to/xyz.html', 'https://another.tld.xy/foo/bar/baz.hml')) # https://another.tld.xy/foo/bar/baz.hml
print(urllib.parse.urljoin('one/two/three.html' , 'four/five.html' )) # one/two/four/five.html