diff --git a/SPECS/python3/CVE-2024-11168.patch b/SPECS/python3/CVE-2024-11168.patch new file mode 100644 index 00000000000..25646db3822 --- /dev/null +++ b/SPECS/python3/CVE-2024-11168.patch @@ -0,0 +1,2718 @@ +From 9e35c846cce147f241b6857d5007905fb54e5806 Mon Sep 17 00:00:00 2001 +From: ankita +Date: Fri, 15 Nov 2024 18:38:46 +0530 +Subject: [PATCH] Adds checks to ensure that bracketed hosts found by urlsplit + are of IPv6 or IPvFuture format. Fixes CVE-2024-11168 + +Signed-off-by: ankita +--- + Lib/test/test_urlparse.py | 26 + + Lib/test/test_urlparse.py.orig | 1385 +++++++++++++++++ + Lib/urllib/parse.py | 16 +- + Lib/urllib/parse.py.orig | 1209 ++++++++++++++ + ...-04-26-09-54-25.gh-issue-103848.aDSnpR.rst | 2 + + 5 files changed, 2637 insertions(+), 1 deletion(-) + create mode 100644 Lib/test/test_urlparse.py.orig + create mode 100644 Lib/urllib/parse.py.orig + create mode 100644 Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst + +diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py +index 574da5b..c84df23 100644 +--- a/Lib/test/test_urlparse.py ++++ b/Lib/test/test_urlparse.py +@@ -1071,6 +1071,32 @@ class UrlParseTestCase(unittest.TestCase): + self.assertEqual(p2.scheme, 'tel') + self.assertEqual(p2.path, '+31641044153') + ++ def test_invalid_bracketed_hosts(self): ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v12ae]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v.IP]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123.]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query') ++ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path') ++ ++ def test_splitting_bracketed_hosts(self): ++ p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') ++ self.assertEqual(p1.hostname, 'v6a.ip') ++ self.assertEqual(p1.username, 'user') ++ self.assertEqual(p1.path, '/path') ++ p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query') ++ self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test') ++ self.assertEqual(p2.username, 'user') ++ self.assertEqual(p2.path, '/path') ++ p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query') ++ self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test') ++ self.assertEqual(p3.username, 'user') ++ self.assertEqual(p3.path, '/path') ++ + def test_port_casting_failure_message(self): + message = "Port could not be cast to integer value as 'oracle'" + p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle') +diff --git a/Lib/test/test_urlparse.py.orig b/Lib/test/test_urlparse.py.orig +new file mode 100644 +index 0000000..574da5b +--- /dev/null ++++ b/Lib/test/test_urlparse.py.orig +@@ -0,0 +1,1385 @@ ++import sys ++import unicodedata ++import unittest ++import urllib.parse ++ ++RFC1808_BASE = "http://a/b/c/d;p?q#f" ++RFC2396_BASE = "http://a/b/c/d;p?q" ++RFC3986_BASE = 'http://a/b/c/d;p?q' ++SIMPLE_BASE = 'http://a/b/c/d' ++ ++# Each parse_qsl testcase is a two-tuple that contains ++# a string with the query and a list with the expected result. ++ ++parse_qsl_test_cases = [ ++ ("", []), ++ ("&", []), ++ ("&&", []), ++ ("=", [('', '')]), ++ ("=a", [('', 'a')]), ++ ("a", [('a', '')]), ++ ("a=", [('a', '')]), ++ ("&a=b", [('a', 'b')]), ++ ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), ++ ("a=1&a=2", [('a', '1'), ('a', '2')]), ++ (b"", []), ++ (b"&", []), ++ (b"&&", []), ++ (b"=", [(b'', b'')]), ++ (b"=a", [(b'', b'a')]), ++ (b"a", [(b'a', b'')]), ++ (b"a=", [(b'a', b'')]), ++ (b"&a=b", [(b'a', b'b')]), ++ (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), ++ (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]), ++ (";a=b", [(';a', 'b')]), ++ ("a=a+b;b=b+c", [('a', 'a b;b=b c')]), ++ (b";a=b", [(b';a', b'b')]), ++ (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]), ++] ++ ++# Each parse_qs testcase is a two-tuple that contains ++# a string with the query and a dictionary with the expected result. ++ ++parse_qs_test_cases = [ ++ ("", {}), ++ ("&", {}), ++ ("&&", {}), ++ ("=", {'': ['']}), ++ ("=a", {'': ['a']}), ++ ("a", {'a': ['']}), ++ ("a=", {'a': ['']}), ++ ("&a=b", {'a': ['b']}), ++ ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), ++ ("a=1&a=2", {'a': ['1', '2']}), ++ (b"", {}), ++ (b"&", {}), ++ (b"&&", {}), ++ (b"=", {b'': [b'']}), ++ (b"=a", {b'': [b'a']}), ++ (b"a", {b'a': [b'']}), ++ (b"a=", {b'a': [b'']}), ++ (b"&a=b", {b'a': [b'b']}), ++ (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), ++ (b"a=1&a=2", {b'a': [b'1', b'2']}), ++ (";a=b", {';a': ['b']}), ++ ("a=a+b;b=b+c", {'a': ['a b;b=b c']}), ++ (b";a=b", {b';a': [b'b']}), ++ (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}), ++] ++ ++class UrlParseTestCase(unittest.TestCase): ++ ++ def checkRoundtrips(self, url, parsed, split): ++ result = urllib.parse.urlparse(url) ++ self.assertEqual(result, parsed) ++ t = (result.scheme, result.netloc, result.path, ++ result.params, result.query, result.fragment) ++ self.assertEqual(t, parsed) ++ # put it back together and it should be the same ++ result2 = urllib.parse.urlunparse(result) ++ self.assertEqual(result2, url) ++ self.assertEqual(result2, result.geturl()) ++ ++ # the result of geturl() is a fixpoint; we can always parse it ++ # again to get the same result: ++ result3 = urllib.parse.urlparse(result.geturl()) ++ self.assertEqual(result3.geturl(), result.geturl()) ++ self.assertEqual(result3, result) ++ self.assertEqual(result3.scheme, result.scheme) ++ self.assertEqual(result3.netloc, result.netloc) ++ self.assertEqual(result3.path, result.path) ++ self.assertEqual(result3.params, result.params) ++ self.assertEqual(result3.query, result.query) ++ self.assertEqual(result3.fragment, result.fragment) ++ self.assertEqual(result3.username, result.username) ++ self.assertEqual(result3.password, result.password) ++ self.assertEqual(result3.hostname, result.hostname) ++ self.assertEqual(result3.port, result.port) ++ ++ # check the roundtrip using urlsplit() as well ++ result = urllib.parse.urlsplit(url) ++ self.assertEqual(result, split) ++ t = (result.scheme, result.netloc, result.path, ++ result.query, result.fragment) ++ self.assertEqual(t, split) ++ result2 = urllib.parse.urlunsplit(result) ++ self.assertEqual(result2, url) ++ self.assertEqual(result2, result.geturl()) ++ ++ # check the fixpoint property of re-parsing the result of geturl() ++ result3 = urllib.parse.urlsplit(result.geturl()) ++ self.assertEqual(result3.geturl(), result.geturl()) ++ self.assertEqual(result3, result) ++ self.assertEqual(result3.scheme, result.scheme) ++ self.assertEqual(result3.netloc, result.netloc) ++ self.assertEqual(result3.path, result.path) ++ self.assertEqual(result3.query, result.query) ++ self.assertEqual(result3.fragment, result.fragment) ++ self.assertEqual(result3.username, result.username) ++ self.assertEqual(result3.password, result.password) ++ self.assertEqual(result3.hostname, result.hostname) ++ self.assertEqual(result3.port, result.port) ++ ++ def test_qsl(self): ++ for orig, expect in parse_qsl_test_cases: ++ result = urllib.parse.parse_qsl(orig, keep_blank_values=True) ++ self.assertEqual(result, expect, "Error parsing %r" % orig) ++ expect_without_blanks = [v for v in expect if len(v[1])] ++ result = urllib.parse.parse_qsl(orig, keep_blank_values=False) ++ self.assertEqual(result, expect_without_blanks, ++ "Error parsing %r" % orig) ++ ++ def test_qs(self): ++ for orig, expect in parse_qs_test_cases: ++ result = urllib.parse.parse_qs(orig, keep_blank_values=True) ++ self.assertEqual(result, expect, "Error parsing %r" % orig) ++ expect_without_blanks = {v: expect[v] ++ for v in expect if len(expect[v][0])} ++ result = urllib.parse.parse_qs(orig, keep_blank_values=False) ++ self.assertEqual(result, expect_without_blanks, ++ "Error parsing %r" % orig) ++ ++ def test_roundtrips(self): ++ str_cases = [ ++ ('file:///tmp/junk.txt', ++ ('file', '', '/tmp/junk.txt', '', '', ''), ++ ('file', '', '/tmp/junk.txt', '', '')), ++ ('imap://mail.python.org/mbox1', ++ ('imap', 'mail.python.org', '/mbox1', '', '', ''), ++ ('imap', 'mail.python.org', '/mbox1', '', '')), ++ ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf', ++ ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', ++ '', '', ''), ++ ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', ++ '', '')), ++ ('nfs://server/path/to/file.txt', ++ ('nfs', 'server', '/path/to/file.txt', '', '', ''), ++ ('nfs', 'server', '/path/to/file.txt', '', '')), ++ ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/', ++ ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', ++ '', '', ''), ++ ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', ++ '', '')), ++ ('git+ssh://git@github.com/user/project.git', ++ ('git+ssh', 'git@github.com','/user/project.git', ++ '','',''), ++ ('git+ssh', 'git@github.com','/user/project.git', ++ '', '')), ++ ] ++ def _encode(t): ++ return (t[0].encode('ascii'), ++ tuple(x.encode('ascii') for x in t[1]), ++ tuple(x.encode('ascii') for x in t[2])) ++ bytes_cases = [_encode(x) for x in str_cases] ++ for url, parsed, split in str_cases + bytes_cases: ++ self.checkRoundtrips(url, parsed, split) ++ ++ def test_http_roundtrips(self): ++ # urllib.parse.urlsplit treats 'http:' as an optimized special case, ++ # so we test both 'http:' and 'https:' in all the following. ++ # Three cheers for white box knowledge! ++ str_cases = [ ++ ('://www.python.org', ++ ('www.python.org', '', '', '', ''), ++ ('www.python.org', '', '', '')), ++ ('://www.python.org#abc', ++ ('www.python.org', '', '', '', 'abc'), ++ ('www.python.org', '', '', 'abc')), ++ ('://www.python.org?q=abc', ++ ('www.python.org', '', '', 'q=abc', ''), ++ ('www.python.org', '', 'q=abc', '')), ++ ('://www.python.org/#abc', ++ ('www.python.org', '/', '', '', 'abc'), ++ ('www.python.org', '/', '', 'abc')), ++ ('://a/b/c/d;p?q#f', ++ ('a', '/b/c/d', 'p', 'q', 'f'), ++ ('a', '/b/c/d;p', 'q', 'f')), ++ ] ++ def _encode(t): ++ return (t[0].encode('ascii'), ++ tuple(x.encode('ascii') for x in t[1]), ++ tuple(x.encode('ascii') for x in t[2])) ++ bytes_cases = [_encode(x) for x in str_cases] ++ str_schemes = ('http', 'https') ++ bytes_schemes = (b'http', b'https') ++ str_tests = str_schemes, str_cases ++ bytes_tests = bytes_schemes, bytes_cases ++ for schemes, test_cases in (str_tests, bytes_tests): ++ for scheme in schemes: ++ for url, parsed, split in test_cases: ++ url = scheme + url ++ parsed = (scheme,) + parsed ++ split = (scheme,) + split ++ self.checkRoundtrips(url, parsed, split) ++ ++ def checkJoin(self, base, relurl, expected): ++ str_components = (base, relurl, expected) ++ self.assertEqual(urllib.parse.urljoin(base, relurl), expected) ++ bytes_components = baseb, relurlb, expectedb = [ ++ x.encode('ascii') for x in str_components] ++ self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb) ++ ++ def test_unparse_parse(self): ++ str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',] ++ bytes_cases = [x.encode('ascii') for x in str_cases] ++ for u in str_cases + bytes_cases: ++ self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u) ++ self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u) ++ ++ def test_RFC1808(self): ++ # "normal" cases from RFC 1808: ++ self.checkJoin(RFC1808_BASE, 'g:h', 'g:h') ++ self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g') ++ self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g') ++ self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/') ++ self.checkJoin(RFC1808_BASE, '/g', 'http://a/g') ++ self.checkJoin(RFC1808_BASE, '//g', 'http://g') ++ self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y') ++ self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') ++ self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s') ++ self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s') ++ self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') ++ self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s') ++ self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x') ++ self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') ++ self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/') ++ self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/') ++ self.checkJoin(RFC1808_BASE, '..', 'http://a/b/') ++ self.checkJoin(RFC1808_BASE, '../', 'http://a/b/') ++ self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g') ++ self.checkJoin(RFC1808_BASE, '../..', 'http://a/') ++ self.checkJoin(RFC1808_BASE, '../../', 'http://a/') ++ self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g') ++ ++ # "abnormal" cases from RFC 1808: ++ self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f') ++ self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.') ++ self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g') ++ self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..') ++ self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g') ++ self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g') ++ self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/') ++ self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h') ++ self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h') ++ ++ # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808), ++ # so we'll not actually run these tests (which expect 1808 behavior). ++ #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g') ++ #self.checkJoin(RFC1808_BASE, 'http:', 'http:') ++ ++ # XXX: The following tests are no longer compatible with RFC3986 ++ # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g') ++ # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g') ++ # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g') ++ # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g') ++ ++ ++ def test_RFC2368(self): ++ # Issue 11467: path that starts with a number is not parsed correctly ++ self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'), ++ ('mailto', '', '1337@example.org', '', '', '')) ++ ++ def test_RFC2396(self): ++ # cases from RFC 2396 ++ ++ self.checkJoin(RFC2396_BASE, 'g:h', 'g:h') ++ self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g') ++ self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g') ++ self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/') ++ self.checkJoin(RFC2396_BASE, '/g', 'http://a/g') ++ self.checkJoin(RFC2396_BASE, '//g', 'http://g') ++ self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y') ++ self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s') ++ self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s') ++ self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s') ++ self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x') ++ self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') ++ self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/') ++ self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/') ++ self.checkJoin(RFC2396_BASE, '..', 'http://a/b/') ++ self.checkJoin(RFC2396_BASE, '../', 'http://a/b/') ++ self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g') ++ self.checkJoin(RFC2396_BASE, '../..', 'http://a/') ++ self.checkJoin(RFC2396_BASE, '../../', 'http://a/') ++ self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g') ++ self.checkJoin(RFC2396_BASE, '', RFC2396_BASE) ++ self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.') ++ self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g') ++ self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..') ++ self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g') ++ self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g') ++ self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/') ++ self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h') ++ self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h') ++ self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y') ++ self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y') ++ self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') ++ self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x') ++ self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') ++ self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x') ++ ++ # XXX: The following tests are no longer compatible with RFC3986 ++ # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g') ++ # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g') ++ # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g') ++ # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g') ++ ++ def test_RFC3986(self): ++ self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') ++ self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x') ++ self.checkJoin(RFC3986_BASE, 'g:h','g:h') ++ self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g') ++ self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g') ++ self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/') ++ self.checkJoin(RFC3986_BASE, '/g','http://a/g') ++ self.checkJoin(RFC3986_BASE, '//g','http://g') ++ self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') ++ self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y') ++ self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s') ++ self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s') ++ self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s') ++ self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x') ++ self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x') ++ self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s') ++ self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q') ++ self.checkJoin(RFC3986_BASE, '.','http://a/b/c/') ++ self.checkJoin(RFC3986_BASE, './','http://a/b/c/') ++ self.checkJoin(RFC3986_BASE, '..','http://a/b/') ++ self.checkJoin(RFC3986_BASE, '../','http://a/b/') ++ self.checkJoin(RFC3986_BASE, '../g','http://a/b/g') ++ self.checkJoin(RFC3986_BASE, '../..','http://a/') ++ self.checkJoin(RFC3986_BASE, '../../','http://a/') ++ self.checkJoin(RFC3986_BASE, '../../g','http://a/g') ++ self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g') ++ ++ # Abnormal Examples ++ ++ # The 'abnormal scenarios' are incompatible with RFC2986 parsing ++ # Tests are here for reference. ++ ++ self.checkJoin(RFC3986_BASE, '../../../g','http://a/g') ++ self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g') ++ self.checkJoin(RFC3986_BASE, '/./g','http://a/g') ++ self.checkJoin(RFC3986_BASE, '/../g','http://a/g') ++ self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.') ++ self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g') ++ self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..') ++ self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g') ++ self.checkJoin(RFC3986_BASE, './../g','http://a/b/g') ++ self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/') ++ self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h') ++ self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h') ++ self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y') ++ self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y') ++ self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x') ++ self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x') ++ self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x') ++ self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x') ++ #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser ++ self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser ++ ++ # Test for issue9721 ++ self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x') ++ ++ def test_urljoins(self): ++ self.checkJoin(SIMPLE_BASE, 'g:h','g:h') ++ self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') ++ self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d') ++ self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g') ++ self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g') ++ self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/') ++ self.checkJoin(SIMPLE_BASE, '/g','http://a/g') ++ self.checkJoin(SIMPLE_BASE, '//g','http://g') ++ self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y') ++ self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y') ++ self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x') ++ self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/') ++ self.checkJoin(SIMPLE_BASE, './','http://a/b/c/') ++ self.checkJoin(SIMPLE_BASE, '..','http://a/b/') ++ self.checkJoin(SIMPLE_BASE, '../','http://a/b/') ++ self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g') ++ self.checkJoin(SIMPLE_BASE, '../..','http://a/') ++ self.checkJoin(SIMPLE_BASE, '../../g','http://a/g') ++ self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g') ++ self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/') ++ self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h') ++ self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h') ++ self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') ++ self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d') ++ self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y') ++ self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y') ++ self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x') ++ self.checkJoin('http:///', '..','http:///') ++ self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x') ++ self.checkJoin('', 'http://a/./g', 'http://a/./g') ++ self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2') ++ self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2') ++ self.checkJoin('ws://a/b','g','ws://a/g') ++ self.checkJoin('wss://a/b','g','wss://a/g') ++ ++ # XXX: The following tests are no longer compatible with RFC3986 ++ # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g') ++ # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g') ++ ++ # test for issue22118 duplicate slashes ++ self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo') ++ ++ # Non-RFC-defined tests, covering variations of base and trailing ++ # slashes ++ self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/') ++ self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/') ++ self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/') ++ self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/') ++ self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g') ++ self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/') ++ ++ # issue 23703: don't duplicate filename ++ self.checkJoin('a', 'b', 'b') ++ ++ def test_RFC2732(self): ++ str_cases = [ ++ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), ++ ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), ++ ('http://[::1]:5432/foo/', '::1', 5432), ++ ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432), ++ ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432), ++ ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/', ++ 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432), ++ ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432), ++ ('http://[::ffff:12.34.56.78]:5432/foo/', ++ '::ffff:12.34.56.78', 5432), ++ ('http://Test.python.org/foo/', 'test.python.org', None), ++ ('http://12.34.56.78/foo/', '12.34.56.78', None), ++ ('http://[::1]/foo/', '::1', None), ++ ('http://[dead:beef::1]/foo/', 'dead:beef::1', None), ++ ('http://[dead:beef::]/foo/', 'dead:beef::', None), ++ ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/', ++ 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), ++ ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None), ++ ('http://[::ffff:12.34.56.78]/foo/', ++ '::ffff:12.34.56.78', None), ++ ('http://Test.python.org:/foo/', 'test.python.org', None), ++ ('http://12.34.56.78:/foo/', '12.34.56.78', None), ++ ('http://[::1]:/foo/', '::1', None), ++ ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None), ++ ('http://[dead:beef::]:/foo/', 'dead:beef::', None), ++ ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/', ++ 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), ++ ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None), ++ ('http://[::ffff:12.34.56.78]:/foo/', ++ '::ffff:12.34.56.78', None), ++ ] ++ def _encode(t): ++ return t[0].encode('ascii'), t[1].encode('ascii'), t[2] ++ bytes_cases = [_encode(x) for x in str_cases] ++ for url, hostname, port in str_cases + bytes_cases: ++ urlparsed = urllib.parse.urlparse(url) ++ self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port)) ++ ++ str_cases = [ ++ 'http://::12.34.56.78]/', ++ 'http://[::1/foo/', ++ 'ftp://[::1/foo/bad]/bad', ++ 'http://[::1/foo/bad]/bad', ++ 'http://[::ffff:12.34.56.78'] ++ bytes_cases = [x.encode('ascii') for x in str_cases] ++ for invalid_url in str_cases + bytes_cases: ++ self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url) ++ ++ def test_urldefrag(self): ++ str_cases = [ ++ ('http://python.org#frag', 'http://python.org', 'frag'), ++ ('http://python.org', 'http://python.org', ''), ++ ('http://python.org/#frag', 'http://python.org/', 'frag'), ++ ('http://python.org/', 'http://python.org/', ''), ++ ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'), ++ ('http://python.org/?q', 'http://python.org/?q', ''), ++ ('http://python.org/p#frag', 'http://python.org/p', 'frag'), ++ ('http://python.org/p?q', 'http://python.org/p?q', ''), ++ (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'), ++ (RFC2396_BASE, 'http://a/b/c/d;p?q', ''), ++ ] ++ def _encode(t): ++ return type(t)(x.encode('ascii') for x in t) ++ bytes_cases = [_encode(x) for x in str_cases] ++ for url, defrag, frag in str_cases + bytes_cases: ++ result = urllib.parse.urldefrag(url) ++ self.assertEqual(result.geturl(), url) ++ self.assertEqual(result, (defrag, frag)) ++ self.assertEqual(result.url, defrag) ++ self.assertEqual(result.fragment, frag) ++ ++ def test_urlsplit_scoped_IPv6(self): ++ p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234') ++ self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt") ++ self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234') ++ ++ p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234') ++ self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt") ++ self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234') ++ ++ def test_urlsplit_attributes(self): ++ url = "HTTP://WWW.PYTHON.ORG/doc/#frag" ++ p = urllib.parse.urlsplit(url) ++ self.assertEqual(p.scheme, "http") ++ self.assertEqual(p.netloc, "WWW.PYTHON.ORG") ++ self.assertEqual(p.path, "/doc/") ++ self.assertEqual(p.query, "") ++ self.assertEqual(p.fragment, "frag") ++ self.assertEqual(p.username, None) ++ self.assertEqual(p.password, None) ++ self.assertEqual(p.hostname, "www.python.org") ++ self.assertEqual(p.port, None) ++ # geturl() won't return exactly the original URL in this case ++ # since the scheme is always case-normalized ++ # We handle this by ignoring the first 4 characters of the URL ++ self.assertEqual(p.geturl()[4:], url[4:]) ++ ++ url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag" ++ p = urllib.parse.urlsplit(url) ++ self.assertEqual(p.scheme, "http") ++ self.assertEqual(p.netloc, "User:Pass@www.python.org:080") ++ self.assertEqual(p.path, "/doc/") ++ self.assertEqual(p.query, "query=yes") ++ self.assertEqual(p.fragment, "frag") ++ self.assertEqual(p.username, "User") ++ self.assertEqual(p.password, "Pass") ++ self.assertEqual(p.hostname, "www.python.org") ++ self.assertEqual(p.port, 80) ++ self.assertEqual(p.geturl(), url) ++ ++ # Addressing issue1698, which suggests Username can contain ++ # "@" characters. Though not RFC compliant, many ftp sites allow ++ # and request email addresses as usernames. ++ ++ url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" ++ p = urllib.parse.urlsplit(url) ++ self.assertEqual(p.scheme, "http") ++ self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080") ++ self.assertEqual(p.path, "/doc/") ++ self.assertEqual(p.query, "query=yes") ++ self.assertEqual(p.fragment, "frag") ++ self.assertEqual(p.username, "User@example.com") ++ self.assertEqual(p.password, "Pass") ++ self.assertEqual(p.hostname, "www.python.org") ++ self.assertEqual(p.port, 80) ++ self.assertEqual(p.geturl(), url) ++ ++ # And check them all again, only with bytes this time ++ url = b"HTTP://WWW.PYTHON.ORG/doc/#frag" ++ p = urllib.parse.urlsplit(url) ++ self.assertEqual(p.scheme, b"http") ++ self.assertEqual(p.netloc, b"WWW.PYTHON.ORG") ++ self.assertEqual(p.path, b"/doc/") ++ self.assertEqual(p.query, b"") ++ self.assertEqual(p.fragment, b"frag") ++ self.assertEqual(p.username, None) ++ self.assertEqual(p.password, None) ++ self.assertEqual(p.hostname, b"www.python.org") ++ self.assertEqual(p.port, None) ++ self.assertEqual(p.geturl()[4:], url[4:]) ++ ++ url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag" ++ p = urllib.parse.urlsplit(url) ++ self.assertEqual(p.scheme, b"http") ++ self.assertEqual(p.netloc, b"User:Pass@www.python.org:080") ++ self.assertEqual(p.path, b"/doc/") ++ self.assertEqual(p.query, b"query=yes") ++ self.assertEqual(p.fragment, b"frag") ++ self.assertEqual(p.username, b"User") ++ self.assertEqual(p.password, b"Pass") ++ self.assertEqual(p.hostname, b"www.python.org") ++ self.assertEqual(p.port, 80) ++ self.assertEqual(p.geturl(), url) ++ ++ url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" ++ p = urllib.parse.urlsplit(url) ++ self.assertEqual(p.scheme, b"http") ++ self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080") ++ self.assertEqual(p.path, b"/doc/") ++ self.assertEqual(p.query, b"query=yes") ++ self.assertEqual(p.fragment, b"frag") ++ self.assertEqual(p.username, b"User@example.com") ++ self.assertEqual(p.password, b"Pass") ++ self.assertEqual(p.hostname, b"www.python.org") ++ self.assertEqual(p.port, 80) ++ self.assertEqual(p.geturl(), url) ++ ++ # Verify an illegal port raises ValueError ++ url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag" ++ p = urllib.parse.urlsplit(url) ++ with self.assertRaisesRegex(ValueError, "out of range"): ++ p.port ++ ++ def test_urlsplit_remove_unsafe_bytes(self): ++ # Remove ASCII tabs and newlines from input ++ url = "http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" ++ p = urllib.parse.urlsplit(url) ++ self.assertEqual(p.scheme, "http") ++ self.assertEqual(p.netloc, "www.python.org") ++ self.assertEqual(p.path, "/javascript:alert('msg')/") ++ self.assertEqual(p.query, "query=something") ++ self.assertEqual(p.fragment, "fragment") ++ self.assertEqual(p.username, None) ++ self.assertEqual(p.password, None) ++ self.assertEqual(p.hostname, "www.python.org") ++ self.assertEqual(p.port, None) ++ self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment") ++ ++ # Remove ASCII tabs and newlines from input as bytes. ++ url = b"http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" ++ p = urllib.parse.urlsplit(url) ++ self.assertEqual(p.scheme, b"http") ++ self.assertEqual(p.netloc, b"www.python.org") ++ self.assertEqual(p.path, b"/javascript:alert('msg')/") ++ self.assertEqual(p.query, b"query=something") ++ self.assertEqual(p.fragment, b"fragment") ++ self.assertEqual(p.username, None) ++ self.assertEqual(p.password, None) ++ self.assertEqual(p.hostname, b"www.python.org") ++ self.assertEqual(p.port, None) ++ self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment") ++ ++ # with scheme as cache-key ++ url = "http://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" ++ scheme = "ht\ntp" ++ for _ in range(2): ++ p = urllib.parse.urlsplit(url, scheme=scheme) ++ self.assertEqual(p.scheme, "http") ++ self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment") ++ ++ def test_urlsplit_strip_url(self): ++ noise = bytes(range(0, 0x20 + 1)) ++ base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag" ++ ++ url = noise.decode("utf-8") + base_url ++ p = urllib.parse.urlsplit(url) ++ self.assertEqual(p.scheme, "http") ++ self.assertEqual(p.netloc, "User:Pass@www.python.org:080") ++ self.assertEqual(p.path, "/doc/") ++ self.assertEqual(p.query, "query=yes") ++ self.assertEqual(p.fragment, "frag") ++ self.assertEqual(p.username, "User") ++ self.assertEqual(p.password, "Pass") ++ self.assertEqual(p.hostname, "www.python.org") ++ self.assertEqual(p.port, 80) ++ self.assertEqual(p.geturl(), base_url) ++ ++ url = noise + base_url.encode("utf-8") ++ p = urllib.parse.urlsplit(url) ++ self.assertEqual(p.scheme, b"http") ++ self.assertEqual(p.netloc, b"User:Pass@www.python.org:080") ++ self.assertEqual(p.path, b"/doc/") ++ self.assertEqual(p.query, b"query=yes") ++ self.assertEqual(p.fragment, b"frag") ++ self.assertEqual(p.username, b"User") ++ self.assertEqual(p.password, b"Pass") ++ self.assertEqual(p.hostname, b"www.python.org") ++ self.assertEqual(p.port, 80) ++ self.assertEqual(p.geturl(), base_url.encode("utf-8")) ++ ++ # Test that trailing space is preserved as some applications rely on ++ # this within query strings. ++ query_spaces_url = "https://www.python.org:88/doc/?query= " ++ p = urllib.parse.urlsplit(noise.decode("utf-8") + query_spaces_url) ++ self.assertEqual(p.scheme, "https") ++ self.assertEqual(p.netloc, "www.python.org:88") ++ self.assertEqual(p.path, "/doc/") ++ self.assertEqual(p.query, "query= ") ++ self.assertEqual(p.port, 88) ++ self.assertEqual(p.geturl(), query_spaces_url) ++ ++ p = urllib.parse.urlsplit("www.pypi.org ") ++ # That "hostname" gets considered a "path" due to the ++ # trailing space and our existing logic... YUCK... ++ # and re-assembles via geturl aka unurlsplit into the original. ++ # django.core.validators.URLValidator (at least through v3.2) relies on ++ # this, for better or worse, to catch it in a ValidationError via its ++ # regular expressions. ++ # Here we test the basic round trip concept of such a trailing space. ++ self.assertEqual(urllib.parse.urlunsplit(p), "www.pypi.org ") ++ ++ # with scheme as cache-key ++ url = "//www.python.org/" ++ scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8") ++ for _ in range(2): ++ p = urllib.parse.urlsplit(url, scheme=scheme) ++ self.assertEqual(p.scheme, "https") ++ self.assertEqual(p.geturl(), "https://www.python.org/") ++ ++ def test_attributes_bad_port(self): ++ """Check handling of invalid ports.""" ++ for bytes in (False, True): ++ for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): ++ for port in ("foo", "1.5", "-1", "0x10"): ++ with self.subTest(bytes=bytes, parse=parse, port=port): ++ netloc = "www.example.net:" + port ++ url = "http://" + netloc + "/" ++ if bytes: ++ netloc = netloc.encode("ascii") ++ url = url.encode("ascii") ++ p = parse(url) ++ self.assertEqual(p.netloc, netloc) ++ with self.assertRaises(ValueError): ++ p.port ++ ++ def test_attributes_without_netloc(self): ++ # This example is straight from RFC 3261. It looks like it ++ # should allow the username, hostname, and port to be filled ++ # in, but doesn't. Since it's a URI and doesn't use the ++ # scheme://netloc syntax, the netloc and related attributes ++ # should be left empty. ++ uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" ++ p = urllib.parse.urlsplit(uri) ++ self.assertEqual(p.netloc, "") ++ self.assertEqual(p.username, None) ++ self.assertEqual(p.password, None) ++ self.assertEqual(p.hostname, None) ++ self.assertEqual(p.port, None) ++ self.assertEqual(p.geturl(), uri) ++ ++ p = urllib.parse.urlparse(uri) ++ self.assertEqual(p.netloc, "") ++ self.assertEqual(p.username, None) ++ self.assertEqual(p.password, None) ++ self.assertEqual(p.hostname, None) ++ self.assertEqual(p.port, None) ++ self.assertEqual(p.geturl(), uri) ++ ++ # You guessed it, repeating the test with bytes input ++ uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" ++ p = urllib.parse.urlsplit(uri) ++ self.assertEqual(p.netloc, b"") ++ self.assertEqual(p.username, None) ++ self.assertEqual(p.password, None) ++ self.assertEqual(p.hostname, None) ++ self.assertEqual(p.port, None) ++ self.assertEqual(p.geturl(), uri) ++ ++ p = urllib.parse.urlparse(uri) ++ self.assertEqual(p.netloc, b"") ++ self.assertEqual(p.username, None) ++ self.assertEqual(p.password, None) ++ self.assertEqual(p.hostname, None) ++ self.assertEqual(p.port, None) ++ self.assertEqual(p.geturl(), uri) ++ ++ def test_noslash(self): ++ # Issue 1637: http://foo.com?query is legal ++ self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"), ++ ('http', 'example.com', '', '', 'blahblah=/foo', '')) ++ self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"), ++ (b'http', b'example.com', b'', b'', b'blahblah=/foo', b'')) ++ ++ def test_withoutscheme(self): ++ # Test urlparse without scheme ++ # Issue 754016: urlparse goes wrong with IP:port without scheme ++ # RFC 1808 specifies that netloc should start with //, urlparse expects ++ # the same, otherwise it classifies the portion of url as path. ++ self.assertEqual(urllib.parse.urlparse("path"), ++ ('','','path','','','')) ++ self.assertEqual(urllib.parse.urlparse("//www.python.org:80"), ++ ('','www.python.org:80','','','','')) ++ self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), ++ ('http','www.python.org:80','','','','')) ++ # Repeat for bytes input ++ self.assertEqual(urllib.parse.urlparse(b"path"), ++ (b'',b'',b'path',b'',b'',b'')) ++ self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"), ++ (b'',b'www.python.org:80',b'',b'',b'',b'')) ++ self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), ++ (b'http',b'www.python.org:80',b'',b'',b'',b'')) ++ ++ def test_portseparator(self): ++ # Issue 754016 makes changes for port separator ':' from scheme separator ++ self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','','')) ++ self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','','')) ++ self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','','')) ++ self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','','')) ++ self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','','')) ++ self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), ++ ('http','www.python.org:80','','','','')) ++ # As usual, need to check bytes input as well ++ self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b'')) ++ self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b'')) ++ self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b'')) ++ self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b'')) ++ self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b'')) ++ self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), ++ (b'http',b'www.python.org:80',b'',b'',b'',b'')) ++ ++ def test_usingsys(self): ++ # Issue 3314: sys module is used in the error ++ self.assertRaises(TypeError, urllib.parse.urlencode, "foo") ++ ++ def test_anyscheme(self): ++ # Issue 7904: s3://foo.com/stuff has netloc "foo.com". ++ self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"), ++ ('s3', 'foo.com', '/stuff', '', '', '')) ++ self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"), ++ ('x-newscheme', 'foo.com', '/stuff', '', '', '')) ++ self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"), ++ ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment')) ++ self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"), ++ ('x-newscheme', 'foo.com', '/stuff', '', 'query', '')) ++ ++ # And for bytes... ++ self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"), ++ (b's3', b'foo.com', b'/stuff', b'', b'', b'')) ++ self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"), ++ (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b'')) ++ self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"), ++ (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment')) ++ self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"), ++ (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'')) ++ ++ def test_default_scheme(self): ++ # Exercise the scheme parameter of urlparse() and urlsplit() ++ for func in (urllib.parse.urlparse, urllib.parse.urlsplit): ++ with self.subTest(function=func): ++ result = func("http://example.net/", "ftp") ++ self.assertEqual(result.scheme, "http") ++ result = func(b"http://example.net/", b"ftp") ++ self.assertEqual(result.scheme, b"http") ++ self.assertEqual(func("path", "ftp").scheme, "ftp") ++ self.assertEqual(func("path", scheme="ftp").scheme, "ftp") ++ self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp") ++ self.assertEqual(func("path").scheme, "") ++ self.assertEqual(func(b"path").scheme, b"") ++ self.assertEqual(func(b"path", "").scheme, b"") ++ ++ def test_parse_fragments(self): ++ # Exercise the allow_fragments parameter of urlparse() and urlsplit() ++ tests = ( ++ ("http:#frag", "path", "frag"), ++ ("//example.net#frag", "path", "frag"), ++ ("index.html#frag", "path", "frag"), ++ (";a=b#frag", "params", "frag"), ++ ("?a=b#frag", "query", "frag"), ++ ("#frag", "path", "frag"), ++ ("abc#@frag", "path", "@frag"), ++ ("//abc#@frag", "path", "@frag"), ++ ("//abc:80#@frag", "path", "@frag"), ++ ("//abc#@frag:80", "path", "@frag:80"), ++ ) ++ for url, attr, expected_frag in tests: ++ for func in (urllib.parse.urlparse, urllib.parse.urlsplit): ++ if attr == "params" and func is urllib.parse.urlsplit: ++ attr = "path" ++ with self.subTest(url=url, function=func): ++ result = func(url, allow_fragments=False) ++ self.assertEqual(result.fragment, "") ++ self.assertTrue( ++ getattr(result, attr).endswith("#" + expected_frag)) ++ self.assertEqual(func(url, "", False).fragment, "") ++ ++ result = func(url, allow_fragments=True) ++ self.assertEqual(result.fragment, expected_frag) ++ self.assertFalse( ++ getattr(result, attr).endswith(expected_frag)) ++ self.assertEqual(func(url, "", True).fragment, ++ expected_frag) ++ self.assertEqual(func(url).fragment, expected_frag) ++ ++ def test_mixed_types_rejected(self): ++ # Several functions that process either strings or ASCII encoded bytes ++ # accept multiple arguments. Check they reject mixed type input ++ with self.assertRaisesRegex(TypeError, "Cannot mix str"): ++ urllib.parse.urlparse("www.python.org", b"http") ++ with self.assertRaisesRegex(TypeError, "Cannot mix str"): ++ urllib.parse.urlparse(b"www.python.org", "http") ++ with self.assertRaisesRegex(TypeError, "Cannot mix str"): ++ urllib.parse.urlsplit("www.python.org", b"http") ++ with self.assertRaisesRegex(TypeError, "Cannot mix str"): ++ urllib.parse.urlsplit(b"www.python.org", "http") ++ with self.assertRaisesRegex(TypeError, "Cannot mix str"): ++ urllib.parse.urlunparse(( b"http", "www.python.org","","","","")) ++ with self.assertRaisesRegex(TypeError, "Cannot mix str"): ++ urllib.parse.urlunparse(("http", b"www.python.org","","","","")) ++ with self.assertRaisesRegex(TypeError, "Cannot mix str"): ++ urllib.parse.urlunsplit((b"http", "www.python.org","","","")) ++ with self.assertRaisesRegex(TypeError, "Cannot mix str"): ++ urllib.parse.urlunsplit(("http", b"www.python.org","","","")) ++ with self.assertRaisesRegex(TypeError, "Cannot mix str"): ++ urllib.parse.urljoin("http://python.org", b"http://python.org") ++ with self.assertRaisesRegex(TypeError, "Cannot mix str"): ++ urllib.parse.urljoin(b"http://python.org", "http://python.org") ++ ++ def _check_result_type(self, str_type): ++ num_args = len(str_type._fields) ++ bytes_type = str_type._encoded_counterpart ++ self.assertIs(bytes_type._decoded_counterpart, str_type) ++ str_args = ('',) * num_args ++ bytes_args = (b'',) * num_args ++ str_result = str_type(*str_args) ++ bytes_result = bytes_type(*bytes_args) ++ encoding = 'ascii' ++ errors = 'strict' ++ self.assertEqual(str_result, str_args) ++ self.assertEqual(bytes_result.decode(), str_args) ++ self.assertEqual(bytes_result.decode(), str_result) ++ self.assertEqual(bytes_result.decode(encoding), str_args) ++ self.assertEqual(bytes_result.decode(encoding), str_result) ++ self.assertEqual(bytes_result.decode(encoding, errors), str_args) ++ self.assertEqual(bytes_result.decode(encoding, errors), str_result) ++ self.assertEqual(bytes_result, bytes_args) ++ self.assertEqual(str_result.encode(), bytes_args) ++ self.assertEqual(str_result.encode(), bytes_result) ++ self.assertEqual(str_result.encode(encoding), bytes_args) ++ self.assertEqual(str_result.encode(encoding), bytes_result) ++ self.assertEqual(str_result.encode(encoding, errors), bytes_args) ++ self.assertEqual(str_result.encode(encoding, errors), bytes_result) ++ ++ def test_result_pairs(self): ++ # Check encoding and decoding between result pairs ++ result_types = [ ++ urllib.parse.DefragResult, ++ urllib.parse.SplitResult, ++ urllib.parse.ParseResult, ++ ] ++ for result_type in result_types: ++ self._check_result_type(result_type) ++ ++ def test_parse_qs_encoding(self): ++ result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1") ++ self.assertEqual(result, {'key': ['\u0141\xE9']}) ++ result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8") ++ self.assertEqual(result, {'key': ['\u0141\xE9']}) ++ result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii") ++ self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']}) ++ result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii") ++ self.assertEqual(result, {'key': ['\u0141\ufffd-']}) ++ result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii", ++ errors="ignore") ++ self.assertEqual(result, {'key': ['\u0141-']}) ++ ++ def test_parse_qsl_encoding(self): ++ result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1") ++ self.assertEqual(result, [('key', '\u0141\xE9')]) ++ result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8") ++ self.assertEqual(result, [('key', '\u0141\xE9')]) ++ result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii") ++ self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')]) ++ result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii") ++ self.assertEqual(result, [('key', '\u0141\ufffd-')]) ++ result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii", ++ errors="ignore") ++ self.assertEqual(result, [('key', '\u0141-')]) ++ ++ def test_parse_qsl_max_num_fields(self): ++ with self.assertRaises(ValueError): ++ urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10) ++ urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10) ++ ++ def test_parse_qs_separator(self): ++ parse_qs_semicolon_cases = [ ++ (";", {}), ++ (";;", {}), ++ (";a=b", {'a': ['b']}), ++ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), ++ ("a=1;a=2", {'a': ['1', '2']}), ++ (b";", {}), ++ (b";;", {}), ++ (b";a=b", {b'a': [b'b']}), ++ (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), ++ (b"a=1;a=2", {b'a': [b'1', b'2']}), ++ ] ++ for orig, expect in parse_qs_semicolon_cases: ++ with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"): ++ result = urllib.parse.parse_qs(orig, separator=';') ++ self.assertEqual(result, expect, "Error parsing %r" % orig) ++ result_bytes = urllib.parse.parse_qs(orig, separator=b';') ++ self.assertEqual(result_bytes, expect, "Error parsing %r" % orig) ++ ++ ++ def test_parse_qsl_separator(self): ++ parse_qsl_semicolon_cases = [ ++ (";", []), ++ (";;", []), ++ (";a=b", [('a', 'b')]), ++ ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]), ++ ("a=1;a=2", [('a', '1'), ('a', '2')]), ++ (b";", []), ++ (b";;", []), ++ (b";a=b", [(b'a', b'b')]), ++ (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), ++ (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), ++ ] ++ for orig, expect in parse_qsl_semicolon_cases: ++ with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"): ++ result = urllib.parse.parse_qsl(orig, separator=';') ++ self.assertEqual(result, expect, "Error parsing %r" % orig) ++ result_bytes = urllib.parse.parse_qsl(orig, separator=b';') ++ self.assertEqual(result_bytes, expect, "Error parsing %r" % orig) ++ ++ ++ def test_urlencode_sequences(self): ++ # Other tests incidentally urlencode things; test non-covered cases: ++ # Sequence and object values. ++ result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True) ++ # we cannot rely on ordering here ++ assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'} ++ ++ class Trivial: ++ def __str__(self): ++ return 'trivial' ++ ++ result = urllib.parse.urlencode({'a': Trivial()}, True) ++ self.assertEqual(result, 'a=trivial') ++ ++ def test_urlencode_quote_via(self): ++ result = urllib.parse.urlencode({'a': 'some value'}) ++ self.assertEqual(result, "a=some+value") ++ result = urllib.parse.urlencode({'a': 'some value/another'}, ++ quote_via=urllib.parse.quote) ++ self.assertEqual(result, "a=some%20value%2Fanother") ++ result = urllib.parse.urlencode({'a': 'some value/another'}, ++ safe='/', quote_via=urllib.parse.quote) ++ self.assertEqual(result, "a=some%20value/another") ++ ++ def test_quote_from_bytes(self): ++ self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo') ++ result = urllib.parse.quote_from_bytes(b'archaeological arcana') ++ self.assertEqual(result, 'archaeological%20arcana') ++ result = urllib.parse.quote_from_bytes(b'') ++ self.assertEqual(result, '') ++ ++ def test_unquote_to_bytes(self): ++ result = urllib.parse.unquote_to_bytes('abc%20def') ++ self.assertEqual(result, b'abc def') ++ result = urllib.parse.unquote_to_bytes('') ++ self.assertEqual(result, b'') ++ ++ def test_quote_errors(self): ++ self.assertRaises(TypeError, urllib.parse.quote, b'foo', ++ encoding='utf-8') ++ self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict') ++ ++ def test_issue14072(self): ++ p1 = urllib.parse.urlsplit('tel:+31-641044153') ++ self.assertEqual(p1.scheme, 'tel') ++ self.assertEqual(p1.path, '+31-641044153') ++ p2 = urllib.parse.urlsplit('tel:+31641044153') ++ self.assertEqual(p2.scheme, 'tel') ++ self.assertEqual(p2.path, '+31641044153') ++ # assert the behavior for urlparse ++ p1 = urllib.parse.urlparse('tel:+31-641044153') ++ self.assertEqual(p1.scheme, 'tel') ++ self.assertEqual(p1.path, '+31-641044153') ++ p2 = urllib.parse.urlparse('tel:+31641044153') ++ self.assertEqual(p2.scheme, 'tel') ++ self.assertEqual(p2.path, '+31641044153') ++ ++ def test_port_casting_failure_message(self): ++ message = "Port could not be cast to integer value as 'oracle'" ++ p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle') ++ with self.assertRaisesRegex(ValueError, message): ++ p1.port ++ ++ p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle') ++ with self.assertRaisesRegex(ValueError, message): ++ p2.port ++ ++ def test_telurl_params(self): ++ p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516') ++ self.assertEqual(p1.scheme, 'tel') ++ self.assertEqual(p1.path, '123-4') ++ self.assertEqual(p1.params, 'phone-context=+1-650-516') ++ ++ p1 = urllib.parse.urlparse('tel:+1-201-555-0123') ++ self.assertEqual(p1.scheme, 'tel') ++ self.assertEqual(p1.path, '+1-201-555-0123') ++ self.assertEqual(p1.params, '') ++ ++ p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com') ++ self.assertEqual(p1.scheme, 'tel') ++ self.assertEqual(p1.path, '7042') ++ self.assertEqual(p1.params, 'phone-context=example.com') ++ ++ p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555') ++ self.assertEqual(p1.scheme, 'tel') ++ self.assertEqual(p1.path, '863-1234') ++ self.assertEqual(p1.params, 'phone-context=+1-914-555') ++ ++ def test_Quoter_repr(self): ++ quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE) ++ self.assertIn('Quoter', repr(quoter)) ++ ++ def test_all(self): ++ expected = [] ++ undocumented = { ++ 'splitattr', 'splithost', 'splitnport', 'splitpasswd', ++ 'splitport', 'splitquery', 'splittag', 'splittype', 'splituser', ++ 'splitvalue', ++ 'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap', ++ } ++ for name in dir(urllib.parse): ++ if name.startswith('_') or name in undocumented: ++ continue ++ object = getattr(urllib.parse, name) ++ if getattr(object, '__module__', None) == 'urllib.parse': ++ expected.append(name) ++ self.assertCountEqual(urllib.parse.__all__, expected) ++ ++ def test_urlsplit_normalization(self): ++ # Certain characters should never occur in the netloc, ++ # including under normalization. ++ # Ensure that ALL of them are detected and cause an error ++ illegal_chars = '/:#?@' ++ hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars} ++ denorm_chars = [ ++ c for c in map(chr, range(128, sys.maxunicode)) ++ if (hex_chars & set(unicodedata.decomposition(c).split())) ++ and c not in illegal_chars ++ ] ++ # Sanity check that we found at least one such character ++ self.assertIn('\u2100', denorm_chars) ++ self.assertIn('\uFF03', denorm_chars) ++ ++ # bpo-36742: Verify port separators are ignored when they ++ # existed prior to decomposition ++ urllib.parse.urlsplit('http://\u30d5\u309a:80') ++ with self.assertRaises(ValueError): ++ urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380') ++ ++ for scheme in ["http", "https", "ftp"]: ++ for netloc in ["netloc{}false.netloc", "n{}user@netloc"]: ++ for c in denorm_chars: ++ url = "{}://{}/path".format(scheme, netloc.format(c)) ++ with self.subTest(url=url, char='{:04X}'.format(ord(c))): ++ with self.assertRaises(ValueError): ++ urllib.parse.urlsplit(url) ++ ++class Utility_Tests(unittest.TestCase): ++ """Testcase to test the various utility functions in the urllib.""" ++ # In Python 2 this test class was in test_urllib. ++ ++ def test_splittype(self): ++ splittype = urllib.parse._splittype ++ self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring')) ++ self.assertEqual(splittype('opaquestring'), (None, 'opaquestring')) ++ self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring')) ++ self.assertEqual(splittype('type:'), ('type', '')) ++ self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string')) ++ ++ def test_splithost(self): ++ splithost = urllib.parse._splithost ++ self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'), ++ ('www.example.org:80', '/foo/bar/baz.html')) ++ self.assertEqual(splithost('//www.example.org:80'), ++ ('www.example.org:80', '')) ++ self.assertEqual(splithost('/foo/bar/baz.html'), ++ (None, '/foo/bar/baz.html')) ++ ++ # bpo-30500: # starts a fragment. ++ self.assertEqual(splithost('//127.0.0.1#@host.com'), ++ ('127.0.0.1', '/#@host.com')) ++ self.assertEqual(splithost('//127.0.0.1#@host.com:80'), ++ ('127.0.0.1', '/#@host.com:80')) ++ self.assertEqual(splithost('//127.0.0.1:80#@host.com'), ++ ('127.0.0.1:80', '/#@host.com')) ++ ++ # Empty host is returned as empty string. ++ self.assertEqual(splithost("///file"), ++ ('', '/file')) ++ ++ # Trailing semicolon, question mark and hash symbol are kept. ++ self.assertEqual(splithost("//example.net/file;"), ++ ('example.net', '/file;')) ++ self.assertEqual(splithost("//example.net/file?"), ++ ('example.net', '/file?')) ++ self.assertEqual(splithost("//example.net/file#"), ++ ('example.net', '/file#')) ++ ++ def test_splituser(self): ++ splituser = urllib.parse._splituser ++ self.assertEqual(splituser('User:Pass@www.python.org:080'), ++ ('User:Pass', 'www.python.org:080')) ++ self.assertEqual(splituser('@www.python.org:080'), ++ ('', 'www.python.org:080')) ++ self.assertEqual(splituser('www.python.org:080'), ++ (None, 'www.python.org:080')) ++ self.assertEqual(splituser('User:Pass@'), ++ ('User:Pass', '')) ++ self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'), ++ ('User@example.com:Pass', 'www.python.org:080')) ++ ++ def test_splitpasswd(self): ++ # Some of the password examples are not sensible, but it is added to ++ # confirming to RFC2617 and addressing issue4675. ++ splitpasswd = urllib.parse._splitpasswd ++ self.assertEqual(splitpasswd('user:ab'), ('user', 'ab')) ++ self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb')) ++ self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb')) ++ self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb')) ++ self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb')) ++ self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb')) ++ self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b')) ++ self.assertEqual(splitpasswd('user:a b'), ('user', 'a b')) ++ self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab')) ++ self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b')) ++ self.assertEqual(splitpasswd('user:'), ('user', '')) ++ self.assertEqual(splitpasswd('user'), ('user', None)) ++ self.assertEqual(splitpasswd(':ab'), ('', 'ab')) ++ ++ def test_splitport(self): ++ splitport = urllib.parse._splitport ++ self.assertEqual(splitport('parrot:88'), ('parrot', '88')) ++ self.assertEqual(splitport('parrot'), ('parrot', None)) ++ self.assertEqual(splitport('parrot:'), ('parrot', None)) ++ self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None)) ++ self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None)) ++ self.assertEqual(splitport('[::1]:88'), ('[::1]', '88')) ++ self.assertEqual(splitport('[::1]'), ('[::1]', None)) ++ self.assertEqual(splitport(':88'), ('', '88')) ++ ++ def test_splitnport(self): ++ splitnport = urllib.parse._splitnport ++ self.assertEqual(splitnport('parrot:88'), ('parrot', 88)) ++ self.assertEqual(splitnport('parrot'), ('parrot', -1)) ++ self.assertEqual(splitnport('parrot', 55), ('parrot', 55)) ++ self.assertEqual(splitnport('parrot:'), ('parrot', -1)) ++ self.assertEqual(splitnport('parrot:', 55), ('parrot', 55)) ++ self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1)) ++ self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55)) ++ self.assertEqual(splitnport('parrot:cheese'), ('parrot', None)) ++ self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None)) ++ ++ def test_splitquery(self): ++ # Normal cases are exercised by other tests; ensure that we also ++ # catch cases with no port specified (testcase ensuring coverage) ++ splitquery = urllib.parse._splitquery ++ self.assertEqual(splitquery('http://python.org/fake?foo=bar'), ++ ('http://python.org/fake', 'foo=bar')) ++ self.assertEqual(splitquery('http://python.org/fake?foo=bar?'), ++ ('http://python.org/fake?foo=bar', '')) ++ self.assertEqual(splitquery('http://python.org/fake'), ++ ('http://python.org/fake', None)) ++ self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar')) ++ ++ def test_splittag(self): ++ splittag = urllib.parse._splittag ++ self.assertEqual(splittag('http://example.com?foo=bar#baz'), ++ ('http://example.com?foo=bar', 'baz')) ++ self.assertEqual(splittag('http://example.com?foo=bar#'), ++ ('http://example.com?foo=bar', '')) ++ self.assertEqual(splittag('#baz'), ('', 'baz')) ++ self.assertEqual(splittag('http://example.com?foo=bar'), ++ ('http://example.com?foo=bar', None)) ++ self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'), ++ ('http://example.com?foo=bar#baz', 'boo')) ++ ++ def test_splitattr(self): ++ splitattr = urllib.parse._splitattr ++ self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'), ++ ('/path', ['attr1=value1', 'attr2=value2'])) ++ self.assertEqual(splitattr('/path;'), ('/path', [''])) ++ self.assertEqual(splitattr(';attr1=value1;attr2=value2'), ++ ('', ['attr1=value1', 'attr2=value2'])) ++ self.assertEqual(splitattr('/path'), ('/path', [])) ++ ++ def test_splitvalue(self): ++ # Normal cases are exercised by other tests; test pathological cases ++ # with no key/value pairs. (testcase ensuring coverage) ++ splitvalue = urllib.parse._splitvalue ++ self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar')) ++ self.assertEqual(splitvalue('foo='), ('foo', '')) ++ self.assertEqual(splitvalue('=bar'), ('', 'bar')) ++ self.assertEqual(splitvalue('foobar'), ('foobar', None)) ++ self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz')) ++ ++ def test_to_bytes(self): ++ result = urllib.parse._to_bytes('http://www.python.org') ++ self.assertEqual(result, 'http://www.python.org') ++ self.assertRaises(UnicodeError, urllib.parse._to_bytes, ++ 'http://www.python.org/medi\u00e6val') ++ ++ def test_unwrap(self): ++ for wrapped_url in ('', '', ++ 'URL:scheme://host/path', 'scheme://host/path'): ++ url = urllib.parse.unwrap(wrapped_url) ++ self.assertEqual(url, 'scheme://host/path') ++ ++ ++class DeprecationTest(unittest.TestCase): ++ ++ def test_splittype_deprecation(self): ++ with self.assertWarns(DeprecationWarning) as cm: ++ urllib.parse.splittype('') ++ self.assertEqual(str(cm.warning), ++ 'urllib.parse.splittype() is deprecated as of 3.8, ' ++ 'use urllib.parse.urlparse() instead') ++ ++ def test_splithost_deprecation(self): ++ with self.assertWarns(DeprecationWarning) as cm: ++ urllib.parse.splithost('') ++ self.assertEqual(str(cm.warning), ++ 'urllib.parse.splithost() is deprecated as of 3.8, ' ++ 'use urllib.parse.urlparse() instead') ++ ++ def test_splituser_deprecation(self): ++ with self.assertWarns(DeprecationWarning) as cm: ++ urllib.parse.splituser('') ++ self.assertEqual(str(cm.warning), ++ 'urllib.parse.splituser() is deprecated as of 3.8, ' ++ 'use urllib.parse.urlparse() instead') ++ ++ def test_splitpasswd_deprecation(self): ++ with self.assertWarns(DeprecationWarning) as cm: ++ urllib.parse.splitpasswd('') ++ self.assertEqual(str(cm.warning), ++ 'urllib.parse.splitpasswd() is deprecated as of 3.8, ' ++ 'use urllib.parse.urlparse() instead') ++ ++ def test_splitport_deprecation(self): ++ with self.assertWarns(DeprecationWarning) as cm: ++ urllib.parse.splitport('') ++ self.assertEqual(str(cm.warning), ++ 'urllib.parse.splitport() is deprecated as of 3.8, ' ++ 'use urllib.parse.urlparse() instead') ++ ++ def test_splitnport_deprecation(self): ++ with self.assertWarns(DeprecationWarning) as cm: ++ urllib.parse.splitnport('') ++ self.assertEqual(str(cm.warning), ++ 'urllib.parse.splitnport() is deprecated as of 3.8, ' ++ 'use urllib.parse.urlparse() instead') ++ ++ def test_splitquery_deprecation(self): ++ with self.assertWarns(DeprecationWarning) as cm: ++ urllib.parse.splitquery('') ++ self.assertEqual(str(cm.warning), ++ 'urllib.parse.splitquery() is deprecated as of 3.8, ' ++ 'use urllib.parse.urlparse() instead') ++ ++ def test_splittag_deprecation(self): ++ with self.assertWarns(DeprecationWarning) as cm: ++ urllib.parse.splittag('') ++ self.assertEqual(str(cm.warning), ++ 'urllib.parse.splittag() is deprecated as of 3.8, ' ++ 'use urllib.parse.urlparse() instead') ++ ++ def test_splitattr_deprecation(self): ++ with self.assertWarns(DeprecationWarning) as cm: ++ urllib.parse.splitattr('') ++ self.assertEqual(str(cm.warning), ++ 'urllib.parse.splitattr() is deprecated as of 3.8, ' ++ 'use urllib.parse.urlparse() instead') ++ ++ def test_splitvalue_deprecation(self): ++ with self.assertWarns(DeprecationWarning) as cm: ++ urllib.parse.splitvalue('') ++ self.assertEqual(str(cm.warning), ++ 'urllib.parse.splitvalue() is deprecated as of 3.8, ' ++ 'use urllib.parse.parse_qsl() instead') ++ ++ def test_to_bytes_deprecation(self): ++ with self.assertWarns(DeprecationWarning) as cm: ++ urllib.parse.to_bytes('') ++ self.assertEqual(str(cm.warning), ++ 'urllib.parse.to_bytes() is deprecated as of 3.8') ++ ++ ++if __name__ == "__main__": ++ unittest.main() +diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py +index 5b7193f..2eb3448 100644 +--- a/Lib/urllib/parse.py ++++ b/Lib/urllib/parse.py +@@ -36,6 +36,7 @@ import sys + import types + import collections + import warnings ++import ipaddress + + __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", + "urlsplit", "urlunsplit", "urlencode", "parse_qs", +@@ -442,6 +443,17 @@ def _checknetloc(netloc): + raise ValueError("netloc '" + netloc + "' contains invalid " + + "characters under NFKC normalization") + ++# Valid bracketed hosts are defined in ++# https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/ ++def _check_bracketed_host(hostname): ++ if hostname.startswith('v'): ++ if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", hostname): ++ raise ValueError(f"IPvFuture address is invalid") ++ else: ++ ip = ipaddress.ip_address(hostname) # Throws Value Error if not IPv6 or IPv4 ++ if isinstance(ip, ipaddress.IPv4Address): ++ raise ValueError(f"An IPv4 address cannot be in brackets") ++ + def urlsplit(url, scheme='', allow_fragments=True): + """Parse a URL into 5 components: + :///?# +@@ -488,12 +500,14 @@ def urlsplit(url, scheme='', allow_fragments=True): + break + else: + scheme, url = url[:i].lower(), url[i+1:] +- + if url[:2] == '//': + netloc, url = _splitnetloc(url, 2) + if (('[' in netloc and ']' not in netloc) or + (']' in netloc and '[' not in netloc)): + raise ValueError("Invalid IPv6 URL") ++ if '[' in netloc and ']' in netloc: ++ bracketed_host = netloc.partition('[')[2].partition(']')[0] ++ _check_bracketed_host(bracketed_host) + if allow_fragments and '#' in url: + url, fragment = url.split('#', 1) + if '?' in url: +diff --git a/Lib/urllib/parse.py.orig b/Lib/urllib/parse.py.orig +new file mode 100644 +index 0000000..5b7193f +--- /dev/null ++++ b/Lib/urllib/parse.py.orig +@@ -0,0 +1,1209 @@ ++"""Parse (absolute and relative) URLs. ++ ++urlparse module is based upon the following RFC specifications. ++ ++RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding ++and L. Masinter, January 2005. ++ ++RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter ++and L.Masinter, December 1999. ++ ++RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T. ++Berners-Lee, R. Fielding, and L. Masinter, August 1998. ++ ++RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998. ++ ++RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June ++1995. ++ ++RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. ++McCahill, December 1994 ++ ++RFC 3986 is considered the current standard and any future changes to ++urlparse module should conform with it. The urlparse module is ++currently not entirely compliant with this RFC due to defacto ++scenarios for parsing, and for backward compatibility purposes, some ++parsing quirks from older RFCs are retained. The testcases in ++test_urlparse.py provides a good indicator of parsing behavior. ++ ++The WHATWG URL Parser spec should also be considered. We are not compliant with ++it either due to existing user code API behavior expectations (Hyrum's Law). ++It serves as a useful guide when making changes. ++""" ++ ++import re ++import sys ++import types ++import collections ++import warnings ++ ++__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", ++ "urlsplit", "urlunsplit", "urlencode", "parse_qs", ++ "parse_qsl", "quote", "quote_plus", "quote_from_bytes", ++ "unquote", "unquote_plus", "unquote_to_bytes", ++ "DefragResult", "ParseResult", "SplitResult", ++ "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"] ++ ++# A classification of schemes. ++# The empty string classifies URLs with no scheme specified, ++# being the default value returned by “urlsplit” and “urlparse”. ++ ++uses_relative = ['', 'ftp', 'http', 'gopher', 'nntp', 'imap', ++ 'wais', 'file', 'https', 'shttp', 'mms', ++ 'prospero', 'rtsp', 'rtspu', 'sftp', ++ 'svn', 'svn+ssh', 'ws', 'wss'] ++ ++uses_netloc = ['', 'ftp', 'http', 'gopher', 'nntp', 'telnet', ++ 'imap', 'wais', 'file', 'mms', 'https', 'shttp', ++ 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', ++ 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh', ++ 'ws', 'wss'] ++ ++uses_params = ['', 'ftp', 'hdl', 'prospero', 'http', 'imap', ++ 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', ++ 'mms', 'sftp', 'tel'] ++ ++# These are not actually used anymore, but should stay for backwards ++# compatibility. (They are undocumented, but have a public-looking name.) ++ ++non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', ++ 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] ++ ++uses_query = ['', 'http', 'wais', 'imap', 'https', 'shttp', 'mms', ++ 'gopher', 'rtsp', 'rtspu', 'sip', 'sips'] ++ ++uses_fragment = ['', 'ftp', 'hdl', 'http', 'gopher', 'news', ++ 'nntp', 'wais', 'https', 'shttp', 'snews', ++ 'file', 'prospero'] ++ ++# Characters valid in scheme names ++scheme_chars = ('abcdefghijklmnopqrstuvwxyz' ++ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' ++ '0123456789' ++ '+-.') ++ ++# Leading and trailing C0 control and space to be stripped per WHATWG spec. ++# == "".join([chr(i) for i in range(0, 0x20 + 1)]) ++_WHATWG_C0_CONTROL_OR_SPACE = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f ' ++ ++# Unsafe bytes to be removed per WHATWG spec ++_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n'] ++ ++# XXX: Consider replacing with functools.lru_cache ++MAX_CACHE_SIZE = 20 ++_parse_cache = {} ++ ++def clear_cache(): ++ """Clear the parse cache and the quoters cache.""" ++ _parse_cache.clear() ++ _safe_quoters.clear() ++ ++ ++# Helpers for bytes handling ++# For 3.2, we deliberately require applications that ++# handle improperly quoted URLs to do their own ++# decoding and encoding. If valid use cases are ++# presented, we may relax this by using latin-1 ++# decoding internally for 3.3 ++_implicit_encoding = 'ascii' ++_implicit_errors = 'strict' ++ ++def _noop(obj): ++ return obj ++ ++def _encode_result(obj, encoding=_implicit_encoding, ++ errors=_implicit_errors): ++ return obj.encode(encoding, errors) ++ ++def _decode_args(args, encoding=_implicit_encoding, ++ errors=_implicit_errors): ++ return tuple(x.decode(encoding, errors) if x else '' for x in args) ++ ++def _coerce_args(*args): ++ # Invokes decode if necessary to create str args ++ # and returns the coerced inputs along with ++ # an appropriate result coercion function ++ # - noop for str inputs ++ # - encoding function otherwise ++ str_input = isinstance(args[0], str) ++ for arg in args[1:]: ++ # We special-case the empty string to support the ++ # "scheme=''" default argument to some functions ++ if arg and isinstance(arg, str) != str_input: ++ raise TypeError("Cannot mix str and non-str arguments") ++ if str_input: ++ return args + (_noop,) ++ return _decode_args(args) + (_encode_result,) ++ ++# Result objects are more helpful than simple tuples ++class _ResultMixinStr(object): ++ """Standard approach to encoding parsed results from str to bytes""" ++ __slots__ = () ++ ++ def encode(self, encoding='ascii', errors='strict'): ++ return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self)) ++ ++ ++class _ResultMixinBytes(object): ++ """Standard approach to decoding parsed results from bytes to str""" ++ __slots__ = () ++ ++ def decode(self, encoding='ascii', errors='strict'): ++ return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self)) ++ ++ ++class _NetlocResultMixinBase(object): ++ """Shared methods for the parsed result objects containing a netloc element""" ++ __slots__ = () ++ ++ @property ++ def username(self): ++ return self._userinfo[0] ++ ++ @property ++ def password(self): ++ return self._userinfo[1] ++ ++ @property ++ def hostname(self): ++ hostname = self._hostinfo[0] ++ if not hostname: ++ return None ++ # Scoped IPv6 address may have zone info, which must not be lowercased ++ # like http://[fe80::822a:a8ff:fe49:470c%tESt]:1234/keys ++ separator = '%' if isinstance(hostname, str) else b'%' ++ hostname, percent, zone = hostname.partition(separator) ++ return hostname.lower() + percent + zone ++ ++ @property ++ def port(self): ++ port = self._hostinfo[1] ++ if port is not None: ++ try: ++ port = int(port, 10) ++ except ValueError: ++ message = f'Port could not be cast to integer value as {port!r}' ++ raise ValueError(message) from None ++ if not ( 0 <= port <= 65535): ++ raise ValueError("Port out of range 0-65535") ++ return port ++ ++ __class_getitem__ = classmethod(types.GenericAlias) ++ ++ ++class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): ++ __slots__ = () ++ ++ @property ++ def _userinfo(self): ++ netloc = self.netloc ++ userinfo, have_info, hostinfo = netloc.rpartition('@') ++ if have_info: ++ username, have_password, password = userinfo.partition(':') ++ if not have_password: ++ password = None ++ else: ++ username = password = None ++ return username, password ++ ++ @property ++ def _hostinfo(self): ++ netloc = self.netloc ++ _, _, hostinfo = netloc.rpartition('@') ++ _, have_open_br, bracketed = hostinfo.partition('[') ++ if have_open_br: ++ hostname, _, port = bracketed.partition(']') ++ _, _, port = port.partition(':') ++ else: ++ hostname, _, port = hostinfo.partition(':') ++ if not port: ++ port = None ++ return hostname, port ++ ++ ++class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): ++ __slots__ = () ++ ++ @property ++ def _userinfo(self): ++ netloc = self.netloc ++ userinfo, have_info, hostinfo = netloc.rpartition(b'@') ++ if have_info: ++ username, have_password, password = userinfo.partition(b':') ++ if not have_password: ++ password = None ++ else: ++ username = password = None ++ return username, password ++ ++ @property ++ def _hostinfo(self): ++ netloc = self.netloc ++ _, _, hostinfo = netloc.rpartition(b'@') ++ _, have_open_br, bracketed = hostinfo.partition(b'[') ++ if have_open_br: ++ hostname, _, port = bracketed.partition(b']') ++ _, _, port = port.partition(b':') ++ else: ++ hostname, _, port = hostinfo.partition(b':') ++ if not port: ++ port = None ++ return hostname, port ++ ++ ++from collections import namedtuple ++ ++_DefragResultBase = namedtuple('DefragResult', 'url fragment') ++_SplitResultBase = namedtuple( ++ 'SplitResult', 'scheme netloc path query fragment') ++_ParseResultBase = namedtuple( ++ 'ParseResult', 'scheme netloc path params query fragment') ++ ++_DefragResultBase.__doc__ = """ ++DefragResult(url, fragment) ++ ++A 2-tuple that contains the url without fragment identifier and the fragment ++identifier as a separate argument. ++""" ++ ++_DefragResultBase.url.__doc__ = """The URL with no fragment identifier.""" ++ ++_DefragResultBase.fragment.__doc__ = """ ++Fragment identifier separated from URL, that allows indirect identification of a ++secondary resource by reference to a primary resource and additional identifying ++information. ++""" ++ ++_SplitResultBase.__doc__ = """ ++SplitResult(scheme, netloc, path, query, fragment) ++ ++A 5-tuple that contains the different components of a URL. Similar to ++ParseResult, but does not split params. ++""" ++ ++_SplitResultBase.scheme.__doc__ = """Specifies URL scheme for the request.""" ++ ++_SplitResultBase.netloc.__doc__ = """ ++Network location where the request is made to. ++""" ++ ++_SplitResultBase.path.__doc__ = """ ++The hierarchical path, such as the path to a file to download. ++""" ++ ++_SplitResultBase.query.__doc__ = """ ++The query component, that contains non-hierarchical data, that along with data ++in path component, identifies a resource in the scope of URI's scheme and ++network location. ++""" ++ ++_SplitResultBase.fragment.__doc__ = """ ++Fragment identifier, that allows indirect identification of a secondary resource ++by reference to a primary resource and additional identifying information. ++""" ++ ++_ParseResultBase.__doc__ = """ ++ParseResult(scheme, netloc, path, params, query, fragment) ++ ++A 6-tuple that contains components of a parsed URL. ++""" ++ ++_ParseResultBase.scheme.__doc__ = _SplitResultBase.scheme.__doc__ ++_ParseResultBase.netloc.__doc__ = _SplitResultBase.netloc.__doc__ ++_ParseResultBase.path.__doc__ = _SplitResultBase.path.__doc__ ++_ParseResultBase.params.__doc__ = """ ++Parameters for last path element used to dereference the URI in order to provide ++access to perform some operation on the resource. ++""" ++ ++_ParseResultBase.query.__doc__ = _SplitResultBase.query.__doc__ ++_ParseResultBase.fragment.__doc__ = _SplitResultBase.fragment.__doc__ ++ ++ ++# For backwards compatibility, alias _NetlocResultMixinStr ++# ResultBase is no longer part of the documented API, but it is ++# retained since deprecating it isn't worth the hassle ++ResultBase = _NetlocResultMixinStr ++ ++# Structured result objects for string data ++class DefragResult(_DefragResultBase, _ResultMixinStr): ++ __slots__ = () ++ def geturl(self): ++ if self.fragment: ++ return self.url + '#' + self.fragment ++ else: ++ return self.url ++ ++class SplitResult(_SplitResultBase, _NetlocResultMixinStr): ++ __slots__ = () ++ def geturl(self): ++ return urlunsplit(self) ++ ++class ParseResult(_ParseResultBase, _NetlocResultMixinStr): ++ __slots__ = () ++ def geturl(self): ++ return urlunparse(self) ++ ++# Structured result objects for bytes data ++class DefragResultBytes(_DefragResultBase, _ResultMixinBytes): ++ __slots__ = () ++ def geturl(self): ++ if self.fragment: ++ return self.url + b'#' + self.fragment ++ else: ++ return self.url ++ ++class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes): ++ __slots__ = () ++ def geturl(self): ++ return urlunsplit(self) ++ ++class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes): ++ __slots__ = () ++ def geturl(self): ++ return urlunparse(self) ++ ++# Set up the encode/decode result pairs ++def _fix_result_transcoding(): ++ _result_pairs = ( ++ (DefragResult, DefragResultBytes), ++ (SplitResult, SplitResultBytes), ++ (ParseResult, ParseResultBytes), ++ ) ++ for _decoded, _encoded in _result_pairs: ++ _decoded._encoded_counterpart = _encoded ++ _encoded._decoded_counterpart = _decoded ++ ++_fix_result_transcoding() ++del _fix_result_transcoding ++ ++def urlparse(url, scheme='', allow_fragments=True): ++ """Parse a URL into 6 components: ++ :///;?# ++ ++ The result is a named 6-tuple with fields corresponding to the ++ above. It is either a ParseResult or ParseResultBytes object, ++ depending on the type of the url parameter. ++ ++ The username, password, hostname, and port sub-components of netloc ++ can also be accessed as attributes of the returned object. ++ ++ The scheme argument provides the default value of the scheme ++ component when no scheme is found in url. ++ ++ If allow_fragments is False, no attempt is made to separate the ++ fragment component from the previous component, which can be either ++ path or query. ++ ++ Note that % escapes are not expanded. ++ """ ++ url, scheme, _coerce_result = _coerce_args(url, scheme) ++ splitresult = urlsplit(url, scheme, allow_fragments) ++ scheme, netloc, url, query, fragment = splitresult ++ if scheme in uses_params and ';' in url: ++ url, params = _splitparams(url) ++ else: ++ params = '' ++ result = ParseResult(scheme, netloc, url, params, query, fragment) ++ return _coerce_result(result) ++ ++def _splitparams(url): ++ if '/' in url: ++ i = url.find(';', url.rfind('/')) ++ if i < 0: ++ return url, '' ++ else: ++ i = url.find(';') ++ return url[:i], url[i+1:] ++ ++def _splitnetloc(url, start=0): ++ delim = len(url) # position of end of domain part of url, default is end ++ for c in '/?#': # look for delimiters; the order is NOT important ++ wdelim = url.find(c, start) # find first of this delim ++ if wdelim >= 0: # if found ++ delim = min(delim, wdelim) # use earliest delim position ++ return url[start:delim], url[delim:] # return (domain, rest) ++ ++def _checknetloc(netloc): ++ if not netloc or netloc.isascii(): ++ return ++ # looking for characters like \u2100 that expand to 'a/c' ++ # IDNA uses NFKC equivalence, so normalize for this check ++ import unicodedata ++ n = netloc.replace('@', '') # ignore characters already included ++ n = n.replace(':', '') # but not the surrounding text ++ n = n.replace('#', '') ++ n = n.replace('?', '') ++ netloc2 = unicodedata.normalize('NFKC', n) ++ if n == netloc2: ++ return ++ for c in '/?#@:': ++ if c in netloc2: ++ raise ValueError("netloc '" + netloc + "' contains invalid " + ++ "characters under NFKC normalization") ++ ++def urlsplit(url, scheme='', allow_fragments=True): ++ """Parse a URL into 5 components: ++ :///?# ++ ++ The result is a named 5-tuple with fields corresponding to the ++ above. It is either a SplitResult or SplitResultBytes object, ++ depending on the type of the url parameter. ++ ++ The username, password, hostname, and port sub-components of netloc ++ can also be accessed as attributes of the returned object. ++ ++ The scheme argument provides the default value of the scheme ++ component when no scheme is found in url. ++ ++ If allow_fragments is False, no attempt is made to separate the ++ fragment component from the previous component, which can be either ++ path or query. ++ ++ Note that % escapes are not expanded. ++ """ ++ ++ url, scheme, _coerce_result = _coerce_args(url, scheme) ++ # Only lstrip url as some applications rely on preserving trailing space. ++ # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both) ++ url = url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE) ++ scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE) ++ ++ for b in _UNSAFE_URL_BYTES_TO_REMOVE: ++ url = url.replace(b, "") ++ scheme = scheme.replace(b, "") ++ ++ allow_fragments = bool(allow_fragments) ++ key = url, scheme, allow_fragments, type(url), type(scheme) ++ cached = _parse_cache.get(key, None) ++ if cached: ++ return _coerce_result(cached) ++ if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth ++ clear_cache() ++ netloc = query = fragment = '' ++ i = url.find(':') ++ if i > 0: ++ for c in url[:i]: ++ if c not in scheme_chars: ++ break ++ else: ++ scheme, url = url[:i].lower(), url[i+1:] ++ ++ if url[:2] == '//': ++ netloc, url = _splitnetloc(url, 2) ++ if (('[' in netloc and ']' not in netloc) or ++ (']' in netloc and '[' not in netloc)): ++ raise ValueError("Invalid IPv6 URL") ++ if allow_fragments and '#' in url: ++ url, fragment = url.split('#', 1) ++ if '?' in url: ++ url, query = url.split('?', 1) ++ _checknetloc(netloc) ++ v = SplitResult(scheme, netloc, url, query, fragment) ++ _parse_cache[key] = v ++ return _coerce_result(v) ++ ++def urlunparse(components): ++ """Put a parsed URL back together again. This may result in a ++ slightly different, but equivalent URL, if the URL that was parsed ++ originally had redundant delimiters, e.g. a ? with an empty query ++ (the draft states that these are equivalent).""" ++ scheme, netloc, url, params, query, fragment, _coerce_result = ( ++ _coerce_args(*components)) ++ if params: ++ url = "%s;%s" % (url, params) ++ return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment))) ++ ++def urlunsplit(components): ++ """Combine the elements of a tuple as returned by urlsplit() into a ++ complete URL as a string. The data argument can be any five-item iterable. ++ This may result in a slightly different, but equivalent URL, if the URL that ++ was parsed originally had unnecessary delimiters (for example, a ? with an ++ empty query; the RFC states that these are equivalent).""" ++ scheme, netloc, url, query, fragment, _coerce_result = ( ++ _coerce_args(*components)) ++ if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): ++ if url and url[:1] != '/': url = '/' + url ++ url = '//' + (netloc or '') + url ++ if scheme: ++ url = scheme + ':' + url ++ if query: ++ url = url + '?' + query ++ if fragment: ++ url = url + '#' + fragment ++ return _coerce_result(url) ++ ++def urljoin(base, url, allow_fragments=True): ++ """Join a base URL and a possibly relative URL to form an absolute ++ interpretation of the latter.""" ++ if not base: ++ return url ++ if not url: ++ return base ++ ++ base, url, _coerce_result = _coerce_args(base, url) ++ bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ ++ urlparse(base, '', allow_fragments) ++ scheme, netloc, path, params, query, fragment = \ ++ urlparse(url, bscheme, allow_fragments) ++ ++ if scheme != bscheme or scheme not in uses_relative: ++ return _coerce_result(url) ++ if scheme in uses_netloc: ++ if netloc: ++ return _coerce_result(urlunparse((scheme, netloc, path, ++ params, query, fragment))) ++ netloc = bnetloc ++ ++ if not path and not params: ++ path = bpath ++ params = bparams ++ if not query: ++ query = bquery ++ return _coerce_result(urlunparse((scheme, netloc, path, ++ params, query, fragment))) ++ ++ base_parts = bpath.split('/') ++ if base_parts[-1] != '': ++ # the last item is not a directory, so will not be taken into account ++ # in resolving the relative path ++ del base_parts[-1] ++ ++ # for rfc3986, ignore all base path should the first character be root. ++ if path[:1] == '/': ++ segments = path.split('/') ++ else: ++ segments = base_parts + path.split('/') ++ # filter out elements that would cause redundant slashes on re-joining ++ # the resolved_path ++ segments[1:-1] = filter(None, segments[1:-1]) ++ ++ resolved_path = [] ++ ++ for seg in segments: ++ if seg == '..': ++ try: ++ resolved_path.pop() ++ except IndexError: ++ # ignore any .. segments that would otherwise cause an IndexError ++ # when popped from resolved_path if resolving for rfc3986 ++ pass ++ elif seg == '.': ++ continue ++ else: ++ resolved_path.append(seg) ++ ++ if segments[-1] in ('.', '..'): ++ # do some post-processing here. if the last segment was a relative dir, ++ # then we need to append the trailing '/' ++ resolved_path.append('') ++ ++ return _coerce_result(urlunparse((scheme, netloc, '/'.join( ++ resolved_path) or '/', params, query, fragment))) ++ ++ ++def urldefrag(url): ++ """Removes any existing fragment from URL. ++ ++ Returns a tuple of the defragmented URL and the fragment. If ++ the URL contained no fragments, the second element is the ++ empty string. ++ """ ++ url, _coerce_result = _coerce_args(url) ++ if '#' in url: ++ s, n, p, a, q, frag = urlparse(url) ++ defrag = urlunparse((s, n, p, a, q, '')) ++ else: ++ frag = '' ++ defrag = url ++ return _coerce_result(DefragResult(defrag, frag)) ++ ++_hexdig = '0123456789ABCDEFabcdef' ++_hextobyte = None ++ ++def unquote_to_bytes(string): ++ """unquote_to_bytes('abc%20def') -> b'abc def'.""" ++ # Note: strings are encoded as UTF-8. This is only an issue if it contains ++ # unescaped non-ASCII characters, which URIs should not. ++ if not string: ++ # Is it a string-like object? ++ string.split ++ return b'' ++ if isinstance(string, str): ++ string = string.encode('utf-8') ++ bits = string.split(b'%') ++ if len(bits) == 1: ++ return string ++ res = [bits[0]] ++ append = res.append ++ # Delay the initialization of the table to not waste memory ++ # if the function is never called ++ global _hextobyte ++ if _hextobyte is None: ++ _hextobyte = {(a + b).encode(): bytes.fromhex(a + b) ++ for a in _hexdig for b in _hexdig} ++ for item in bits[1:]: ++ try: ++ append(_hextobyte[item[:2]]) ++ append(item[2:]) ++ except KeyError: ++ append(b'%') ++ append(item) ++ return b''.join(res) ++ ++_asciire = re.compile('([\x00-\x7f]+)') ++ ++def unquote(string, encoding='utf-8', errors='replace'): ++ """Replace %xx escapes by their single-character equivalent. The optional ++ encoding and errors parameters specify how to decode percent-encoded ++ sequences into Unicode characters, as accepted by the bytes.decode() ++ method. ++ By default, percent-encoded sequences are decoded with UTF-8, and invalid ++ sequences are replaced by a placeholder character. ++ ++ unquote('abc%20def') -> 'abc def'. ++ """ ++ if isinstance(string, bytes): ++ return unquote_to_bytes(string).decode(encoding, errors) ++ if '%' not in string: ++ string.split ++ return string ++ if encoding is None: ++ encoding = 'utf-8' ++ if errors is None: ++ errors = 'replace' ++ bits = _asciire.split(string) ++ res = [bits[0]] ++ append = res.append ++ for i in range(1, len(bits), 2): ++ append(unquote_to_bytes(bits[i]).decode(encoding, errors)) ++ append(bits[i + 1]) ++ return ''.join(res) ++ ++ ++def parse_qs(qs, keep_blank_values=False, strict_parsing=False, ++ encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): ++ """Parse a query given as a string argument. ++ ++ Arguments: ++ ++ qs: percent-encoded query string to be parsed ++ ++ keep_blank_values: flag indicating whether blank values in ++ percent-encoded queries should be treated as blank strings. ++ A true value indicates that blanks should be retained as ++ blank strings. The default false value indicates that ++ blank values are to be ignored and treated as if they were ++ not included. ++ ++ strict_parsing: flag indicating what to do with parsing errors. ++ If false (the default), errors are silently ignored. ++ If true, errors raise a ValueError exception. ++ ++ encoding and errors: specify how to decode percent-encoded sequences ++ into Unicode characters, as accepted by the bytes.decode() method. ++ ++ max_num_fields: int. If set, then throws a ValueError if there ++ are more than n fields read by parse_qsl(). ++ ++ separator: str. The symbol to use for separating the query arguments. ++ Defaults to &. ++ ++ Returns a dictionary. ++ """ ++ parsed_result = {} ++ pairs = parse_qsl(qs, keep_blank_values, strict_parsing, ++ encoding=encoding, errors=errors, ++ max_num_fields=max_num_fields, separator=separator) ++ for name, value in pairs: ++ if name in parsed_result: ++ parsed_result[name].append(value) ++ else: ++ parsed_result[name] = [value] ++ return parsed_result ++ ++ ++def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, ++ encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): ++ """Parse a query given as a string argument. ++ ++ Arguments: ++ ++ qs: percent-encoded query string to be parsed ++ ++ keep_blank_values: flag indicating whether blank values in ++ percent-encoded queries should be treated as blank strings. ++ A true value indicates that blanks should be retained as blank ++ strings. The default false value indicates that blank values ++ are to be ignored and treated as if they were not included. ++ ++ strict_parsing: flag indicating what to do with parsing errors. If ++ false (the default), errors are silently ignored. If true, ++ errors raise a ValueError exception. ++ ++ encoding and errors: specify how to decode percent-encoded sequences ++ into Unicode characters, as accepted by the bytes.decode() method. ++ ++ max_num_fields: int. If set, then throws a ValueError ++ if there are more than n fields read by parse_qsl(). ++ ++ separator: str. The symbol to use for separating the query arguments. ++ Defaults to &. ++ ++ Returns a list, as G-d intended. ++ """ ++ qs, _coerce_result = _coerce_args(qs) ++ separator, _ = _coerce_args(separator) ++ ++ if not separator or (not isinstance(separator, (str, bytes))): ++ raise ValueError("Separator must be of type string or bytes.") ++ ++ # If max_num_fields is defined then check that the number of fields ++ # is less than max_num_fields. This prevents a memory exhaustion DOS ++ # attack via post bodies with many fields. ++ if max_num_fields is not None: ++ num_fields = 1 + qs.count(separator) ++ if max_num_fields < num_fields: ++ raise ValueError('Max number of fields exceeded') ++ ++ pairs = [s1 for s1 in qs.split(separator)] ++ r = [] ++ for name_value in pairs: ++ if not name_value and not strict_parsing: ++ continue ++ nv = name_value.split('=', 1) ++ if len(nv) != 2: ++ if strict_parsing: ++ raise ValueError("bad query field: %r" % (name_value,)) ++ # Handle case of a control-name with no equal sign ++ if keep_blank_values: ++ nv.append('') ++ else: ++ continue ++ if len(nv[1]) or keep_blank_values: ++ name = nv[0].replace('+', ' ') ++ name = unquote(name, encoding=encoding, errors=errors) ++ name = _coerce_result(name) ++ value = nv[1].replace('+', ' ') ++ value = unquote(value, encoding=encoding, errors=errors) ++ value = _coerce_result(value) ++ r.append((name, value)) ++ return r ++ ++def unquote_plus(string, encoding='utf-8', errors='replace'): ++ """Like unquote(), but also replace plus signs by spaces, as required for ++ unquoting HTML form values. ++ ++ unquote_plus('%7e/abc+def') -> '~/abc def' ++ """ ++ string = string.replace('+', ' ') ++ return unquote(string, encoding, errors) ++ ++_ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' ++ b'abcdefghijklmnopqrstuvwxyz' ++ b'0123456789' ++ b'_.-~') ++_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) ++_safe_quoters = {} ++ ++class Quoter(collections.defaultdict): ++ """A mapping from bytes (in range(0,256)) to strings. ++ ++ String values are percent-encoded byte values, unless the key < 128, and ++ in the "safe" set (either the specified safe set, or default set). ++ """ ++ # Keeps a cache internally, using defaultdict, for efficiency (lookups ++ # of cached keys don't call Python code at all). ++ def __init__(self, safe): ++ """safe: bytes object.""" ++ self.safe = _ALWAYS_SAFE.union(safe) ++ ++ def __repr__(self): ++ # Without this, will just display as a defaultdict ++ return "<%s %r>" % (self.__class__.__name__, dict(self)) ++ ++ def __missing__(self, b): ++ # Handle a cache miss. Store quoted string in cache and return. ++ res = chr(b) if b in self.safe else '%{:02X}'.format(b) ++ self[b] = res ++ return res ++ ++def quote(string, safe='/', encoding=None, errors=None): ++ """quote('abc def') -> 'abc%20def' ++ ++ Each part of a URL, e.g. the path info, the query, etc., has a ++ different set of reserved characters that must be quoted. The ++ quote function offers a cautious (not minimal) way to quote a ++ string for most of these parts. ++ ++ RFC 3986 Uniform Resource Identifier (URI): Generic Syntax lists ++ the following (un)reserved characters. ++ ++ unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" ++ reserved = gen-delims / sub-delims ++ gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" ++ sub-delims = "!" / "$" / "&" / "'" / "(" / ")" ++ / "*" / "+" / "," / ";" / "=" ++ ++ Each of the reserved characters is reserved in some component of a URL, ++ but not necessarily in all of them. ++ ++ The quote function %-escapes all characters that are neither in the ++ unreserved chars ("always safe") nor the additional chars set via the ++ safe arg. ++ ++ The default for the safe arg is '/'. The character is reserved, but in ++ typical usage the quote function is being called on a path where the ++ existing slash characters are to be preserved. ++ ++ Python 3.7 updates from using RFC 2396 to RFC 3986 to quote URL strings. ++ Now, "~" is included in the set of unreserved characters. ++ ++ string and safe may be either str or bytes objects. encoding and errors ++ must not be specified if string is a bytes object. ++ ++ The optional encoding and errors parameters specify how to deal with ++ non-ASCII characters, as accepted by the str.encode method. ++ By default, encoding='utf-8' (characters are encoded with UTF-8), and ++ errors='strict' (unsupported characters raise a UnicodeEncodeError). ++ """ ++ if isinstance(string, str): ++ if not string: ++ return string ++ if encoding is None: ++ encoding = 'utf-8' ++ if errors is None: ++ errors = 'strict' ++ string = string.encode(encoding, errors) ++ else: ++ if encoding is not None: ++ raise TypeError("quote() doesn't support 'encoding' for bytes") ++ if errors is not None: ++ raise TypeError("quote() doesn't support 'errors' for bytes") ++ return quote_from_bytes(string, safe) ++ ++def quote_plus(string, safe='', encoding=None, errors=None): ++ """Like quote(), but also replace ' ' with '+', as required for quoting ++ HTML form values. Plus signs in the original string are escaped unless ++ they are included in safe. It also does not have safe default to '/'. ++ """ ++ # Check if ' ' in string, where string may either be a str or bytes. If ++ # there are no spaces, the regular quote will produce the right answer. ++ if ((isinstance(string, str) and ' ' not in string) or ++ (isinstance(string, bytes) and b' ' not in string)): ++ return quote(string, safe, encoding, errors) ++ if isinstance(safe, str): ++ space = ' ' ++ else: ++ space = b' ' ++ string = quote(string, safe + space, encoding, errors) ++ return string.replace(' ', '+') ++ ++def quote_from_bytes(bs, safe='/'): ++ """Like quote(), but accepts a bytes object rather than a str, and does ++ not perform string-to-bytes encoding. It always returns an ASCII string. ++ quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f' ++ """ ++ if not isinstance(bs, (bytes, bytearray)): ++ raise TypeError("quote_from_bytes() expected bytes") ++ if not bs: ++ return '' ++ if isinstance(safe, str): ++ # Normalize 'safe' by converting to bytes and removing non-ASCII chars ++ safe = safe.encode('ascii', 'ignore') ++ else: ++ safe = bytes([c for c in safe if c < 128]) ++ if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): ++ return bs.decode() ++ try: ++ quoter = _safe_quoters[safe] ++ except KeyError: ++ _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ ++ return ''.join([quoter(char) for char in bs]) ++ ++def urlencode(query, doseq=False, safe='', encoding=None, errors=None, ++ quote_via=quote_plus): ++ """Encode a dict or sequence of two-element tuples into a URL query string. ++ ++ If any values in the query arg are sequences and doseq is true, each ++ sequence element is converted to a separate parameter. ++ ++ If the query arg is a sequence of two-element tuples, the order of the ++ parameters in the output will match the order of parameters in the ++ input. ++ ++ The components of a query arg may each be either a string or a bytes type. ++ ++ The safe, encoding, and errors parameters are passed down to the function ++ specified by quote_via (encoding and errors only if a component is a str). ++ """ ++ ++ if hasattr(query, "items"): ++ query = query.items() ++ else: ++ # It's a bother at times that strings and string-like objects are ++ # sequences. ++ try: ++ # non-sequence items should not work with len() ++ # non-empty strings will fail this ++ if len(query) and not isinstance(query[0], tuple): ++ raise TypeError ++ # Zero-length sequences of all types will get here and succeed, ++ # but that's a minor nit. Since the original implementation ++ # allowed empty dicts that type of behavior probably should be ++ # preserved for consistency ++ except TypeError: ++ ty, va, tb = sys.exc_info() ++ raise TypeError("not a valid non-string sequence " ++ "or mapping object").with_traceback(tb) ++ ++ l = [] ++ if not doseq: ++ for k, v in query: ++ if isinstance(k, bytes): ++ k = quote_via(k, safe) ++ else: ++ k = quote_via(str(k), safe, encoding, errors) ++ ++ if isinstance(v, bytes): ++ v = quote_via(v, safe) ++ else: ++ v = quote_via(str(v), safe, encoding, errors) ++ l.append(k + '=' + v) ++ else: ++ for k, v in query: ++ if isinstance(k, bytes): ++ k = quote_via(k, safe) ++ else: ++ k = quote_via(str(k), safe, encoding, errors) ++ ++ if isinstance(v, bytes): ++ v = quote_via(v, safe) ++ l.append(k + '=' + v) ++ elif isinstance(v, str): ++ v = quote_via(v, safe, encoding, errors) ++ l.append(k + '=' + v) ++ else: ++ try: ++ # Is this a sufficient test for sequence-ness? ++ x = len(v) ++ except TypeError: ++ # not a sequence ++ v = quote_via(str(v), safe, encoding, errors) ++ l.append(k + '=' + v) ++ else: ++ # loop over the sequence ++ for elt in v: ++ if isinstance(elt, bytes): ++ elt = quote_via(elt, safe) ++ else: ++ elt = quote_via(str(elt), safe, encoding, errors) ++ l.append(k + '=' + elt) ++ return '&'.join(l) ++ ++ ++def to_bytes(url): ++ warnings.warn("urllib.parse.to_bytes() is deprecated as of 3.8", ++ DeprecationWarning, stacklevel=2) ++ return _to_bytes(url) ++ ++ ++def _to_bytes(url): ++ """to_bytes(u"URL") --> 'URL'.""" ++ # Most URL schemes require ASCII. If that changes, the conversion ++ # can be relaxed. ++ # XXX get rid of to_bytes() ++ if isinstance(url, str): ++ try: ++ url = url.encode("ASCII").decode() ++ except UnicodeError: ++ raise UnicodeError("URL " + repr(url) + ++ " contains non-ASCII characters") ++ return url ++ ++ ++def unwrap(url): ++ """Transform a string like '' into 'scheme://host/path'. ++ ++ The string is returned unchanged if it's not a wrapped URL. ++ """ ++ url = str(url).strip() ++ if url[:1] == '<' and url[-1:] == '>': ++ url = url[1:-1].strip() ++ if url[:4] == 'URL:': ++ url = url[4:].strip() ++ return url ++ ++ ++def splittype(url): ++ warnings.warn("urllib.parse.splittype() is deprecated as of 3.8, " ++ "use urllib.parse.urlparse() instead", ++ DeprecationWarning, stacklevel=2) ++ return _splittype(url) ++ ++ ++_typeprog = None ++def _splittype(url): ++ """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" ++ global _typeprog ++ if _typeprog is None: ++ _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL) ++ ++ match = _typeprog.match(url) ++ if match: ++ scheme, data = match.groups() ++ return scheme.lower(), data ++ return None, url ++ ++ ++def splithost(url): ++ warnings.warn("urllib.parse.splithost() is deprecated as of 3.8, " ++ "use urllib.parse.urlparse() instead", ++ DeprecationWarning, stacklevel=2) ++ return _splithost(url) ++ ++ ++_hostprog = None ++def _splithost(url): ++ """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" ++ global _hostprog ++ if _hostprog is None: ++ _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL) ++ ++ match = _hostprog.match(url) ++ if match: ++ host_port, path = match.groups() ++ if path and path[0] != '/': ++ path = '/' + path ++ return host_port, path ++ return None, url ++ ++ ++def splituser(host): ++ warnings.warn("urllib.parse.splituser() is deprecated as of 3.8, " ++ "use urllib.parse.urlparse() instead", ++ DeprecationWarning, stacklevel=2) ++ return _splituser(host) ++ ++ ++def _splituser(host): ++ """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" ++ user, delim, host = host.rpartition('@') ++ return (user if delim else None), host ++ ++ ++def splitpasswd(user): ++ warnings.warn("urllib.parse.splitpasswd() is deprecated as of 3.8, " ++ "use urllib.parse.urlparse() instead", ++ DeprecationWarning, stacklevel=2) ++ return _splitpasswd(user) ++ ++ ++def _splitpasswd(user): ++ """splitpasswd('user:passwd') -> 'user', 'passwd'.""" ++ user, delim, passwd = user.partition(':') ++ return user, (passwd if delim else None) ++ ++ ++def splitport(host): ++ warnings.warn("urllib.parse.splitport() is deprecated as of 3.8, " ++ "use urllib.parse.urlparse() instead", ++ DeprecationWarning, stacklevel=2) ++ return _splitport(host) ++ ++ ++# splittag('/path#tag') --> '/path', 'tag' ++_portprog = None ++def _splitport(host): ++ """splitport('host:port') --> 'host', 'port'.""" ++ global _portprog ++ if _portprog is None: ++ _portprog = re.compile('(.*):([0-9]*)', re.DOTALL) ++ ++ match = _portprog.fullmatch(host) ++ if match: ++ host, port = match.groups() ++ if port: ++ return host, port ++ return host, None ++ ++ ++def splitnport(host, defport=-1): ++ warnings.warn("urllib.parse.splitnport() is deprecated as of 3.8, " ++ "use urllib.parse.urlparse() instead", ++ DeprecationWarning, stacklevel=2) ++ return _splitnport(host, defport) ++ ++ ++def _splitnport(host, defport=-1): ++ """Split host and port, returning numeric port. ++ Return given default port if no ':' found; defaults to -1. ++ Return numerical port if a valid number are found after ':'. ++ Return None if ':' but not a valid number.""" ++ host, delim, port = host.rpartition(':') ++ if not delim: ++ host = port ++ elif port: ++ try: ++ nport = int(port) ++ except ValueError: ++ nport = None ++ return host, nport ++ return host, defport ++ ++ ++def splitquery(url): ++ warnings.warn("urllib.parse.splitquery() is deprecated as of 3.8, " ++ "use urllib.parse.urlparse() instead", ++ DeprecationWarning, stacklevel=2) ++ return _splitquery(url) ++ ++ ++def _splitquery(url): ++ """splitquery('/path?query') --> '/path', 'query'.""" ++ path, delim, query = url.rpartition('?') ++ if delim: ++ return path, query ++ return url, None ++ ++ ++def splittag(url): ++ warnings.warn("urllib.parse.splittag() is deprecated as of 3.8, " ++ "use urllib.parse.urlparse() instead", ++ DeprecationWarning, stacklevel=2) ++ return _splittag(url) ++ ++ ++def _splittag(url): ++ """splittag('/path#tag') --> '/path', 'tag'.""" ++ path, delim, tag = url.rpartition('#') ++ if delim: ++ return path, tag ++ return url, None ++ ++ ++def splitattr(url): ++ warnings.warn("urllib.parse.splitattr() is deprecated as of 3.8, " ++ "use urllib.parse.urlparse() instead", ++ DeprecationWarning, stacklevel=2) ++ return _splitattr(url) ++ ++ ++def _splitattr(url): ++ """splitattr('/path;attr1=value1;attr2=value2;...') -> ++ '/path', ['attr1=value1', 'attr2=value2', ...].""" ++ words = url.split(';') ++ return words[0], words[1:] ++ ++ ++def splitvalue(attr): ++ warnings.warn("urllib.parse.splitvalue() is deprecated as of 3.8, " ++ "use urllib.parse.parse_qsl() instead", ++ DeprecationWarning, stacklevel=2) ++ return _splitvalue(attr) ++ ++ ++def _splitvalue(attr): ++ """splitvalue('attr=value') --> 'attr', 'value'.""" ++ attr, delim, value = attr.partition('=') ++ return attr, (value if delim else None) +diff --git a/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst b/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst +new file mode 100644 +index 0000000..81e5904 +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst +@@ -0,0 +1,2 @@ ++Add checks to ensure that ``[`` bracketed ``]`` hosts found by ++:func:`urllib.parse.urlsplit` are of IPv6 or IPvFuture format. +-- +2.34.1 + diff --git a/SPECS/python3/python3.spec b/SPECS/python3/python3.spec index f11f6656684..bb6a2d2c589 100644 --- a/SPECS/python3/python3.spec +++ b/SPECS/python3/python3.spec @@ -12,7 +12,7 @@ Summary: A high-level scripting language Name: python3 Version: 3.9.19 -Release: 6%{?dist} +Release: 7%{?dist} License: PSF Vendor: Microsoft Corporation Distribution: Mariner @@ -27,6 +27,7 @@ Patch3: CVE-2024-7592.patch Patch4: CVE-2024-6232.patch Patch5: CVE-2024-8088.patch Patch6: CVE-2024-4032.patch +Patch7: CVE-2024-11168.patch # Patch for setuptools, resolved in 65.5.1 Patch1000: CVE-2022-40897.patch Patch1001: CVE-2024-6345.patch @@ -171,6 +172,7 @@ The test package contains all regression tests for Python as well as the modules %patch4 -p1 %patch5 -p1 %patch6 -p1 +%patch7 -p1 %build # Remove GCC specs and build environment linker scripts @@ -326,6 +328,9 @@ rm -rf %{buildroot}%{_bindir}/__pycache__ %{_libdir}/python%{majmin}/test/* %changelog +* Fri Nov 15 2024 Ankita Pareek - 3.9.19-7 +- Address CVE-2024-11168 + * Tue Oct 01 2024 Ankita Pareek - 3.9.19-6 - Patch for CVE-2024-4032 diff --git a/toolkit/resources/manifests/package/pkggen_core_aarch64.txt b/toolkit/resources/manifests/package/pkggen_core_aarch64.txt index b2dd9729a96..711f8febee1 100644 --- a/toolkit/resources/manifests/package/pkggen_core_aarch64.txt +++ b/toolkit/resources/manifests/package/pkggen_core_aarch64.txt @@ -237,10 +237,10 @@ ca-certificates-base-2.0.0-18.cm2.noarch.rpm ca-certificates-2.0.0-18.cm2.noarch.rpm dwz-0.14-2.cm2.aarch64.rpm unzip-6.0-20.cm2.aarch64.rpm -python3-3.9.19-6.cm2.aarch64.rpm -python3-devel-3.9.19-6.cm2.aarch64.rpm -python3-libs-3.9.19-6.cm2.aarch64.rpm -python3-setuptools-3.9.19-6.cm2.noarch.rpm +python3-3.9.19-7.cm2.aarch64.rpm +python3-devel-3.9.19-7.cm2.aarch64.rpm +python3-libs-3.9.19-7.cm2.aarch64.rpm +python3-setuptools-3.9.19-7.cm2.noarch.rpm python3-pygments-2.4.2-7.cm2.noarch.rpm which-2.21-8.cm2.aarch64.rpm libselinux-3.2-1.cm2.aarch64.rpm diff --git a/toolkit/resources/manifests/package/pkggen_core_x86_64.txt b/toolkit/resources/manifests/package/pkggen_core_x86_64.txt index f6f00514a00..73d62bbb7a3 100644 --- a/toolkit/resources/manifests/package/pkggen_core_x86_64.txt +++ b/toolkit/resources/manifests/package/pkggen_core_x86_64.txt @@ -237,10 +237,10 @@ ca-certificates-base-2.0.0-18.cm2.noarch.rpm ca-certificates-2.0.0-18.cm2.noarch.rpm dwz-0.14-2.cm2.x86_64.rpm unzip-6.0-20.cm2.x86_64.rpm -python3-3.9.19-6.cm2.x86_64.rpm -python3-devel-3.9.19-6.cm2.x86_64.rpm -python3-libs-3.9.19-6.cm2.x86_64.rpm -python3-setuptools-3.9.19-6.cm2.noarch.rpm +python3-3.9.19-7.cm2.x86_64.rpm +python3-devel-3.9.19-7.cm2.x86_64.rpm +python3-libs-3.9.19-7.cm2.x86_64.rpm +python3-setuptools-3.9.19-7.cm2.noarch.rpm python3-pygments-2.4.2-7.cm2.noarch.rpm which-2.21-8.cm2.x86_64.rpm libselinux-3.2-1.cm2.x86_64.rpm diff --git a/toolkit/resources/manifests/package/toolchain_aarch64.txt b/toolkit/resources/manifests/package/toolchain_aarch64.txt index d3af0a93a6f..9b64e496b96 100644 --- a/toolkit/resources/manifests/package/toolchain_aarch64.txt +++ b/toolkit/resources/manifests/package/toolchain_aarch64.txt @@ -510,28 +510,28 @@ procps-ng-devel-3.3.17-2.cm2.aarch64.rpm procps-ng-lang-3.3.17-2.cm2.aarch64.rpm pyproject-rpm-macros-1.0.0~rc1-4.cm2.noarch.rpm python-markupsafe-debuginfo-2.1.0-1.cm2.aarch64.rpm -python3-3.9.19-6.cm2.aarch64.rpm +python3-3.9.19-7.cm2.aarch64.rpm python3-audit-3.0.6-8.cm2.aarch64.rpm python3-cracklib-2.9.7-5.cm2.aarch64.rpm -python3-curses-3.9.19-6.cm2.aarch64.rpm +python3-curses-3.9.19-7.cm2.aarch64.rpm python3-Cython-0.29.33-2.cm2.aarch64.rpm -python3-debuginfo-3.9.19-6.cm2.aarch64.rpm -python3-devel-3.9.19-6.cm2.aarch64.rpm +python3-debuginfo-3.9.19-7.cm2.aarch64.rpm +python3-devel-3.9.19-7.cm2.aarch64.rpm python3-gpg-1.16.0-2.cm2.aarch64.rpm python3-jinja2-3.0.3-4.cm2.noarch.rpm python3-libcap-ng-0.8.2-2.cm2.aarch64.rpm -python3-libs-3.9.19-6.cm2.aarch64.rpm +python3-libs-3.9.19-7.cm2.aarch64.rpm python3-libxml2-2.10.4-4.cm2.aarch64.rpm python3-lxml-4.9.1-1.cm2.aarch64.rpm python3-magic-5.40-2.cm2.noarch.rpm python3-markupsafe-2.1.0-1.cm2.aarch64.rpm python3-newt-0.52.21-5.cm2.aarch64.rpm -python3-pip-3.9.19-6.cm2.noarch.rpm +python3-pip-3.9.19-7.cm2.noarch.rpm python3-pygments-2.4.2-7.cm2.noarch.rpm python3-rpm-4.18.0-4.cm2.aarch64.rpm -python3-setuptools-3.9.19-6.cm2.noarch.rpm -python3-test-3.9.19-6.cm2.aarch64.rpm -python3-tools-3.9.19-6.cm2.aarch64.rpm +python3-setuptools-3.9.19-7.cm2.noarch.rpm +python3-test-3.9.19-7.cm2.aarch64.rpm +python3-tools-3.9.19-7.cm2.aarch64.rpm readline-8.1-1.cm2.aarch64.rpm readline-debuginfo-8.1-1.cm2.aarch64.rpm readline-devel-8.1-1.cm2.aarch64.rpm diff --git a/toolkit/resources/manifests/package/toolchain_x86_64.txt b/toolkit/resources/manifests/package/toolchain_x86_64.txt index 853fc9b4307..810ce79881b 100644 --- a/toolkit/resources/manifests/package/toolchain_x86_64.txt +++ b/toolkit/resources/manifests/package/toolchain_x86_64.txt @@ -516,28 +516,28 @@ procps-ng-devel-3.3.17-2.cm2.x86_64.rpm procps-ng-lang-3.3.17-2.cm2.x86_64.rpm pyproject-rpm-macros-1.0.0~rc1-4.cm2.noarch.rpm python-markupsafe-debuginfo-2.1.0-1.cm2.x86_64.rpm -python3-3.9.19-6.cm2.x86_64.rpm +python3-3.9.19-7.cm2.x86_64.rpm python3-audit-3.0.6-8.cm2.x86_64.rpm python3-cracklib-2.9.7-5.cm2.x86_64.rpm -python3-curses-3.9.19-6.cm2.x86_64.rpm +python3-curses-3.9.19-7.cm2.x86_64.rpm python3-Cython-0.29.33-2.cm2.x86_64.rpm -python3-debuginfo-3.9.19-6.cm2.x86_64.rpm -python3-devel-3.9.19-6.cm2.x86_64.rpm +python3-debuginfo-3.9.19-7.cm2.x86_64.rpm +python3-devel-3.9.19-7.cm2.x86_64.rpm python3-gpg-1.16.0-2.cm2.x86_64.rpm python3-jinja2-3.0.3-4.cm2.noarch.rpm python3-libcap-ng-0.8.2-2.cm2.x86_64.rpm -python3-libs-3.9.19-6.cm2.x86_64.rpm +python3-libs-3.9.19-7.cm2.x86_64.rpm python3-libxml2-2.10.4-4.cm2.x86_64.rpm python3-lxml-4.9.1-1.cm2.x86_64.rpm python3-magic-5.40-2.cm2.noarch.rpm python3-markupsafe-2.1.0-1.cm2.x86_64.rpm python3-newt-0.52.21-5.cm2.x86_64.rpm -python3-pip-3.9.19-6.cm2.noarch.rpm +python3-pip-3.9.19-7.cm2.noarch.rpm python3-pygments-2.4.2-7.cm2.noarch.rpm python3-rpm-4.18.0-4.cm2.x86_64.rpm -python3-setuptools-3.9.19-6.cm2.noarch.rpm -python3-test-3.9.19-6.cm2.x86_64.rpm -python3-tools-3.9.19-6.cm2.x86_64.rpm +python3-setuptools-3.9.19-7.cm2.noarch.rpm +python3-test-3.9.19-7.cm2.x86_64.rpm +python3-tools-3.9.19-7.cm2.x86_64.rpm readline-8.1-1.cm2.x86_64.rpm readline-debuginfo-8.1-1.cm2.x86_64.rpm readline-devel-8.1-1.cm2.x86_64.rpm