@@ -654,14 +654,73 @@ def test_urlsplit_remove_unsafe_bytes(self):
654
654
self .assertEqual (p .scheme , "http" )
655
655
self .assertEqual (p .geturl (), "http://www.python.org/javascript:alert('msg')/?query=something#fragment" )
656
656
657
+ def test_urlsplit_strip_url (self ):
658
+ noise = bytes (range (0 , 0x20 + 1 ))
659
+ base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
660
+
661
+ url = noise .decode ("utf-8" ) + base_url
662
+ p = urllib .parse .urlsplit (url )
663
+ self .assertEqual (p .scheme , "http" )
664
+ self .assertEqual (p .netloc , "User:Pass@www.python.org:080" )
665
+ self .assertEqual (p .path , "/doc/" )
666
+ self .assertEqual (p .query , "query=yes" )
667
+ self .assertEqual (p .fragment , "frag" )
668
+ self .assertEqual (p .username , "User" )
669
+ self .assertEqual (p .password , "Pass" )
670
+ self .assertEqual (p .hostname , "www.python.org" )
671
+ self .assertEqual (p .port , 80 )
672
+ self .assertEqual (p .geturl (), base_url )
673
+
674
+ url = noise + base_url .encode ("utf-8" )
675
+ p = urllib .parse .urlsplit (url )
676
+ self .assertEqual (p .scheme , b"http" )
677
+ self .assertEqual (p .netloc , b"User:Pass@www.python.org:080" )
678
+ self .assertEqual (p .path , b"/doc/" )
679
+ self .assertEqual (p .query , b"query=yes" )
680
+ self .assertEqual (p .fragment , b"frag" )
681
+ self .assertEqual (p .username , b"User" )
682
+ self .assertEqual (p .password , b"Pass" )
683
+ self .assertEqual (p .hostname , b"www.python.org" )
684
+ self .assertEqual (p .port , 80 )
685
+ self .assertEqual (p .geturl (), base_url .encode ("utf-8" ))
686
+
687
+ # Test that trailing space is preserved as some applications rely on
688
+ # this within query strings.
689
+ query_spaces_url = "https://www.python.org:88/doc/?query= "
690
+ p = urllib .parse .urlsplit (noise .decode ("utf-8" ) + query_spaces_url )
691
+ self .assertEqual (p .scheme , "https" )
692
+ self .assertEqual (p .netloc , "www.python.org:88" )
693
+ self .assertEqual (p .path , "/doc/" )
694
+ self .assertEqual (p .query , "query= " )
695
+ self .assertEqual (p .port , 88 )
696
+ self .assertEqual (p .geturl (), query_spaces_url )
697
+
698
+ p = urllib .parse .urlsplit ("www.pypi.org " )
699
+ # That "hostname" gets considered a "path" due to the
700
+ # trailing space and our existing logic... YUCK...
701
+ # and re-assembles via geturl aka unurlsplit into the original.
702
+ # django.core.validators.URLValidator (at least through v3.2) relies on
703
+ # this, for better or worse, to catch it in a ValidationError via its
704
+ # regular expressions.
705
+ # Here we test the basic round trip concept of such a trailing space.
706
+ self .assertEqual (urllib .parse .urlunsplit (p ), "www.pypi.org " )
707
+
708
+ # with scheme as cache-key
709
+ url = "//www.python.org/"
710
+ scheme = noise .decode ("utf-8" ) + "https" + noise .decode ("utf-8" )
711
+ for _ in range (2 ):
712
+ p = urllib .parse .urlsplit (url , scheme = scheme )
713
+ self .assertEqual (p .scheme , "https" )
714
+ self .assertEqual (p .geturl (), "https://www.python.org/" )
715
+
657
716
def test_attributes_bad_port (self ):
658
717
"""Check handling of invalid ports."""
659
718
for bytes in (False , True ):
660
719
for parse in (urllib .parse .urlsplit , urllib .parse .urlparse ):
661
720
for port in ("foo" , "1.5" , "-1" , "0x10" , "-0" , "1_1" , " 1" , "1 " , "६" ):
662
721
with self .subTest (bytes = bytes , parse = parse , port = port ):
663
722
netloc = "www.example.net:" + port
664
- url = "http://" + netloc
723
+ url = "http://" + netloc + "/"
665
724
if bytes :
666
725
if netloc .isascii () and port .isascii ():
667
726
netloc = netloc .encode ("ascii" )
0 commit comments