
    i>                        d dl Z d dlmZ g dZeg dz   Zg ddddd	d
dddddddddddddddddddddd d!d" e j
                  d#e j                  j                         $       e j
                  d%e j                  j                         $      d&d'd(d)d*d+d,d-d.d/d0d1d2Zg d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdT e j
                  dUe j                  j                         $      dVdWZ	g dXZ
g dYZe j                  j                  dZe      d[        Ze j                  j                  dZe	      d\        Ze j                  j                  dZe      d]        Ze j                  j                  dZe      d^        Ze j                  j"                  e j                  j                  d_e
      e j                  j                  dZe      d`                      Ze j                  j"                  e j                  j                  dae      e j                  j                  dZe      db                      Ze j                  j"                  e j                  j                  d_e
      e j                  j                  dae      e j                  j                  dZe      dc                             Ze j                  j"                  e j                  j                  dde
      e j                  j                  dee
      e j                  j                  dZe      df                             Ze j                  j"                  e j                  j                  dge      e j                  j                  dhe      e j                  j                  dZe      di                             Zy)j    N)BASE_EXCEPTIONS)zhttp://www.nytimes.com/2016/04/20/us/politics/new-york-primary-preview.html?hp&action=click&pgtype=Homepage&clickSource=story-heading&module=a-lede-package-region&region=top-news&WT.nav=top-news&_r=0zwww.red-stars.comzmailto:foo.bar@baz.com)zmailto:foo-bar@baz-co.comz$mailto:foo-bar@baz-co.com?subject=hizwww.google.com?q=google&http://foo.com/blah_(wikipedia)#cite-1zhttp://foo.com/blah_blahzhttp://BlahBlah.com/Blah_Blahzhttp://foo.com/blah_blah/z%http://www.example.com/wpstyle/?p=364z1https://www.example.com/foo/?bar=baz&inga=42&quuxz'http://userid:password@example.com:8080z(http://userid:password@example.com:8080/zhttp://userid@example.comzhttp://userid@example.com/zhttp://userid@example.com:8080zhttp://userid@example.com:8080/z"http://userid:password@example.comz#http://userid:password@example.com/zhttp://142.42.1.1/zhttp://142.42.1.1:8080/r   z+http://foo.com/blah_(wikipedia)_blah#cite-1u&   http://foo.com/unicode_(✪)_in_parensz'http://foo.com/(something)?after=parensz/http://code.google.com/events/#&product=browserzhttp://j.mpzftp://foo.bar/bazz,http://foo.bar/?q=Test%20URL-encoded%20stuffz2http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.comzhttp://1337.netzhttp://a.b-c.dezhttp://223.255.255.254zhttp://a.b--c.de/z+ssh://login@server.com:12345/repository.gitz&svn+ssh://user@ssh.yourdomain.com/pathz8chrome://extensions/?id=mhjfbmdgcfjbbpaeojofohoefgiehjai)marksz3chrome-extension://mhjfbmdgcfjbbpaeojofohoefgiehjaiz$http://foo.com/blah_blah_(wikipedia)z,http://foo.com/blah_blah_(wikipedia)_(again)zhttp://www.foo.co.ukzhttp://www.foo.co.uk/zhttp://www.foo.co.uk/blah/blahu   http://⌘.wsu   http://⌘.ws/u   http://☺.damowmow.com/u   http://✪df.ws/123u   http://➡.ws/䨹u   http://مثال.إختبارu   http://例子.测试u/   http://उदाहरण.परीक्षाzhttp://zhttp://.z	http://..z
http://../zhttp://?z	http://??z
http://??/zhttp://#z	http://##z
http://##/z)http://foo.bar?q=Spaces should be encodedz//z//az///az///z	http:///azrdar://1234zh://testzhttp:// shouldfail.comz:// should failzhttp://foo.bar/foo(bar)baz quuxzhttp://-error-.invalid/zhttp://a.b-.cozhttp://0.0.0.0zhttp://10.1.1.0zhttp://10.1.1.255zhttp://224.1.1.1zhttp://123.123.123zhttp://3628126748zhttp://.www.foo.bar/zhttp://.www.foo.bar./zhttp://10.1.1.1zNASDAQ:GOOGzhttp://-a.b.cozfoo.comzhttp://1.1.1.1.1zhttp://www.foo.bar./)(">)r   :r   urlc                 *    | j                  |      J y N	url_matchen_tokenizerr
   s     p/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/tokenizer/test_urls.pytest_should_matchr   y   s    !!#&222    c                 *    | j                  |      J y r   r   r   s     r   test_should_not_matchr   ~   s    !!#&...r   c                 \     | |      }t        |      dk(  sJ |d   j                  |k(  sJ y )N   r   lentext	tokenizerr
   tokenss      r   !test_tokenizer_handles_simple_urlr      s3    s^Fv;!!9>>S   r   c                      | d|z   dz         }t        |      dk(  sJ |d   j                  dk(  sJ |d   j                  |k(  sJ |d   j                  dk(  sJ y )Nr   )   r   r      r   r   s      r   *test_tokenizer_handles_simple_surround_urlr#      sh    sSy3'Fv;!!9>>S   !9>>S   !9>>S   r   prefixc                      | ||z         }t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ y Nr"   r   r   r   )r   r$   r
   r   s       r   #test_tokenizer_handles_prefixed_urlr'      sP     v|$Fv;!!9>>V###!9>>S   r   suffixc                      | ||z         }t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ y r&   r   )r   r
   r(   r   s       r   #test_tokenizer_handles_suffixed_urlr*      sP     sV|$Fv;!!9>>S   !9>>V###r   c                      | ||z   |z         }t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ y Nr!   r   r   r"   r   )r   r$   r(   r
   r   s        r   #test_tokenizer_handles_surround_urlr-      sk    
 v|f,-Fv;!!9>>V###!9>>S   !9>>V###r   prefix1prefix2c                      | ||z   |z         }t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ y r,   r   )r   r.   r/   r
   r   s        r   %test_tokenizer_handles_two_prefix_urlr1      sl    
 w(3./Fv;!!9>>W$$$!9>>W$$$!9>>S   r   suffix1suffix2c                 F    | ||z   |z         }||z   t         v r<t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  ||z   k(  sJ y t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ y )Nr"   r   r   r!   )r   r   r   )r   r2   r3   r
   r   s        r   %test_tokenizer_handles_two_suffix_urlr5      s    
 sW}w./FO+6{aay~~$$$ay~~7!22226{aay~~$$$ay~~(((ay~~(((r   )pytestspacy.lang.tokenizer_exceptionsr   
URLS_BASIC	URLS_FULLparammarkxfailURLS_SHOULD_MATCHURLS_SHOULD_NOT_MATCHPREFIXESSUFFIXESparametrizer   r   r   r#   slowr'   r*   r-   r1   r5    r   r   <module>rD      s]    ;
   	33#3  3 ,	3
 83 .3 /3  3 !3 %3 &3 )3 *3 3 3  -!3" 2#3$ -%3& .'3( 6)3* +3, -3. 3/30 9132 334 536 738 93: 2;3< -=3> FLLBkk!?3F FLL=V[[EVEVEXG3L +M3N 3O3P Q3R S3T %U3V W3X Y3Z [3\ ]3^ _3` #a3b c3d 6e3 j&&& & 	&
 & & & & & & 0& 	& 
& & 
&  !&" #&$ %&& '&( )&* &+&, -&. /&0 1&2 3&4 5&6 7&8 9&: ;&< =&> ?&@ A&B C&D E&F FLL&++"3"3"56G&H I&J K& T    123 33  56/ 7/ 
+! ,! 
+! ,! 8,	*! + - ! 8,	*$ + - $ 8,8,	*$ + - - $ H-H-	*! + . . ! H-H-	*
) + . . 
)r   