
    iF                     <   d dl Z d dlmZ d dlmZ g dZg dZg dZe j                  j                  dg d      d	        Z
e j                  j                  d
e      e j                  j                  ddg      d               Ze j                  j                  d
e      e j                  j                  ddg      d               Ze j                  j                  d
e      e j                  j                  ddg      e j                  j                  ddg      d                      Ze j                  j                  d
e      e j                  j                  ddg      e j                  j                  ddg      d                      Ze j                  j                  d
e      e j                  j                  ddg      d               Ze j                  j                  d
e      e j                  j                  ddg      d               Ze j                  j                  ddg      d        Ze j                  j                  ddg      d        Ze j                  j                  de      e j                  j                  ddg      d               Ze j                  j                  de      e j                  j                  ddg      e j                  j                  ddg      d                      Ze j                  j                  ddg      d         Zd! Zy)"    N)TOKENIZER_PREFIXES)compile_prefix_regex)([{*))]}r   ))r   r	   )r   r
   )r   r   )r   r   text)r   z((<c                 F     | |      }t        |      t        |      k(  sJ y Nlenen_tokenizerr   tokenss      o/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/lang/en/test_punct.py$test_en_tokenizer_handles_only_punctr      s"    $Fv;#d)###    punctHelloc                      | ||z         }t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ y N   r      r   r   r   r   r   r   s       r   #test_en_tokenizer_splits_open_punctr       sP     %$,'Fv;!!9>>U"""!9>>T!!!r   c                      | ||z         }t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ y r   r   r   s       r   $test_en_tokenizer_splits_close_punctr"      sP     $,'Fv;!!9>>T!!!!9>>U"""r   	punct_add`c                      | ||z   |z         }t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ y N   r   r   r   r   r   r   r#   r   r   s        r   ,test_en_tokenizer_splits_two_diff_open_punctr)   #   sl     %)+d23Fv;!!9>>U"""!9>>Y&&&!9>>T!!!r   'c                      | ||z   |z         }t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ y r&   r   r(   s        r   -test_en_tokenizer_splits_two_diff_close_punctr,   .   sk     $,23Fv;!!9>>T!!!!9>>U"""!9>>Y&&&r   c                      | ||z   |z   |z         }t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ y )N   r   r'   r   r   s       r   (test_en_tokenizer_splits_same_open_punctr/   9   sZ     %%-%/$67Fv;!!9>>U"""!9>>T!!!r   c                      | ||z   |z   |z         }t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ y )Nr.   r   r   r   r   s       r   )test_en_tokenizer_splits_same_close_punctr1   B   sZ     $,.67Fv;!!9>>T!!!!9>>U"""r   z'Thec                 \     | |      }t        |      dk(  sJ |d   j                  dk(  sJ y )Nr   r   r*   r   r   s      r   )test_en_tokenizer_splits_open_appostropher3   K   s4    $Fv;!!9>>S   r   zHello''c                 d     | |      }t        |      dk(  sJ  | d      }t        |      dk(  sJ y )Nr   z''r   r   )r   r   r   tokens_puncts       r   )test_en_tokenizer_splits_double_end_quoter6   R   s;    $Fv;!%L|!!!r   zpunct_open,punct_closec                      | ||z   |z         }t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ y r&   r   )r   
punct_openpunct_closer   r   s        r   )test_en_tokenizer_splits_open_close_punctr:   Z   sl    
 *t+k9:Fv;!!9>>Z'''!9>>T!!!!9>>[(((r   zpunct_open2,punct_close2)r$   r*   c                     | ||z   |z   |z   |z         }t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ |d   j                  |k(  sJ y )N   r   r   r   r'   r.   r   )r   r8   r9   punct_open2punct_close2r   r   s          r    test_en_tokenizer_two_diff_punctr?   f   s     +
2T9KG,VWFv;!!9>>[(((!9>>Z'''!9>>T!!!!9>>[(((!9>>\)))r   z
text,punct)z(can'tr   c                 p    t        t              j                  } ||       }|j                         |k(  sJ y r   )r   r   searchgroup)r   r   en_search_prefixesmatchs       r   (test_en_tokenizer_splits_pre_punct_regexrE   u   s2    -.@AHHt$E;;=E!!!r   c                 X    d} | |      }|t        |      dz
     j                  dk(  sJ y )Nz*(And a 6a.m. run through Washington Park).r   .r   r   s      r   'test_en_tokenizer_splits_bracket_periodrH   |   s2    7D$F#f+/"''3...r   )pytestspacy.lang.punctuationr   
spacy.utilr   
PUNCT_OPENPUNCT_CLOSEPUNCT_PAIREDmarkparametrizer   r    r"   r)   r,   r/   r1   r3   r6   r:   r?   rE   rH    r   r   <module>rR      s    5 +!
"? !12$ 3$
 *-'+" , ." +.'+# , /# *-se,'+" , - ." +.se,'+' , - /' *-'+" , ." +.'+# , /# &*! +! )-" ." 1<@'+) , A) 1<@3j\B'+	* , C A	* '89" :"/r   