
    iA              	          d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 g  ej                  dej                  j                                ej                  dej                  j                                ej                  dej                  j                                ej                  dej                  j                               d	 ej                  d
ej                  j                                ej                  dej                  j                                ej                  dej                  j                                ej                  dej                  j                                ej                  dej                  j                               dd ej                  dej                  j                                ej                  dej                  j                                ej                  dej                  j                                ej                  dej                  j                               d ej                  dej                  j                                ej                  dej                  j                                ej                  dej                  j                                ej                  dej                  j                               d ej                  dej                  j                                ej                  dej                  j                                ej                  dej                  j                                ej                  dej                  j                                ej                  dej                  j                                ej                  dej                  j                                ej                  d ej                  j                                ej                  d!ej                  j                               d" ej                  d#ej                  j                                ej                  d$ej                  j                                ej                  d%ej                  j                                ej                  d&ej                  j                                ej                  d'ej                  j                                ej                  d(ej                  j                                ej                  d)ej                  j                                ej                  d*ej                  j                                ej                  d+ej                  j                                ej                  d,ej                  j                                ej                  d-ej                  j                                ej                  d.ej                  j                                ej                  d/ej                  j                                ej                  d0ej                  j                                ej                  d1ej                  j                               Zej                  j                  d2e      d3        Zd4 Zd5 Zej$                  j&                  d=d6ej$                  j(                  d7ed8efd9       Zej                  j0                  ej                  j                  d2e       ej2                   e       :      d2ed;ed8dfd<                     Zy)>    N)	Tokenizer)get_lang_classfr)marksafarbgbnbocacsdadeelenesetfafigahehihrhuiditknlbltlvnbnlplptrosiskslsqsrsvtatetltrtturkmrlangc                 H    t        |              j                  }t        j                  d|  d      }|j                  D ]V  } ||      D cg c]  }|j
                  r|j                   }}|j                  |      D cg c]  }|d   	 }}||k(  rVJ  y c c}w c c}w )Nzspacy.lang.z	.examples   )r   	tokenizerpytestimportorskip	sentencesis_spacetextexplain)r3   r6   examplessentencettokensdebug_tokenss          s/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/tokenizer/test_explain.pytest_tokenizer_explainrC   A   s    $t$&00I""[i#@AH&& &"+H"5HQQZZ!&&HH&/&7&7&AB!BB%%%&HBs   BBBc                 N   t        j                  d      }t        j                  d      }dddigi}t        | ||j                  |j                        } |d      D cg c]  }|j
                   }}|j                  d      D cg c]  }|d   	 }}||k(  sJ y c c}w c c}w )Nz[\.]$z[/]za.ORTH)rulessuffix_searchinfix_finditerza/a.r5   )recompiler   searchfinditerr;   r<   )en_vocab	suffix_reinfix_rerF   r6   r?   r@   explain_tokenss           rB   &test_tokenizer_explain_special_matcherrQ   K   s    

8$Izz&!HVTN#$E&&((	I (/0aff0F0$-$5$5f$=>qad>N>^### 1>s   BB"c                     dddigi}t        | |      }d} ||      D cg c]  }|j                   }}|j                  |      D cg c]  }|d   	 }}||k(  sJ y c c}w c c}w )Nz:]rE   )rF   z: ]r5   )r   r;   r<   )rM   rF   r6   r;   r?   r@   rP   s          rB   1test_tokenizer_explain_special_matcher_whitespacerS   Z   s}    VTN#$EI D'o.aff.F.$-$5$5d$;<qad<N<^### /<s   A	A$drawmax_n_wordsreturnc           	         dj                  g t        j                  D cg c]  }t        j                  |       c}d      }t         | t        j                  j                  d|                  D cg c]M  } | t        j                  j                  d             | t        j                  j                  |            gO }}dj                  |D cg c]  }|D ]  }|  c}}      S c c}w c c}w c c}}w )as  
    Composite strategy for fuzzily generating sentence with varying interpunctation.

    draw (hypothesis.strategies.DrawFn): Protocol for drawing function allowing to fuzzily pick from hypothesis'
                                         strategies.
    max_n_words (int): Max. number of words in generated sentence.
    RETURNS (str): Fuzzily generated sentence.
    |z\s   )	min_value	max_valuer5   )min_size )joinstringpunctuationrI   escaperange
hypothesis
strategiesintegersr;   
from_regex)rT   rU   ppunctuation_and_space_regex_r>   
token_pairtokens           rB   sentence_strategyrl   f   s     #&((<!3!3
4A299Q<
4<e<# &&//!{/ST

  &&++Q+78&&112MNO	
H  88XNz:N%UNUNOO 5 Os   C3
7AC8C=
)r>   r>   c                 H   t        j                  |       j                  }t        j                  dd|      j                         } ||      D cg c]  }|j                   }}|j                  |      D cg c]  }|d   	 }}||k(  sJ | d| d|        yc c}w c c}w )z
    Tests whether output of tokenizer.explain() matches tokenizer output. Input generated by hypothesis.
    lang (str): Language to test.
    text (str): Fuzzily generated sentence to tokenize.
    z\s+r]   r5   z, N)spacyblankr6   rI   substripr;   r<   )r3   r>   r6   r?   r@   rA   s         rB   test_tokenizer_explain_fuzzyrr      s     !;;t,66I vvfc8,224H'12aff2F2"+"3"3H"=>QAaD>L>\!JfXR~Rz#JJ! 3>s   B7B)   )rI   r_   rc   hypothesis.strategiesr7   rn   spacy.tokenizerr   
spacy.utilr   parammarkslow	LANGUAGESparametrizerC   rQ   rS   rd   	compositeDrawFnintstrrl   xfailgivenrr        rB   <module>r      s   	      % %
/FLLV[[--/0/FLLV[[--/0/ FLLV[[--/0/ FLLV[[--/0	/
 	/ FLLV[[--/0/ FLLV[[--/0/ FLLV[[--/0/ FLLV[[--/0/ FLLV[[--/0/ 	/ 	/ FLLV[[--/0/ FLLV[[--/0/ FLLV[[--/0/  FLLV[[--/0!/" 	#/$ FLLV[[--/0%/& FLLV[[--/0'/( FLLV[[--/0)/* FLLV[[--/0+/, 	-/. FLLV[[--/0//0 FLLV[[--/01/2 FLLV[[--/03/4 FLLV[[--/05/6 FLLV[[--/07/8 FLLV[[--/09/: FLLV[[--/0;/< FLLV[[--/0=/> 	?/@ FLLV[[--/0A/B FLLV[[--/0C/D FLLV[[--/0E/F FLLV[[--/0G/H FLLV[[--/0I/J FLLV[[--/0K/L FLLV[[--/0M/N FLLV[[--/0O/P FLLV[[--/0Q/R FLLV[[--/0S/T FLLV[[--/0U/V FLLV[[--/0W/X FLLV[[--/0Y/Z FLLV[[--/0[/\ FLLfkk..01]/	d +& ,&$	$   PJ1188 Ps PSV P !P4 +,./Ks Kc Kd K 0 , Kr   