
    ii                     L   d dl Z d dlZd dlZd dlmZ d dlZd dlmZmZm	Z	 d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZmZmZmZ d dlmZ ddlm Z m!Z! 	 d dl"Z" e"jF                  d        e"jH                  d       d Z&d Z'd Z(d Z)ejT                  d        Z+d Z,d Z-d Z.d Z/d Z0d Z1d Z2d Z3d Z4ejT                  d        Z5ejT                  d        Z6ejT                  d        Z7ejp                  js                  ddd g      d!        Z:ejp                  js                  ddd g      d"        Z;ejp                  js                  ddd g      d#        Z<ejp                  js                  ddd g      d$        Z=ejp                  js                  ddd g      d%        Z>ejp                  js                  ddd g      d&        Z?ejp                  js                  ddd g      d'        Z@ejp                  j                  ejp                  js                  ddd g      d(               ZBd) ZCd* ZDd+ ZEd, ZFd- ZGd. ZHejp                  js                  d/g d0      d1        ZIejp                  js                  d/g d2      d3        ZJejp                  js                  d4d5dd6d7geeg      d8        ZKd9 ZLejp                  js                  ddd g      d:        ZMd; ZNejp                  j                   eP e	       e       d<=      d>        ZQd? ZRd@ ZSy# e%$ r Y w xY w)A    N)mock)CupyOpsNumpyOpsget_current_ops)GermanEnglish)Language)Scorer)DocSpan)Example)find_matching_languageignore_errorraise_errorregistry)Vocab   )add_vecs_to_vocabassert_docs_equalc                 8    d| j                   v rt        d      | S )N2zno dice)text
ValueErrordocs    j/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/test_language.pyevil_componentr      s    
chh##J    c                 P    | j                   j                  d      s
d| d   _        | S )N4T)r   
startswithis_sent_startr   s    r   perhaps_set_sentencesr%   %   s%    88s# $BJr   c                 >    | j                  d      st        d      | S )N
SENT_STARTzno sents)has_annotationr   r   s    r   assert_sents_errorr)   +   s     l+$$Jr   c                 R    t        j                  d      }|j                  d|        y )NspacyzTrouble with component %s.)logging	getLoggerwarning)	proc_nameprocdocseloggers        r   
warn_errorr4   1   s!    w'F
NN/;r   c                      t        t                     } | j                  d      }dD ]  }|j                  |        | j	                          | S )NtextcatPOSITIVENEGATIVE)r
   r   add_pipe	add_label
initialize)nlpr6   labels      r   r=   r=   6   sH    
57
Cll9%G) !% !NNJr   c                 L   d}ddddi}ddi}t        | j                  |j                  d      	      }t        j                  ||      }| j                  |g       t        j                  t              5  | j                  |       d d d        t        j                  t              5  | j                  ||f       d d d        t        j                  t              5  | j                  ||f       d d d        t        j                  t              5  t        j                  |d       }d d d        t        j                  t              5  t        j                  ||      }d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   fxY w# 1 sw Y   y xY w)
Nhello worldcats      ?        r7   LABELT words)r   vocabsplitr   	from_dictupdatepytestraises	TypeErrorr   KeyError)r=   r   annotswrongkeyannotsr   examples         r   test_language_updaterS   @   sY   D3C89Ft_N
ciitzz#
/CV,GJJy 
y	! 

7 
y	! #

D&>"# 
y	! "

C=!" 
z	" /##C./	x	  9##C89 9 # #" "/ /9 9s<   5E*(E6FF
F*E36E?FFF#c                    d}dddddii}t        | j                  |j                  d            }t        j                  ||      }| j                  |g      }|d	   d
kD  sJ | j                  d |fD              }|d	   d
kD  sJ t        j                  t              5  | j                  |       d d d        t        j                  t              5  | j                  ||fg       d d d        t        j                  t              5  | j                  ||fg       d d d        t        j                  t              5  | j                  ||g       d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   WxY w# 1 sw Y   y xY w)Nr@   doc_annotationrA   rB   rC   r7   rE   rF   speedr   c              3       K   | ]  }|  y wN ).0egs     r   	<genexpr>z)test_language_evaluate.<locals>.<genexpr>c   s     1"1s   )	r   rH   rI   r   rJ   evaluaterL   rM   rN   )r=   r   rP   r   rR   scoress         r   test_language_evaluater_   Z   sa   Dcs)K LMF
ciitzz#
/CV,G\\7)$F'?Q \\1y11F'?Q 
y	! W 
y	! 'tVn%&' 
y	! &sFm_%&	y	! %dF^$% % ' '& &% %s0    EE(	E4?F E%(E14E= F	c                     t        j                  d      d        }d}ddddi}t        t                     }  | |      }| j                  d       | j	                  t        j                  ||      g       y)	zoTest that docs are processed correctly within Language.pipe if the
    component doesn't expose a .pipe method.test_evaluate_no_pipec                     | S rX   rY   r   s    r   pipez#test_evaluate_no_pipe.<locals>.pipex       
r   r@   rA   rB   rC   r7   N)r
   	componentr   r:   r]   r   rJ   )r=   rc   r   rP   r   s        r   ra   ra   t   sv     /0 1 D3C89F
57
C
d)CLL()LL'##C012r   c                    t        |       }|j                  d      }dD ]  }|j                  |        |j                          ddddi}|j	                  d      }t        j                  ||      }|j                  |g      }|j                  d      j                  }|D ]  }|d   j                  |      J  |j                  j                  j                         D ]  }	|	|vs|d   j                  |	      J  y)	z8Test that evaluate works with a multilabel textcat pipe.textcat_multilabelFEATUREREQUESTBUGQUESTIONrA   rB   )ri   rl   r@   cats_f_per_typeN)r
   r:   r;   r<   make_docr   rJ   r]   get_pipelabelsget	referencerA   keys)
en_vocabr=   rg   r>   rP   r   rR   r^   rp   keys
             r    test_evaluate_textcat_multilabelrv      s   
8
C&:;: ,$$U+,NN#378F
,,}
%CV,G\\7)$F\\./66F @'(,,U3???@  %%**, >f+,005===>r   c                 x   t        |       }|j                  d      }dD ]  }|j                  |        |j                  d      }dD ]  }|j                  |        |j                          ddddddddi}|j	                  d	      }t        j                  ||      }|j                  |g      }|j                  |j                  d
         j                  }	|	D ]  }|d   j                  |      J  |j                  j                  j                         D ]  }
|
|	vs|d   j                  |
      J  y)z|Test that evaluate evaluates the final textcat component in a pipeline
    with more than one textcat or textcat_multilabel.r6   r7   rg   rh   rA   rB   rC   r8   r9   ri   rl   r8   r9   r@   r"   rm   N)r
   r:   r;   r<   rn   r   rJ   r]   ro   
pipe_namesrp   rq   rr   rA   rs   )rt   r=   r6   r>   rg   rP   r   rR   r^   rp   ru   s              r   $test_evaluate_multiple_textcat_finalrz      sY    8
Cll9%G) !% !&:;: ,$$U+,NN 	
	F ,,}
%CV,G\\7)$F\\#..,-44F @'(,,U3???@  %%**, >f+,005===>r   c                    d t         j                  j                  d      fd       }t        |       }|j	                  ddddii      }dD ]  }|j                  |        |j	                  d	      }d
D ]  }|j                  |        |j                          ddddddddi}|j                  d      }t        j                  ||      }|j                  |g      }	d|	v sJ |j                  d      j                  }
t        |	d   j                               t        |
      k(  sJ d|	v sJ |j                  d	      j                  }
t        |	d   j                               t        |
      k(  sJ y)z_Test that evaluate can evaluate multiple textcat components separately
    with custom scorers.c                     t        j                  | dfddi|}|j                         D ci c]  \  }}d| | c}}S c c}}w )NrA   multi_labelFcustom_)r   
score_catsitems)exampleskwargsr^   kvs        r   custom_textcat_scorezEtest_evaluate_multiple_textcat_separate.<locals>.custom_textcat_score   sW    ""
 
 	
 .4\\^<TQ'!q <<<s   Atest_custom_textcat_scorerc                       S rX   rY   )r   s   r   make_custom_textcat_scorerzKtest_evaluate_multiple_textcat_separate.<locals>.make_custom_textcat_scorer   s	    ##r   r6   scorerz@scorersconfigr7   rg   rh   rA   rB   rC   rx   r@   custom_cats_f_per_typerm   N)r+   r   scorersr
   r:   r;   r<   rn   r   rJ   r]   ro   rp   setrs   )rt   r   r=   r6   r>   rg   rP   r   rR   r^   rp   r   s              @r   'test_evaluate_multiple_textcat_separater      s   = ^^89$ :$ 8
Cll:'CDE  G * !% !&:;: ,$$U+,NN 	
	F ,,}
%CV,G\\7)$F#v---\\)$++Fv./44673v;FFF&&&\\./66Fv'(--/0CK???r   c                 0    | xj                   dz  c_         | S )Nr   )vectorr   s    r   vector_modification_piper      s    JJ!OJJr   c                 $    d| j                   d<   | S )Nbarfoo)	user_datar   s    r   userdata_piper      s     CMM%Jr   c                 P    t        | ddd      }| xj                  |fz  c_        | S )Nr   r   FIRST)r>   )r   ents)r   spans     r   ner_piper      s'    Q)DHHHJr   c                  $    dg dfdg dfdg dfgS )Nr+   )g皙ɿ333333ӿworld)r   r   gٿrc   )gffffff?g?g?rY   rY   r   r   sample_vectorsr      s(     
$%	$%	! r   c                 :   t        j                  dt               t        j                  dt               t        j                  dt               t        | j                  |       | j                  d       | j                  d       | j                  d       | S )N&test_language_vector_modification_pipefunctest_language_userdata_pipetest_language_ner_pipe)r
   re   r   r   r   r   rH   r:   )r=   r   s     r   nlp2r     st    07O 4=I/h?cii0LL9:LL)*LL./Jr   c                      g d} | S )N)zHello world.zThis is spacy.z-You can use multiprocessing with pipe method.zPlease try!rY   )datas    r   textsr     s    D Kr   	n_process   c                     t               }t        |t              s|dk  rO|dz  }|D cg c]
  } | |       }}| j                  ||d      }t	        ||      D ]  \  }}t        ||        y y c c}w )Nr   
   r   
batch_size)r   
isinstancer   rc   zipr   )	r   r   r   opsr   	expectedsr1   r   expected_docs	            r   test_language_piper      s{    

C#x IM
,12DT$Z2	2yy)yB!$T9!5 	1Cc<0	1 %22s   A0c                 J    t               }t        |t              s|dk  rt        j                  |      }t        j
                  |      \  }} fd|D        } j                  ||d      }d}	t        j                  t        ||      |	      D ]  \  }
}t        |
|        y y )Nr   c              3   .   K   | ]  } |        y wrX   rY   )rZ   r   r   s     r   r\   z,test_language_pipe_stream.<locals>.<genexpr>3  s     3DT$Z3s   r      )
r   r   r   	itertoolscycleteerc   islicer   r   )r   r   r   r   stream_textstexts0texts1r   r1   n_fetchr   r   s   `           r   test_language_pipe_streamr   ,  s    

C#x IM u-"|43F3	yy9yC!*!1!1#dI2F!P 	1Cc<0	1 %2r   c                     t               }t        |t              s| dk  r:t               }|j	                  d       |j                          ddg}t        j                  t              5   ||d          ddd       t        j                  t              5  t        |j                  ||              ddd       |j                  t               t        j                  t              5  t        |j                  ||              ddd       |j                  t               t        |j                  ||             }t        |      dk(  sJ  ||d          yy# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   oxY w)z3Test that the error handling of nlp.pipe works wellr   merge_subtokensz-Curious to see what will happen to this text.zAnd this one.r   Nr   )r   r   r   r	   r:   r<   rL   rM   r   listrc   set_error_handlerr   r   lenr   r   r=   r   r1   s        r    test_language_pipe_error_handlerr   ;  s1    
C#x IMi&'@/R]]:& 	aM	]]:& 	7%956	7k*]]:& 	7%956	7 	l+CHHUiH894yA~~E!H# %2	 		7 	7	7 	7s$   )EE()E4E%(E14E=c                 &   t        j                  dt               t               }t	        |t
              s|dk  r9t               }|j                  d       g d}t        j                  t              5  t        |j                  |             ddd       |j                  t               t        j                   d      }t"        j$                  j'                  |d      5 }t        |j                  ||            }|d	k(  rG|j)                          |j*                  dk(  sJ t-        |      |j*                  z   t-        |      k(  sJ |D cg c]  }|j.                   c}g d
k(  sJ 	 ddd       yy# 1 sw Y   xY wc c}w # 1 sw Y   yxY w)zETest the error handling of a custom component that has no pipe methodmy_evil_componentr   r   )TEXT 111TEXT 222TEXT 333TEXT 342TEXT 666Nr+   r.   r   r   )r   r   r   r
   re   r   r   r   r   r	   r:   rL   rM   r   r   rc   r   r4   r,   r-   r   patchobjectassert_called
call_countr   r   )	rt   r   r   r=   r   r3   mock_warningr1   r   s	            r   'test_language_pipe_error_handler_customr   S  sV    *@

C#x IMi()L]]:& 	"%!	" 	j)""7+ZZvy1 
	V\ )<=D A~**,#..!3334y<#:#::c%jHHH(,-CHH-1UUUU
	V 
	V %2	" 	"  .
	V 
	Vs+   4E6!A-FF!
F6E?FFc                    t        j                  dt               t               }t	        |t
              s|dk  rxt               }|j                  d       g d}t        j                  t              5  t        |j                  |d             ddd       |j                  t               t        j                   d      }t"        j$                  j'                  |d	      5 }t        |j                  |d|
            }|dk(  rG|j)                          |j*                  dk(  sJ t-        |      |j*                  z   t-        |      k(  sJ |d   d   j.                  |d   d   fdk(  sJ |d   d   j.                  |d   d   fdk(  sJ |d   d   j.                  |d   d   fdk(  sJ 	 ddd       yy# 1 sw Y   &xY w# 1 sw Y   yxY w)z8Test the error handling of nlp.pipe with input as tuplesr   r   r   )r   o   )r      r   iM  )r   iV  r   i  T)	as_tuplesNr+   r.   )r   r   r   r   r   r   r   r   )rt   r   r   r=   r   r3   r   tupless           r   0test_language_pipe_error_handler_input_as_tuplesr   o  s    *@

C#x IMi()
 ]]:& 	2%401	2j)""7+ZZvy1 
	J\#((5DI(NOF A~**,#..!3336{\%<%<<E
JJJ1IaL%%vay|48IIII1IaL%%vay|48IIII1IaL%%vay|48IIII
	J 
	J %2	2 	2
	J 
	Js   4F5#CG5F?Gc                    t        j                  dt               t        j                  dt               t	               }t        |t              s|dk  rt        d      D cg c]  }t        |       d }}t               }|j                  d       |j                  d       |j                          t        j                  t              5  t        |j!                  ||d            }d	d	d	       |j#                  t$               t        |j!                  ||d            }t'        |      d
k(  sJ y	y	c c}w # 1 sw Y   RxY w)z4Test the error handling of a component's pipe methodmy_perhaps_sentencesr   r)   r   d   z is enough. Doner   r   NY   )r
   re   r%   r)   r   r   r   rangestrr	   r:   r<   rL   rM   r   r   rc   r   r   r   )rt   r   r   ir   r=   r1   s          r   %test_language_pipe_error_handler_piper     s    -4IJ+2DE

C#x IM6;CjACF8+,AAi+,)*]]:& 	M)KLD	M 	l+CHHUiBHGH4yB %2A
	M 	Ms   #D9D>>Ec                    t               }t        |t              s| dk  rt               }d|_        ddgdz  }t        j                  t              5  t        |j                  ||              ddd       t        |_        | dk(  r?t        j                  t              5  t        |j                  ||              ddd       yt        |j                  ||             }t        |      dk(  sJ yy# 1 sw Y   xY w# 1 sw Y   yxY w)	$Test the error handling for make_docr   r   1234567890123456789012345r   Nr   r   r   r   r   r	   
max_lengthrL   rM   r   r   rc   r   default_error_handlerr   r   s        r   0test_language_pipe_error_handler_make_doc_actualr     s    
 
C#x IMi'1B6]]:& 	7%956	7$0!>z* ;SXXeyX9:; ; )<=Dt9>!> %2	7 	7; ;s   C3C?3C<?Dc                 v   t               }t        |t              s| dk  rt               }d|_        ddgdz  }t        j                  t              5  t        |j                  ||              ddd       t        |_        t        |j                  ||             }t        |      dk(  sJ yy# 1 sw Y   BxY w)r   r   r   r   r   r   Nr   r   r   s        r   3test_language_pipe_error_handler_make_doc_preferredr     s    
 
C#x IMi'1B6]]:& 	7%956	7$0!CHHUiH894yA~~ %2	7 	7s   B//B8c                  z  	
 d} ddd
dd	t        j                  |  d      fd       }t        j                  |  d      fd       }t        j                  |  d      
fd       }t        j                  |  d	      fd
       }t        j                  |  d      	fd       }dgd|  did|  did|  diddddiid|  d	id|  didd}t        j                  |      }|j                  j
                  dk(  sJ |j                  d   dk(  sJ |j                  d   dk(  sJ d|j                  vsJ d|j                  vsJ |j                  dgk(  sJ  |d      sJ |j                          |j                  d   dk(  sJ |j                  d   dk(  sJ t        
	g      sJ y )N+test_language_from_config_before_after_initF_beforec                      fd} | S )Nc                 B    d| t         u sJ d| j                  _        | S )NTr   )r	   Defaultsr   )lang_cls
ran_befores    r   before_creationzbtest_language_from_config_before_after_init.<locals>.make_before_creation.<locals>.before_creation  s)    Jw&&&$)H!Or   rY   )r   r   s    r   make_before_creationzItest_language_from_config_before_after_init.<locals>.make_before_creation  s    	 r   _afterc                      fd} | S )Nc                     dt        | t              sJ | j                  g k(  sJ | j                  j                  dk(  sJ d| j
                  d<   | S )NTr   r   r   r	   ry   r   r   meta)r=   	ran_afters    r   after_creationz`test_language_from_config_before_after_init.<locals>.make_after_creation.<locals>.after_creation  sT    Ic7+++>>R'''<<##u,,,#CHHUOJr   rY   )r  r  s    r   make_after_creationzHtest_language_from_config_before_after_init.<locals>.make_after_creation  s    	 r   _after_pipelinec                      fd} | S )Nc                     dt        | t              sJ | j                  dgk(  sJ | j                  j                  dk(  sJ | j
                  d   dk(  sJ d| j
                  d<   | S )NTsentencizerr   r   bazr  )r=   ran_after_pipelines    r   after_pipeline_creationzrtest_language_from_config_before_after_init.<locals>.make_after_pipeline_creation.<locals>.after_pipeline_creation  sm    !%c7+++>>m_444<<##u,,,88E?e+++#CHHUOJr   rY   )r  r  s    r   make_after_pipeline_creationzQtest_language_from_config_before_after_init.<locals>.make_after_pipeline_creation  s    	 '&r   _before_initc                      fd} | S )Nc                 *    dd| j                   d<   | S )NTbeforebefore_initr  )r=   ran_before_inits    r   r  zZtest_language_from_config_before_after_init.<locals>.make_before_init.<locals>.before_init  s    "O&.CHH]#Jr   rY   )r  r  s    r   make_before_initzEtest_language_from_config_before_after_init.<locals>.make_before_init  s    	 r   _after_initc                      fd} | S )Nc                 *    dd| j                   d<   | S )NTafter
after_initr  )r=   ran_after_inits    r   r  zXtest_language_from_config_before_after_init.<locals>.make_after_init.<locals>.after_init  s    !N%,CHH\"Jr   rY   )r  r  s    r   make_after_initzDtest_language_from_config_before_after_init.<locals>.make_after_init  s    	 r   r
  
@callbacks)pipeliner   r  r  factory)r  r  )r=   
componentsr<   r   r   r  r  r  r   r  r  )
r   	callbacksr	   from_configr   r   r  ry   r<   all)namer   r  r  r  r  r   r=   r  r  r  r   r  s           @@@@@r   r   r     sA   8DJION4&() * 4&(
 )
 4&01' 2' 4&-. / 4&,- . ' ,g.>?+vV_=(4o6N'O	
 %y-&@A(TF,*?@'D6)=>
F 

f
%C<<u$$$88E?e###88E?e###(((sxx'''>>m_,,,v;;NN88M"h...88L!W,,,	Y 2O^T  r   c                     d} t        j                  |  dd        t        j                  |  dd        t        j                  |  dd        t        j                  |  dd	        |  d|  dfD ]A  }d
dd|iii}t        j                  t              5  t        j                  |       ddd       C |  d|  dfD ]A  }d
dd|iii}t        j                  t              5  t        j                  |       ddd       C |  d|  dfD ]A  }d
dd|iii}t        j                  t              5  t        j                  |       ddd       C y# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   fxY w)z=Check that an error is raised if function doesn't return nlp.3test_language_from_config_before_after_init_invalid_before1c                      d S )Nc                      y rX   rY   r=   s    r   <lambda>zWtest_language_from_config_before_after_init_invalid.<locals>.<lambda>.<locals>.<lambda>,      r   rY   rY   r   r   r,  zEtest_language_from_config_before_after_init_invalid.<locals>.<lambda>,  s    7G r   r   _before2c                      d S )Nc                      |        S rX   rY   r+  s    r   r,  zWtest_language_from_config_before_after_init_invalid.<locals>.<lambda>.<locals>.<lambda>-  s    35 r   rY   rY   r   r   r,  zEtest_language_from_config_before_after_init_invalid.<locals>.<lambda>-  s    7H r   _after1c                      d S )Nc                      y rX   rY   r+  s    r   r,  zWtest_language_from_config_before_after_init_invalid.<locals>.<lambda>.<locals>.<lambda>.  r-  r   rY   rY   r   r   r,  zEtest_language_from_config_before_after_init_invalid.<locals>.<lambda>.  s    6F r   c                      d S )Nc                     t         S rX   r   r+  s    r   r,  zWtest_language_from_config_before_after_init_invalid.<locals>.<lambda>.<locals>.<lambda>/  s    ' r   rY   rY   r   r   r,  zEtest_language_from_config_before_after_init_invalid.<locals>.<lambda>/  s    6I r   r=   r   r  N_after2r  r  )r   r"  rL   rM   r   r	   r#  )r%  callback_namer   s      r   r'  r'  )  s   @D$x(/GH$x(/HI$w'.FG$w'.IJ!F(+vX->? (+lM-JKL]]:& 	('	( 	(( "F'*tfG,<= (*\=,IJK]]:& 	('	( 	(( "F'*tfG,<= (3lM5RST]]:& 	('	( 	((	( 	(	( 	(	( 	(s$   E+E&9E2E#	&E/	2E;	c                       G d d      } t        j                  d      } | |j                        |_        d} ||      }|j                  |k(  sJ y)z3Test the custom whitespace tokenizer from the docs.c                       e Zd Zd Zd Zy)?test_language_whitespace_tokenizer.<locals>.WhitespaceTokenizerc                     || _         y rX   )rH   )selfrH   s     r   __init__zHtest_language_whitespace_tokenizer.<locals>.WhitespaceTokenizer.__init__C  s	    DJr   c                     |j                  d      }dgt        |      z  }t        |      D ]  \  }}|dk(  sd||<   d||<    |d   dk(  r|dd }|dd }nd|d<   t        | j                  ||      S )NrE   T Fr"   r   )rG   spaces)rI   r   	enumerater   rH   )r<  r   rG   r@  r   words         r   __call__zHtest_language_whitespace_tokenizer.<locals>.WhitespaceTokenizer.__call__F  s    JJsOEVc%j(F$U+ &42:"E!H %F1I&
 RyCa""r
tzzv>>r   N)__name__
__module____qualname__r=  rC  rY   r   r   WhitespaceTokenizerr:  B  s    		?r   rG  enz?   What's happened to    me? he thought. It wasn't a dream.    N)r+   blankrH   	tokenizerr   )rG  r=   r   r   s       r   "test_language_whitespace_tokenizerrK  ?  sL    ? ?* ++d
C'		2CMLD
d)C88tr   c                     d}  G d d      t        j                  |       ddt        ffd       }ddd| iii}t        j                  |      } |d	      }|D cg c]  }|j
                   c}d
dgk(  sJ t        |j                  d	g            d   }|D cg c]  }|j
                   c}d
dgk(  sJ yc c}w c c}w )zFTest that a fully custom tokenizer can be plugged in via the registry.test_language_custom_tokenizerc                       e Zd ZdZd Zd Zy)7test_language_custom_tokenizer.<locals>.CustomTokenizerzEDummy "tokenizer" that splits on spaces and adds prefix to each word.c                 4    |j                   | _         || _        y rX   )rH   prefix)r<  r=   rQ  s      r   r=  z@test_language_custom_tokenizer.<locals>.CustomTokenizer.__init__e  s    DJ DKr   c                     |j                  d      D cg c]  }| j                   |  }}t        | j                  |      S c c}w )NrE   rF   )rI   rQ  r   rH   )r<  r   rB  rG   s       r   rC  z@test_language_custom_tokenizer.<locals>.CustomTokenizer.__call__i  sA    8<

3H}TF+HEHtzz// Is   AN)rD  rE  rF  __doc__r=  rC  rY   r   r   CustomTokenizerrO  b  s    S	!	0r   rT  rQ  c                       fd}|S )Nc                      |       S )N)rQ  rY   )r=   rT  rQ  s    r   create_tokenizerzYtest_language_custom_tokenizer.<locals>.custom_create_tokenizer.<locals>.create_tokenizero  s    "3v66r   rY   )rQ  rW  rT  s   ` r   custom_create_tokenizerz?test_language_custom_tokenizer.<locals>.custom_create_tokenizerm  s    	7  r   r=   rJ  z@tokenizersr@   _hello_worldr   N)_)r   
tokenizersr   r	   r#  r   r   rc   )r%  rX  r   r=   r   trT  s         @r   rM  rM  ^  s    +D	0 	0       kM4#89:F


f
%C
m
C qAFF Xx$8888
sxx(
)!
,C qAFF Xx$8888 ! s   B9B>c                     dddii} t        j                  t              5  t        j                  |        ddd       t        j                  t              5  t        j                  |        ddd       y# 1 sw Y   AxY w# 1 sw Y   yxY w)zTest that calling Language.from_config raises an error and lang defined
    in config needs to match language-specific subclasses.r=   langrH  N)rL   rM   r   r
   r#  r   r   s    r   &test_language_from_config_invalid_langr`  |  su     fd^$F	z	" %V$%	z	" #6"# #% %# #s   A6B6A?Bc                      t        j                  d      } | j                  d   d   dk(  sJ dddii}ddi}t        j                  d||      } | j                  d   d   dk(  sJ | j                  d   dk(  sJ y )	NrH  trainingdropoutg?g?r%  my_custom_model)r   r  )r+   rI  r   r  )r=   r   r  s      r   test_spacy_blankre    s    
++d
C::j!),3339c*+F%&D
++d6
5C::j!),33388F0000r   zlang,target)frafrfrerh  iwhemorosccsrmulxx)zxxNc                 $    t        |       |k(  sJ y)zc
    Test that we can look up languages by equivalent or nearly-equivalent
    language codes.
    N)r   )r_  targets     r   test_language_matchingrz    s    " "$'6111r   ))rH  rH  rf  ri  rk  rn  rq  rt  )rv  rv  c                 P    t        j                  |       }|j                  |k(  sJ y)z
    Test that we can get spacy.blank in various languages, including codes
    that are defined to be equivalent or that match by CLDR language matching.
    N)r+   rI  r_  )r_  ry  r=   s      r   test_blank_languagesr|    s$    $ ++d
C88vr   valueFxyc                     d}t        j                  t              5 }t        |        d d d        |t	        j
                        v sJ y # 1 sw Y   #xY w)Nzinvalid value)rL   rM   r   r
   r   r}  )r}  err_fragmentr2   s      r    test_language_init_invalid_vocabr    sG    "L	z	" a3qww<''' s   A

Ac                 :   t        t                     }|j                  d      }dD ]  }|j                  |        |j	                          d}||j
                  j                  vsJ || j
                  j                  vsJ |j
                  j                  j                  |       |j
                  j                  j                         | j
                  j                  j                         k7  sJ |j
                  j                  j                         }t        j                  t              5  | j                  dd|       d d d        || j
                  j                  v sJ |j
                  j                  j                         |k(  sJ y # 1 sw Y   MxY w)Nr6   r7   thisisalongstringtextcat2)r%  source)r
   r   r:   r;   r<   rH   stringsaddvectorsto_bytesrL   warnsUserWarning)r   r=   r6   r>   long_stringvectors_bytess         r    test_language_source_and_vectorsr    sJ   
57
Cll9%G) !% !NN%Kcii/////djj00000II+&99%%'4::+=+=+F+F+HHHHII%%..0M	k	" >ij=> $**,,,,,99%%'=888> >s   0FFc                 <   g d}|D cg c]  }| j                  |       }}t        d |D              rJ  | |d         }|j                  |d   k(  sJ t        |j                        dkD  sJ t        t               t              s|dk  rzt        j                         5  t        j                  d       | j                  ||      }|D cg c]  }|j                   c}|k(  sJ t        d |D              sJ 	 d d d        y y c c}w c c}w # 1 sw Y   y xY w)N)rA   dogszguinea pigsc              3   F   K   | ]  }t        |j                          y wrX   r   rA   rZ   r   s     r   r\   z,test_pass_doc_to_pipeline.<locals>.<genexpr>  s     1S3sxx=1   !r   r   errorr   c              3   F   K   | ]  }t        |j                          y wrX   r  r  s     r   r\   z,test_pass_doc_to_pipeline.<locals>.<genexpr>  s     5s388}5r  )rn   anyr   r   rA   r   r   r   warningscatch_warningssimplefilterrc   r$  )r=   r   r   r   r1   r   s         r   test_pass_doc_to_pipeliner    s   +E+014CLL1D11D1111
d1g,C88uQxsxx=1/#X.)a- $$& 	6!!'*88DI86D(,-CHH-66655555		6 	6 3@ 2 .	6 	6s#   D!-DD!DDDc                    ddg}t        j                  t              5   | |       d d d        t        t	        | j                  |                  dk(  sJ g d}t        j                  t              5  t	        | j                  |             d d d        t        j                  t              5   | |       d d d        y # 1 sw Y   xY w# 1 sw Y   @xY w# 1 sw Y   y xY w)NzThis is a text.zThis is another.r   )r   r      )rL   rM   r   r   r   rc   )r=   str_listint_lists      r   test_invalid_arg_to_pipeliner    s    !#56H	z	" HtCHHX&'(A---H	z	" !SXXh !	z	" H  ! ! s#   	C4C0	CCCC#ztest requires GPU)reasonc                    |dz  }| j                  |dd      }t        j                  t        d      5  t        j                  t
              5  |D ]  } 	 d d d        d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)Nr   r   r   zmultiprocessing with GPU modelsmatch)rc   rL   r  r  rM   r   )r   r   r1   r[  s       r    test_multiprocessing_gpu_warningr    s}     BJE99UaA96D	k)J	K ]]:& 	 	 	 	 s#   A5A)A5)A2	.A55A>c                    t        j                  dt               | j                  d       t	        j
                  t        d      5  t        j                  dt               d d d        t	        j
                  t        d      5  t        j                  dt               d d d        y # 1 sw Y   IxY w# 1 sw Y   y xY w)Nr   r   znot permittedr  zmy.evil.component.v1)r
   re   r   r:   rL   rM   r   r   r+  s    r   test_dot_in_factory_namesr    s    *@LL$%	z	9 H1GH 
z	9 F/nEF FH HF Fs   B,B8,B58Cc                  j   t               } t        j                  d      d        }| j                  d        | d       | j	                  d       t        j                  d      d        }| j                  d       t        j                  t        d      5   | d       ddd       y# 1 sw Y   yxY w)	zNTest that an error is raised if components return a type other than a
    doc.test_component_good_pipec                     | S rX   rY   r   s    r   	good_pipez(test_component_return.<locals>.good_pipe  rd   r   r   test_component_bad_pipec                     | j                   S rX   )r   r   s    r   bad_pipez'test_component_return.<locals>.bad_pipe  s    xxr   zinstead of a Docr  N)r	   r
   re   r:   remove_piperL   rM   r   )r=   r  r  s      r   test_component_returnr    s     )C23 4 LL+,KOO./12 3 LL*+	z);	< F  s   	B))B2)Tr   r,   r  unittestr   rL   	thinc.apir   r   r   r+   spacy.lang.der   spacy.lang.enr	   spacy.languager
   spacy.scorerr   spacy.tokensr   r   spacy.trainingr   
spacy.utilr   r   r   r   spacy.vocabr   utilr   r   torchset_num_threadsset_num_interop_threadsImportErrorr   r%   r)   r4   fixturer=   rS   r_   ra   rv   rz   r   r   r   r   r   r   r   markparametrizer   r   r   r   r   r   r   xfailr   r   r'  rK  rM  r`  re  rz  r|  r  r  r  r  skipifr   r  r  r  rY   r   r   <module>r     s        8 8    ! #  " " R R  6	 E!!E!!!$
<
  94%43 >(>D1@h

   
 
   q!f-1 .1 q!f-1 .1 q!f- .. q!f-V .V6 q!f-J .J> q!f- .( q!f-" ."( q!f- . \~(,>9<#1 22 	 5$c
He"LM( N(9( q!f-6 .6"	 ?$g..7J  Fe  		s   &(L L#"L#