
    iP              #          d dl Z d dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
 d dlmZ d dlZd dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZmZmZ d dl m!Z! d dl"m#Z# d dl$m%Z& d dl'm(Z(m)Z) d dl*m+Z+ d dl,m-Z- ddlm.Z. dddddifdddddifgZ/ddddddifddddddifgZ0dZ1 e       je                  e1      d   Z3d Z4d  Z5ejl                  jo                  d!      d"        Z8ejl                  jo                  d#      d$        Z9ejl                  ju                  d%eeeeeeg      ejl                  jo                  d&      d'               Z;d(Z<ejl                  ju                  d)d*d+g      ejl                  jo                  d,      d-               Z=ejl                  jo                  d.      d/        Z>ejl                  jo                  d0      d1        Z?ejl                  j                  d23      d4        ZAejl                  j                  d23      d5        ZBejl                  ju                  d6d*d+g      d7        ZCejl                  ju                  d8d*e4fd+e5fg      d9        ZDejl                  ju                  d6d*d+g      d:        ZEejl                  ju                  d8d*e4fd+e5fg      d;        ZFejl                  j                  ejl                  ju                  d<d*d=d>d?d@dAfd*d=d>d>d@dAfd+d=d?d?d@dAfd+d=d?d>d@dAfd*dBd?ddCdDddEdEddF	fd+dBd?ddCdDddEdEddF	fd*dGe!d=d>d?d@dAdHfd*dGe!d=d>d>d@dAdHfd+dGe!d=d?d?d@dAdHfd+dGe!d=d?d>d@dAdHfd*dIe!d>dJfd+dIe!d?dJfg      dK               ZHejl                  ju                  d<d*dLd>d?d@dAfd*dLd>d>d@dAfd+dLd?d?d@dAfd+dLd?d>d@dAfd*dMe!d>d>d>d>d>dNfd+dMe!d?d>d>d>d>dNfg      dO        ZIejl                  ju                  d<d*dLd>d?d@dAfd*dLd>d>d@dAfd+dLd?d?d@dAfd+dLd?d>d@dAfd*dMe!d>d>d>d>d>dNfd+dMe!d?d>d>d>d>dNfg      dP        ZJdQ ZKejl                  ju                  d<d+dGe3dLd?dEd?dRdHfd*dGe3dLd>dSd?dRdHfd*dTe3d>dJfd+dTe3d?dJfd*dMe3d>d>d>d>d>dNfd+dMe3d?d>d>d>d>dNfg      dU        ZLejl                  ju                  dVd*e4e/fd+e5e0fg      dW        ZMdX ZNdY ZOejl                  j                  ejl                  ju                  dZd+e0d=d?dEd?dRfd*e/d=d>d[d?dRfd+e0dBd?ddCdDddEdEddF	fd*e/dBd?ddCdDddEdEddF	fd*e/dIe!d>dJfd+e0dIe!d?dJfd+e0d\d?dEd?dRfd*e/d\d>d[d?dRfd+e0d\d?d@d>dRfd*e/d\d>dd>dRfd+e0dLd?dEd?dRfd*e/dLd>d[d?dRfd+e0dLd?d@d>dRfd*e/dLd>dd>dRfd+e0dGe!dLd?dEd?dRdHfd*e/dGe!dLd>dSd?dRdHfd*e/d]e!d>dJfd+e0d]e!d?dJfd*e/dTe!d>dJfd+e0dTe!d?dJfd*e/dMe!d>d>d>d>d>dNfd+e0dMe!d?d>d>d>d>dNfg      d^               ZPd_ ZQd` ZRda ZSdb ZTejl                  ju                  dcdddeg      dfeUdgeVfdh       ZWejl                  ju                  didjdkg      dfeUdleVfdm       ZXdn ZYdo ZZejl                  ju                  dpdqdrg      ds        Z[y)t    N)assert_almost_equal)Configcompoundingfix_random_seedget_current_ops)msg)util)print_prf_per_typeprint_textcats_auc_per_cat)English)Language)TextCategorizer)single_label_bow_configsingle_label_cnn_configsingle_label_default_config)multi_label_bow_configmulti_label_cnn_configmulti_label_default_config)DEFAULT_TOK2VEC_MODEL)Scorer)build_lazy_init_tok2vec)DocDocBin)Example)init_nlp   )make_tempdirzI'm so happy.cats      ?        )POSITIVENEGATIVEzI'm so angryzI'm angry and confused)ANGRYCONFUSEDHAPPYzI'm confused but happyz?
[model]
@architectures = "test.LazyInitTok2Vec.v1"
width = 96
modelc           	          g t         D ]<  }j                  t        j                  | j	                  |d         |d                > fd}|S )Nr      c                       S N train_exampless   r/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/pipeline/test_textcat.pyget_examplesz4make_get_examples_single_label.<locals>.get_examples:   	        )TRAIN_DATA_SINGLE_LABELappendr   	from_dictmake_docnlptr/   r-   s      @r.   make_get_examples_single_labelr9   5   sR    N$ Kg//QqT0BAaDIJK r1   c           	          g t         D ]<  }j                  t        j                  | j	                  |d         |d                > fd}|S )Nr   r(   c                       S r*   r+   r,   s   r.   r/   z3make_get_examples_multi_label.<locals>.get_examplesE   r0   r1   )TRAIN_DATA_MULTI_LABELr3   r   r4   r5   r6   s      @r.   make_get_examples_multi_labelr=   @   sR    N# Kg//QqT0BAaDIJK r1   i  c            
         ddg} g d}g d}t        j                  d      }g }t        ||      D ]N  \  }}| D ci c]  }|||k(  
 }}|j                  t	        j
                  |j                  |      d|i             P ddd	d
d}	|j                  dd|	id      }
| D ]  }|
j                  |        |j                  d      5  |j                         }t        d      D ]B  }i }t        j                  |t        ddd            }|D ]  }|j                  ||d|        D 	 ddd       yc c}w # 1 sw Y   yxY w)zXTest whether adding n-grams in the textcat works even when n > token length of some docs	offensiveinoffensivezThis is an offensive textz!This is the second offensive textinoffr?   r?   r@   enr   spacy.TextCatBOW.v1Tr   F@architecturesexclusive_classes
ngram_sizeno_output_layertextcatr&   configlastenable         @      @@jt?size皙?examplessgddroplossesN)spacyblankzipr3   r   r4   r5   add_pipe	add_labelselect_pipes
initializeranger	   	minibatchr   update)unique_classesx_trainy_trainr7   
train_datatexttrain_instancelabelcat_dictr&   rK   	optimizerir\   batchesbatchs                   r.   test_issue3611rs   K   s{    "=1NG
 8G
++d
CJ #GW 5 Un@NOuE5N22OO'++CLL,>@RSTU
 0! 	E ll9gu-=DlIG !% ! 
				+ SNN$	q 	SAFnnZk#tU6STG  S

Eys6
RS		SS S PS Ss   D3A!D88Ei  c            
         ddg} g d}g d}t        j                  d      }g }t        ||      D ]N  \  }}| D ci c]  }|||k(  
 }}|j                  t	        j
                  |j                  |      d|i             P ddd	d
d}	|j                  dd|	id      }
| D ]  }|
j                  |        |j                  d      5  |j                         }t        d      D ]B  }i }t        j                  |t        ddd            }|D ]  }|j                  ||d|        D 	 ddd        |d      }|j                   d   dk(  sJ |j                   d   dk(  sJ yc c}w # 1 sw Y   ?xY w)z.Test whether textcat works fine with empty docr?   r@   rA   rC   rD   r   rE   Tr   FrF   rK   r&   rL   rO   rQ   rR   rS   rT   rU   rW   rX   N r    )r]   r^   r_   r3   r   r4   r5   r`   ra   rb   rc   rd   r	   re   r   rf   r   )rg   rh   ri   r7   rj   rk   rl   rm   rn   r&   rK   ro   rp   r\   rq   rr   docs                    r.   test_issue4030rw   p   s    "=1NG
 8G
++d
CJ #GW 5 Un@NOuE5N22OO'++CLL,>@RSTU
 0! 	E ll9gu-=DlIG !% ! 
				+ SNN$	q 	SAFnnZk#tU6STG  S

Eys6
RS		SS b'C88K C'''88M"c)))1 PS Ss   E#A!E((E1textcat_configi  c                    d}t               j                  |       }g }t        d      D ]  }t        d       t	               }d}dddddi}|j                  ||d	
      }t        |d         D ]  }	|j                  |	        |j                          |j                  |      }
|j                  t        j                  |
|      g       |j                  j                  |
g      }|j                  |d           t!        |      dk(  sJ t#               }t%        |j'                  |d         |j'                  |d         d       t%        |j'                  |d         |j'                  |d         d       y)zWTest that after fixing the random seed, the results of the pipeline are truly identicalrK   rQ   r   zUOnce hot, form ping-pong-ball-sized balls of the mixture, each weighing roughly 25 g.r   r   r    )Labe1Label2Label3TrL   r(      decimalr   N)r   from_strrd   r   r   r`   setra   rc   r5   rf   r   r4   r&   predictr3   lenr   r   to_numpy)rx   	componentpipe_cfgresultsrp   r7   rk   annotspiperm   rv   resultopss                r.   test_issue5551r      sV    Ix  0HG1X "ifC3#FG||IhT|B( 	"ENN5!	" 	ll4 

G%%c6234##SE*vay!"  w<1

CWQZ0#,,wqz2JTUVWQZ0#,,wqz2JTUVr1   a  
[paths]
train = "TRAIN_PLACEHOLDER"
raw = null
init_tok2vec = null
vectors = null

[system]
seed = 0
gpu_allocator = null

[nlp]
lang = "en"
pipeline = ["textcat"]
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
disabled = []
before_creation = null
after_creation = null
after_pipeline_creation = null
batch_size = 1000

[components]

[components.textcat]
factory = "TEXTCAT_PLACEHOLDER"

[corpora]

[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths:train}

[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths:train}


[training]
train_corpus = "corpora.train"
dev_corpus = "corpora.dev"
seed = ${system.seed}
gpu_allocator = ${system.gpu_allocator}
frozen_components = []
before_to_disk = null

[pretraining]

[initialize]
vectors = ${paths.vectors}
init_tok2vec = ${paths.init_tok2vec}
vocab_data = null
lookups = null
before_init = null
after_init = null

[initialize.components]

[initialize.components.textcat]
labels = ['label1', 'label2']

[initialize.tokenizer]
component_namerK   textcat_multilabeli  c                    d }t               5 }|dz  } ||       t        j                  d|       }|j                  d|j                               }t	        j
                  |      }t        |       ddd       y# 1 sw Y   yxY w)z/Test initializing textcat with labels in a listc                    t        j                  d      }|j                  d      }ddd|_        t	        |g      j                         }| j                  d      5 }|j                  |       d d d        y # 1 sw Y   y xY w)NrD   z	Some textr   r(   )label1label2)docswb)r]   r^   r5   r   r   to_bytesopenwrite)out_filer7   rv   out_datafile_s        r.   create_dataz#test_issue6908.<locals>.create_data	  sn    kk$ll;'1-u%..0]]4  	"EKK!	" 	" 	"s   A88Bztrain.spacyTEXTCAT_PLACEHOLDERTRAIN_PLACEHOLDERN)r   CONFIG_ISSUE_6908replaceas_posixr	   load_config_from_strr   )r   r   tmp_path
train_path
config_strrM   s         r.   test_issue6908r     s~    " 
 8-
J&../DnU
''(;Z=P=P=RS
**:6  s   A$A;;Bik  c                  p    ddd d} t        t        |        ddddd d d dd} t        t        | d	d
       y )Ng0L}?g7?)LABEL_ALABEL_BLABEL_CgAϢ?g"nN%?g3M'?)prf)r   r   foobar)nametype)r   r   r
   )scoress    r.   test_issue7019r     sC    #4PFsF+"GDt4F sFU;r1   i&  c                     t               } | j                  d      }t        |       }| j                  |        |       }|j	                  |D cg c]  }|j
                   c}      }|j                  ||      d   }|j                  |dz  |j                  dd            d   }|t        j                  |      k(  sJ y c c}w )NrK   r   r   )axis)
r   r`   r9   rc   r   	predictedget_lossrepeatpytestapprox)r7   rK   r/   rY   egr   lossloss_double_bss           r.   test_issue9904r   %  s    
*Cll9%G1#6LNN< ~H__X>rbll>?FHf-a0D%%hlFMM!!M4LMaPN6==0000	 ?s   Cz#Test is flakey when run with others)reasonc                  6   t               } | j                  d      }|j                  d       | j                          t	        d      D ]#  }dD ]  \  }}| j                  |dd|iif        %  | d      }d|j                  v sJ |j                  d   dk\  sJ y )NrK   answerr}   ))aaaar   )bbbbr   )aar   )	bbbbbbbbbr    )aaaaaar(   r   aaa      ?)r   r`   ra   rc   rd   rf   r   )r7   rK   rp   rk   r   rv   s         r.   test_simple_trainr   4  s    
*Cll9%GhNN1X =
 	=LD& JJv&'9:;<	== e*Csxx88H$$$r1   c            
         t        j                  d       t        j                   j                  d       g } t               }g d}|D ]]  }|D ]V  }|D ci c]  }|t	        ||k(         }}| j                  t        |j                  dgdz  ||gz   dgdz  z         |f       X _ t        j                  |        t        |j                  d      }|D ]  }|j                  |        |j                  d       }t        d	      D ]W  }	i }
| D cg c]  \  }}t        j                  |d
i       }}}|j                  |||
       t        j                  |        Y |D ]  }|D ]z  }t        |j                  dgdz  ||gz   dgdz  z         }|D ci c]  }|||k(  
 }} ||       |j                   j#                         D ]  \  }}||   s|dk  rJ |dkD  rJ  |  y c c}w c c}}w c c}w )Nr}   )abcdrQ   )words   )widthc                      g S r*   r+   r+   r1   r.   <lambda>z0test_textcat_learns_multilabel.<locals>.<lambda>W  s    2 r1      r   rZ   r\   r   )randomseednumpyr   floatr3   r   vocabshuffler   ra   rc   rd   r   r4   rf   r   items)r   r7   lettersw1w2letterr   rK   ro   rp   r\   rv   catrY   truthscores                   r.   test_textcat_learns_multilabelr   H  s   
KKN	LLaD
*CG X 	XB>EFFFE",//FDFKKSYYseai2r(.BcUQY.NOQUVW	XX NN4ciiq1G "&!""":.I2Y KOPxsCG%%cFD>:PPxYv>t	
  	' 	'BciiuqyB8';seai'GHC8?@fVR6\)@E@CL!hhnn. '
USz 3;&; 3;&;	'		'	' G Q As   G)"G.G4r   c                    t               }|j                  |       }|j                  d       t        j                  t
              5  |j                  d       d d d        | dk(  r3t        j                  t
              5  |j                          d d d        y |j                          y # 1 sw Y   RxY w# 1 sw Y   y xY w)Nr   	   rK   )r   r`   ra   r   raises
ValueErrorrc   )r   r7   rK   s      r.   test_label_typesr   i  s    
*Cll4 Gh	z	" ! y]]:& 	NN	 	 	 	 	s   B)>B5)B25B>zname,get_examplesc                     t               }|j                  |       } ||      fd}t        j                  t              5  |j                  |       d d d        y # 1 sw Y   y xY w)Nc                              } | d   j                   }t        |j                  j                               d   }d|j                  |<   | S )Nr   g       @)	referencelistr   keys)rY   refkeyexample_getters      r.   invalid_examplesz2test_invalid_label_value.<locals>.invalid_examples  sE    !#qk##388==?#A&r1   r/   r   r`   r   r   r   rc   )r   r/   r7   rK   r   r   s        @r.   test_invalid_label_valuer   x  s[     *Cll4 G!#&N 
z	" 6$456 6 6s   AA(c                     t               }|j                  |        t        j                  t              5  |j                          d d d        y # 1 sw Y   y xY wr*   r   )r   r7   s     r.   test_no_labelr     s@    
*CLL	z	"   s   AAc                 j    t               }|j                  |        |j                   ||             y )Nr   )r   r`   rc   )r   r/   r7   s      r.   test_implicit_labelr     s*     *CLLNNS 1N2r1   zname,textcat_configrE   TFrQ   )rG   rH   rJ   rI   zspacy.TextCatEnsemble.v1@   i  r(   )	rG   rH   pretrained_vectorsr   
embed_size
conv_depthwindow_sizerI   dropoutzspacy.TextCatEnsemble.v2)rG   tok2veclinear_modelzspacy.TextCatCNN.v1)rG   r   rH   c                 f   t               }d|i}|j                  | |      }|j                  d       |j                  d       |j                          |j                  j                  d      dv sJ t        j                  t              5  |j                  d       ddd       y# 1 sw Y   yxY w)	z/The old textcat architectures weren't resizabler&   rM   r!   r"   nOr   NNEUTRALN)	r   r`   ra   rc   r&   maybe_get_dimr   r   r   r   rx   r7   pipe_configrK   s        r.   test_no_resizer	    s    2 *CN+Kll4l4Gj!j!NN==&&t,	999	z	" %)$% % %s   B''B0zspacy.TextCatBOW.v3zspacy.TextCatReduce.v1)rG   r   rH   use_reduce_firstuse_reduce_lastuse_reduce_maxuse_reduce_meanc                    t               }d|i}|j                  | |      }|j                  d       |j                  d       |j                  j	                  d      dv sJ |j                          |j                  j	                  d      dv sJ |j                  d       |j                  j	                  d      dv sJ y	)
z+The new textcat architectures are resizabler&   r  r!   r"   r  r  r  )rQ   NN)r   r`   ra   r&   r  rc   r  s        r.   test_resizer    s      *CN+Kll4l4Gj!j!==&&t,	999NN==&&t,	999i ==&&t,	999r1   c                 &   t        d       t               }d|i}|j                  | |      }g t        D ]9  \  }}j	                  t        j                  |j                  |      |             ; |j                  fd      }|j                  j                  d      dv sJ t        d      D ]  }i }	|j                  ||		        d
}
 ||
      }t        |j                        dk(  sJ |j                  d   }|j                  d   }|j                  d        ||
      }t        |j                        dk(  sJ |j                  d   |k(  sJ |j                  d   |k(  sJ |j                  d   dk  sJ t        d      D ]  }i }	|j                  ||		         ||
      }t        |j                        dk(  sJ |j                  d   |k7  sJ |j                  d   |k7  sJ |j                  D ]  }|j                  |   dk  rJ  y )Nr   r&   r  c                       S r*   r+   r,   s   r.   r   z*test_resize_same_results.<locals>.<lambda>      N r1   r   r  r  r}   r   I am happy.r   r!   r"   r  rQ   r(   )r   r   r`   r2   r3   r   r4   r5   rc   r&   r  rd   rf   r   r   ra   )r   rx   r7   r  rK   rk   annotationsro   rp   r\   	test_textrv   pos_predneg_predr   r-   s                  @r.   test_resize_same_resultsr    s#     A
)CN+Kll4l4GN4 Rkg//T0BKPQR,BCI==&&t,	9991X A

>y
@A
 I
i.Csxx=Axx
#Hxx
#H i 
i.Csxx=A88J8+++88J8+++88I!###1X A

>y
@A
 i.Csxx=A88J8+++88J8+++xx "xx}!!!"r1   c                  J   t               } | j                  d       g t        D ]9  \  }}j                  t	        j
                  | j                  |      |             ; t        j                  t              5  | j                  fd       d d d        y # 1 sw Y   y xY w)NrK   c                       S r*   r+   r,   s   r.   r   z.test_error_with_multi_labels.<locals>.<lambda>+  s    N r1   r   )r   r`   r<   r3   r   r4   r5   r   r   r   rc   )r7   rk   r  r-   s      @r.   test_error_with_multi_labelsr  $  s    
*CLLN3 Rkg//T0BKPQR	z	" <$:;< < <s   :BB"rF   r}   z#spacy.TextCatParametricAttention.v1c                     t               }d|i}|j                  | |      }|j                  d       |j                  d       |j                          |j	                  dg       y )Nr&   r  r!   r"   zThis is a test.)r   r`   ra   rc   r   r  s        r.   test_tok2vec_lazy_initr  /  s]    $ )CN+Kll4l4Gj!j!NNHH !r1   zname,get_examples, train_datac                    t               }|j                  |       }|D ]=  \  }}|j                  d      j                         D ]  \  }}|j	                  |        ? |j                          |j                   ||             t        j                  t              5  |j                  d        d d d        t        j                  t              5  |j                   |              d d d        y # 1 sw Y   CxY w# 1 sw Y   y xY w)Nr   r   c                       y r*   r+   r+   r1   r.   r   z*test_initialize_examples.<locals>.<lambda>[  s    r1   )	r   r`   getr   ra   rc   r   r   	TypeError)	r   r/   rj   r7   rK   rk   r  rm   values	            r.   test_initialize_examplesr#  J  s     *Cll4 G' %k'OOF399; 	%LE5e$	%% NNNNS 1N2	y	! 2L12	y	! 4LN34 42 24 4s   C5D5C>D
c                     t        d       t               } | j                  d      }g t        D ]9  \  }}j	                  t        j                  | j                  |      |             ; | j                  fd      }|j                  j                  d      dk(  sJ t        d      D ]  }i }| j                  ||        d   d	k  sJ d
} | |      }|j                  }	|	d   dkD  sJ |	d   |	d   z   t        j                  dd      k(  sJ t!               5 }
| j#                  |
       t%        j&                  |
      } ||      }|j                  }|d   dkD  sJ |d   |d   z   t        j                  dd      k(  sJ 	 d d d        | j)                        }|d   dk(  sJ |d   dk(  sJ |d   dk(  sJ |d   dk(  sJ d|v sJ g d}| j+                  |      D cg c]  }|j                   }}| j+                  |      D cg c]  }|j                   }}|D cg c]
  } | |       c}D cg c]  }|j                   }}t-        ||      D ]   \  }}|D ]  }t/        ||   ||   d        " t-        ||      D ]   \  }}|D ]  }t/        ||   ||   d        " y # 1 sw Y   )xY wc c}w c c}w c c}w c c}w )Nr   rK   c                       S r*   r+   r,   s   r.   r   z%test_overfitting_IO.<locals>.<lambda>i  r  r1   r   r  r   2   r   {Gz?r  r!   ?r"   r   gMbP?cats_micro_fcats_macro_fcats_macro_auc
cats_scorecats_score_desczJust a sentence.zI like green eggs.r  z
I eat ham.r}   r~   )r   r   r`   r2   r3   r   r4   r5   rc   r&   get_dimrd   rf   r   r   r   r   to_diskr	   load_model_from_pathevaluater   r_   r   )r7   rK   rk   r  ro   rp   r\   r  rv   r   tmp_dirnlp2doc2cats2r   textsbatch_cats_1batch_cats_2no_batch_catscats_1cats_2r   r-   s                         @r.   test_overfitting_IOr=  `  s   A
)Cll9%GN4 Rkg//T0BKPQR,BCI==  &!+++2Y A

>y
@A )t### I
i.C88D
c!!!
d:..&--U2KKKK 
 R7G((1I		Z 3&&&Z 5#44c58QQQQR \\.)F.!S(((.!S((("#s***,3&&&&&& TE(+8CHH8L8(+8CHH8L8@E)F#d))FG#SXXGMGlL9 E 	ECsVC[!D	EE lM: E 	ECsVC[!D	EE1R R$ 98)FGs%   A)J0'J=K(K<K0J:c                     t        d       t               } | j                  d      }g t        D ]9  \  }}j	                  t        j                  | j                  |      |             ; | j                  fd      }|j                  j                  d      dk(  sJ t        d      D ]  }i }| j                  ||        d   d	k  sJ d
} | |      }|j                  }	|	d   dkD  sJ |	d   dkD  sJ t               5 }
| j                  |
       t!        j"                  |
      } ||      }|j                  }|d   dkD  sJ |d   dkD  sJ 	 d d d        | j%                        }|d   dk(  sJ |d   dk(  sJ d|v sJ g d}| j'                  |      D cg c]  }|j                   }}| j'                  |      D cg c]  }|j                   }}|D cg c]
  } | |       c}D cg c]  }|j                   }}t)        ||      D ]   \  }}|D ]  }t+        ||   ||   d        " t)        ||      D ]   \  }}|D ]  }t+        ||   ||   d        " y # 1 sw Y   xY wc c}w c c}w c c}w c c}w )Nr   r   c                       S r*   r+   r,   s   r.   r   z+test_overfitting_IO_multi.<locals>.<lambda>  r  r1   r   r  rQ   d   r   r'  zI am confused but happy.r%   r(  r$   r)  r   r*  r-  r.  r}   r~   )r   r   r`   r<   r3   r   r4   r5   rc   r&   r/  rd   rf   r   r   r0  r	   r1  r2  r   r_   r   )r7   rK   rk   r  ro   rp   r\   r  rv   r   r3  r4  r5  r6  r   r7  batch_deps_1batch_deps_2no_batch_depsr;  r<  r   r-   s                         @r.   test_overfitting_IO_multirD    s   A
)Cll/0GN3 Rkg//T0BKPQR,BCI==  &!+++3Z A

>y
@A &'$... +I
i.C88D=3
c!!! 
 '7G((1I		W~###Z 3&&&' \\.)F.!S(((.!S(((&&& TE(+8CHH8L8(+8CHH8L8@E)F#d))FG#SXXGMGlL9 E 	ECsVC[!D	EE lM: E 	ECsVC[!D	EE-' '  98)FGs%   AI(I5I: I?4J(I2zname,train_data,textcat_config   zspacy.TextCatBOW.v2zspacy.TextCatCNN.v2c                    d|i}t               }|j                  | |      }g }|D ]q  \  }}|j                  t        j                  |j                  |      |             |j                  d      j                         D ]  \  }	}
|j                  |	        s |j                         }t        d      D ]  }i }|j                  |||        y )Nr&   r  r   r}   r   )r   r`   r3   r   r4   r5   r   r   ra   rc   rd   rf   )r   rj   rx   r  r7   rK   r-   rk   r  rm   r"  ro   rp   r\   s                 r.   test_textcat_configsrG    s    N N+K
)Cll4l4GN' %kg//T0BKPQ'OOF399; 	%LE5e$	%%  I1X A

>y
@Ar1   c                     t               } | j                  d      }t        |       }|j                  |ddgd       |j                  dk(  sJ |j
                  d   dk(  sJ | j                  d      }t        |       }t        j                  t              5  |j                  |ddgd       d d d        |j                  |dd	g
       |j                  dk(  sJ d|j
                  vsJ y # 1 sw Y   @xY w)NrK   POSNEGlabelspositive_label)rI  rJ  rM  r   FICTIONDRAMArL  )rN  rO  )
r   r`   r9   rc   rL  cfgr=   r   r   r!  )r7   rK   r/   r   s       r.   test_positive_classrR     s   
)Cll9%G1#6L|UEN5Q>>^+++;;'(E111&:;05L	y	! 
%%% 	& 	

 !!,	77K!L$$(<<<<#5#9#9999
 
s   C--C6c                      t               } | j                  d      }t        |       }t        j                  t
              5  |j                  |ddgd       d d d        y # 1 sw Y   y xY w)NrK   SOMETHINGrI  rK  )r   r`   r9   r   r   r   rc   r7   rK   r/   s      r.   test_positive_class_not_presentrW    sa    
)Cll9%G1#6L	z	" Y<0ARWXY Y Y    A  A)c                      t               } | j                  d      }t        |       }t        j                  t
              5  |j                  |g dd       d d d        y # 1 sw Y   y xY w)NrK   )rT  rU  rI  rI  rK  )r   r`   r=   r   r   r   rc   rV  s      r.   test_positive_class_not_binaryrZ    s]    
)Cll9%G05L	z	" 
!9% 	 	

 
 
rX  c                     g } t               } |d      }ddddd|_         |d      }ddddd|_        | j                  t        ||              |d      }ddddd|_         |d      }ddddd|_        | j                  t        ||             t	               j                  | dg d      }|d   d	   d
   dk(  sJ |d   d	   d   dk(  sJ |d   d   d
   dk(  sJ |d   d   d   dk(  sJ |d   d   d
   dk(  sJ |d   d   d   dk(  sJ |d   d   d
   dk(  sJ |d   d   d   dk(  sJ |d   dk(  sJ |d   dk(  sJ y )Noner   wintersummerspringautumnr    twor   rP  cats_f_per_typer^  r   r   r   r_  r   r`  ra  cats_micro_pg?cats_micro_rgUUUUUU?r   r   r3   r   r   
score_cats)r-   r7   ref1pred1ref2pred2r   s          r.   test_textcat_evaluationrl  %  s   N
)Cu:D#LDIJE3#MEJ'%./u:D#LDIJE3#MEJ'%./X  'O ! F #$X.s3u<<<#$X.s3u<<<#$X.s3q888#$X.s3u<<<#$X.s3u<<<#$X.s3u<<<#$X.s3u<<<#$X.s3u<<<.!U***.!U***r1   zmulti_label,spring_p)Tr   )Fr   multi_labelspring_pc                    g }t               } |d      }ddddd|_         |d      }ddddd|_        |j                  t        ||              |d      }dddd|_         |d      }ddddd|_        |j                  t        ||             t	               j                  |dg d| 	      }|d
   d   d   |k(  sJ |d
   d   d   dk(  sJ y)z
    multi-label: the missing 'spring' in gold_doc_2 doesn't incur a penalty
    exclusive labels: the missing 'spring' in gold_doc_2 is interpreted as 0.0r\  r    r   r^  r_  ra  r`  rb  r^  r_  ra  r   r]  )rL  rm  rc  r`  r   r   Nrf  )	rm  rn  r-   r7   rh  ri  rj  rk  r   s	            r.   test_textcat_eval_missingrr  D  s     N
)Cu:D#LDIJE3#MEJ'$./u:D#=DIJE3#MEJ'%./X  7	 ! F #$X.s3x???#$X.s3u<<<r1   zmulti_label,expected_loss)Tr   )Fg      ?expected_lossc                    g t               } |d      }ddddd}j                  t        j                  |d|i              |d      }dddd}j                  t        j                  |d|i             | r|j	                  d      }n|j	                  d	      }t        |t              sJ |j                  fd
       |j                  j                  j                  g dg dgd      }|j                  |      \  }	}
|	|k(  sJ y)z
    multi-label: the missing 'spring' in gold_doc_2 doesn't incur an increase in loss
    exclusive labels: the missing 'spring' in gold_doc_2 is interpreted as 0.0 and adds to the loss
    r\  r    r   rp  r   rb  rq  r   rK   c                       S r*   r+   r,   s   r.   r   z#test_textcat_loss.<locals>.<lambda>  s    ~ r1   )r    r    r    r   )r    r    r   r   r   )dtypeN)r   r3   r   r4   r`   
isinstancer   rc   r&   r   asarrayr   )rm  rs  r7   doc1cats1r5  r6  rK   r   r   d_scoresr-   s              @r.   test_textcat_lossr|  f  s
    N
)Cu:DcSCHE'++D65/BCu:DcS9E'++D65/BC,,34,,y)g///-.]]&&	34C ' F %%nf=ND(=   r1   c                  8   t               } | j                  d       g t        D ]9  \  }}j                  t	        j
                  | j                  |      |             ; | j                  fd       | j                        }d|d   cxk  rdk  sJ  J | j                  ddi	      }|d
   d   d   dk(  sJ | j                  ddi	      }|d   }|d
   d   d   dk(  sJ | j                  ddd	      }|d   }|d
   d   d   dk(  sJ ||k\  sJ y )Nr   c                       S r*   r+   r,   s   r.   r   z3test_textcat_multilabel_threshold.<locals>.<lambda>       r1   r   r   r,  r(   	thresholdr   
scorer_cfgrc  r!   r   )r  rM  	r   r`   r2   r3   r   r4   r5   rc   r2  )r7   rk   r  r   macro_fpos_fr-   s         @r.   !test_textcat_multilabel_thresholdr    s\   
)CLL%&N4 Rkg//T0BKPQRNN 6N7 \\.)F|$))))))\\.k35G\HF#$Z05:::\\.k15E\FF\"G#$Z05<<<\\j#Q  F < E#$Z05<<<Gr1   c                     t               } | j                  d       g t        D ]9  \  }}j                  t	        j
                  | j                  |      |             ; | j                  fd       | j                        }d|d   cxk  rdk  sJ  J | j                  ddi	      }|d
   d   d   dk(  sJ | j                  ddi	      }|d
   d   d   dk(  sJ y )Nr   c                       S r*   r+   r,   s   r.   r   z.test_textcat_multi_threshold.<locals>.<lambda>  r  r1   r   r   r,  r(   r  r   r  rc  r!   r   r  )r7   rk   r  r   r-   s       @r.   test_textcat_multi_thresholdr    s    
)CLL%&N4 Rkg//T0BKPQRNN 6N7 \\.)F|$))))))\\.k35G\HF#$Z05:::\\.k15E\FF#$Z05<<<r1   zcomponent_name,scorer)rK   zspacy.textcat_scorer.v1)r   z"spacy.textcat_multilabel_scorer.v1c                 B   t               }|j                  | dd|ii       g t        D ]9  \  }}j                  t	        j
                  |j                  |      |             ; |j                  fd       |j                        }d|d   cxk  rdk  sJ  J y	)
zQCheck that legacy scorers are registered and produce the expected score
    keys.scorerz@scorersr  c                       S r*   r+   r,   s   r.   r   z-test_textcat_legacy_scorers.<locals>.<lambda>  r  r1   r   r   r,  r(   Nr  )r   r  r7   rk   r  r   r-   s         @r.   test_textcat_legacy_scorersr    s     )CLLJ3G(HLIN4 Rkg//T0BKPQRNN 6N7 \\.)F|$))))))r1   )\r   numpy.randomr   r   numpy.testingr   	thinc.apir   r   r   r   wasabir   r]   r	   spacy.cli.evaluater
   r   spacy.lang.enr   spacy.languager   spacy.pipeliner   spacy.pipeline.textcatr   r   r   !spacy.pipeline.textcat_multilabelr   r   r   spacy.pipeline.tok2vecr   spacy.scorerr   spacy.tests.tok2vecr   _spacy.tokensr   r   spacy.trainingr   spacy.training.initializer   r   r2   r<   lazy_init_model_configr   LAZY_INIT_TOK2VEC_MODELr9   r=   markissuers   rw   parametrizer   r   r   r   r   skipr   r   r   r   r   r   slowr	  r  r  r  r  r#  r=  rD  rG  rR  rW  rZ  rl  boolr   rr  r|  r  r  r  r+   r1   r.   <module>r     s3      - K K    M ! # * 
 
 9  < $ " .   vCSABCf3C@AB  #3QT(UVW#3QT(UVW 
 
 !(++,BCGL  4!S !SH 4%* %*P #"
 4W 
W8= @ $% 4 	
( 4< < 41 1 >?% @%& >?' @'@ )-A!BC D 	23	<=66" )-A!BC D 	23	<=33  
'<SWlq  BC  D  	E	'<SWlp  AB  C  	D	2G^cx}  NO   P  	Q	2G^cx|  MN   O  	P	'AX]uy  EG  W[  kl  }~  NO  \`  a  	b	2Lch  AE  PR  bf  vw  HI  YZ  gk   l  	m	'ANc  I^  uy  NS  cd  ve  f  	g	'ANc  I^  uy  NR  bc  vd  e  	f	2LYn  Ti  @E  Z_  op  Aq   r  	s	2LYn  Ti  @E  Z^  no  Ap   q  	r	'<I^uyz{	2GTi  AF   G  	H!,%- .%  
'<SWlq  BC  D  	E	'<SWlp  AB  C  	D	2G^cx}  NO   P  	Q	2G^cx|  MN   O  	P	'?Lax|  SW  lp  DH  ]a  b  	c	2JWl  DI  _c  x|  PT  im   n  	o	::  
'<SWlq  BC  D  	E	'<SWlp  AB  C  	D	2G^cx}  NO   P  	Q	2G^cx|  MN   O  	P	'?Lax|  SW  lp  DH  ]a  b  	c	2JWl  DI  _c  x|  PT  im   n  	o	*"*"Z<  
2LYp  Vk  BG  WX  mr  Cs   t  	u	'ANe  K`  w{  KL  af  xg  h  	i	'LYp  HL  M  	N	2Wd{  SX   Y  	Z	'?Lcz~  UY  nr  FJ  _c  d  	e	2JWn  FK  ae  z~  RV  ko   p  	q
	"	" #	24KL	<>TU443El1Ej $ 
5J_v{  LM  bg  8h  	i	+@Ulp  AB  W\  .]  	^	5Jd  |A  Y]  hj  z~  NO  `a  qr  C  8D  	E	+@Zqv  OS  ^`  pt  DE  VW  gh  uy  .z  	{	+@Ubw  OS  .T  	U	5J_  mB  Y^  8_  	`	5J_v{  LM  bg  8h  	i	+@Ulp  AB  W\  .]  	^	5J_v{  LM  bf  8g  	h	+@Ulp  AB  W[  .\  	]	5J_v{  LM  bg  8h  	i	+@Ulp  AB  W\  .]  	^	5J_v{  LM  bf  8g  	h	+@Ulp  AB  W[  .\  	]	5Jd  rG  lA  X]  mn  CH  YI  8J  	K	+@Zg|  bw  NR  bc  x}  O~  .  	@	+@Ubw  OS  .T  	U	5J_  mB  Y^  8_  	`	+@e  sH  _c  .d  	e	5Jo  }R  in  8o  	p	+@Xez  RV  lp  EI  ]a  vz  .{  	|	5Jb  pE  \a  w{  PT  hl  AE  8F  	G? #JAK# LA:&Y
+> N#=4 =5 =	=< !4 ! !	!::=* .D**r1   