
    i                        d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	m
Z
 d dlZd dlZd dlmZ d dlmZ d dlmZ d dlZd dlmZ d d	lmZmZ d d
lmZmZmZ d dlmZ d dlmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& d dl'm(Z(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0m1Z1 d dl2m3Z3 d dl4m5Z5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@Z@ d dlAmBZBmCZC d dlDmEZE d dlFmGZGmHZHmIZI d dlJmKZKmLZLmMZM d dlNmOZOmPZPmQZQmRZR ddlSmTZT ej                  j                  d      d         ZWej                  j                  d!      d"        ZXej                  j                  d#      d$        ZYej                  j                  d%      ej                  j                  d&g d'      d(e[d)e[fd*              Z\d+ Z]d, Z^ej                  j                  d-d.d/g      d0        Z_d1 Z`d2 Zad3 Zbej                  j                  d4d5d6gd7d8ifd9gd7d8ifd5d:gd7d:ifd;gd7d:ifg d<d=d>d?fd5d@gd=d>d?fg dAdBd=dCdDfg dEdBd=dCdDfg      dF        Zcej                  j                  dGdHgg dIg      dJ        Zdej                  j                  dGg dKd7gg      dL        ZedM Zfej                  j                  dNdOdPg      ej                  j                  dQg dRg g dSg dTdUdVgg      ej                  j                  dWdXdYg      ej                  j                  dZd=dCg      d[                             Zgd\ Zhej                  j                  d]g d^      d_        Ziej                  j                  d]g d`      da        Zjdb Zkdc Zlej                  j                  ddg de      df        Zmdg Znej                  j                  ej                  j                  dhg di      dj               Zpdk Zqdl Zrej                  j                  dddmdUg      dn        Zsdo Ztdp Zudq Zvdr Zwej                  j                  dsg dt      du        Zxdv Zydw Zzdx Z{dy Z|dz Z}d{ Z~d| Zd} Zd~ Zd Zd Zd Zd Zd Zd Zy)    N)Counter)Path)AnyDictListTuple)NoSuchOption)SpecifierSet)Config)about)download_moduleinfo)parse_config_overridesstring_to_listwalk_directory)apply)_compile_gold_get_distribution_get_kl_divergence_get_labels_from_model_get_labels_from_spancat_get_span_characteristics_get_spans_length_freq_dist_print_span_characteristics)get_compatibilityget_version)render_parses)find_threshold)RECOMMENDATIONSfill_configinit_config)_init_labels)_is_permitted_package_nameget_third_party_dependencies)get_model_pkgs)English)Dutch)Language)RecommendationSchema)DocDocBin)Span)Exampledocs_to_jsonoffsets_to_biluo_tags)conll_ner_to_docsconllu_to_docsiob_to_docs)ENV_VARSget_minor_versionload_configload_model_from_config   )make_tempdiri9  c                      d} t        t        |             }t        |d   D cg c]  }|j                  j                   c}      rJ |d   j                  d      rJ yc c}w )zd
    conllu_to_docs should not raise an exception if the HEAD column contains an
    underscore
    a%  
1	[	_	PUNCT	-LRB-	_	_	punct	_	_
2	This	_	DET	DT	_	_	det	_	_
3	killing	_	NOUN	NN	_	_	nsubj	_	_
4	of	_	ADP	IN	_	_	case	_	_
5	a	_	DET	DT	_	_	det	_	_
6	respected	_	ADJ	JJ	_	_	amod	_	_
7	cleric	_	NOUN	NN	_	_	nmod	_	_
8	will	_	AUX	MD	_	_	aux	_	_
9	be	_	AUX	VB	_	_	aux	_	_
10	causing	_	VERB	VBG	_	_	root	_	_
11	us	_	PRON	PRP	_	_	iobj	_	_
12	trouble	_	NOUN	NN	_	_	dobj	_	_
13	for	_	ADP	IN	_	_	case	_	_
14	years	_	NOUN	NNS	_	_	nmod	_	_
15	to	_	PART	TO	_	_	mark	_	_
16	come	_	VERB	VB	_	_	acl	_	_
17	.	_	PUNCT	.	_	_	punct	_	_
18	]	_	PUNCT	-RRB-	_	_	punct	_	_
r   ENT_IOBN)listr1   allheadihas_annotation)
input_datadocsts      e/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/test_cli.py*test_cli_converters_conllu_empty_heads_nerrD   0   s_    J( z*+Dd1g.AFFHH.///Aw%%i0000 /s   A i<  c                      t               } t        j                  | j                  d      i       }| j	                  |g       y )N )r(   r-   	from_dictmake_docevaluate)nlpexamples     rC   test_issue4924rL   Q   s2    
*CR 0"5GLL'    i  c                      dddgdddiddidd} t        j                  |       }t               5 }|dz  }|j                  |       dg d	dd
t	        |      id
t	        |      iddid	d}t        |      }|dz  }|j                  |       |dz  }t        ||d       t        |      }ddd       d   d   d
   t	              k(  sJ |d   d   d
   t	        |      k(  sJ |d   d   d   dk(  sJ d|d   d   v sJ y# 1 sw Y   XxY w)zETest that fill-config doesn't turn sourced components into factories.entok2vectagger)langpipelinefactory)rP   rQ   )rJ   
components
test_model)rP   rQ   nersourcerW   zbase.cfgz
config.cfgT)silentNrU   model)r&   from_configr8   to_diskstrr   r    r5   )
source_cfg
source_nlpdir_pathsource_pathbase_cfg	base_pathoutput_path
filled_cfgs           rC   test_issue7055rf   X   si    9h*?@!9- (+
J $$Z0J	 .8-;' .JK$c+&67#S%56!5)
 (#z)	#-K48 -
#.$ l#I.x8C<LLLLl#H-h73{;KKKKl#E*95>>>j.u5555+. .s   A<DDi1  zfactory,output_file))depszparses.html)entszentities.html)spansz
spans.htmlrT   output_filec                    t               5 }dddddddddd	d
dgdddddddddddddd	d
dgiddddddddddd	dddddddddd	ddd d!d"d#d$d%dd	d&d'd(d)d)d*d+d,d-d	d-d.d/d0d"d1d2d3dd	d4d5d6d)d)d7d8d9d-d	d:ddd)d)d;d<d,d=d	d=d>d?d0d"d@dAd%d4d	ddBdCdDdEdFdGdHdd	dddId0dJd@dKdLd-d	dMdNdd0dJd@dOdPdd	dQdRdSdDdEdTdUdHdVd	dVd
dWd0dJdXdYdZd-d	d[d\dd0d"dXd]dPdVd	d^d_d`dDdEdadbdHdd	gdc}t        j                  dd      }t        |j                        j                  |      }t        dh|g|ddde| dfi ||z  j                         sJ 	 dgdgdg       yg# 1 sw Y   ygxY w)izL
    Test if all displaCy types (ents, dep, spans) produce an HTML file
    6   nam_adj_country,   )endlabelstartS   nam_liv_personE   d   nam_pro_title_bookV   scrF   )ro   kb_idrp   rq   uk   Niedawno czytał em nową książkę znakomitego szkockiego medioznawcy , Briana McNaira - Cultural Chaos .r      ADVz
Degree=Posniedawnoadvmodr7   )	idrq   ro   tagposmorphlemmadepr=   	      PRAETVERBzYAnimacy=Hum|Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Actu   czytaćROOT         AGLTNOUNz-Animacy=Inan|Case=Ins|Gender=Masc|Number=Singemiobj         ADJz*Case=Acc|Degree=Pos|Gender=Fem|Number=Singnowyamod         SUBSTzCase=Acc|Gender=Fem|Number=Singu	   książkaobj       +   z8Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Singznakomitacl   z7Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Singszkockiy   7   B   z,Animacy=Hum|Case=Gen|Gender=Masc|Number=SingmedioznawcaC   D   INTERPPUNCTzPunctType=Comm,punctK   PROPNBriannmod
   L   McNairflat   T   U   zPunctType=Dash-   ^   z-Animacy=Inan|Case=Nom|Gender=Masc|Number=SingCulturalconj   _   Chaos   e   f   zPunctType=Peri.)rh   ri   texttokenspl)rA   rd   
model_namelimitTN )r8   spacyblankr*   vocab	from_jsonr   is_file)rT   rj   tmp_dirdoc_jsonrJ   docs         rC   test_issue12566r   {   s    
 J17 %6D%5C&:RH 6GRTU6FQST"!#!5!#		0
   )'#
 "!x&!
 !!L!!
   I#!
 "!>( 
   W' 
   V'!
 "!K*!
 #"- "
 ""K$!
 ""K%!
 #"- "
 ""L'!
 "!L$!
  #"- "
yh)}
@ kk$#))n&&x0 	 	
G!	
HOQU	
 +%..000UJ1 J1 J1s   D1EEc                      t               } | j                  d       t               5 }| j                  |       t	        |dg      }|d   dk(  sJ |d   dgk(  sJ 	 d d d        y # 1 sw Y   y xY w)NtextcatrF   )excluderR   nlrU   )r'   add_piper8   r\   r   )rJ   r   raw_datas      rC   test_cli_infor   Q  ss    
'CLL	 57G".4'''%)444	5 5 5s   5A%%A.c                     g d} dj                  |       }t        t        |d            }t        |      dk(  sJ t	        |      g}|d   d   dk(  sJ t        |d   d         dk(  sJ t        |d   d   d   d         dk(  sJ |d   d   d   d   d   }t        |d	         d
k(  sJ |d	   }|D cg c]  }|d   	 c}g dk(  sJ |D cg c]  }|d   	 c}g dk(  sJ |D cg c]  }|d   	 c}g dk(  sJ |D cg c]  }|d   	 c}g dk(  sJ |d   d   d   d   D cg c]  }|d   |d   |d   f }}t        |d   |d      }	|	g dk(  sJ y c c}w c c}w c c}w c c}w c c}w )N)zG1	Dommer	dommer	NOUN	_	Definite=Ind|Gender=Masc|Number=Sing	2	appos	_	Oz/2	Finn	Finn	PROPN	_	Gender=Masc	4	nsubj	_	B-PERz.3	Eilertsen	Eilertsen	PROPN	_	_	2	name	_	I-PERuC   4	avstår	avstå	VERB	_	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	O
r7   n_sentsr   r~   
paragraphs	sentencesr   r   orth)DommerFinn	Eilertsen   avstårr   )r   r   r   r   r=   )r7   r   r   r   )apposnsubjnamer   entitiesr   Omissing)r   zB-PERzL-PERr   joinr;   r1   lenr.   r/   
linesr@   converted_docs	convertedsentr   rB   eent_offsets
biluo_tagss
             rC   "test_cli_converters_conllu_to_docsr   [  s   E 5!J.Q?@N~!###n-.IQ<"""y|L)*a///y|L)!,[9:a???Q<%a(5a8DtH~!###(^F%&!AfI&*TTTT$%AeH%)KKKK%&!AfI&-777$%AeH%)KKKK$-aL$>q$A*$M 1qtQqTK  '~a'8+sSJ5555 '%&%s   4EE(E$E)(E.r   )L1	Dommer	dommer	NOUN	_	Definite=Ind|Gender=Masc|Number=Sing	2	appos	_	name=OzB2	Finn	Finn	PROPN	_	Gender=Masc	4	nsubj	_	SpaceAfter=No|name=B-PERz33	Eilertsen	Eilertsen	PROPN	_	_	2	name	_	name=I-PERV   4	avstår	avstå	VERB	_	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	SpaceAfter=No|name=Oz%5	.	$.	PUNCT	_	_	4	punct	_	name=B-BAD)zG1	Dommer	dommer	NOUN	_	Definite=Ind|Gender=Masc|Number=Sing	2	appos	_	_z@2	Finn	Finn	PROPN	_	Gender=Masc	4	nsubj	_	SpaceAfter=No|NE=B-PERz13	Eilertsen	Eilertsen	PROPN	_	_	2	name	_	NE=L-PERuO   4	avstår	avstå	VERB	_	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	SpaceAfter=Noz#5	.	$.	PUNCT	_	_	4	punct	_	NE=B-BADc           	         dj                  |       }t        t        |dddd            }t        |      dk(  sJ t	        |      g}|d   d   dk(  sJ t        |d   d	         dk(  sJ |d   d	   d   d
   dk(  sJ t        |d   d	   d   d         dk(  sJ |d   d	   d   d   d   }t        |d         dk(  sJ |d   }|D cg c]  }|d   	 c}g dk(  sJ |D cg c]  }|d   	 c}g dk(  sJ |D cg c]  }|d   	 c}g dk(  sJ |D cg c]  }|d   	 c}g dk(  sJ |d   d	   d   d   D cg c]  }|d   |d   |d   f }}t        |d   |d      }	|	g dk(  sJ y c c}w c c}w c c}w c c}w c c}w )Nr   r7   PERSONrF   )PERBAD)r   ner_mapr   r~   r   rawu   Dommer FinnEilertsen avstår. r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r=   )r7   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   )r   zB-PERSONzL-PERSONr   r   r   r   s
             rC   /test_cli_converters_conllu_to_docs_name_ner_mapr   x  s   ( 5!Jz1hr6RSN ~!###n-.IQ<"""y|L)*a///Q<%a(/3SSSSy|L)!,[9:a???Q<%a(5a8DtH~!###(^F%&!AfI&*YYYY$%AeH%)TTTT%&!AfI&*;;;;$%AeH%)TTTT$-aL$>q$A*$M 1qtQqTK  '~a'8+sSJ@@@@ '%&%s   E-!E2;E7E<;Fc                     g d} dj                  |       }t        t        |ddd            }t        |      dk(  sJ t	        |      g}|d   d   dk(  sJ t        |d   d         dk(  sJ |d   d   d   d	   d
k(  sJ t        |d   d   d   d         dk(  sJ |d   d   d   d   d   }t        |d         dk(  sJ |d   }|D cg c]  }|d   	 c}g dk(  sJ |D cg c]  }|d   	 c}g dk(  sJ |D cg c]  }|d   	 c}g dk(  sJ |D cg c]  }|d   	 c}g dk(  sJ |D cg c]  }|d   	 c}g dk(  sJ |D cg c]  }|d   	 c}g dk(  sJ |D cg c]  }|d   	 c}g dk(  sJ |d   d   d   d   D cg c]  }|d   |d   |d   f }}t        |d   |d      }	|	g d k(  sJ y c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w )!N)r   z2-3	FE	_	_	_	_	_	_	_	_z42	Finn	Finn	PROPN	_	Gender=Masc	4	nsubj	_	name=B-PERzC3	Eilertsen	Eilertsen	X	_	Gender=Fem|Tense=past	2	name	_	name=I-PERr   z!5	.	$.	PUNCT	_	_	4	punct	_	name=Or   r7   T)r   merge_subtokensappend_morphologyr   r~   r   r   u   Dommer FE avstår. r   r   r   r   )r   FEr   r   r   )z*NOUN__Definite=Ind|Gender=Masc|Number=Singz#PROPN_X__Gender=Fem,Masc|Tense=pastz&VERB__Mood=Ind|Tense=Pres|VerbForm=Finr   r   )r   r   r   r   r   )z$Definite=Ind|Gender=Masc|Number=SingzGender=Fem,Masc|Tense=pastz Mood=Ind|Tense=Pres|VerbForm=FinrF   r   )dommerzFinn Eilertsenu   avståz$.r=   )r7   r7   r   r   r   )r   r   r   r   r   r   r   r   )r   zU-PERr   r   r   r   s
             rC   ,test_cli_converters_conllu_to_docs_subtokensr    sw   E 5!J44	
N
 ~!###n-.IQ<"""y|L)*a///Q<%a(/3HHHHy|L)!,[9:a???Q<%a(5a8DtH~!###(^F%&!AfI&*JJJJ$%AeH% *    %%AeH%)KKKK &'1AgJ' ,    !''1AgJ'+WWWW%&!AfI&-777$%AeH%)LLLL$-aL$>q$A*$M 1qtQqTK  '~a'8+sSJ1111+ '% &' (&%s0   	F=#G=GG1GG%GG c                  .   g d} dj                  |       }t        t        |d            }t        |      dk(  sJ t	        |      }|d   dk(  sJ t        |d         dk(  sJ t        |d   d   d	         d
k(  sJ t        dd
      D ]D  }|d   d   d	   |   }t        |d         dk(  sJ |d   }g d}|D cg c]  }|d   	 c}|k(  rDJ  t        |d   j                        dk(  sJ |d   j                  D ]  }	|	j                  dv rJ  y c c}w )N)zAI|O like|O London|I-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|OzAI|O like|O London|B-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|Oz^I|PRP|O like|VBP|O London|NNP|I-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|Oz^I|PRP|O like|VBP|O London|NNP|B-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|Or   r   r   r7   r~   r   r   r   r   r   rz   IlikeLondonandNewYorkCityr   r   New York Cityr  )r   r;   r2   r   r.   rangerh   r   )
r   r@   r   r   r>   r   r   expectedrB   ents
             rC   test_cli_converters_iob_to_docsr    sU   E 5!J+j"=>N~!###^,IT?ay&'1,,,y&q)+671<<<1a[ 7&q)+6q94>"a'''hM#)*a&	*h6667 ~a %%&!+++a %% 7xx66667 +s   :Dc                  *   g d} dj                  |       }t        t        |d            }t        |      dk(  sJ t	        |      }|d   dk(  sJ t        |d         dk(  sJ t        |d   d   d	         d
k(  sJ t        dd
      D ]B  }|d   d   d	   |   }t        |d         dk(  sJ |d   }|D cg c]  }|d   	 c}g dk(  rBJ  t        |d   j                        dk(  sJ |d   j                  D ]  }|j                  dv rJ  y c c}w )N).z-DOCSTART- -X- O OrF   zI	Ozlike	OzLondon	B-GPEzand	Oz	New	B-GPEz
York	I-GPEz
City	I-GPEz.	OrF   zI Ozlike OzLondon B-GPEzand Oz	New B-GPEz
York I-GPEz
City I-GPEz. OrF   zI PRP Oz
like VBP OzLondon NNP B-GPEzand CC OzNew NNP B-GPEzYork NNP I-GPEzCity NNP I-GPEz. . OrF   z	I PRP _ Ozlike VBP _ OzLondon NNP _ B-GPEz
and CC _ OzNew NNP _ B-GPEzYork NNP _ I-GPEzCity NNP _ I-GPEz. . _ OrF   z	I	PRP	_	Ozlike	VBP	_	OzLondon	NNP	_	B-GPEz
and	CC	_	OzNew	NNP	_	B-GPEzYork	NNP	_	I-GPEzCity	NNP	_	I-GPEz.	.	_	Or   r   r   r7   r~   r   r   r   r   r   rz   r   r  r  )r   r;   r0   r   r.   r  rh   r   )	r   r@   r   r   r>   r   r   rB   r  s	            rC   %test_cli_converters_conll_ner_to_docsr    sU   /E` 5!J+JCDN~!###^,IT?ay&'1,,,y&q)+671<<<1a[ i&q)+6q94>"a'''h#)*a&	*.hhhhi ~a %%&",,,a %% 7xx66667 +s   6Dzargs,expected--x.foo10x.foor   z
--x.foo=10barz--x.foo=bar)r  --x.barbazTr  )r  x.barz--x.bar=baz)r  10.1r  z--x.bazfalseg333333$@F)r  r  x.baz)r  r  r  z--x.baz=falsec                 $    t        |       |k(  sJ y N)r   )argsr  s     rC   test_parse_config_overridesr%  3  s    , "$'8333rM   r$  z--foo)r  r  z--bazc                 v    t        j                  t              5  t        |        d d d        y # 1 sw Y   y xY wr#  )pytestraisesr	   r   r$  s    rC   #test_parse_config_overrides_invalidr*  L  s+    	|	$ %t$% % %   /8)r  r  r  c                 v    t        j                  t              5  t        |        d d d        y # 1 sw Y   y xY wr#  )r'  r(  
SystemExitr   r)  s    rC   %test_parse_config_overrides_invalid_2r.  R  s+    	z	" %t$% % %r+  c                     d} | t         j                  t        j                  <   t	        g       }t        |      dk(  sJ |d   dk(  sJ |d   dk(  sJ |d   du sJ |d	   d
k(  sJ dt         j                  t        j                  <   t	        g d       i k(  sJ t        j                  t              5  t	        g        d d d        dt         j                  t        j                  <   t        j                  t              5  t	        g        d d d        t         j                  t        j                  = y # 1 sw Y   wxY w# 1 sw Y   5xY w)Nz2--x.foo bar --x.bar=12 --x.baz false --y.foo=hellor   r  r  r  r   r!  Fzy.foohelloz--x)env_varzhello world)	osenvironr3   CONFIG_OVERRIDESr   r   r'  r(  r-  )	overridesresults     rC   test_parse_cli_overridesr7  X  s    DI,5BJJx(()#B'Fv;!'?e###'?b   '?e###'?g%%%,1BJJx(()!"d3r999	z	" #r"#,9BJJx(()	z	" #r"#


8,,-# ## #s   2D4 E 4D= E	rR   rO   r   rS   )rQ   parserrW   )rW   r   sentencizer)morphologizerspancatentity_linkerspancat_singlelabeltextcat_multilabeloptimize
efficiencyaccuracypretrainingc                 v    t        | |||d      }t        |t              sJ |rd|d   d<   t        |d       y )NF)rR   rS   r?  rB  gpuzmy_data.jsonlpathsraw_textT	auto_fill)r!   
isinstancer   r6   )rR   rS   r?  rB  configs        rC   test_init_configrK  k  sJ     F ff%%%&5w
#6T2rM   c                  V    t        j                         D ]  \  } }t        di |rJ  y )Nr   )r   itemsr)   )rR   datas     rC   test_model_recommendationsrO    s/    %++- ,
d#+d+++,rM   value)parser,textcat,tagger parser, textcat ,tagger rQ  rR  z  "parser"," textcat " ,"tagger "z  'parser',' textcat ' ,'tagger '[parser,textcat,tagger]z["parser","textcat","tagger"]z$[" parser" ,"textcat ", " tagger " ]rS  z[ parser, textcat , tagger]z['parser','textcat','tagger']z$[' parser' , 'textcat', ' tagger ' ]c                 ,    t        | d      g dk(  sJ y )NFintify)r8  r   rQ   r   rP  s    rC   test_string_to_listrY    s    * %.2QQQQrM   )z1,2,3z[1,2,3]z["1","2","3"]z[" 1" ,"2 ", " 3 " ]z[' 1' , '2', ' 3 ' ]c                 T    t        | d      g dk(  sJ t        | d      g dk(  sJ y )NFrU  )123T)r7   r   r   rW  rX  s    rC   test_string_to_list_intifyr^    s.     %./AAA%-:::rM   c                      t        dt        j                  z         } d| _        t        j                  | v r@d}t	               }t        ||      }t        t        j                        t        |      k(  sJ y y )N==Fen_core_web_sm)r
   r   __version__prereleasesr   r   r4   )specr   compatibilityversions       rC   test_download_compatibilityrg    sj    u0001DDD %
)+j-8 !2!237H7QQQQ	 !rM   c                     t        dt        j                  z         } d| _        t        j                  | v rOt	               \  }}t        t        j                        }|j                  |i       }t        |      dkD  sJ d|v sJ y y )Nr`  Fr   ra  )r
   r   rb  rc  r%   r4   getr   )rd  
model_pkgscompatspacy_versioncurrent_compats        rC   !test_validate_compatibility_tablern    s    u0001DDD +-
F)%*;*;<M26>"Q&&&>111 !rM   component_name)rW   r   r;  rQ   c                 >   t               }|j                  |       }dD ]  }|j                  |        t        |j	                  |       j
                        dk(  sJ t               5 }t        ||       t        d| gdd      }dd| d	|  d
di|d   d   | <   t        |d      }t        |j	                  |       j
                        dk(  sJ |j                          t        |j	                  |       j
                        dk(  sJ 	 d d d        y # 1 sw Y   y xY w)N)T1T2T3T4r   r   r@  F)rR   rS   r?  rD  labelszspacy.read_labels.v1/z.json)z@readerspath
initializerU   TrG  r   )r'   r   	add_labelr   get_piperu  r8   r"   r!   r6   rx  )ro  rJ   	componentrp   r   rJ  nlp2s          rC   test_init_labelsr}    s%   
'C^,I) #E"#s||N+223q888	 >7S'"$%!	
 2")1^$4E:>
|\*>: &f=4==0778A===4==0778A==='> > >s   'B"DDc            	      l   t               } | j                  d       t        | j                        g k(  sJ t               } | j                  dddddddi	       t        | j                        g k(  sJ t        j                  d
      d        }| j                  d
       t        | j                         y )NrQ   r   rZ   zspacy.TextCatBOW.v1Tr7   F)z@architecturesexclusive_classes
ngram_sizeno_output_layerrJ  third_party_testc                     d S )Nc                     | S r#  r   )xs    rC   <lambda>zItest_get_third_party_dependencies.<locals>.test_factory.<locals>.<lambda>  s     rM   r   )rJ   r   s     rC   test_factoryz7test_get_third_party_dependencies.<locals>.test_factory  s    rM   )r'   r   r$   rJ  rT   )rJ   r  s     rC   !test_get_third_party_dependenciesr    s     'CLL'

3r999 'CLL"7%)#(
   (

3r999 ]]%& ' LL#$ ,rM   zfactory_name,pipe_name))rW   rW   )rW   my_ner)r;  r;  )r;  
my_spancatc                 ^   d}t               }|j                  | |      }|D ]  }|j                  |        |j                          |j	                  |      j
                  |k(  sJ | dk(  r't        |      |j                     t        |      k(  sJ y t        ||       t        |      k(  sJ y )N)AB)r   r;  )
r&   r   ry  rx  rz  ru  r   keysetr   )factory_name	pipe_nameru  rJ   piperp   s         rC   test_get_labels_from_modelr  
  s     F
)C<<9<5D uNN<<	"))V333y ',TXX6#f+EEE%c<8CKGGGrM   c                      t        d      dk(  sJ t        d      dk(  sJ t        d      dk(  sJ t        d      dk(  sJ t        d      dk(  sJ t        d      dk(  sJ t        d      dk(  sJ y )	Nu   Meine_BäumeF_packagepackage_z.packagezpackage.z-packagezpackage-)r#   r   rM   rC   test_permitted_package_namesr  #  s    %n5>>>%j1U:::%j1U:::%j1U:::%j1U:::%j1U:::%j1U:::rM   c                     t               } t        | j                  g d      }t        | j                  g dg dg d      }t        ||      }t	        |gdg| d      }|d	   d
k(  sJ t        | j                  g d      }t        | j                  g dg dg d      }t        ||      }t	        |gdg| d      }|d	   dk(  sJ y )N)Tokenr   r  r  r  words)r  r   r  )TFT)r   r   B-ENT)r  sent_startsrh   rW   Tboundary_cross_entsr   )r   r  zI-ENTr7   )r&   r*   r   r-   r   )rJ   predrefegrN  s        rC   test_debug_data_compile_goldr  .  s    
)Csyy EFD
		-' 	C 
s	B"wT2D%&!+++syy EFD
		-'$	C 
s	B"wT2D%&!+++rM   r;  c                 R   t               }d}t        |j                  g d      }t        |ddd      t        |ddd      g|j                  |<   t        |j                  g d      }t        |ddd      t        |ddd      g|j                  |<   t        ||      }t        |g| g|d	      }|d
   |   t        ddd      k(  sJ |d   |   dgdgdk(  sJ |d   |   t        |ddd      gt        |ddd      gdk(  sJ |d   |   |dd g|dd gd|dd g|dd gddk(  sJ y )Nrx   WelcometotheBankofChinar   r  r   r   ORGr   GPETr;  r7   )r  r  spans_lengthspans_per_typesb_per_typer   r   )rq   ro   r   )r&   r*   r   r,   ri   r-   r   r   )ro  rJ   	spans_keyr  r  r  rN  s          rC   &test_debug_data_compile_gold_for_spansr  G  s|   
)CIsyy TUD!$1e4d4Au6MNDJJy
ciiS
TC aE2DaE4JKCIIi	s	B"/d;D	?9%11E)FFFF	*qc1#.FFFF !),S!Q&'S!Q&'1    y)a(S1XJ7a(S1XJ7.   rM   c                      t               } t        | j                  g d      t        | j                  dg      g}t        dddd      }t	        |d      }||k(  sJ y )	N)r  r  r  r  r        ?      ?)chinabankr  T)	normalize)r&   r*   r   r   r   )rJ   rA   r  freq_distributions       rC   &test_frequency_distribution_is_correctr  `  s]    
)CCII45CIIgY'D
 d$?@H)$$?(((rM   c                      t        ddd      } t        ddddd      }t        | |      }d}t        j                  ||d	      sJ y )
Nr  r  )abg333333?g?)r  r  cdgX2ı.?gMbP?)rel_tol)r   r   mathisclose)pqr6  r  s       rC   )test_kl_divergence_computation_is_correctr  l  sK    c%&AdDt<=A1%FH<<$777rM   c                     t               } d}t        | j                  g d      }t        |ddd      t        |ddd      g|j                  |<   t        | j                  g d      }t        |ddd      t        |ddd      g|j                  |<   t        ||      }|g}t        |d	g| d
      }t        |||      }h dj                  |j                               sJ |d   dk(  sJ |d   dk(  sJ y )Nrx   r  r  r   r   r  r   r  r;  Texamplescompiled_goldr  >   bdsdlengths
min_lengthr7   
max_length)
r&   r*   r   r,   ri   r-   r   r   issubsetkeysrJ   r  r  r  r  r  rN  span_characteristicss           rC   *test_get_span_characteristics_return_valuer  t  s    
)CIsyy TUD!$1e4d4Au6MNDJJy
ciiS
TC aE2DaE4JKCIIi	s	BtHI;T:D4 #++,@,E,E,GHHH-222-222rM   c                     t               } d}t        | j                  g d      }t        |ddd      t        |ddd      g|j                  |<   t        | j                  g d      }t        |ddd      t        |ddd      g|j                  |<   t        ||      }|g}t        |d	g| d
      }t        |||      }t        |       y)zDTest if interface between two methods aren't destroyed if refactoredrx   r  r  r   r   r  r   r  r;  Tr  N)	r&   r*   r   r,   ri   r-   r   r   r   r  s           rC   0test_ensure_print_span_characteristics_wont_failr    s    
)CIsyy TUD!$1e4d4Au6MNDJJy
ciiS
TC aE2DaE4JKCIIi	s	BtHI;T:D4   45rM   	threshold)F   P   r   Z   r   c                 p    g dg dg dd}t        ||       }t        |j                               | k\  sJ y )Nr7   r   r   r   r   r   r   r   r   r7   r   r   span_type_1span_type_2span_type_3)r   sumvalues)r  sample_span_lengths
span_freqss      rC   4test_span_length_freq_dist_threshold_must_be_correctr    s@     $##
 --@)LJz  "#y000rM   c                      g dg dg dd} d}t        | |      }t        |j                               |k\  sJ t        |j	                               g dk(  sJ y )Nr  r  r  r  r  )r   r7   r   r   r   )r   r  r  r;   r  )r  r  r  s      rC   1test_span_length_freq_dist_output_must_be_correctr    s]    ###
 I,-@)LJz  "#y000
!"o555rM   c            	      l    t               5 } | dz  }t        | |dddd       d d d        y # 1 sw Y   y xY w)Nz
test.spacyblank:enr   r7   )r8   r   )	data_pathoutputs     rC   test_applycli_empty_dirr    s:    	 ;9\)iVQ:; ; ;s   *3c            	      L   t               5 } | dz  }t        j                  d      } |d      }t               }|j	                  | dz         t        | |dddd       |j                  |       |j	                  | dz         t        | |dddd       d d d        y # 1 sw Y   y xY w)Ntestout.spacyrO   testing apply cli.testin.spacyr  r   r7   )r8   r   r   r+   r\   r   add)r  r  rJ   r   docbins        rC   test_applycli_docbinr    s    	 
;9_,kk$&'y>12iVQ:

3y>12iVQ:
; 
; 
;s   BBB#c            	         t               5 } | dz  }dddg}ddig}t        j                  | dz  |       t        | |ddd	d	       t        j                  | d
z  |       t        | |ddd	d	       d d d        y # 1 sw Y   y xY w)Nr  Testing apply cli.   )fieldr  r  234
test.jsonlr  r7   ztest2.jsonl)r8   srslywrite_jsonlr   )r  r  rN  data2s       rC   test_applycli_jsonlr    s    	 <9_,.s;<5!")l2D9iWa;)m3U;iWa;< < <s   A#A77B c            	          t               5 } | dz  }t        | dz  d      5 }|j                  d       d d d        t        | |dddd       d d d        y # 1 sw Y   "xY w# 1 sw Y   y xY w)Nr  ztest.foowr  r  r   r7   )r8   openwriter   )r  r  ftests      rC   test_applycli_txtr    sr    	 ;9_,)j(#. 	.%KK,-	.iVQ:	; ;	. 	.; ;s!   AAAA	AA(c            	      v   t               5 } | dz  }d}t        j                  d      } ||      }d|ig}t        j                  | dz  |       t               }|j                  |       |j                  | dz         t        | dz  d      5 }|j                  |       d d d        t        | |d	dd
d
       t        t               j                  |      j                  |j                              }t        |      dk(  sJ |D ]  }|j                   |k(  rJ  	 d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)Nr  zTesting apply clirO   r   r  r  ztest.txtr   r  r7   r   )r8   r   r   r  r  r+   r  r\   r  r  r   r;   	from_diskget_docsr   r   r   )	r  r  r   rJ   r   
jsonl_datar  r  r6  s	            rC   test_applycli_mixedr	    s*   	 $9_,"kk$$itn%
)l2J?

3y>12)j(#. 	%KK	iVQ:fh((099#))DE6{a 	$C88t###	$!$ $	 	$ $s+   BD/D#A7D/D/#D,	(D//D8c            	         t        j                  dd       d} t               5 }|dz  }t        j                  d      } |d      }| |j
                  _        t        d	      }|j                  |       |j                  |d
z         t        ||dddd       t        t               j                  |      j                  |j                              }|d   j
                  j                  | k(  sJ 	 d d d        y # 1 sw Y   y xY w)Nextr   )default)r  r   r  rO   r  T)store_user_datar  r  rF   r7   )r*   set_extensionr8   r   r   _r  r+   r  r\   r   r;   r  r  r   )valr  r  rJ   r   r  r6  s          rC   test_applycli_user_datar    s    eQ'
C	 
&9_,kk$&'	-

3y>12iRA6fh((099#))DEay{{#%%%
& 
& 
&s   CC<<Dc                    dt         dt        t           fd	 ddt        t        t        t
        t        t        f   f   df   dt        t         t        t           f   ffd}t               5 } |       \  }}t        |D cg c]  }|j                   c}      j                  |dz         t               5 }|j                  |       t        ||dz  d	d
dd      \  }}}	|t        |	j                               k(  sJ |	d   dk(  sJ 	 d d d         |di ff      \  }}
t               5 }|j                  |       t        ||dz  dd
dd      \  }}}	|t        |	j                               k(  sJ |	d   dk(  sJ 	 d d d         |di ff      \  }}
t               5 }|j                  |       t        ||dz  d	d
dd      sJ 	 d d d         |       \  }}
t               5 }|j                  |       t        j                  t               5  t        ||dz  dd
dd       d d d        d d d        d d d        y c c}w # 1 sw Y   8xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   DxY w# 1 sw Y   HxY w# 1 sw Y   y xY w)NrJ   returnc                     g }dddddddgidfddddddd	gidffD ]>  }| j                  |d
         }|j                  t        j                  ||d                @ |S )Nz/I am angry and confused in the Bank of America.      ?        )ANGRYCONFUSEDHAPPYrx   )r   .   r  )catsri   z$I am confused but happy in New York.)   #   r  r   r7   )rH   appendr-   rG   )rJ   rA   rB   r   s       rC   make_examplesz.test_cli_find_threshold.<locals>.make_examples   s      B&)sSI"_$56 7&)sSI"_$56
 	6A  ,,qt$CKK))#qt45#	6& rM   rU   .c                    t               }|j                  ddddi       | D ]  \  }}|j                  ||         |      |j                  fd       t        d	      D ]  }|j	                          |fS )
Nr>  tc_multir  g?)r  r   rJ  r  c                       S r#  r   )new_exampless   rC   r  z;test_cli_find_threshold.<locals>.init_nlp.<locals>.<lambda>'  s     rM   )get_examplesr   )r&   r   rx  r  update)rU   new_nlpcfncomp_configr>   r#  r  s        @rC   init_nlpz)test_cli_find_threshold.<locals>.init_nlp  s     )-% 	 	
 !+ 	6CS5	6 %W-(<=q 	)ANN<(	) $$rM   )rA   z
docs.spacyr!  r  cats_macro_fT)rZ   r  r  threshold_key
scores_keyrY   r  r  r;  
spans_sc_fr>  r  )r   )r(   r   r-   r   r]   r   r   r8   r+   	referencer\   r   maxr  r'  r(  AttributeError)capsysr)  docs_dirrJ   r  rK   nlp_dirbest_threshold
best_scoreresr  r  s              @rC   test_cli_find_thresholdr7    s   8 W 2 >@%%T#s(^ 34c9:%	xg&	'%* 
 =8 !
Xh?7W&&?@HH|#	
 ^ 	#wKK .<"\1$))/+NJ SZZ\!2222s8s?"?	# Ir?,-Q^ 	#wKK .<"\1#)'/+NJ SZZ\!2222s8s?"?	# 0"578Q^ 		wKK !"\1$))  		 Q^ 
	wKK ~. !&5!"--
	g= =
 @	# 	#	# 	#		 		 
	 
	g= =s   .I?H=
$I?9AI I?,AI? I?(II?$+I3I'$I3,I?=I?I	I?I	I?I$	 I?'I0,I33I<	8I??Jc                     t               5 } g d}|D ]  }t        | |z        j                            t        t	        |             dk(  sJ t        t	        | d             dk(  sJ t        t	        | d            dk(  sJ t        t	        | d            dk(  sJ t        t	        | d            d	k(  sJ t        t	        | d
            dk(  sJ 	 d d d        y # 1 sw Y   y xY w)N)z	data1.iobz	data2.iobz
data3.jsonzdata4.conllzdata5.conllzdata6.conllz	data7.txtr   )suffixjsonr7   iobr   conllr   pdfr   )r8   r   touchr   r   )r  filesfs      rC   test_walk_directoryrA  m  s    	 ;1
  	 AQK	  N1%&1,,,N1T23999N1V45!;;;N1U34:::N1W561<<<N1U34:::'; ; ;s   CCC&c            	         ddg difddg difg} t               }g }| D ]<  }|j                  t        j                  |j	                  |d         |d                > t        |dg|d	      }t        |d
         dk(  sJ y )NShe likes green eggslemmas)sher
  greeneggEat blue ham)eatbluehamr   r7   trainable_lemmatizerTlemmatizer_treesr   )r(   r  r-   rG   rH   r   r   )r  rJ   train_examplesrB   rN  s        rC   *test_debug_data_trainable_lemmatizer_basicrO    s    	(,K!LM	($:;<H *CN Kg//QqT0BAaDIJK *@)A3MD t&'(A---rM   c            	          ddg difdg dg ddfg} t               }g }| D ]<  }|j                  t        j                  |j	                  |d         |d	                > t        |d
g|d      }|d   dk(  sJ y )NrC  rD  )rF   r
  rF  rF   zHe hates green eggs)HehatesrF  eggs)rF   rR  r   rF  rF   )r  rD  r   r7   rL  Tpartial_lemma_annotationsr   r(   r  r-   rG   rH   r   )partial_examplesrJ   rN  rB   rN  s        rC   ,test_debug_data_trainable_lemmatizer_partialrX    s     
 (,E!FG "=7	
	 *CN Kg//QqT0BAaDIJK *@)A3MD+,111rM   c            	          ddg difddg difg} t               }g }| D ]<  }|j                  t        j                  |j	                  |d         |d                > t        |dg|d	      }|d
   dk(  sJ y )NrC  rD  )norZ  rZ  rZ  rH  )rZ  rZ  rZ  r   r7   rL  Tn_low_cardinality_lemmasr   rV  )low_cardinality_examplesrJ   rN  rB   rN  s        rC   4test_debug_data_trainable_lemmatizer_low_cardinalityr]    s    	(,D!EF	($678  *CN% Kg//QqT0BAaDIJK *@)A3MD*+q000rM   c            	          di fdi fg} t               }g }| D ]<  }|j                  t        j                  |j	                  |d         |d                > t        |dg|d      }|d   dk(  sJ y )	NrC  rH  r   r7   rL  Tno_lemma_annotationsr   rV  )unannotated_examplesrJ   rN  rB   rN  s        rC   2test_debug_data_trainable_lemmatizer_not_annotatedra    s    	$	 *CN! Kg//QqT0BAaDIJK *@)A3MD&'1,,,rM   c                      ddl m}  ddlm}  y )Nr   project_run)	spacy.clird  spacy.cli.project.runrc  s    rC   test_project_api_importsrg    s    %1rM   c                    | j                  t        dd        | j                  t        dd        t        j                  dd       t        j                  t
              5  t        j                  dd       d	d	d	       y	# 1 sw Y   y	xY w)
zmTest that we can't tell spacy download to get an arbitrary model by using a
    relative path in the filenamerun_commandc                      y r#  r   )cmds    rC   r  z5test_download_rejects_relative_urls.<locals>.<lambda>  s    rM   _get_pip_install_cmdc                  
    ddgS )Npipinstallr   r   rM   rC   r  z5test_download_rejects_relative_urls.<locals>.<lambda>  s    %9K rM   zen_core_web_sm-3.7.1T)directz../en_core_web_sm-3.7.1N)setattrr   downloadr'  r(  r-  )monkeypatchs    rC   #test_download_rejects_relative_urlsrt    sw     8HI/1K
 3DA	z	" I  !:4HI I Is   !BB)r  r2  collectionsr   pathlibr   typingr   r   r   r   r'  r  clickr	   packaging.specifiersr
   	thinc.apir   r   r   re  r   r   spacy.cli._utilr   r   r   spacy.cli.applyr   spacy.cli.debug_datar   r   r   r   r   r   r   r   spacy.cli.downloadr   r   spacy.cli.evaluater   spacy.cli.find_thresholdr   spacy.cli.init_configr   r    r!   spacy.cli.init_pipeliner"   spacy.cli.packager#   r$   spacy.cli.validater%   spacy.lang.enr&   spacy.lang.nlr'   spacy.languager(   spacy.schemasr)   spacy.tokensr*   r+   spacy.tokens.spanr,   spacy.trainingr-   r.   r/   spacy.training.convertersr0   r1   r2   
spacy.utilr3   r4   r5   r6   utilr8   markissuerD   rL   rf   parametrizer]   r   r   r   r   r  r  r  r%  r*  r.  r7  rK  rO  rY  r^  rg  rn  r}  r  slowr  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r  r7  rA  rO  rX  r]  ra  rg  rt  r   rM   rC   <module>r     s    	   ) )    -    + R R !	 	 	 > , 3 K K 0 V - !  # . $ " G G T T W W  41 1@ 4  46 6D 5QN1S N1s N1	 
N1b56: 	
	
&A'&A2/2d72A7H  T	WbM*
'2'
U	gu-.
7E*+	&$(GH
]	#te%DE>TE:	

 <TE:	
*4+*4 7)-H!IJ% K%
 ";gY!GH% I%
.& $.#
)5	 45	 lJ%?@u63 7 A	 /3,
 (R)(R ;;
R2 )+RS> T>:-D H H;,2 )I7L+MN O0	)83*6& &:;1 <1	6;;<;$*& k\;.. 2,1-2
IrM   