
    i              	          d dl mZmZmZmZ d dlZd dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, ejZ                  d        Z.d Z/ej`                  jc                  d      d        Z2ej`                  jc                  d      d        Z3ej`                  jc                  d      d        Z4ej`                  jc                  d      ej`                  jk                  dddg      de6fd              Z7d  Z8d! Z9d" Z:d# Z;d$ Z<d% Z=d& Z>d' Z?d( Z@d) ZAd* ZBej`                  jc                  d+      d,        ZCd- ZDd. ZEd/ ZFd0 ZGd1 ZHd2 ZId3 ZJej`                  j                  d4      d5        ZLej`                  j                  d4      d6        ZMd7 ZNd8 ZOd9d:d;d<d=id>gg d?d@fdAd:d<d;d=id>dBgg dCd@fdDd:d<d;d=id>gg dCd@fdEd:d;d<d=id>dFgg d?d@fdGi g g dHd@fgZPg dIZQdJ ZRdK ZSdL ZTej`                  j                  dMN      dO        ZVej`                  j                  dMN      dP        ZWdQ ZXdR ZYdS ZZej`                  jk                  dTdUdVedWfdUdXedWfg      dY        Z[ej`                  jk                  dZd[d\d]gd^d_d]gd`dad]dbdcd]gg      dd        Z\ej`                  jc                  de      df        Z]dg Z^ej`                  jk                  dhddXedWfddXedWfg      die6djee_ef   fdk       Z`dl Zay)m    )AnyCallableDictIterableN)assert_equal)registryutil)	ENT_KB_ID)pickle)	CandidateInMemoryLookupKBKnowledgeBaseget_candidatesEnglish)load_kb)build_span_maker)EntityLinker)EntityLinker_v1)DEFAULT_TOK2VEC_MODEL)Scorer)make_tempdir)DocSpan)Example)ensure_path)Vocabc                      t               S Nr        x/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/pipeline/test_entity_linker.pynlpr#      s
    9r!   c                 4    d}| |z
  |cxk  r	| |z   k  sJ  J y )Ng-C6?r    )abdeltas      r"   assert_almost_equalr(      s(    Eu9&QY&&&&&r!   iB  c                  n   t               } t        | j                  d      }g d}g d}t        j                  t
              5  |j                  ddgddg||g       d	d	d	       |j                         d
k(  sJ t               5 }t        |      }|j                         s|j                          |dz  }|j                  t        |             t        | j                  d      }|j                  t        |             d	d	d	       j                         d
k(  sJ y	# 1 sw Y   xY w# 1 sw Y   +xY w)zKTest that setting entities with overlapping identifiers does not mess up IO   entity_vector_length)?g?g)\(?)g?g      @gGz @Q1    o   )entity_list	freq_listvector_listN   kb)r   r   vocabpytestwarnsUserWarningset_entitiesget_size_entitiesr   r   existsmkdirto_diskstr	from_disk)r#   r5   vector1vector2ddir_path	file_pathkb2s           r"   test_issue4674rG   "   s    )C	#))!	<BGG	k	" 

t3i '* 	 	

 !Q&&&	 &1q> NNtO	


3y>"syyqAc)n%&   "a'''!
 
& &s   DA<D+D(+D4iJ  c                 F   ddl m}  || d      }|j                  ddg d       t        j                  t
              5  |j                  d	dgd
g       ddd       |j                  d	      du sJ |j                  ddgdg       |j                  ddgdg       t               5 }|j                  |       |j                  |       ddd       |j                         dk(  sJ t        |j                               ddhk(  sJ y# 1 sw Y   xY w# 1 sw Y   KxY w)zNEnsure that the KB does not accept empty strings, but otherwise IO works fine.r   )r   r*   r+   1   r4      r*   entityfreqentity_vector 皙?aliasentitiesprobabilitiesNFx皙?y皙?rL   )spacy.kb.kb_in_memoryr   
add_entityr7   raises
ValueError	add_aliascontains_aliasr   r>   r@   get_size_aliasessetget_alias_strings)en_vocabr   r5   tmp_dirs       r"   test_issue6730rf   <   s    7	(	;BMM3iM@	z	" D
2cUCDR E)))LLscU3%L@LLscU3%L@	 7


7
W  A%%%r##%&3*444D D s    D*#DDD i  c                     d} t               }|j                  d       |j                  d      }dddiddiddidd	igd
g}|j                  |        ||       }|j                  D cg c]  }| }}t	        |      dk(  sJ |d   }|j
                  d   }|j                  |j                  cxk  r|j                  k  sJ  J |j                  |j                        dk(  sJ y c c}w )NzfKathleen Battle sang in Mahler 's Symphony No. 8 at the Cincinnati Symphony Orchestra 's May Festival.sentencizerentity_rulerTHINGLOWERsymphonyno.8labelpatternrL   r   )
r   add_pipeadd_patternssentslenentsstartendindexsent)	textr#   rulerpatternsdocs	sentencessent0ents	            r"   test_issue7065r   R   s    sD
)CLLLL(E *%$##		

H 
x 
d)CII&q&I&y>QaLE
((1+C99uyy*377*****??388$))) 's   (	C&entity_in_first_sentenceTFc                    t               }dd}dg}ddddi}| r|j                  d       ddd|d	<   g d
} ||      }t        j                  ||||d      }|gfd}|j	                  dd      }	|	j                  |       |j                  fd      }
t        d      D ]  }|j                  |
        |	j                  |j                  g       y)a  Tests if NEL crashes if entities cross sentence boundaries and the first associated sentence doesn't have an
    entity.
    entity_in_prior_sentence (bool): Whether to include an entity in the first sentence associated with the
    sentence-crossing entity.
    r*   z'Mahler 's Symphony No. 8 was beautiful.)
      WORK)r   r                 ?)Q7304Q270853r      PERSONr   r   )	r4   r   r   r   r4   r   r   r   rU   linkssent_startsc                     t        |       }|j                  ddg d       |j                  ddgdg       |j                  d	dg d
       |j                  dd	gdg       |S )Nr+   r      	   r4   irM   zNo. 8r   rS   r   r   r*   Mahlerr   r\   r_   r6   mykbvector_lengths     r"   	create_kbz.test_sentence_crossing_ents.<locals>.create_kb   sx    MJyrL[% 	 	

 	wRzJY% 	 	

 r!   entity_linkerTlastc                       S r   r    train_exampless   r"   <lambda>z-test_sentence_crossing_ents.<locals>.<lambda>       N r!   get_examplesrL   )sgdN)r   appendr   	from_dictrs   set_kb
initializerangeupdatepredict	reference)r   r#   r|   rU   r   r   r   exampler   r   	optimizerir   r   s               @@r"   test_sentence_crossing_entsr   n   s     )CM4D !H56E()"%#6f.K
d)C(U;OG YN$ LLtL<M#,BCI1X 2

>y
12 7,,-.r!   c                    
 ddg difg} t               }dg 
| D ]2  \  }} ||      }
j                  t        j                  ||             4 fd}|j	                  dd      }|j                  |       |j                  
fd	
      }t        d      D ]  }i }	|j                  
||	        |j	                  dd       |j                  
       y )NzThe sky is blue.r   )r4   r   r   r   r   r*   c                 x    t        |       }|j                  ddg d       |j                  ddgdg       |S Nr+   Q2146908r   r   rM   Russ Cochranr-   r   r   s     r"   r   z#test_no_entities.<locals>.create_kb   :    MJz*M~
|cU;r!   r   Tr   c                       S r   r    r   s   r"   r   z"test_no_entities.<locals>.<lambda>   r   r!   r   rL   r   lossesrh   first)
r   r   r   r   rs   r   r   r   r   evaluate)
TRAIN_DATAr#   r|   
annotationr   r   r   r   r   r   r   r   s             @@r"   test_no_entitiesr      s     	
J )CMN& Bj$ig//Z@AB LLtL<M#,BCI1X A

>y
@A
 LLdL+ LL r!   c                  ~   ddddiidgg ddfg} t               }dg | D ]2  \  }} ||      }j                  t        j                  ||             4 fd	}|j	                  d
d      }|j                  |       |j                  fd      }t        d      D ]  }i }	|j                  ||	        |j	                  dd       dddiddigddddiddigdg}
|j	                  dd
      }|j                  |
       |j                        }d|d   v sJ d|d   v sJ d|d   v sJ d|d   vsJ y )N,Russ Cochran his reprints include EC Comics.r   r   r   r   r   r   r   r4   r   r   r   r   r   r   r   r   rU   r   r*   c                 x    t        |       }|j                  ddg d       |j                  ddgdg       |S r   r   r   s     r"   r   z%test_partial_links.<locals>.create_kb   r   r!   r   Tr   c                       S r   r    r   s   r"   r   z$test_partial_links.<locals>.<lambda>   r   r!   r   rL   r   rh   r   r   rk   russcochranrp   ORGeccomicsri   beforeents_per_typenel_f_per_type)r   r   r   r   rs   r   r   r   r   rt   r   )r   r#   r|   r   r   r   r   r   r   r   r~   r}   resultsr   r   s                @@r"   test_partial_linksr      s    ;!J#45./8	
	J )CMN& Bj$ig//Z@AB LLtL<M#,BCI1X A

>y
@A
 LLdL+&(9GY;O'PQgt_w6I$JKH LLL@E	x  ll>*Gw////w/0000GO,,,, 01111r!   c                    t        | j                  d      }|j                  ddg d       |j                  ddg d	       |j                  d
dg d       |j                  ddd
gddg       |j                  ddgdg       |j	                         dk(  sJ |j                         dk(  sJ |j                  d      g dk(  sJ |j                  d      g d	k(  sJ |j                  d
      g dk(  sJ t        |j                  dd      d       t        |j                  d
d      d       t        |j                  dd      d       t        |j                  d
d      d       y)zCTest the valid construction of a KB with 3 entities and two aliasesr*   r+   r.      )      r*   rM   Q2   )rL   r4   r   Q3   )r   ir   douglas皙?rX   rS   adamr-   rL   rN   rT   Q342r   douglassssssN)	r   r6   r\   r_   r;   ra   
get_vectorr(   get_prior_probr#   r   s     r"   test_kb_valid_entitiesr      s^   CIIA>D 	OO4b	OBOO4ayOAOO4bOD 	NNdD\#sNTNN4&NF !!#q(((  "a''' ??4 I---??4 I---??4 K/// ++4y+I3O++4y+I3O++6+KSQ++4~+NPSTr!   c                 <   t        | j                  d      }|j                  dddg       |j                  dddg       |j                  d	d
dg       t        j                  t
              5  |j                  dddgddg       ddd       y# 1 sw Y   yxY w)zSTest the invalid construction of a KB with an alias linked to a non-existing entityr4   r+   r.   r   rM   r   r   rL   r   r   r*   r   r   r   rX   rS   Nr   r6   r\   r7   r]   r^   r_   r   s     r"   test_kb_invalid_entitiesr     s    CIIA>D 	OO4bO<OO4asO;OO4bO< 
z	" 
tVnS#J 	 	

 
 
   0BBc                 <   t        | j                  d      }|j                  dddg       |j                  dddg       |j                  d	d
dg       t        j                  t
              5  |j                  ddd	gddg       ddd       y# 1 sw Y   yxY w)zDTest the invalid construction of a KB with wrong prior probabilitiesr4   r+   r.   r   rM   r   r   rL   r   r   r*   r   r   rR   rS   Nr   r   s     r"   test_kb_invalid_probabilitiesr   *  s    CIIA>D 	OO4bO<OO4asO;OO4bO< 
z	" YY$cSVZXY Y Yr   c                 <   t        | j                  d      }|j                  dddg       |j                  dddg       |j                  d	d
dg       t        j                  t
              5  |j                  ddd	gg d       ddd       y# 1 sw Y   yxY w)zTTest the invalid construction of a KB with non-matching entity and probability listsr4   r+   r.   r   rM   r   r   rL   r   r   r*   r   )333333?rR   rZ   rS   Nr   r   s     r"   test_kb_invalid_combinationr   8  s    CIIA>D 	OO4bO<OO4asO;OO4bO< 
z	" 
tTl/ 	 	

 
 
r   c                     t        | j                  d      }|j                  ddg d       t        j                  t
              5  |j                  ddd	g       d
d
d
       y
# 1 sw Y   y
xY w)zMTest the invalid construction of a KB with non-matching entity vector lengthsr*   r+   r.   r   rK   rM   r   r   rL   N)r   r6   r\   r7   r]   r^   r   s     r"   test_kb_invalid_entity_vectorr   H  sa    CIIA>D 	OO4b	OB 
z	" @t!A3?@ @ @s   A&&A/c                    | j                  di       }t        |j                        dk(  sJ t        j                  t
        d      5  |j                          ddd       |j                  j                         dk(  sJ |j                  j                         dk(  sJ |j                  j                  dk(  sJ y# 1 sw Y   cxY w)z<Test that the default (empty) KB is loaded upon constructionr   configr   E139)matchN@   )
rs   rv   r5   r7   r]   r^   validate_kbr;   ra   r,   r#   r   s     r"   test_kb_defaultr   T  s    LLL<M} A%%%	z	0 $!!#$ --/1444,,.!33300B666$ $s   	B<<Cc                    | j                  dddi      }t        |j                        dk(  sJ |j                  j                         dk(  sJ |j                  j	                         dk(  sJ |j                  j
                  dk(  sJ y)zNTest that the default (empty) KB can be configured with a custom entity lengthr   r,   #   r   r   N)rs   rv   r5   r;   ra   r,   r   s     r"   test_kb_custom_lengthr  a  s    LL:PRT9ULVM} A%%%--/1444,,.!33300B666r!   c                     | j                  d      }t        j                  t              5  |j	                  d        ddd       y# 1 sw Y   yxY w)z2Test that the EL can't initialize without examplesr   c                      g S r   r    r    r!   r"   r   z*test_kb_initialize_empty.<locals>.<lambda>n  s     r!   N)rs   r7   r]   	TypeErrorr   r   s     r"   test_kb_initialize_emptyr  j  s?    LL1M	y	! -  ,- - -s   AAc                    t        | j                  d      }t               5 }|j                  |dz         |j	                  |dz         |j                  |dz  dz         |j	                  |dz  dz         |j                  |dz         t        j                  t              5  |j	                  |dz  dz         ddd       ddd       y# 1 sw Y   xY w# 1 sw Y   yxY w)zTest serialization of the KBr4   r+   r5   newunknownN)r   r6   r   r>   r@   r7   r]   r^   )r#   r   rC   s      r"   test_kb_serializer  q  s    CIIA>D	 
11QXq4x QY%&q5y4'(QX]]:& 	1NN1y=4/0	1
1 
1	1 	1
1 
1s$   BC&C>CC	CC$i#  c                 |   g d}t        | j                  d      }|j                  dgdg|g       |j                  d      |k(  sJ t	               5 }|j                  |dz         t        | j                  d      }|j                  |dz         |j                  d      |k(  sJ 	 d d d        y # 1 sw Y   y xY w)Nr   r      r   r   r6   r,   E1r4   r5   )r   r6   r:   r   r   r>   r@   )r#   vkb1rC   rF   s        r"   test_kb_serialize_2r    s    A

CCdVaS1#&>>$1$$$	 )1AHSYYQGa$h~~d#q(((	) ) )s   AB22B;c                    g d}g d}g d}t        | j                  d      }|j                  dgdg|g       |j                         dgk(  sJ |j                  dd	gdd
g||g       t	        |j                               dd	hk(  sJ |j                  d      |k(  sJ |j                  d	      |k(  sJ t               5 }|j                  |dz         t        | j                  d      }|j                  |dz         t	        |j                               dd	hk(  sJ |j                  d      |k(  sJ |j                  d	      |k(  sJ 	 ddd       y# 1 sw Y   yxY w)zGTest that set_entities entirely overwrites the previous set of entitiesr  )r4   r4   r4   r   )rL   rL   rL   r*   r   r  E0r4   r  E2r   r5   N)	r   r6   r:   get_entity_stringsrb   r   r   r>   r@   )r#   r  v1v2r  rC   rF   s          r"   test_kb_set_entitiesr    sX   A	B	B

CCdVaS1#&!!#v---dD\Aq6B84s%%'(T4L888>>$2%%%>>$2%%%	 *1AHSYYQGa$h3))+,t<<<~~d#r)))~~d#r)))* * *s   ?BEEc                    d}|| j                   j                  vsJ t        | j                   d      }|j                  |      rJ |j	                  |ddg       |j                  |      sJ ||j                   j                  v sJ t               5 }|j                  |dz         t        t               d      }|j                  |dz         ||j                   j                  v sJ 	 ddd       y# 1 sw Y   yxY w)	z/Test serialization of the KB and custom strings	MyFunnyIDr4   r+   iV  r*   )rO   rP   r5   N)	r6   stringsr   contains_entityr\   r   r>   r   r@   )r#   rN   r   rC   mykb_news        r"   test_kb_serialize_vocabr     s    F*****CIIA>D##F+++OOFQCO8'''TZZ'''''	 01QX#EG!D1t8$/////0 0 0s   AC55C>c                    t        | j                  d      } | d      }|dd }|dd }|dd }|dd }|j                  dd	dg
       |j                  dddg
       |j                  dddg
       |j                  dddgddg       |j                  ddgdg       t	        t        ||            dk(  sJ t	        t        ||            dk(  sJ t	        t        ||            dk(  sJ t	        t        ||            dk(  sJ t        ||      d   j                  dk(  sJ t        ||      d   j                  dk(  sJ t        t        ||      d   j                  d       t        t        ||      d   j                  d       y)z!Test correct candidate generationr4   r+   zdouglas adam Adam shrubberyr   rL   r*   r   r.      rM   r   r   r   r   r   r   rZ   rS   r   r-   N)r   r6   r\   r_   rv   r   entity_alias_r(   entity_freq
prior_prob)r#   r   r   douglas_entadam_entAdam_entshrubbery_ents          r"   test_candidate_generationr+    s   CIIA>D
+
,Ca(K1QxH1QxH!HM 	OO4bO<OO4bO<OO4asO; 	NNdD\#sNTNN4&NF ~dK01Q666~dH-.!333~dH-.!333~dM23q888 $)!,44<<<$)!,33v===tX6q9EErJtX6q9DDcJr!   c                 T  	
 | j                  d       dddigd}| j                  d      }|j                  |g       d }| j                  dd	d
i      }|j                  |       d} | |      }|d   j                  dk(  sJ |d   j                  dk(  sJ |d   j                  dk(  sJ d 		fd
t	        j
                  d      dt        t        dgt        t           f   f	fd       }t	        j
                  d      dt        t        t        d   gt        t        t              f   f
fd       }| j                  ddd
ddiddid      }|j                  |        | |      }|d   j                  dk(  sJ |d   j                  dk(  sJ |d   j                  dk(  sJ y)z8Test correct candidate generation as part of the EL piperh   r   rk   r   rp   ri   c                     t        | d      }|j                  dddg       |j                  ddd	g       |j                  d
ddgddg       |S )Nr4   r+   r   r   rL   rM   r   r   r*   r   r   rZ   rS   r   r6   r5   s     r"   r   z-test_el_pipe_configuration.<locals>.create_kb  sV    e!<
T1#>
T!=
9d|CQT:V	r!   r   incl_contextFr   z%Douglas and douglas are not the same.r   NILr4   rQ   rL   r   c                 T    | j                  |j                  j                               S r   )get_alias_candidatesr|   lower)r5   spans     r"   get_lowercased_candidatesz=test_el_pipe_configuration.<locals>.get_lowercased_candidates  s    &&tyy'899r!   c                 <    |D cg c]  } | |       c}S c c}w r   r    )r5   spansr4  r5  s      r"   get_lowercased_candidates_batchzCtest_el_pipe_configuration.<locals>.get_lowercased_candidates_batch  s    @EF)"d3FFFs   z$spacy.LowercaseCandidateGenerator.v1returnr   c                       S r   r    )r5  s   r"   create_candidatesz5test_el_pipe_configuration.<locals>.create_candidates  s     )(r!   z)spacy.LowercaseCandidateBatchGenerator.v1c                       S r   r    )r8  s   r"   create_candidates_batchz;test_el_pipe_configuration.<locals>.create_candidates_batch  s     /.r!   z@misc)r/  r   get_candidates_batchN)rs   rt   r   
ent_kb_id_r   miscr   r   r   r   replace_pipe)r#   rr   r}   r   r   r|   r   r;  r=  r5  r8  s            @@r"   test_el_pipe_configurationrB    s   LL w	.B-CDGLL(E	y! LL.%9PLQM#2D
d)Cq6%%%q6"""q6$$$:G ]]9:)x	6"HY$77  ) ;)
 ]]>?/X	8F+,hx	7J.KK& / @/ $$!&(NOD%
 % 
M #
d)Cq6$$$q6"""q6$$$r!   c                     | j                  di       }|j                  dk(  sJ | j                  ddddi      }|j                  dk(  sJ y)z6Test that n_sents can be set through the configurationr   r   r   n_sentsrL   N)rs   rD  rA  r   s     r"   test_nel_nsentsrE    s^    LLL<M  A%%%$$)Q % M   A%%%r!   c                    t        | j                  d      }|j                  dddg       |j                  dddg      }|j                  d	d
dg       |j                  ddd	gddg       |j                  ddgdg      }|j	                  d      }t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  dk(  sJ |d   j                  |k(  sJ |d   j                  dk(  sJ t               5 }|j                  |dz         t        t               d      }|j                  |dz         |j	                  d      }t        |      dk(  sJ |d   j                  |k(  sJ |d   j                  dk(  sJ |d   j                  |k(  sJ |d   j                  dk(  sJ |j                  d      dgk(  sJ t        |j!                  dd      d       ddd       y# 1 sw Y   yxY w)z7Test that string information is retained across storager4   r+   r.   r"  rM   r   r   rL   r   r   r*   r   rR   rZ   rS   r   r-   r   r5   N)r   r6   r\   r_   r2  rv   rN   r#  rT   r$  r   r>   r   r@   r   r(   r   )r#   r   q2_hash	adam_hash
candidatesrC   kb_new_vocabs          r"   test_vocab_serializationrK    s   CIIA>D 	OO4bO<ooT1#oFGOO4asO; 	NNdD\#sNTVtfSERI**62Jz?aa=7***a=  D(((a=)+++a=6)))	 O1QX'aHq4x(!66v>
:!###!}##w...!}$$,,,!}""i///!}##v---&&t,333L77iH#NO O Os    C"G++G4c                 r   t        | j                  d      }|j                  dddg       |j                  dddg       |j                  d	d
dg       |j                  ddd	gddg       |j                  ddgdg       t	        |j                  d            dk(  sJ |j                  ddd       t	        |j                  d            dk(  sJ t        j                  t              5  |j                  ddd       ddd       t	        |j                  d            dk(  sJ y# 1 sw Y   )xY w)z5Test that we can append additional alias-entity pairsr4   r+   r.   r"  rM   r   r   rL   r   r   r*   r   rR   rZ   rS   r   r-   rX   rT   rN   r&  r   N)
r   r6   r\   r_   rv   r2  append_aliasr7   r8   r9   r   s     r"   test_append_aliasrO  ?  s5   CIIA>D 	OO4bO<OO4bO<OO4asO; 	NNdD\#sNTNN4&NF t((34999 	IdsC t((34999 
k	" H	$3GH t((34999	H Hs   0D--D6zignore:\[W036c                    t        | j                  d      }|j                  dddg       |j                  dddg       |j                  d	d
dg       |j                  ddd	gddg       |j                  ddgdg       t	        j
                  t              5  |j                  ddd       ddd       y# 1 sw Y   yxY w)zLTest that append an alias will throw an error if prior probs are exceeding 1r4   r+   r.   r"  rM   r   r   rL   r   r   r*   r   r   rZ   rS   r   r-   rX   rM  N)r   r6   r\   r_   r7   r]   r^   rN  r   s     r"   test_append_invalid_aliasrQ  ]  s     CIIA>D 	OO4bO<OO4bO<OO4asO; 	NNdD\#sNTNN4&NF 
z	" H	$3GH H Hs   B<<Cc                 >   dfd}| j                  d       ddddddg}| j                  d      }|j                  |       d	d
i}| j                  d|d      }|j                  |       | j                          |j                  j                  d      k(  sJ d} | |      }|j                  D ]f  }|j                  }	|j                  }
|j                  j                         }|j                  D ]#  }|j                  |	k(  s|j                  |
k(  r#J  h y)z9Test that Span.as_doc preserves the existing entity linksr4   c                     t        |       }|j                  dddg       |j                  dddg       |j                  ddgd	g
       |j                  ddgdg
       |S )Nr+   r.   r   r4   rM   r   r   Bostongffffff?rS   Denverg333333?r   r   s     r"   r   z.test_preserving_links_asdoc.<locals>.create_kbu  sh    MJt"QC@t!A3?XseLXseLr!   rh   GPErT  rp   rU  ri   
incl_priorFr   Tr   r   nO(She lives in Boston. He lives in Denver.N)rs   rt   r   r   modelget_dimrw   r|   kb_id_r{   as_doc)r#   r   r~   r}   r   r   r|   r   r   	orig_text
orig_kb_idsent_docs_entr   s                @r"   test_preserving_links_asdocrc  p  s    M LLH-H-H LL(E	x E"FLLdLKM#NN&&t,=== 6D
d)Cxx 2HH	ZZ
88??$]] 	2EzzY&||z111	2	2r!   c                 p   d} | |      }t        t        |j                              dk(  sJ t        |dddd      }|g|_        t        t        |j                              dk(  sJ t        |j                        d   j                  dk(  sJ t        |j                        d   j
                  dk(  sJ y	)
+Test that doc.ents preserves KB annotationsrZ  r   r*   r   LOCr.   rq   kb_idr4   N)rv   listrw   r   label_r]  )r#   r|   r   
boston_ents       r"   test_preserving_links_entsrl    s    5D
d)CtCHH~!###c1auD9J|CHtCHH~!###>!##u,,,>!##t+++r!   c                    d} | |      }t        t        |j                              dk(  sJ |j                  j                  j                  d      }|j                  j                  j                  d      }||ddfg|_        t        t        |j                              dk(  sJ t        |j                        d   j                  dk(  sJ t        |j                        d   j                  dk(  sJ y)	re  rZ  r   rf  r.   r*   r   r4   N)rv   ri  rw   r6   r  addrj  r]  )r#   r|   r   locq1s        r"   test_preserving_links_ents_2rq    s    5D
d)CtCHH~!###
))




&C					t	$Bb!Q CHtCHH~!###>!##u,,,>!##t+++r!   CRuss Cochran captured his first major title with his son as caddie.r   r   r   )Q7381115r   r   )r4   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )"   +   ARTr   +Russ Cochran has been publishing comic art.@Russ Cochran was a member of University of Kentucky's golf team.)ru  3   rf  zThe weather is nice today.)r4   r   r   r   r   r   )r   rs  rs  r   c                     t               } dd| j                  j                  vsJ g t        D ]2  \  }} | |      }j	                  t        j                  ||             4 fd}| j                  ddddi      }t        |t              sJ |j                  |       d|j                  j                  v sJ d|j                  j                  j                  v sJ | j                  fd	      }|j                  j                  d
      k(  sJ |j                  j                  d
      |j                  j                  k(  sJ t!        d      D ]  }i }| j#                  ||        d   dk  sJ | j                  dd       dddiddigdg}	| j                  dd      }
|
j%                  |	       g }t        D ]9  \  }} | |      }|j&                  D ]  }|j	                  |j(                          ; |t*        k(  sJ t-               5 }| j/                  |       t1        j2                  |      }|j4                  | j4                  k(  sJ d|j                  j                  v sJ |j7                  d      }d|j                  j                  v sJ d|j                  j                  j                  v sJ g }t        D ]9  \  }} ||      }|j&                  D ]  }|j	                  |j(                          ; |t*        k(  sJ 	 d d d        g d}| j9                  |      D cg c]  }|j;                  t<        g       }}| j9                  |      D cg c]  }|j;                  t<        g       }}|D cg c]
  } | |       c}D cg c]  }|j;                  t<        g       }}t?        ||       t?        ||       | jA                        }d|v sJ d|v sJ d|v sJ d|v sJ d|v sJ d|v sJ d|v sJ d|d   v sJ |d   dkD  sJ |d   dkD  sJ y # 1 sw Y   xY wc c}w c c}w c c}w c c}w ) Nr*   r   c                     t        |       }|j                  ddg d       |j                  ddg d       |j                  dddgd	d	g
       |S Nr+   r   r   r   rM   rs  r   r         ?rS   r   r   s     r"   r   z4test_overfitting_IO_gold_entities.<locals>.create_kb  _      MJz*Mz*M  *-* 	 	

 r!   r   Tuse_gold_entsr   r   c                       S r   r    r   s   r"   r   z3test_overfitting_IO_gold_entities.<locals>.<lambda>  r   r!   r   rY  2   r   MbP?rh   r   r   rk   r   r   rp   ri   r   )rr  r   rw  rx  nel_macro_pnel_macro_rnel_macro_fnel_micro_pnel_micro_rnel_micro_fr   r   )!r   r6   r  r   r   r   r   rs   
isinstancer   r   r5   r   r[  r\  r,   r   r   rt   rw   r]  GOLD_entitiesr   r>   r	   load_model_from_path
pipe_namesget_pipepipeto_arrayr
   r   r   )r#   r|   r   r   r   r   r   r   r   r~   r}   predictionsr   re   nlp2entity_linker2doc2textsbatch_deps_1batch_deps_2no_batch_depsevalr   r   s                         @@r"   !test_overfitting_IO_gold_entitiesr    s   
)CMSYY..... N& Bj$ig//Z@AB LLdOT+B ! M m\222#,,44444))//77777 ,BCI&&t,===&&t,0@0@0U0UUUU2Y A

>y
@A /"U*** LLdL+ &(9GY;O'PQH LLL@E	x  K& +j$i88 	+Cszz*	++ -''' 
 ,7G((1#..000TZZ/////7^1199999^..44<<<<< * 	/D*:Dyy /""3::./	/ m+++, E :=%I#CLL)-ILI9<%I#CLL)-ILIQV:W3t9:WX3S\\9+.XMX|,}-<<'DD   D   D   D   D   D   t###t,----""""""O, ,, JI:WXs%   C:P*1P7#P<QQ*P4c                     t               } dd| j                  j                  vsJ g }t        D ]2  \  }} | |      }|j	                  t        j                  ||             4 fd}| j                  dd      }| j                  dddd	i
      }|j                  |       g }t        D ]c  \  }}|j	                  t        j                  | j                  |      |             |j                  d      D ]  }	|j                  |	d           e | j                         }
t        d      D ]  }i }| j                  ||
|        d   dk  sJ |d   dk  sJ | j                  dd       d} | |      }|j                  }t!        |      dk(  sJ |d   j"                  dk(  sJ |d   j$                  dk(  sJ |d   j&                  dk7  sJ t)               5 }| j+                  |       t-        j.                  |      }|j0                  | j0                  k(  sJ  ||      }|j                  }t!        |      dk(  sJ |d   j"                  dk(  sJ |d   j$                  dk(  sJ |d   j&                  dk7  sJ 	 d d d        | j3                  |      }d|v sJ d|v sJ d|v sJ d|v sJ d|v sJ d|d   v sJ d|d   v sJ |d   dkD  sJ |d   dkD  sJ |d   dkD  sJ y # 1 sw Y   ixY w)Nr*   r   c                     t        |       }|j                  ddg d       |j                  ddg d       |j                  dddgd	d	g
       |S r|  r   r   s     r"   r   z/test_overfitting_IO_with_ner.<locals>.create_kbF  r~  r!   nerTr   r   r  Fr  rU   rL   r  r   r  rh   rr  r4   r   r   r   r0  r  r  ents_fr   r   )r   r6   r  r   r   r   r   rs   r   make_docget	add_labelr   r   r   rw   rv   r|   rj  r]  r   r>   r	   r  r  r   )r#   r   r|   r   r   r   r  r   annotationsr   r   r   r   	test_textrw   re   r  r  ents2r  r   s                       @r"   test_overfitting_IO_with_nerr  :  s_   
)CMSYY..... N& Bj$ig//Z@AB ,,uD,
)CLLdOU+C ! M #N' "kg//T0BKPQ??:. 	"CMM#a&!	""  I 2Y A

>y
@A %=5   /"U*** LLdL+ VI
i.C88Dt9>>7<<>)))7>>X%%%7>>U""" 
 	(7G((1#..000I		5zQQx}}...Qx(***Qx%'''	( <<'DD   D   tt###d"""t,----tO,,,,"""""">A-	( 	(s   B"K&&K/c                  2   d} t               5 }|dz  }t               }d|j                  j                  vsJ t	        |j                  |       }|j                  ddg d       |j                  ddgd	g
       d|j                  j                  v sJ |j                  |       t               }d|j                  j                  vsJ |j                  j                  j                  d       d|j                  j                  v sJ d|j                  j                  vsJ |j                  dd      }|j                  t        |             d|j                  j                  v sJ d|j                  j                  v sJ 	 d d d        y # 1 sw Y   y xY w)Nr*   r5   r   r+   r   r   rM   r   r   rS   
RandomWordr   Tr   )r   r   r6   r  r   r\   r_   r>   rn  rs   r   r   )r   re   kb_dirnlp1r   r  r   s          r"   test_kb_serializationr    sf   M	 274y!3!3333

Oz*M^zlSVRWXTZZ/////Vy4::#5#5555

|,tzz11111!3!3333 oDAWV_-TZZ/////tzz11111)2 2 2s   E6FFzNeeds fixing)reasonc                  d   t               } t        | j                  d      }|j                  ddg d       |j	                  d      rJ |j                  ddgdg	       |j	                  d      sJ t        j                  |      }t        j                  |      }|j	                  d      sJ y 
Nr*   r+   r   r   r   rM   r   r   rS   )	r   r   r6   r\   r`   r_   r   dumpsloads)r#   kb_1datakb_2s       r"   test_kb_pickler    s     )CCIIA>DOO:BjOI"">222NN:,seNT~...<<D<<D~...r!   c                     d } t               }|j                  d       |j                  dd      }|j                  |        |j                  ddgk(  sJ |j                  j                  d      sJ t        j                  |      }t        j                  |      }|j                  ddgk(  sJ |j                  d      }|j                  j                  d      sJ y )Nc                 x    t        | d      }|j                  ddg d       |j                  ddgdg	       |S r  r   r.  s     r"   r   z"test_nel_pickle.<locals>.create_kb  =    e!<
Zb
K
>ZLQTPUV	r!   r  r   Tr   r   )
r   rs   r   r  r5   r`   r   r  r  r  )r   nlp_1entity_linker_1r  nlp_2entity_linker_2s         r"   test_nel_pickler    s     IE	NN5nn_4n@O9%7777,,^<<<<<DLLE7777nn_5O,,^<<<r!   c                     t               } t        | j                  d      }|j                  ddg d       |j                  ddg d	       |j	                  d
dgdg       |j	                  ddgdg       |j	                  dddgddg       |j                  d
      sJ |j                         }t        | j                  d      }|j                  d
      rJ |j                  |      }|j                         |j                         k(  sJ |j                  |j                  k(  sJ |j                         |j                         k(  sJ |j                  d      |j                  d      k(  sJ |j                  d      |j                  d      k(  sJ |j                  d
      sJ |j                         |j                         k(  sJ |j                         |j                         k(  sJ t        |j                  d
            t        |j                  d
            k(  sJ t        |j                  d            t        |j                  d            k(  sJ y )Nr*   r+   r   r   r   rM   Q66r   rK   r   r   rS   Boeingr}  
RandomnessrZ   rX   )r   r   r6   r\   r_   r`   to_bytes
from_bytesr;   r,   r  r   ra   rc   rv   r2  )r#   r  kb_bytesr  s       r"   test_kb_to_bytesr    s;   
)CCIIA>DOO:BjOIOO5q	OBNN:,seNTNNUGC5NINNeZ%8c
   ~...}}HCIIA>D"">222??8$D!!#t'='='????$$(A(AAAA""$(?(?(AAAA??:&$//**EEEE??5!T__U%;;;;~...  "d&;&;&====!!#t'='='????t((89S!!.1>    t((673!!,/<   r!   c                     d } t               }|j                  d       |j                  dd      }|j                  |        |j                  j	                  d      sJ |j
                  ddgk(  sJ |j                         }t               }|j                  d       |j                  dd       |j
                  ddgk(  sJ |j                  d      j                  j	                  d      rJ |j                  |      }|j                  d      j                  }|j	                  d      sJ |j                  d      g dk(  sJ t        |j                  dd	      d
       y )Nc                 x    t        | d      }|j                  ddg d       |j                  ddgdg	       |S r  r   r.  s     r"   r   z$test_nel_to_bytes.<locals>.create_kb  r  r!   r  r   Tr   r   r   r   r   r   )r   rs   r   r5   r`   r  r  r  r  r   r(   r   )r   r  r  	nlp_bytesr  r  s         r"   test_nel_to_bytesr    sF    IE	NN5nn_4n@O9%,,^<<<7777 IIE	NN5	NN?N.7777~~o.11@@PPPY'E>>/*--D~...??:&*444:^Dcr!   c                  ~   g } t               } |d      }t        |dddd      t        |ddd	d
      g|_         |d      }t        |dddd      t        |ddd	d
      g|_        | j                  t	        ||              |d      }t        |dddd      t        |ddd	d      g|_         |d      }t        |dddd      t        |ddd	d      g|_        | j                  t	        ||              |d      }t        |ddd	d      g|_         |d      }t        |ddd	d      g|_        | j                  t	        ||             t               j                  | dg      }|d   d   d   dk(  sJ |d   d   d   dk(  sJ |d   d	   d   dk(  sJ |d   d	   d   dk(  sJ |d   dk(  sJ |d   dk(  sJ y )NzJulia lives in London happily.r   r4   r   r   rg  r*   r   rf  r   Q70zShe loves London.rL   Q13r0  zLondon is great.)negative_labelsr   pr}  rr   r  gUUUUUU?r  )r   r   rw   r   r   r   score_links)	r   r#   ref1pred1ref2pred2ref3pred3scoress	            r"   test_scorer_linksr    s    N
)C/0DT1axt4T1auD1DI 01EUAq6UAqT2EJ '%./"#DT1axt4T1auE2DI #$EUAq5UAqU3EJ '%./!"DdAqU;<DI"#Eua%u=>EJ'%./X!!.5'!JF"#H-c2e;;;"#H-c2e;;;"#E*3/5888"#E*3/5888- E)))- E)))r!   zname,configr   spacy.EntityLinker.v1)@architecturestok2veczspacy.EntityLinker.v2c                    dt               }g t        D ];  \  }}|j                  |      }j                  t	        j
                  ||             = fd}|j                  | d|i      }|d   dk(  rt        |t              sJ t        |t              sJ |j                  |       |j                  fd      }t        d	      D ]  }	i }
|j                  ||

        y )Nr*   c                     t        |       }|j                  ddg d       |j                  ddg d       |j                  dddgd	d	g
       |S r|  r   r   s     r"   r   z,test_legacy_architectures.<locals>.create_kbN  s]    MJz*Mz*M  *-* 	 	

 r!   r[  r   r  r  c                       S r   r    r   s   r"   r   z+test_legacy_architectures.<locals>.<lambda>_  r   r!   r   rL   r   )r   r   r  r   r   r   rs   r  r   r   r   r   r   r   )namer   r#   r|   r   r   r   r   r   r   r   r   r   s              @@r"   test_legacy_architecturesr  <  s     M
)CN& Bjll4 g//Z@AB	 LLw.?L@M#::-999-666#,BCI1X A

>y
@Ar!   r~   	CHARACTERKirbyrp   r   KorbyISisCOLORpinkc                    ddddiidgg ddfg}t               }dg |D ]2  \  }} ||      }j                  t        j                  ||             4 |j	                  d	      }|j                  |        D ]  } ||j                        |_         |j                  d	       fd
}|j	                  dddid      }	|	j                  |       |	j                  du sJ |j                  fd      }
t        d      D ]  }i }|j                  |
|        |j	                  dd       |j                         y )NzKirby is pink)r   r   Q613241r   )r   r   r  )r4   r   r   r   r*   ri   c                     t        |       }|j                  ddg d       |j                  ddgdg       |j                  ddg d	       |j                  ddgdg       |S )
Nr+   r  r   r   rM   r  r-   r  )r  rL   r   r   s     r"   r   z$test_no_gold_ents.<locals>.create_kb  s`    MJyrLwcU3vBjIvx#/r!   r   r  FTrX  c                       S r   r    r   s   r"   r   z#test_no_gold_ents.<locals>.<lambda>  r   r!   r   rL   r   rh   r   )r   r   r   r   rs   rt   	predictedremove_piper   r  r   r   r   r   )r~   r   r#   r|   r   r   r}   egr   r   r   r   r   r   r   s                @@r"   test_no_gold_entsr  f  sp     9c"2301(	
	J )CMN& Bj$ig//Z@AB
 LL(E	x   +R\\*+
 OON# LL% 8t ! M #&&%///,BCI1X A

>y
@A
 LLdL+ LL r!   ig%  c                    	
 t               } t        | j                  ddgddgddg      }t        | j                  g dg d	g d
      }t        ||      }|g	d

fd}| j	                  dd      }|j                  |       | j                  	fd      }t        d      D ]  }i }| j                  	||        | j	                  dd       | j                  	       y )Nr  123456TFB-CHARACTER
B-CARDINAL)wordsspacesrw   )r  123456)TFF)r  r  r  r*   c                 x    t        |       }|j                  ddg d       |j                  ddgdg       |S )Nr+   r  r   r   rM   r  r-   r   r   s     r"   r   z-test_tokenization_mismatch.<locals>.create_kb  s:    MJyrLwcU3r!   r   r   c                       S r   r    r   s   r"   r   z,test_tokenization_mismatch.<locals>.<lambda>  r   r!   r   rL   r   rh   r   )
r   r   r6   r   rs   r   r   r   r   r   )r#   doc1r  r  r   r   r   r   r   r   r   s            @@r"   test_tokenization_mismatchr    s    
)C		!e}\*	D 		%#8	D 
t	BTNM LLtL<M#,BCI1X A

>y
@A LLdL+LL r!   c                  x    t        j                  t              5  t        dd       ddd       y# 1 sw Y   yxY w)z;Test whether instantiation of abstract KB base class fails.Nr*   )r7   r]   r  r   r    r!   r"   test_abstract_kb_instantiationr    s-    	y	! dA  s   09zmeet_threshold,configmeet_thresholdr   c                 .    t               }|j                  d       d}dg}dddii}g d}d ||      }t        j                  ||||d      g fd	}|j                  d
dd|d      }	|	j	                  |       |j                  fd       |j                  dd
      }
|
j                  dddigdg        ||      }t        |j                        dk(  sJ  r|j                  d   j                  k(  sJ t        j                  sJ yy)zTests abstention threshold.
    meet_threshold (bool): Whether to configure NEL setup so that confidence threshold is met.
    config (Dict[str, Any]): NEL architecture config.
    rh   z&Mahler's Symphony No. 8 was beautiful.r   r   r   r   )	r4   r   r   r   r   r   r   r   r   r   c                     t        | d      }|j                  dg d       |j                  dgrdndg	       |S )
Nr*   r+   r   r   rM   r   r4   g{Gz?rS   r   )r6   r   	entity_idr  s     r"   r   z!test_threshold.<locals>.create_kb  sK    A>yrL[ .1D9 	 	

 r!   r   TgGz?)	thresholdr[  r  c                       S r   r    r   s   r"   r   z test_threshold.<locals>.<lambda>	  s     r!   r   ri   r   r   rk   mahlerrp   r4   r   N)r   rs   r   r   r   r   rt   rv   rw   r]  r   r0  )r  r   r#   r|   rU   r   r   r   r   r   r}   r   r   s   `          @@r"   test_thresholdr    s1    )CLL3D !Hgs^$E.KI
d)Ch{S	
N	 LL!F3 ! M
 #NN 6N7 LLL@E	((8K7LMNO
d)Csxx=A.<388A;*RR,BRBRRRBR*r!   c                      t               }  | d      }|dd }d|_        |g|_         | d      }t               } |||gd       y)zCThe forward pass of the span maker may have a doc with no entities.za b cr   r4   Xzx y zFN)r   rj  rw   r   )r#   r  r   r  
span_makers        r"   "test_span_maker_forward_with_emptyr    sN    
)Cw<D
q)CCJDIw<D "#Jd|U#r!   )btypingr   r   r   r   r7   numpy.testingr   spacyr   r	   spacy.attrsr
   spacy.compatr   spacy.kbr   r   r   r   spacy.lang.enr   spacy.mlr   spacy.ml.models.entity_linkerr   spacy.pipeliner   spacy.pipeline.legacyr   spacy.pipeline.tok2vecr   spacy.scorerr   spacy.tests.utilr   spacy.tokensr   r   spacy.trainingr   
spacy.utilr   spacy.vocabr   fixturer#   r(   markissuerG   rf   r   parametrizeboolr   r   r   r   r   r   r   r   r   r  r  r  r  r  r   r+  rB  rE  rK  rO  filterwarningsrQ  rc  rl  rq  r   r  r  r  r  xfailr  r  r  r  r  r  r  r  r  r?   r  r  r    r!   r"   <module>r"     sX   0 0  &   !  O O !  : ' 1 8  ) " " "   '
 4( (2 45 5* 4* *6 43dE]C0/$ 0/ D 0/f$!N02fU:
 Y
 	@
77-1  4	) 	)**0"K@:%z&!OH:< ,-H .H$ ,-%2 .%2P
,," K#>?'(@	BC 4#>?'91	34 3#>?'(1	34 H#>?'9@	BC
 ""+	-.%
, Ag#TUp24 .)
/ *
/ .)= *=,B<(*X 	-DQfgh	-DQfghAAD  7	34	01D	)W+PQ
9!
9!x 4$! $!N 	#:G\]^	"9F[\]-S4 -Sc3h -S-S`$r!   