
    iq                     d   d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZmZmZ d dlm Z  ddlm!Z! dddgifddddgifgZ"ejF                  d        Z$ejF                  d        Z%ejF                  d        Z&ejF                  d        Z'ejF                  d        Z(ejF                  d        Z)ejT                  jW                  ddg      ejT                  jY                  d      d                Z-ejT                  jY                  d!      d"        Z.ejT                  jY                  d#      d$        Z/ejT                  jY                  d%      d&        Z0ejT                  jY                  d'      d(        Z1d) Z2ejT                  jY                  d*      d+        Z3ejT                  jY                  d,      d-        Z4d. Z5d/ Z6d0 Z7d1 Z8d2 Z9ejT                  ju                  d34      d5        Z;ejT                  ju                  d34      d6        Z<d7 Z=d8 Z>d9 Z?d: Z@d; ZAd< ZBd= ZCd> ZDd? ZEejT                  jW                  d@dAdBg      dC        ZFdD ZGdE ZHdF ZIdG ZJdH ZKdI ZL ej                  dJ       G dK dL             ZNy)M    N)assert_equal)registryutil)ENT_IOB)English)Italian)Language)Lookups)EntityRecognizer)BiluoPushDown)DEFAULT_NER_MODEL)DocSpan)Exampleiob_to_biluosplit_bilu_labelVocab   )make_tempdirWho is Shaka Khan?entities      PERSONzI like London and Berlin.)r      LOC)      r   c                       y)Nnon_entities r#       l/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/parser/test_ner.pyneg_keyr&      s    r$   c                      t               S Nr   r#   r$   r%   vocabr)   !   s	    7Nr$   c                      t        | g d      S )N)CaseywenttoNewYork.words)r   )r)   s    r%   docr3   &   s    uGHHr$   c                 z    | dd }| dd }|j                   |j                  df|j                   |j                  dfgS )Nr            r   GPE)
start_charend_char)r3   caseynys      r%   entity_annotsr=   +   sH    !HE	QqB			5>>84	U+ r$   c                 d    t        t        | D cg c]  \  }}}|
 c}}}            S c c}}}w r(   )sortedset)r=   selabels       r%   entity_typesrD   5   s)    #-@@!Qu@ABB@s   +c                 Z    t        j                  |      }t        | j                  |      S )NrD   )r   get_actionsstrings)r)   rD   actionss      r%   tsysrJ   :   s#    ''\BG00r$   rC   z
U-JOB-NAMEi  c           
          t               }i }|j                  d|      }t        j                  t	        |j
                  dg      dgdgdgdgdg| gd      }d	|j                  j                  |g
      d   v sJ y )Nnerconfigwordr1   r   tagdep)idsr2   tagsheadsdepsr   zJOB-NAME)examplesr5   )r	   create_piper   	from_dictr   r)   movesrG   )rC   nlprN   rL   examples        r%   test_issue1967r\   @   s     *CF
//%/
/CCIIfX&3XGSG	

G ..	.B1EEEEr$   i  c                  h   t               } | j                  d      }|j                  d       | j                          t               }|j                  d       t	        |j                  d      j                        dk(  sJ |j                  d      j                  } |j                  d   || j                  d      j                  j                         |j                  | j                                d|j                  d      j                  vsJ |j                  d      j                  dk(  sJ y)zGTest that spurious 'extra_labels' aren't created when initializing NER.rL   CITIZENSHIPr   resize_outputextra_labels)r^   N)r   add_pipe	add_label
initializelenget_pipelabelsmodelattrsrY   n_moves
from_bytesto_bytescfg)rZ   rL   nlp2rg   s       r%   test_issue2179rn   T   s     )C
,,u
CMM- NN9DMM%t}}U#**+q000MM% &&E EKK U(;(A(A(I(IJOOCLLN#u!5!9!9999==&&*::::r$   iQ	  c                      d} t        |       g dk(  sJ d}t        |      g dk(  sJ d}t        |      g dk(  sJ d}t        |      g dk(  sJ y	)
z9Test that IOB tags are correctly converted to BILUO tags.)	B-BRAWLER	I-BRAWLERrq   )rp   rq   z	L-BRAWLER)I-ORGrr   B-ORG)rs   L-ORGzU-ORG)B-PERSONzI-PERSONru   )ru   L-PERSONU-PERSON)B-MULTI-PERSONzI-MULTI-PERSONrx   )rx   zL-MULTI-PERSONzU-MULTI-PERSONN)r   )tags1tags2tags3tags4s       r%   test_issue2385r}   e   sh     4E"IIII'E"====0E"FFFFBE"XXXXr$   i
  c                     t               } g }|j                  t        j                  | j	                  d      dg i      g       t        d      D cg c]  }t        |       }}| j                  d      }t        |      D ]  }|j                  |        | j                         }t        d      D ]6  }i }t        j                  |       |D ]  }| j                  |g||d        8 yc c}w )	zdTest issue that arises when too many labels are added to NER model.
    Used to cause segfault.
    zOne sentencer   i  rL      g      ?)sgdlossesdropN)r   extendr   rX   make_docrangestrra   listrb   rc   randomshuffleupdate)	rZ   
train_datairD   rL   entity_type	optimizerr   r[   s	            r%   test_issue2800r   v   s    
 )CJ			3<<7*b9I	JK %*$K0qCF0L0
,,u
CL) #k"# I2Y Jz"! 	JGJJyiSJI	JJ 1s   C,i  c                     t               } | j                  d      }|j                  d       | j                          g d}|j                  |k(  sJ t               }|j                  d      }|j
                  } |j                  d   ||j                  j                         |j                  | j                                |j                  |k(  sJ y)zTest issue that occurred in spaCy nightly where NER labels were being
    mapped to classes incorrectly after loading the model, when the labels
    were added using ner.add_label().
    rL   ANIMAL)OzB-ANIMALzI-ANIMALzL-ANIMALzU-ANIMALr_   N)r   ra   rb   rc   
move_namesrg   rh   rY   ri   rj   rk   )rZ   rL   r   rm   ner2rg   s         r%   test_issue3209r      s     )C
,,u
CMM(NNFJ>>Z'''9D==DJJE EKK 		(9(9:OOCLLN#??j(((r$   c                      t               } | j                  d      }|j                  d       | j                          g d}dh}|j                  |k(  sJ t        |j                        |k(  sJ y)zBTest that labels are inferred correctly when there's a - in label.rL   zLARGE-ANIMAL)r   zB-LARGE-ANIMALzI-LARGE-ANIMALzL-LARGE-ANIMALzU-LARGE-ANIMALN)r   ra   rb   rc   r   r@   rf   )rZ   rL   r   rf   s       r%   test_labels_from_BILUOr      sf    
)C
,,u
CMM.!NNJ F>>Z'''szz?f$$$r$   i  c                     t               } | j                  d      }|j                  d       | j                          d| j                  v sJ  | d      }|j                  d      sJ |D ]  }|j                  dk(  rJ  dddg}| j                  d	      }|j                  |       d	| j                  v sJ d| j                  v sJ  | d      }|j                  d      sJ |D ]  }|j                  dk(  rJ  y
)zDTest that running an entity_ruler after ner gives consistent resultsrL   PEOPLEhir   r   SOFTWAREspacyrC   patternentity_rulerN)r   ra   rb   rc   
pipe_nameshas_annotationent_iobadd_patterns)rZ   rL   doc1tokenpatternsrulerdoc2s          r%   test_issue4267r      s    )C
,,u
CMM(NNCNN"""t9Dy))) "}}!!!" %9:HLL(E	x S^^+++CNN"""t9Dy))) "}}!!!"r$   i  c                     d} d}t               }| |d}|j                  d|      }|j                  d       |j                           |d      }t	        |j
                        dk(  sJ d|j
                  v sJ t        |d	d
d      }t        |j                        |gz   |_        |g}|j                  |d| |       t	        |j
                        dk(  sJ d|j
                  v sJ y)z:This should not crash or exit with some strange error code   -C6?
beam_widthbeam_densitybeam_nerrM   
SOME_LABELzWhat do you think about Apple ?r5   r7      MY_ORGrC           )r   r   r   r   N)
r   ra   rb   rc   rd   rf   r   r   ents
beam_parse)r   r   rZ   rN   rL   r3   	apple_entdocss           r%   test_issue4313r      s     JL
)C $F ,,z&,
1CMM,NN
/
0Cszz?a3::%%%S!Qh/ICHH~+CH 5DNN4cj|NTszz?aszz!!!r$   c                     t        j                  |d|i      }| j                  |d      }|D cg c]  }| j                  |       }}|g dk(  sJ y c c}w )Nr   F)_debug)rw   r   r   B-GPEL-GPEr   )r   rX   get_oracle_sequenceget_class_name)rJ   r3   r=   r[   act_classesactnamess          r%   test_get_oracle_movesr      s`    j-%@AG**75*AK1<=#T  %=E=AAAA >s   Ac                    || j                   d<   t        |ddg      }ddg}t        j                  |d|i      }t	        |j
                  ddd	
      t	        |j
                  ddd
      g|j
                  j                  |<   | j                  |      }|D cg c]  }| j                  |       }}|sJ |d   d	k7  sJ |d   dk7  sJ |d   dk7  sJ yc c}w )zTest that we don't get stuck in a two word input when we have a negative
    span. This could happen if we don't have the right check on the B action.
    r&   ABr1   Nr   r   r5   r   r   r   r   ru   rv   	rl   r   r   rX   r   yspansr   r   	rJ   r)   r&   r3   r=   r[   r   r   r   s	            r%   $test_negative_samples_two_word_inputr      s     "DHHY
eC:
&C4LMj-%@AG 	WYY1C(WYY1H- GIIOOG **73K1<=#T  %=E=L58s??8z!!!8z!!!	 >s   Cc                    || j                   d<   t        |g d      }g d}t        j                  |d|i      }t	        |j
                  ddd	      t	        |j
                  dd
d	      g|j
                  j                  |<   | j                  |      }|D cg c]  }| j                  |       }}|sJ |d   dk7  sJ |d   dk7  sJ yc c}w )HTest that we exclude a 2-word entity correctly using a negative example.r&   )r   r   Cr1   )NNNr   r   r5   r   r   r   r   ru   Nr   r   s	            r%   &test_negative_samples_three_word_inputr     s    !DHHY
e?
+C&Mj-%@AG 	WYY1C(WYY1H- GIIOOG **73K1<=#T  %=E=L58s??8z!!! >s   Cc                    || j                   d<   t        |dg      }dg}t        j                  |d|i      }t	        |j
                  ddd	      t	        |j
                  ddd
	      g|j
                  j                  |<   | j                  |      }|D cg c]  }| j                  |       }}|sJ |d   dk7  sJ |d   dk7  sJ yc c}w )r   r&   r   r1   Nr   r   r5   r   r   r   rw   r   r   s	            r%   test_negative_samples_U_entityr     s    !DHHY
eC5
!CFMj-%@AG 	WYY1C(WYY1H- GIIOOG **73K1<=#T  %=E=L58s??8z!!! >s   C
c                     t        j                  |      }t        | j                  |d      }|j                  d   dk(  sJ y )NrF   r"   )incorrect_spans_keyr&   )r   rG   rH   rl   )r)   rD   rI   rJ   s       r%   %test_negative_sample_key_is_in_configr   *  s;    ''\BG^TD88I.000r$   zNo longer supported)reasonc                 6   g d}g d}t        | |      }t        j                  |||d      }t        | j                        }d}|D ]  }||dk(  r"|j                  |j                  d      d       -t        |      \  }}	|j                  |j                  d      |	       |j                  |j                  d	      |	       |j                  |j                  d
      |	       |j                  |j                  d      |	        |j                  |       y )N)r   52Bomber)NNz	L-PRODUCTr1   )r2   r   Mr   ILUr   r    r   r   r   r   	r   r   rX   r   rH   
add_actionindexr   r   
en_vocabr2   
biluo_tagsr3   r[   rY   
move_typesrP   actionrC   s
             r%   test_oracle_moves_missing_Br   2  s   !E*J
he
$Cu*%MNG(**+E/J 
;;CZZ--c2B7,S1MFEZ--c2E:Z--c2E:Z--c2E:Z--c2E:
; 
g&r$   c                 n   g d}g d}t        | |      }t        j                  |d|i      }t        | j                        }d}|D ][  }||dk(  r"|j                  |j                  d      d       -t        |      \  }}	|j                  |j                  |      |	       ] |j                  |       y )N)	
production
ofNorthropr   zCorp.r   z'sradar)	r   r   r   rs   Nrr   rt   r   r   r1   r   r   r   r   r   r   s
             r%   test_oracle_moves_whitespacer   L  s    VEKJ
he
$Cj*%=>G(**+E/J >;CZZ--c2B7,S1MFEZ--f5u=> 
g&r$   c                     t               }  | d      }i }| j                  d|      }|D cg c]  }|j                   c}g dk(  sJ |D cg c]  }|j                   c}g dk(  sJ |j                  j                  dd       |j                  d       |j                  j                  |g      d   }|j                  j                  |d	       |j                  j                  |d	       |j                  j                  |d	       |j                  j                  |d
      sJ t               } |d      }i }|j                  d|      }|j                  g |dd gd       |D cg c]  }|j                   c}g dk(  sJ |D cg c]  }|j                   c}g dk(  sJ |j                  j                  dd       |j                  j                  dd       |j                  d       |j                  j                  |g      d   }	|j                  j                  |	d	       |j                  j                  |	d	       |j                  j                  |	d	       |j                  j                  |	d
      rJ |j                  j                  |	d      sJ |j                  j                  |	d       |j                  j                  |	d
      rJ |j                  j                  |	d      sJ yc c}w c c}w c c}w c c}w )z5Test succesful blocking of tokens to be in an entity.I live in New YorkrL   rM   r   r   r   r   r   r7   r   r8   r   r   r   r6   
unmodifiedblockeddefault)r   r   r   r   r      zU-N)r   rW   ent_iob_	ent_type_rY   r   rb   
init_batchapply_transitionis_validset_ents)
nlp1r   rN   ner1r   state1rm   r   r   state2s
             r%   test_accept_blocked_tokenr  a  s    9D$%DFE&1D(,-uENN-1EEEE)-.EOO.2FFFF 	JJ!R NN5ZZ""D6*1-FJJ,JJ,JJ,::vw/// 9D$%DFE&1D 	MM"tAayk<M@(,-uENN-1GGGG)-.EOO.2FFFF 	JJ!R JJ!R NN5ZZ""D6*1-FJJ,JJ,JJ,zz""67333::vt,,,JJ-zz""67333::vt,,,O ..* ..s   K:K?+LL	c            	         dddgifddg ifg} t               }g }| D ]<  }|j                  t        j                  |j	                  |d         |d                > |j                  dd	      }|j                  d
       |j                          t        d      D ]5  }i }t        j                  |d      }|D ]  }|j                  ||        7 y)z7Test that training an empty text does not throw errors.r   r   r   r   r   r5   rL   Tlastr   r      sizer   N)r   appendr   rX   r   ra   rb   rc   r   r   	minibatchr   	r   rZ   train_examplestrL   itnr   batchesbatchs	            r%   test_train_emptyr    s     

->,?@A	j"J
 )CN Kg//QqT0BAaDIJK
,,u4,
(CMM(NNQx -..a8 	-EJJuVJ,	--r$   c            	         dddgifg} t               }g }| D ]<  }|j                  t        j                  |j	                  |d         |d                > |j                  dd      }|j                  d	       |j                          t        d
      D ]W  }i }t        j                  |d      }|D ]7  }t        j                  t              5  |j                  ||       ddd       9 Y y# 1 sw Y   FxY w)zFTest that the deprecated negative entity format raises a custom error.r   r   )r   r   z!PERSONr   r5   rL   Tr  r   r   r  r  r	  N)r   r
  r   rX   r   ra   rb   rc   r   r   r  pytestraises
ValueErrorr   r  s	            r%   test_train_negative_deprecatedr    s     

-?,@ABJ )CN Kg//QqT0BAaDIJK
,,u4,
(CMM(NNQx 1..a8 	1Ez* 1

5
01 1	111 1s   C00C9c                     t               } | j                  d       | j                           | d      }|D cg c]  }|j                   c}g dk(  sJ |D cg c]  }|j                   c}g dk(  sJ i }| j                  d|      }|j                  j                  dd       |j                  d       |j                  j                  |g      d	   }|j                  j                  |d
      sJ |j                  j                  |d      sJ |j                  j                  |d
       |j                  j                  |d      sJ |j                  j                  |d      sJ y c c}w c c}w )NrL   r   )r   r   r   r   r   r   rM   r7   r   r8   r   r   zU-GPEzI-GPEr   )r   ra   rc   r   r   rW   rY   r   rb   r   r   r   )rZ   r3   r   rN   r   states         r%   test_overwrite_tokenr    s<   
)CLLNN
"
#C(+,uENN,0IIII),-EOO-1EEEEF??5?0DJJ!R NN5JJ!!3%(+E::ug...::ug...JJw/::ug...::ug... --s   E,E1c                      t               } | j                  d      }|j                  d       | j                           | d      }g d}|D cg c]  }|j                   c}|k(  sJ y c c}w )NrL   MY_LABELz3John is watching the news about Croatia's elections)	r   r   r   r   r   r   r   r   r   )r   ra   rb   rc   r   )rZ   rL   r3   resultr   s        r%   test_empty_nerr    s]    
)C
,,u
CMM*NN
C
DC:F(+,uENN,666,s   A)c                  |   t               } dddg}| j                  d      }| j                  d      }|j                  d       | j                          |j	                  |        | d      }g d}g d	}|D cg c]  }|j
                   c}|k(  sJ |D cg c]  }|j                   c}|k(  sJ y
c c}w c c}w )zLTest that an NER works after an entity_ruler: the second can add annotationsTHINGThisr   r   rL   r  *This is Antti Korhonen speaking in Finlandr   r   r   r   r   r   r   r   r   r   r   r   r   r   Nr   ra   rb   rc   r   r   r   )rZ   r   r   untrained_nerr3   expected_iobsexpected_typesr   s           r%   test_ruler_before_nerr)    s    
)C "f56HLL(E LL'MJ'NN	x 
:
;C7M6N(+,uENN,===),-EOO-??? --s   9B4B9c                     ddi}dt         i}t        j                  |d      d   }t        | |fi | t        | |       y )Nupdate_with_oracle_cut_sized   rg   T)validate)r   r   resolver   )r   rN   rl   rg   s       r%   test_ner_constructorr/    sK    %sF %
&CS409EXu//Xu%r$   c                     t               } | j                  dd      }|j                  d       | j                          dddg}| j                  d      }|j	                  |        | d	      }g d
}g d}|D cg c]  }|j
                   c}|k(  sJ |D cg c]  }|j                   c}|k(  sJ yc c}w c c}w )zTTest that an entity_ruler works after an NER: the second can overwrite O annotationsrL   uner)namer  r   r!  r   r   r"  r#  r$  Nr%  )rZ   r&  r   r   r3   r'  r(  r   s           r%   test_ner_before_rulerr3    s    
)C LLVL4MJ'NN "f56HLL(E	x 
:
;C7M6N(+,uENN,===),-EOO-??? --s   ;B6B;c                  X   t               } | j                  dddd       | j                  d      }|j                  d       | j                           | d      }g d	}g d
}|D cg c]  }|j                   c}|k(  sJ |D cg c]  }|j
                   c}|k(  sJ yc c}w c c}w )zITest functionality for blocking tokens so they can't be in a named entityblockerr   r7   )startendrM   rL   r  z,This is Antti L Korhonen speaking in Finland)r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   N)r   ra   rb   rc   r   r   )rZ   r&  r3   r'  r(  r   s         r%   test_block_nerr8    s     )CLLQq#9L:LL'MJ'NN
<
=C<M5N(+,uENN,===),-EOO-??? --s   'B"B'	use_upperTFc                 j   t               }|j                  ddd| ii      }g }t        D ]c  \  }}|j                  t	        j
                  |j                  |      |             |j                  d      D ]  }|j                  |d           e |j                         }t        d      D ]  }i }	|j                  |||	        	d   d	k  sJ d
}
 ||
      }|j                  }t        |      dk(  sJ |d   j                  dk(  sJ |d   j                  dk(  sJ t!               5 }|j#                  |       t%        j&                  |      } ||
      }|j                  }t        |      dk(  sJ |d   j                  dk(  sJ |d   j                  dk(  sJ |j)                  d      }|j*                  j,                  d   | k(  sJ |j                  d        ||
      }|j                  }t        |      dk(  sJ |d   j                  dk(  sJ |d   j                  dk(  sJ 	 d d d        g d}|j/                  |      D cg c]  }|j1                  t2        g       }}|j/                  |      D cg c]  }|j1                  t2        g       }}|D cg c]
  } ||       c}D cg c]  }|j1                  t2        g       }}t5        ||       t5        ||       d}
|j                  |
      }t7        |dddd      g|_        |j                  }t        |      dk(  sJ |d   j                  dk(  sJ |d   j                  dk(  sJ |d   j8                  dk(  sJ  |j)                  d      |      }|j                  }t        |      dk(  sJ |d   j                  dk(  sJ |d   j                  dk(  sJ |d   j8                  dk(  sJ |d   j                  dk(  sJ |d   j                  dk(  sJ |d   j8                  dk(  sJ y # 1 sw Y   xY wc c}w c c}w c c}w c c}w )NrL   rg   r9  rM   r   r   2   r   r   h㈵>I like London.r5   r   Londonr   	has_upperRANDOM_NEW_LABEL)zJust a sentence.z$Then one more sentence about London.zHere is another one.r>  zI like London and London.r6   i  )rC   kb_id)r   ra   
TRAIN_DATAr
  r   rX   r   getrb   rc   r   r   r   rd   textlabel_r   to_diskr   load_model_from_pathre   rg   rh   pipeto_arrayr   r   r   rB  )r9  rZ   rL   r  rE  annotationsentr   r   r   	test_textr3   r   tmp_dirrm   r   ents2r   doc3ents3textsbatch_deps_1batch_deps_2no_batch_depss                           r%   test_overfitting_IOrV    s    )C
,,ugY/G%H,
ICN' "kg//T0BKPQ??:. 	"CMM#a&!	""  I2Y A

>y
@A %=7""" !I
i.C88Dt9>>7<<8###7>>U""" 
 (7G((1I		5zQQx}}(((Qx%'''}}U#zz,	999)*I		5zQQx}}(((Qx%'''!(&E 8;xxGCLL'+GLG7:xxGCLL'+GLGOT8UtT8UVS\\7),VMV|,}- ,I
,,y
!CS!Qe489CH88Dt9>>7<<8###7>>U"""7==D   
#,,u
c
"C88Dt9>>7<<8###7>>U"""7==D   7<<8###7>>U"""7==Ac( (2 HG8UVs%   (C?PP!9P&P+0P0Pc                     d} d}t               }| |d}|j                  d|      }g }t        D ]c  \  }}|j                  t	        j
                  |j                  |      |             |j                  d      D ]  }|j                  |d           e |j                         }	i }
|j                  ||	|
       d	}|j                  |      }|g}|j                  |      }|j                  |      d
   }t        t        |            D ]6  }|j                  D ]%  }|||dz   |f   }d}d
|z
  |cxk  r
d|z   k  r"J  J  8 y )Nr   r   r   r   rM   r   r   r<  r>  r   r5   r=  )r   ra   rC  r
  r   rX   r   rD  rb   rc   r   predictscored_entsr   rd   rf   )r   r   rZ   rN   rL   r  rE  rK  rL  r   r   rM  r3   r   beamsentity_scoresjrC   scoreepss                       r%   test_beam_ner_scoresr_  l  sb   JL
)C $F ,,z&,
1CN' "kg//T0BKPQ??:. 	"CMM#a&!	""  I FJJ~9VJ< !I
,,y
!C5DKKEOOE*1-M3s8_ /ZZ 	/E!1a!eU"34ECs7e.q3w.....	//r$   c                    t               }d}d}||| d}|j                  d|      }g }t        D ]c  \  }}|j                  t	        j
                  |j                  |      |             |j                  d      D ]  }	|j                  |	d           e |j                         }
t        d      D ]  }i }|j                  ||
|	        d   dk  sJ d
}|j                  |      g}|j                  |      }|j                  |      d   }|d   dk(  sJ |d   dk(  sJ t         ||      j                        dk(  sJ t!               5 }|j#                  |       t%        j&                  |      }|j                  |      g}|j)                  d      }|j                  |      }|j                  |      d   }|d   dk(  sJ |d   dk(  sJ 	 d d d        |j                  |      }t	        ||      }t+        |ddd      g|j,                  j.                  | <   |g}t        d      D ]  }i }|j                  ||
|	        t         ||      j                        dk(  sJ y # 1 sw Y   xY w)Nr   r   r   r   r   r   rM   r   r   r;  r<  zI like Londonr   )r   r6   r   g      ?)r   r6   r   r   r5   r6   r   r   )r   ra   rC  r
  r   rX   r   rD  rb   rc   r   r   rX  rY  rd   r   r   rG  r   rH  re   r   	referencer   )r&   rZ   r   r   rN   rL   r  rE  rK  rL  r   r   r   rM  r   rZ  r[  rN  rm   docs2r   beams2entity_scores2neg_docneg_exneg_train_exampless                             r%   test_beam_overfitting_IOri    s   
)CJL $&F
 ,,z&,
1CN' "kg//T0BKPQ??:. 	"CMM#a&!	""  I 2Y A

>y
@A *&&&  ILL#$DKKEOOE*1-M'3...)*c111s9~""#q((( 
 77G((1y)*}}Z(e$))&1!4m,333./36667 ll9%GWg&F'+GQ5'A&BF7# 2Y E

%9V
DE
 s9~""#q(((+7 7s   BI&&I/c                    t               }d}d}||| d}|j                  d|      }d}|j                  |      }|j                  d       |j                  d       t	        j
                  |d	d
gi      }t        |j                  ddd      t        |j                  ddd      t        |j                  ddd      g|j                  j                  | <   |j                         }	t        d      D ]  }
i }|j                  |g|	|        y)zCheck that the NER update works with a negative annotation that is a different label of the correct one,
    or partly overlapping, etcr   r   ra  r   rM   r   r   ORGr   r   r   r   r6   r5   r<  N)r   ra   r   rb   r   rX   r   rb  r   rc   r   r   r&   rZ   r   r   rN   rL   
train_textrf  r[   r   r   r   s               r%   test_neg_annotationrn    s    )CJL $&F
 ,,z&,
1C%Jll:&GMM(MM%*7H6I)JKGW1e,W1h/W1h/(GG$  I1X <

G9)F
;<r$   c                    t               }d}d}||| d}|j                  d|      }d}|j                  |      }|j                  d       |j                  d       t	        j
                  |d	d
gi      }t        |j                  ddd      g|j                  j                  | <   t        |j                  j                        dk(  sJ |j                  j                  d   j                  dk(  sJ |j                  j                  d   j                  dk(  sJ t        |j                  j                  |          dk(  sJ |j                  j                  |    d   j                  dk(  sJ |j                  j                  |    d   j                  dk(  sJ |j                         }	t        d      D ];  }
i }t        j                   t"              5  |j%                  |g|	|       d d d        = y # 1 sw Y   HxY w)Nr   r   ra  r   rM   r   r   r   r   r   r   r   r5   r   z
Shaka Khanr<  )r   ra   r   rb   r   rX   r   rb  r   rd   r   rE  rF  rc   r   r  r  r  r   rl  s               r%   test_neg_annotation_conflictrp    s   
)CJL $&F
 ,,z&,
1C%Jll:&GMM(MM%*7H6I)JKG(,W->->1h(O'PGG$w  %%&!+++!!!$))\999!!!$++x777w  &&w/0A555""7+A.33|CCC""7+A.55AAA I1X @]]:& 	@JJyiJ?	@ 	@@	@ 	@s   G11G:	c                    t               }d}d}||| d}|j                  d|       g d}g d}t        |j                  |      }t	        j
                  |d	|i      }t        |j                  d
dd      }	|	g|j                  j                  | <   |j                         }
t        d      D ]  }i }|j                  |g|
|        dv sJ y)z/Regression test for previously flakey behaviourr   r   ra  r   rM   )5FEDERALNATIONALMORTGAGEASSOCIATION(FannieMaez):Postedyieldson30yearmortgagecommitmentsfordeliverywithinr|  daysrv  pricedatpar)z9.75%,standardconventionalfixed-rate	mortgages;z8.70r  r  z6/2r  cappedoner  r}  
adjustabler  r  r0   Source:TelerateSystemszInc.)5rs   rr   rr   rt   r   rs   rt   r   r   r   r   B-DATEL-DATEr   r   r   r   r   r  r  r   r   r   r   r   	B-PERCENT	L-PERCENTr   r   r   r   r   r   r   r   r  r  r   z
U-CARDINALr   r   r  zI-DATEr  r   r   r   r   r   r   r   r   r   r1   rL   r;  5   rk  r7   r<  N)r   ra   r   r)   r   rX   r   rb  r   rc   r   r   )r&   rZ   r   r   rN   tokensiobr3   r[   neg_spanr   r   r   s                r%   test_beam_valid_parser    s    
)CJL $&F
 LLFL+ VF xC ciiv
&CeS\2GG%%r2u5H(0zGG$ I1X <

G9)F
;< r$   c                    t               }|j                  t        j                  v sJ t	               |j
                  _        t        |j
                  j                        rJ |j                  d       | j                  t        j                        5  |j                          d| j                  v sJ 	 d d d        | j                          |j
                  j                  j                  d       d|j
                  j                  j!                  d      d<   | j                  t        j                        5  |j                          d| j                  vsJ 	 d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)NrL   W033lexeme_normr   a)r   langr   LEXEME_NORM_LANGSr
   r)   lookupsrd   ra   at_levelloggingDEBUGrc   rE  clear	add_table	get_table)caplogrZ   s     r%   test_ner_warns_no_lookupsr  "  s   
)C88t-----	CII399$$%%%LL		' %$$$% LLNII.69CII.s3		' )V[[((() )% %) )s   !E$9!E0$E-0E9r5  c                       e Zd ZddZd Zy)BlockerComponent1c                 .    || _         || _        || _        y r(   )r6  r7  r2  )selfrZ   r6  r7  r2  s        r%   __init__zBlockerComponent1.__init__5  s    
	r$   c                 ^    |j                  g || j                  | j                   gd       |S )Nr   r   )r   r6  r7  )r  r3   s     r%   __call__zBlockerComponent1.__call__:  s+    R#djj488"<!=|T
r$   N)
my_blocker)__name__
__module____qualname__r  r  r#   r$   r%   r  r  3  s    
r$   r  )Or  r   r  numpy.testingr   r   r   r   spacy.attrsr   spacy.lang.enr   spacy.lang.itr   spacy.languager	   spacy.lookupsr
   spacy.pipeliner   $spacy.pipeline._parser_internals.nerr   spacy.pipeline.nerr   spacy.tokensr   r   spacy.trainingr   r   r   spacy.vocabr   r   rC  fixturer&   r)   r3   r=   rD   rJ   markparametrizeissuer\   rn   r}   r   r   r   r   r   r   r   r   r   r   skipr   r   r  r  r  r  r  r)  r/  r3  r8  rV  r_  ri  rn  rp  r  r  factoryr  r#   r$   r%   <module>r     s"      &    ! ! # ! + > 0 " B B   J):(;<= :/P"QR
     I I   C C 1 1
 <.14F  2F$ 4; ;  4Y Y  4J J* 4) )&%$ 4" "2 4" "4B","&"&1 ./' 0'2 ./' 0'(.-b-*1*/*7@(&@*@ tUm4J 5JZ /F7)t<:@> :)" )  r$   