
    i                     `   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZmZmZ d
ZddedgiifddeddgiifgZddedgiifddeg diifddeg iifgZefdZe j.                  j1                  dg dg dg dfg dg dg dfg dg dg dfg dg dg df e j2                  ddgg dd d!g       e j2                  g d"g dg d#       e j2                  d$d%gg dd!d&g      g      d'        Zd( Zd) Ze j.                  j1                  d*g d+      d,        Zd- Zy).    N)Config)util)English)Language)span_finder_default_config)Doc)Example)fix_random_seedmake_tempdirregistrypytestzWho is Shaka Khan?spans)      zI like London and Berlin.r            I like London and Berlin)r   r   )r   r    c                     g }|D ]>  }t        j                  | j                  |d         |d         }|j                  |       @ |S )Nr      )r	   	from_dictmake_docappend)nlpdatatrain_examplestegs        v/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/pipeline/test_span_finder.pymake_examplesr#      sO    N "s||AaD11Q48b!"     z4tokens_predicted, tokens_reference, reference_truths)Mon.-June16)Mon.r'   r(   r)   )r   r   r+   r+   r   r   r+   )r*   r'   Juner)   )r+   r+   r   r   r   r   r+   )r*   r'   r(   16)r*   z-Junze 16)r+   r+   r+   r+   z	Mon.-Juner)   r0   r+   )Mon.-r(   r)   )r+   r,   r+   r4   zJune 16r/   c                 H   t               }t        |j                  | dgt        |       z        }t        |j                  |dgt        |      z        }t	        ||      }|j
                  j                  dd      g|j
                  j                  t        <   |j                  ddt        i      }|j                          |j                  j                  }|j                  |j                  k7  r8t        j                  t         d	      5  |j#                  |g|       d d d        y |j#                  |g|      \  }	}
t        |	      t        |       k(  sJ |j$                  j&                  j)                  |	|j$                  j+                  |             y # 1 sw Y   y xY w)
NF)wordsspaces   	   span_finder	spans_keyconfigz*must match between reference and predictedmatch)r   r   vocablenr	   	reference	char_spanr   	SPANS_KEYadd_pipe
initializemodelopstextr   raises
ValueError_get_aligned_truth_scoresxptestingassert_array_equalasarray)tokens_predictedtokens_referencereference_truthsr   	predictedrB   exampler:   rH   truth_scoresmaskss              r"   test_loss_alignment_examplerX   '   sk   R *C		)5'C@P<Q2QI 		)5'C@P<Q2QI i+G*1*;*;*E*Ea*K)LGI&,,}k95M,NKNN




C~~']]J
 	B 117)SA	B 	%??	3OL%|$4 5555FFNN%%lCFFNNCS4TU	B 	s   FF!c                     t               }  | d       | d      g}|d   dd g|d   j                  t        <   |d   dd g|d   j                  t        <   d}|D ]  }|t        |      z  } t	               j                  t              j                         }t        j                  |      d   }|j                  |	       |j                  |      }t        |      |k(  sJ t        |d         d
k(  sJ y )NThis is an example.This is the second example.r         r   r8   rG   )X   )r   r   rD   rA   r   from_strr   interpolater   resolverF   predict)r   docstotal_tokensdocr=   rG   predictionss          r"   test_span_finder_modelrh   g   s    
*C%&,I(JKD $Q!~DGMM) $Q!~DGMM)L !C ! X9:FFHFV$W-E	t--%K{|+++{1~!###r$   c                  `   t               }  | d       | d      g}|d   dd g|d   j                  t        <   |d   dd g|d   j                  t        <   | j                  dd	t        i
      }| j	                          t        |j                  |            }t        |d   j                  v sJ y )NrZ   r[   r   r\   r]   r   r8   r:   r;   r<   )r   r   rD   rE   rF   listpipe)r   rd   r:   s      r"   test_span_finder_componentrl   |   s    
*C%&,I(JKD $Q!~DGMM) $Q!~DGMM),,}k95M,NKNN  &'DQ%%%r$   z"min_length, max_length, span_count))r   r   r   )NN   )r_   N   )Nr   r_   )r_   r\   r_   c                 :    t               } |d      } dk(  rEdk(  r@t        j                  t        d      5  |j	                  d t
        d      }d d d        y |j	                  d t
        d      }|j                          g d}|j                  |g|       |j                  t
           sJ t        |j                  t
                 |k(  sJ t        d	       d
 t         fd|j                  t
           D              sJ y # 1 sw Y   y xY w)Nz1Me and Jenny goes together like peas and carrots.r   z"Both 'min_length' and 'max_length'r>   r:   )
max_length
min_lengthr;   r<   )
r/   r+   r,   r+   r+   r+   r,   r+   r0   r+   infr   c              3   P   K   | ]  }t        |      cxk  xr k  nc   y wN)rA   ).0spanrp   rq   s     r"   	<genexpr>z4test_set_annotations_span_lengths.<locals>.<genexpr>   s"     VzSY4*44Vs   #&)r   r   rJ   rK   rE   rD   rF   set_annotationsr   rA   floatall)rq   rp   
span_countr   rf   r:   scoress   ``     r"   !test_set_annotations_span_lengthsr}      s$   
 *C
A
BCQ:?]]:-QR 	,,",",!* ' K	 	,,$$"
  K NNF v.99Ysyy#$
222 5\

V9AUVVVVY	 	s   DDc                     t        d       t               } | j                  ddt        i      }t	        |       | j                  fd      }|j                  j                  d      dk(  sJ t        d	      D ]  }i }| j                  ||
        d   dk  sJ d} | |      }|j                  t           }t        |      dk(  sJ t        |D cg c]  }|j                   c}      h dk(  sJ t               5 }	| j                  |	       t!        j"                  |	      }
 |
|      }|j                  t           }t        |      dk(  sJ t        |D cg c]  }|j                   c}      h dk(  sJ 	 d d d        | j%                        }dt         d|v sJ |dt         d   dk(  sJ |dt         d   dk(  sJ  | d      }t        |j                  t                 dk(  sJ y c c}w c c}w # 1 sw Y   xY w)Nr   r:   r;   r<   c                       S rt    )r   s   r"   <lambda>z%test_overfitting_IO.<locals>.<lambda>   s    N r$   )get_examplesnOr_   2   )sgdlossesgMbP?r   r\   >   London and BerlinBerlinLondonspans__f_pg      ?_rg      ?r   r   )r
   r   rE   rD   r#   rF   rG   get_dimrangeupdater   rA   setrI   r   to_diskr   load_model_from_pathevaluate)r   r:   	optimizerir   	test_textrf   r   rv   tmp_dirnlp2doc2spans2r|   r   s                 @r"   test_overfitting_IOr      s   A
)C,,}k95M,NK"3'N,BCI$$T*a///2Y A

>y
@A - 5((( +I
i.CIIi Eu:??e,d		,- 2    
 

7G((1II&6{a&1$DII12 7
 
 	
 


 \\.)FI;b!V+++F9+R()T111F9+R()S000 h-Csyy#$)))9 - 2

 

s%   G3AG=G8
/G=8G==H)r   	thinc.apir   spacyr   spacy.lang.enr   spacy.languager   spacy.pipeline.span_finderr   spacy.tokensr   spacy.trainingr	   
spacy.utilr
   r   r   rD   
TRAIN_DATATRAIN_DATA_OVERLAPPINGr#   markparametrizeparamrX   rh   rl   r}   r   r   r$   r"   <module>r      s      ! # A  " > >	Gi'%;<=#	9w123
 Gi'%;<="	9:;< 'Ir?	#$  '  : ,'4	
 ,'4	
 ,+4	
 )',	

 	$'V	

 	#+$	

 	i 'V	
?$'PVQ'PV0$*& (G0W	0Wf/*r$   