
    i9                     Z   d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZmZmZmZ d dlmZmZ ddlmZ dd	lmZ dd
lmZ ddlm Z  ddl!m"Z"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z. dZ/dZ0 e       jc                  e0      d   Z2 G d de.      Z3d Z4y)    N)Counter)islice)AnyCallableDictIterableListOptionalTuplecast)ConfigModelNumpyOpsSequenceCategoricalCrossentropy)Floats2dInts2d   )util)Errors)Language)Doc)Examplevalidate_examplesvalidate_get_examples)Vocab   )	EditTrees)validate_edit_tree)lemmatizer_score)TrainablePipe   z
[model]
@architectures = "spacy.Tagger.v2"

[model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 4
embed_size = 2000
window_size = 1
maxout_pieces = 3
subword_features = true
modelc                      e Zd ZdZ	 d,ddddeddeded	ed
ee   de	de
de	dee   fdZdee   dee   deeee   f   fdZdee   dee   fdZd Zd Zd Zdee   fdZedee	df   fd       Zede
fd       Zedefd       Zdddd eg ee   f   d!ee    d"ee   fd#Z! e"       d$d%Z# e"       d$d&Z$ e"       fd'Z% e"       fd(Z&d"efd)Z'd eg ee   f   fd*Z(d-d+Z)y).EditTreeLemmatizerzK
    Lemmatizer that lemmatizes each word using a predicted edit tree.
    orth   Fr   )backoffmin_tree_freq	overwritetop_kscorervocabr"   namer'   r(   r)   r*   r+   c                    || _         || _        || _        || _        || _        || _        || _        t        | j                   j                        | _	        i | _
        dg i| _        || _        t               | _        y)a  
        Construct an edit tree lemmatizer.

        backoff (Optional[str]): backoff to use when the predicted edit trees
            are not applicable. Must be an attribute of Token or None (leave the
            lemma unset).
        min_tree_freq (int): prune trees that are applied less than this
            frequency in the training data.
        overwrite (bool): overwrite existing lemma annotations.
        top_k (int): try to apply at most the k most probable edit trees.
        labelsN)r,   r"   r-   r'   r(   r)   r*   r   stringstrees
tree2labelcfgr+   r   	numpy_ops)	selfr,   r"   r-   r'   r(   r)   r*   r+   s	            t/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/pipeline/edit_tree_lemmatizer.py__init__zEditTreeLemmatizer.__init__0   sn    . 

	*"
tzz112
*,$,b>!    examplesscoresreturnc                 z   t        |d       t        dd      }g }|D ]  }g }t        |j                  |j	                  dd            D ]b  \  }}||dk(  rd}	nB| j
                  j                  |j                  |      }
| j                  j                  |
d	      }	|j                  |	       d |j                  |         |||      \  }}| j                  j                  j                  j                  |      r3t        t         j"                  j%                  | j&                  
            t)        |      |fS )NzEditTreeLemmatizer.get_lossF)	normalizemissing_valueLEMMAT)	as_string r   r-   )r   r   zip	predictedget_alignedr1   addtextr2   getappendr"   opsxpisnan
ValueErrorr   E910formatr-   float)r5   r9   r:   	loss_functruthseg	eg_truthsrE   
gold_lemmalabeltree_idd_scoreslosss                r6   get_losszEditTreeLemmatizer.get_lossV   s     	($AB3eSUV	 	%BI),bnnWnE* (%	: %r)9E"jjnnY^^ZHG OO//;E  '( MM)$	% #662$::>>""4(V[[//TYY/?@@T{H$$r8   docsc                 .   | j                   dk(  r| j                  }n,| j                   t        k  r| j                  }n| j                  }t        t        |            }t        d |D              s]t        | j                  d         }|D cg c](  }| j                  j                  j                  d|      * }}t        |      |k(  sJ |S | j                  j                  |      }t        |      |k(  sJ  |||      }t        |      |k(  sJ |S c c}w )Nr   c              3   2   K   | ]  }t        |        y wN)len).0docs     r6   	<genexpr>z-EditTreeLemmatizer.predict.<locals>.<genexpr>   s     ,3s8,s   r/   r   )r*   _scores2guesses_top_k_equals_1TOP_K_GUARDRAIL_scores2guesses_top_k_greater_1_scores2guesses_top_k_guardrailr`   listanyr3   r"   rK   alloc2ipredict)r5   r\   scores2guessesn_docsn_labels_guessesr:   s           r6   rk   zEditTreeLemmatizer.predictq   s    ::?!@@NZZ?*!AAN!AAN T$Z,t,,488H-.HRV$WQTZZ^^%;%;Ax%H$WG$Ww<6)))N##D)6{f$$$ v.7|v%%% %Xs   -Dc                    g }t        ||      D ]  \  }}|j                  d      }| j                  j                  |      }g }t	        |      D ]c  \  }}	| j
                  d   ||      }
| j                  j                  |
|	j                        |j                  |
       S|j                  d       e |j                  t        j                  |              |S )Nr   )axisr/   r=   )rD   argmaxr4   asarray	enumerater3   r1   applyrH   rJ   nparray)r5   r\   r:   rp   rb   
doc_scoresdoc_guessesdoc_compat_guessesitokenrX   s              r6   rd   z1EditTreeLemmatizer._scores2guesses_top_k_equals_1   s    "40 	9OC$+++3K..00=K!#%cN 25((8,[^<::##GUZZ8D&--g6&--b12 NN288$678	9 r8   c                    g }t        | j                  t        | j                              }t	        ||      D ]  \  }}| j
                  j                  |      }g }t        |      D ]  \  }}	t        |      D ]  }
t        ||   j                               }| j                  d   |   }| j                  j                  ||	j                        |j                  |        {t!        j"                  t         j$                        j                   |||f<    |j                  d        |j                  t!        j&                  |              |S )Nr/   r=   )minr*   r`   r/   rD   r4   rt   ru   rangeintrs   r3   r1   rv   rH   rJ   rw   finfofloat32rx   )r5   r\   r:   rp   r*   rb   ry   r{   r|   r}   ro   	candidatecandidate_tree_ids                r6   rf   z2EditTreeLemmatizer._scores2guesses_top_k_greater_1   s'   DJJDKK 01"40 	9OC//
;J!#%cN 	25u 2A #JqM$8$8$: ;I(,(:9(E%zz''(95::FR*112CD/1xx

/C/G/GJq)|,2 '--b1	2 NN288$678	9 r8   c                    g }t        ||      D ]  \  }}t        j                  |      dd | j                   dz
  df   }| j                  j                  |      }g }t        ||      D ][  \  }}	d}
|	D ]>  }| j                  d   |   }| j                  j                  ||j                        <|}
 n |j                  |
       ] |j                  t        j                  |              |S )N.r   r=   r/   )rD   rw   argsortr*   r4   rt   r3   r1   rv   rH   rJ   rx   )r5   r\   r:   rp   rb   ry   rz   r{   r}   
candidatesrX   r   r   s                r6   rg   z2EditTreeLemmatizer._scores2guesses_top_k_guardrail   s    "40 	9OC**Z06La"6L1LMK..00=K!#%(k%: 3!z!+ I(,(:9(E%zz''(95::FR"3 #))'23 NN288$678	9" r8   c                    t        |      D ]  \  }}||   }t        |d      r|j                         }t        |      D ]  \  }}| j                  s||   j                  dk(  s%|dk(  r/| j
                  7t        ||   | j
                        ||   _        Y| j                  j                  |||   j                        }|||   _
          y )NrI   r   r=   )ru   hasattrrI   r)   lemmar'   getattrr1   rv   rH   lemma_)	r5   r\   batch_tree_idsr|   rb   doc_tree_idsjrX   r   s	            r6   set_annotationsz"EditTreeLemmatizer.set_annotations   s    o 	.FAs)!,L|U++//1'5 
.
7>>SV\\Q%6 "}<<3+23q64<<+HCFL $

 0 0#a&++ F(-A
.		.r8   .c                 2    t        | j                  d         S )z4Returns the labels currently added to the component.r/   )tupler3   r5   s    r6   r/   zEditTreeLemmatizer.labels   s     TXXh'((r8   c                      y)NT r   s    r6   hide_labelszEditTreeLemmatizer.hide_labels   s    r8   c                 ^   g }t        t        | j                              D ]h  }| j                  |   }d|v r| j                  j                  |d      |d<   d|v r| j                  j                  |d      |d<   |j                  |       j t        |t        | j                  d               S )Norigsubstr/   )r1   r/   )	r   r`   r1   r,   r0   rJ   dictr   r3   )r5   r1   rX   trees       r6   
label_datazEditTreeLemmatizer.label_data   s    S_- 	G::g&D~#zz11$v,?V$ $

 2 24= AWLL	 %dhhx.@(ABBr8   N)nlpr/   get_examplesr   r/   c          	         t        |d       || j                  |       n| j                  |       g }g }t         |       d      D ]  }|j	                  |j
                         g }|j                  D ]m  }|j                  dk(  rd }	n&| j                  |j                  |j                        }	|j	                  | j                  d   D 
cg c]  }
|
|	k(  rdnd c}
       o t        t        |      }|j	                  | j                  j                  j!                  |d              | j#                          t%        |      dkD  s/J t&        j(                  j+                  | j,                  	             t%        |      dkD  s/J t&        j(                  j+                  | j,                  	             | j                  j/                  ||
       y c c}
w )NzEditTreeLemmatizer.initialize
   r   r/   g      ?g        r   )dtyperC   )XY)r   _labels_from_data_add_labelsr   rJ   x	referencer   _pair2labelrH   r   r3   r   r   r"   rK   rt   _require_labelsr`   r   E923rP   r-   
initialize)r5   r   r   r/   
doc_samplelabel_sampleexamplegold_labelsr}   
gold_labelrW   s              r6   r   zEditTreeLemmatizer.initialize   s    	l,KL>""<0V$ 
lnb1 	VGgii(-/K ** ;;!#!%J!%!1!1%**ell!KJ"" &*XXh%7!  %
2; x5K

 6 6{) 6 TU#	V& 	:"FFKK$6$6DII$6$FF"< 1$Hfkk&8&8dii&8&HH$


l;s   
Gexcludec                `      fd fd fd fdd}t        j                  ||        S )Nc                 `    j                   j                  t        j                  |             S r_   )r3   updatesrsly
json_loadsbr5   s    r6   <lambda>z/EditTreeLemmatizer.from_bytes.<locals>.<lambda>  s    TXX__U-=-=a-@A r8   c                 :    j                   j                  |       S r_   )r"   
from_bytesr   s    r6   r   z/EditTreeLemmatizer.from_bytes.<locals>.<lambda>      tzz44Q7 r8   c                 >    j                   j                  |       S Nr   )r,   r   )r   r   r5   s    r6   r   z/EditTreeLemmatizer.from_bytes.<locals>.<lambda>  s    tzz44Q4H r8   c                 :    j                   j                  |       S r_   )r1   r   r   s    r6   r   z/EditTreeLemmatizer.from_bytes.<locals>.<lambda>  r   r8   r3   r"   r,   r1   )r   r   )r5   
bytes_datar   deserializerss   ` ` r6   r   zEditTreeLemmatizer.from_bytes  s-    A7H7	
 	
M7;r8   c                Z      fd fd fd fdd}t        j                  |      S )Nc                  B    t        j                   j                        S r_   )r   
json_dumpsr3   r   s   r6   r   z-EditTreeLemmatizer.to_bytes.<locals>.<lambda>#  s    5++DHH5 r8   c                  8     j                   j                         S r_   )r"   to_bytesr   s   r6   r   z-EditTreeLemmatizer.to_bytes.<locals>.<lambda>$      TZZ002 r8   c                  <    j                   j                         S r   )r,   r   )r   r5   s   r6   r   z-EditTreeLemmatizer.to_bytes.<locals>.<lambda>%  s    TZZ000A r8   c                  8     j                   j                         S r_   )r1   r   r   s   r6   r   z-EditTreeLemmatizer.to_bytes.<locals>.<lambda>&  r   r8   r   )r   r   )r5   r   serializerss   `` r6   r   zEditTreeLemmatizer.to_bytes!  s)    52A2	
 }}['22r8   c                      t        j                  |      } fd fd fd fdd}t        j                  ||       y )Nc                 D    t        j                  | j                        S r_   )r   
write_jsonr3   pr5   s    r6   r   z,EditTreeLemmatizer.to_disk.<locals>.<lambda>.  s    U--a: r8   c                 :    j                   j                  |       S r_   )r"   to_diskr   s    r6   r   z,EditTreeLemmatizer.to_disk.<locals>.<lambda>/      tzz11!4 r8   c                 >    j                   j                  |       S r   )r,   r   r   r   r5   s    r6   r   z,EditTreeLemmatizer.to_disk.<locals>.<lambda>0  s    tzz11!W1E r8   c                 :    j                   j                  |       S r_   )r1   r   r   s    r6   r   z,EditTreeLemmatizer.to_disk.<locals>.<lambda>1  r   r8   r   )r   ensure_pathr   )r5   pathr   r   s   ` ` r6   r   zEditTreeLemmatizer.to_disk+  s8    %:4E4	
 	T;0r8   c                 d      fd} fd| fd fdd}t        j                  ||        S )Nc                     	 t        | d      5 }j                  j                  |j                                d d d        y # 1 sw Y   y xY w# t        $ r t        t        j                        d w xY w)Nrb)openr"   r   readAttributeErrorrN   r   E149)r   mfiler5   s     r6   
load_modelz0EditTreeLemmatizer.from_disk.<locals>.load_model6  s_    8!T] 8eJJ))%**,78 8 8! 8 -478s'   A *AA AA A $A2c                 `    j                   j                  t        j                  |             S r_   )r3   r   r   	read_jsonr   s    r6   r   z.EditTreeLemmatizer.from_disk.<locals>.<lambda>>  s    TXX__U__Q-?@ r8   c                 >    j                   j                  |       S r   )r,   	from_diskr   s    r6   r   z.EditTreeLemmatizer.from_disk.<locals>.<lambda>@  s    tzz33Aw3G r8   c                 :    j                   j                  |       S r_   )r1   r   r   s    r6   r   z.EditTreeLemmatizer.from_disk.<locals>.<lambda>A  s    tzz33A6 r8   r   )r   r   )r5   r   r   r   r   s   ` `  r6   r   zEditTreeLemmatizer.from_disk5  s5    	8 AG6	
 	t]G4r8   c                    d|vr)t        t        j                  j                  d            d|vr)t        t        j                  j                  d            t	        |d         | j
                  d<   g }|d   D ]  }t        |      }|r8t        t        j                  j                  dj                  |                  t        |      }d|v r+| j                  j                  j                  |d         |d<   d|v r+| j                  j                  j                  |d         |d<   |j                  |        | j                  j                  |       t!        | j"                        D ]  \  }}|| j$                  |<    y )Nr/   rC   r1   
)errorsr   r   )rN   r   E857rP   rh   r3   r   E1026joinr   r,   r0   rG   rJ   r1   	from_jsonru   r/   r2   )r5   r/   r1   r   r   rW   s         r6   r   zEditTreeLemmatizer._add_labelsG  sT   6!V[[//X/>??& V[[//W/=>>!&"237O 	D'-F !4!4DIIf<M!4!NOO:D~#zz1155d6lCV~ $

 2 2 6 6tG} EWLL	 	

U#$T[[1 	*KE4$)DOOD!	*r8   c                    t               }t        |j                        }t               }i } |       D ]q  }|j                  D ]`  }|j
                  dk7  s|j                  |j                  |j                        }||xx   dz  cc<   |j                  |j                  f||<   b s |j                         D ]1  \  }}	|	| j                  k\  s||   \  }
}| j                  |
|d       3 y )Nr   r   T)	add_label)r   r   r0   r   r   r   rG   rH   r   itemsr(   r   )r5   r   r,   r1   
tree_freqs
repr_pairsr   r}   rX   freqformr   s               r6   r   z$EditTreeLemmatizer._labels_from_dataa  s     %--(%i

#~ 	EG ** E;;!##ii

ELLAGw'1,'+0::u||*DJw'	E	E (--/ 	>MGTt)))(1e  u =	>r8   c                     | j                   j                  ||      }|| j                  vrF|syt        | j                  d         | j                  |<   | j                  d   j                  |       | j                  |   S )z
        Look up the edit tree identifier for a form/label pair. If the edit
        tree is unknown and "add_label" is set, the edit tree will be added to
        the labels.
        Nr/   )r1   rG   r2   r`   r3   rJ   )r5   r   r   r   rX   s        r6   r   zEditTreeLemmatizer._pair2labelv  sl     **..u-$//)'*488H+='>DOOG$HHX%%g.w''r8   )trainable_lemmatizer)F)*__name__
__module____qualname____doc__r   r   r   strr
   r   boolr   r7   r   r   r	   r   r   rQ   r[   r   r   rk   rd   rf   rg   r   propertyr/   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r8   r6   r$   r$   +   s    +	$$ "(%5$$$$ $$ 	$$ #$$ $$ $$ $$ "$$L% )%37>%	ud8n$	%%6HSM d6l 4"(*.HSM ." )c3h ) ) T   	CD 	C 	C #'!%(<r8G#445(< h	(<
 (<T 16 
 #(' 3 %*G 1 ',g $*$ *4>hr8G;L7L.M >*(r8   r$   c                 x    | dk(  r!t        j                  d      }|j                  S t        dt         d|        )Nmake_edit_tree_lemmatizerzspacy.pipeline.factorieszmodule z has no attribute )	importlibimport_moduler  r   r   )r-   modules     r6   __getattr__r    sA    **(()CD///
78*,>tfE
FFr8   )5r  collectionsr   	itertoolsr   typingr   r   r   r   r	   r
   r   r   numpyrw   r   	thinc.apir   r   r   r   thinc.typesr   r   rB   r   r   r   languager   tokensr   trainingr   r   r   r,   r   _edit_tree_internals.edit_treesr   _edit_tree_internals.schemasr   
lemmatizerr   trainable_piper    re   default_model_configfrom_str"DEFAULT_EDIT_TREE_LEMMATIZER_MODELr$   r  r   r8   r6   <module>r     s       M M M   N N (     H H  6 < ( )   &,X%6%67K%LW%U "X( X(x
Gr8   