
    i#                     *   d dl mZ d dlmZmZmZmZmZmZm	Z	 d dl
Z
d dlmZmZmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZmZ ddlmZmZ dd	l m!Z! dd
l"m#Z# ddl$m%Z& erddl'm(Z( ddl)m*Z* de+de+de,dedegef   fdZ-de+de+de+dedegef   fdZ.d Z/d Z0	 d,dede+de+dee+   def
dZ1dddede+de+def
dZ2dddede+de+de+defdZ3	 d-ddd ed!e4defd"Z5 G d# d$      Z6	 d-d%ed&   d'e6d!e4dee
jn                  ed&   f   fd(Z8d.d)e,d'e6d*e,de,fd+Z9y)/    )partial)TYPE_CHECKINGCallableIterableListOptionalTuplecastN)CosineDistance
L2Distance	LayerNormLinearMaxoutModelMultiSoftmaxSoftmaxchain
list2arrayto_categorical	zero_init)Loss)Floats2dInts1d   )IDORTH)Errors)OOV_RANK)ModeDoc)Vocabmaxout_pieceshidden_sizelossreturnr"   c                 R     dddt         dt         f fd}dt        ffd|S )Nvocabr"   tok2vecr&   c                     | j                   j                  d   dk(  rt        t        j                        t        | |      }        |j                  d<   |S )N   r   )r$   r#   r%   )vectorsshape
ValueErrorr   E875build_cloze_multi_task_modelattrs)r(   r)   modelcreate_vectors_lossr$   r#   s      k/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/ml/models/multi_task.pycreate_vectors_objectivez9create_pretrain_vectors.<locals>.create_vectors_objective$   sT    ==q!Q&V[[)),7=
 23F    c                      dk(  rt        dd      } t        t        |       S dk(  rt        d      } t        t        |       S t	        t
        j                  j                  d            )	NcosineT)	normalizeignore_zeros)distanceL2)r9   z'cosine', 'L2')found	supported)r   r   get_vectors_lossr   r.   r   E906format)r;   r%   s    r4   r3   z4create_pretrain_vectors.<locals>.create_vectors_loss-   sd    8%4HH+h??T\!D1H+h??V[[//dFV/WXXr6   )r   r   )r#   r$   r%   r5   r3   s   ``` @r4   create_pretrain_vectorsrB   !   s3     % E 	Y 	Y $#r6   n_charactersc                 8     dddt         dt         f fd}|S )Nr(   r"   r)   r&   c                 d    t        | |      }t        t              |j                  d<   |S )N)r$   r#   nr_charrF   r%   )'build_cloze_characters_multi_task_modelr   get_characters_lossr1   )r(   r)   r2   r$   r#   rC   s      r4   create_characters_objectivez?create_pretrain_characters.<locals>.create_characters_objective>   s9    7#' 
 &&9<PFr6   )r   )r#   r$   rC   rJ   s   ``` r4   create_pretrain_charactersrK   ;   s%    	7 	U 	u 	 '&r6   c                 F   |d   j                   }|j                  j                  t        j                  k(  r| j                  |D cg c]%  }|j                  t              j                         ' c}      }|d   j                   j                  j                  |   }d||t        k(  <    |||      \  }}	|	|fS |j                  j                  t        j                  k(  r|| j                  |D cg c]%  }t        t        |j                  t                    ' c}      }
|j                  j                  |
      }| j!                  |      } |||      \  }}	|	|fS t#        t$        j&                  j)                  |j                  j                              c c}w c c}w )z^Compute a loss based on a distance between the documents' vectors and
    the prediction.
    r   )mode)r(   r,   rM   VectorsModedefaultflattento_arrayr   raveldatar   floretr
   r   r   	get_batch	as_contigr.   r   E850rA   )opsdocs
predictionr;   r(   docidstargetd_targetr%   keyss              r4   r?   r?   L   sT    GMME}}[000
 kktD3<<+113DEa&&++C0"#sh!*f5$ > 
		{11	1{{MDd);<MN((.v&!*f5$ > ++1C1C+DEE E
 Ns   *F/*Fc                 j   t        j                  |D cg c]  }|j                  |       c}      }|j                  d      }| j	                  t        |d      d      }|j                  dd|z  f      }||z
  }|dz  j                         }|t        |j                  d	         z  }	||	fS c c}w )
zGCompute a loss based on a number of characters predicted from the docs.rG   )   )	n_classesfdtypera      r   )	numpyvstackto_utf8_arrayreshapeasarrayr   sumfloatr-   )
rX   rY   rZ   rF   r[   
target_idsr]   diffr%   r^   s
             r4   rI   rI   d   s    TRcs000ARSJ##E*J[[
cB#[NF^^Rw/0FD!G==?DeJ,,Q/00H> Ss   B0r)   token_vector_widthnOc           	          t        ||dz        }t        | t        |dz  ||d      t        |dz        |      }|j	                  d|        |j	                  d|       |S )Nrg   )rr   nI        )rr   rt   nPdropoutr)   output_layer)r   r   r   r   set_ref)r)   r#   rq   rr   softmaxr2   s         r4   build_multi_task_modelr{   p   sr      2Q 67G!A%!		
 	$q()
E 
MM)W%	MM.'*Lr6   r(   c           
      p   | j                   j                  d   }t        t        t        t
        d   t        f   t                     t        ||j                  d      |dd      t        ||t                    }t        ||      }t        | |      }|j                  d|       |j                  d	|       |S )
Nr+   r   rr   Tru   )rr   rt   rv   r9   rw   )rr   rt   init_Wr)   rx   )r,   r-   r   r
   r   r   r   r   r   get_dimr   r   build_masked_language_modelry   )r(   r)   r#   r$   rr   rx   r2   s          r4   r0   r0      s     
		Q	BU4
#X-.
=t$	
 	"Y7
L '<(E'u5E	MM)W%	MM.,/Lr6   rF   c           	      *   t        t        t        t        d   t        f   t                     t        ||      t        |      t        dg|z  |            }t        | t        ||            }|j                  d|       |j                  d|       |S )Nr   )rr   rv   )rt   rb   r)   rx   )r   r
   r   r   r   r   r   r   r   r   ry   )r(   r)   r#   r$   rF   rx   r2   s          r4   rH   rH      s     U4
#X-.
=+-0[!cUW_5	L (uWl/KLE	MM)W%	MM.,/Lr6   wrapped_model	mask_probc           
          t        |       fd}ddt        fd}t        d||g|d|it        j                  |j                              }|j                  d|       |S )z7Convert a model into a BERT-style masked language modelc                     t        |      \  }| j                  j                        j                  j                  d   df       | j
                  d   ||      \  }fd}||fS )N)r   r   r+   c                 $    | dz
  z  }  |       S )Nr+    )d_outputbackpropmasks    r4   mlm_backwardzFbuild_masked_language_model.<locals>.mlm_forward.<locals>.mlm_backward   s    D HH%%r6   )_apply_maskrX   rl   rk   r-   layers)	r2   rY   is_trainoutputr   r   r   r   random_wordss	        @@r4   mlm_forwardz0build_masked_language_model.<locals>.mlm_forward   sn     |yI
dyy  &..

1q/AB*5<<?4:	& |##r6   r2   c                     | j                   d   }|j                  ||       |j                  D ]5  }|j                  |      s| j	                  ||j                  |             7 y )Nr   )XY)r   
initialize	dim_nameshas_dimset_dimr~   )r2   r   r   wrappeddims        r4   mlm_initializez3build_masked_language_model.<locals>.mlm_initialize   sZ    ,,q/Q!$$$ 	9Cs#c7??3#78	9r6   zmasked-language-modelr   )r   initrefsdims)NN)_RandomWordsr   dictfromkeysr   ry   )r(   r   r   r   r   	mlm_modelr   s     `   @r4   r   r      sl      &L	$9e 9 ']]=223I i/r6   c                        e Zd ZddZdefdZy)r   r&   Nc                    |D cg c]  }|j                   dk7  s|j                    c}| _        | j                  d d | _        |D cg c]  }|j                   dk7  s|j                     }}|d d }t        j                  t        j
                  |d            }||j                         z  }|| _        g | _        y c c}w c c}w )Nru   '  rd   re   )	probtextwordsrh   exparrayrm   probs_cache)selfr(   lexr   s       r4   __init__z_RandomWords.__init__   s    */C3388s?chhC
ZZ'
 &+>cchh#o>>fu$yyU#)FG
 "$ D ?s   B>B>CCc                    | j                   sX| j                   j                  t        j                  j	                  t        | j                        d| j                               | j                   j                         }| j                  |   S )Nr   )p)	r   extendrh   randomchoicelenr   r   pop)r   indexs     r4   nextz_RandomWords.next   sa    {{KK##C

OUdjj#I !zz%  r6   )r(   r"   r&   N)__name__
__module____qualname__r   strr   r   r6   r4   r   r      s    $!c !r6   r   rY   r!   r   c                    ddl m} t        d | D              }t        j                  j                  dd|f      }||k\  }d}g }| D ]  }g }	|D ]@  }
||   st        |
j                  |      }n|
j                  }|	j                  |       |dz  }B |D cg c]  }t        |j                         }}|j                   ||j                  |	|              ||fS c c}w )	Nr   r    c              3   2   K   | ]  }t        |        y wN)r   ).0r[   s     r4   	<genexpr>z_apply_mask.<locals>.<genexpr>   s     %CH%s   ru   g      ?r   r+   )r   spaces)
tokens.docr!   rm   rh   r   uniform_replace_wordr   appendboolwhitespace_r(   )rY   r   r   r!   Nr   imasked_docsr[   r   tokenwordwr   s                 r4   r   r      s     "%%%A<<S1$/D9D	AK G 	E7$UZZ>zzLLFA	 033!$q}}%33 	3syyfEFG   4s   Cr   r   c                 z    t         j                  j                         }|dk  r|S |dk  r|j                         S | S )Ng?g?)rh   r   r   )r   r   r   rolls       r4   r   r     s;    << Dcz	  ""r6   r   )g333333?)z[MASK]):	functoolsr   typingr   r   r   r   r   r	   r
   rh   	thinc.apir   r   r   r   r   r   r   r   r   r   r   r   
thinc.lossr   thinc.typesr   r   r1   r   r   errorsr   utilr   r,   r   rN   r   r!   r(   r"   intr   rB   rK   r?   rI   r{   r0   rH   rn   r   r   ndarrayr   r   r   r6   r4   <module>r      s    Q Q Q      (    *!$$%($03$w%&$4''%('8;'w%&'"0	  	  		
 ."36EH
,"36EHSV
  >B!!#(!5:!
!H! !4 KO
5/)5BG
5==$u+%&< < s RU r6   