
    ig.                        U d dl Z d dlmZ d dlmZmZmZmZmZm	Z	 ddl
mZ ddlmZ ddlmZmZ eeef   Zeeee   ef   Zeeee   ef   Zdd	d
dZeeeeeef   f   ed<   dddded   ed   d	dZeeeeeef   f   ed<    ej6                  d      ed   fed   ed   ed   ed    e	eed         ed   d	ddedededededededefd       Z ej6                  d      ed   fed   ed   ed   ed   ed    e	eed         d	ddedededededededefd        Z ej6                  d!      ed   ed    e	eed         d	d"dededededef
d#       Z G d$ d%e      Z g d&Z!y)'    N)defaultdict)DictListOptionalTupleUnioncast   )get_array_ops)registry)FloatsXd	Generator        T      ?)L2L2_is_weight_decay	grad_clipSGD_DEFAULTSgMbP?g?g+?g:0yE>r   r   )
learn_ratebeta1beta2epsr   r   r   ADAM_DEFAULTSzRAdam.v1r   r   r   r   r   )r   r   r   r   r   r   use_averagesr   c                *    t        | |||||||d	      S )NT)r   r   r   r   r   r   r   	use_radam	Optimizer)r   r   r   r   r   r   r   r   s           a/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/thinc/optimizers.pyRAdamr       s,     -!
 
    zAdam.v1)r   r   r   r   r   r   r   c                *    t        | |||||||d	      S )NF)r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   s           r   Adamr#   8   s,     -!
 
r!   zSGD.v1)r   r   r   r   c          	      &    t        | |||dd|      S )Nr   )r   r   r   r   r   r   r   )r   r   r   r   r   s        r   SGDr%   Q   s&     -! r!   c                      e Zd ZU dZeeef   ed<   eeef   ed<   eeeef      ed<   ee	e
f   ed<   eeef   ed<   eeef   ed<   eed<   eed	<   eed
<   eed<   eed<   eed<   eed<   eed<   eeee         ed<   g dZed   ed   ed   ed   ed   ddddd	ededededededededefdZd Zd Zdddeee	f   deded efd!Zd" Zd# Zy$)%r   zDo various flavours of stochastic gradient descent, with first and
    second order momentum. Currently support 'vanilla' SGD, Adam, and RAdam.
    mom1mom2averages	schedules	nr_update	last_seenr   r   b1b2r   r   r   r   _radam_buffer)r'   r(   r)   r*   r+   r,   r   r   r-   r.   r   r   r   r   r/   r   r   TF)r   r   r   r   r   r   r   r   r   c                   i | _         i | _        |ri | _        nd| _        i | _        t	        t
              | _        t	        t
              | _        | j                  d|       | j                  d|       | j                  d|       | j                  d|       | j                  d|       | j                  d|       || _	        |	| _
        t        d      D 
cg c]  }
g d	 c}
| _        yc c}
w )
al  
        Initialize an optimizer.

        learn_rate (float): The initial learning rate.
        L2 (float): The L2 regularization term.
        beta1 (float): First-order momentum.
        beta2 (float): Second-order momentum.
        eps (float): Epsilon term for Adam etc.
        grad_clip (float): Gradient clipping.
        use_averages (bool): Whether to track moving averages of the parameters.
        use_radam (bool): Whether to use the RAdam optimizer.
        L2_is_weight_decay (bool): Whether to interpret the L2 parameter as a
            weight decay term, in the style of the AdamW optimizer.
        Nr   r   r-   r.   r   r   
   )NNN)r'   r(   r)   r*   r   intr+   r,   _set_attr_or_scheduler   r   ranger/   )selfr   r   r   r   r   r   r   r   r   _s              r   __init__zOptimizer.__init__   s    6 		DM DM$S)$S)"";	:""<<""4/""4/""5#.""4,""4:?)DQ0DDs   C*c                 D   t        |t        t        t        f      rt	        | ||       y t        |t
              rt        |      }|| j                  |<   	 t	        | |t        |             y # t        t        f$ r$}d| dt        |       d| }t        |      d }~ww xY w)NzInvalid schedule for 'z' (z)
)
isinstancefloatboolr2   setattrlistiterr*   nextStopIteration	TypeErrortype
ValueError)r5   namevalueeerrs        r   r3   zOptimizer._set_attr_or_schedule   s    eeT3/0D$&%&U#(DNN4 &dDK0!9- &.tfCU}CsK o%&s   A, ,B;BBc                     | j                   j                         D ]  \  }}	 t        |      }t        | ||         y # t        $ r t	        | |      }Y 'w xY w)N)r*   itemsr?   r@   getattrr<   )r5   keyschedulerE   s       r   step_scheduleszOptimizer.step_schedules   s[    !^^113 	&MC+X D#u%	& ! +c*+s   =AAr   )lr_scalerK   weightsgradientrN   c                   t        |      dk  r||fS t        |      }| j                  |xx   dz  cc<   | j                  |   }| j                  dk7  r| j                  s|| j                  |z  z  }| j
                  r|j                  || j
                        }| j                  r| j                  ||||||      \  }}nb| j                  dkD  r)| j                  dkD  r| j                  ||||||      \  }}n*| j                  dkD  rt        ||| j                  z  |z  z  }|dz  }| j                  dk7  r.| j                  r"||| j                  z  | j                  z  |z  z  }| j                  X|| j                  vr*|j                  |j                   d      | j                  |<   |j#                  | j                  |   ||       ||fS )zCall the optimizer with weights and a gradient. The key is the
        identifier for the parameter, usually the node ID and parameter name.
        r
   r   r   float32)dtype)lenr   r+   r   r   r   clip_gradientr   _radamr-   r.   _adamNotImplementedErrorr   r)   allocshapeupdate_averages)r5   rK   rO   rP   rN   opsnr_upds          r   __call__zOptimizer.__call__   s    x=1H$$G$sq $77a< 7 7'))H>>((4>>BH>> $Wh#v!GX WWs]tww} $

Wh#v!GX WWs]%%x$//1H<<GA77a<D33x$//1DGG;gEEG==$$--'%(YYw}}IY%Nc"c 2GVD  r!   c                    || j                   vr(|j                  |j                        | j                   |<   || j                  vr(|j                  |j                        | j                  |<   |j	                  ||j                        }|j	                  ||j                        }| j
                  |   | j                   |   | j                  |   d}	| j                  | j                  | j                  g| j                  d| j                  d}
d}|	d   |	d   }}|
d   \  }}||z  }|d|z
  |d	z  z  z  }||z  }|d|z
  |z  z  }|	d
xx   dz  cc<   |
d   t        |	d
   dz           }|	d
   |d   k(  r|d   |d	   }}n|	d
   |d<   ||	d
   z  }d	d|z
  z  dz
  }|d	|	d
   z  |z  d|z
  z  z
  }||d<   |dk\  rCt        j                  d|z
  |dz
  z  |dz
  z  |d	z
  z  |z  |z  |d	z
  z        d||	d
   z  z
  z  }n|rdd||	d
   z  z
  z  }nd}||d	<   |dk\  rN|
d   dk7  r||
d    |
d   z  |z  z  }|j                  j                  |      |
d   z   }|| |
d   z  ||z  z  z  }n.|dkD  r)|
d   dk7  r||
d    |
d   z  |z  z  }|| |
d   z  |z  z  }|j                  ||j                         |j                  ||j                         fS )N)stepexp_avg
exp_avg_sqr   )lrbetasr   weight_decaybufferTra   rb   rd   r
      r`   rf   r1   r         r   re   rc   r   )r'   alloc1fsizer(   	reshape1fr+   r   r-   r.   r   r/   r2   mathsqrtxp	reshape_frZ   )r5   r\   rO   gradrN   rK   r]   
weights_1Dgradient_1Dstategroupdegenerated_to_sgdra   rb   r   r   bufferedN_sma	step_sizebeta2_t	N_sma_maxdenoms                         r   rV   zOptimizer._radam   sr   dii [[6DIIcNdii [[6DIIcN]]7GLL9
mmD$))4
 NN3'yy~))C.
 //ggtww'88((
 "#I.l0CW~u 	e
q5y[!^44
5AI,,f?3uV}r'9#:;=HQK''{HQK9E-HQKuV},GQY!+IE&M 1G ;q7{ KKEHQK z II[qy" 1}& qy" 	
    !1}& %-//1	 $1uf'=#=>		#HQK A:^$)u^44uT{BZOO
FFKK
+eEl:E9*uT{2goFFJ]^$)u^44uT{BZOO
9*uT{2W<<JMM*gmm4MM+tzz2
 	
r!   c                    |j                  ||j                        }|j                  ||j                        }|| j                  vr(|j                  |j                        | j                  |<   || j                  vr(|j                  |j                        | j                  |<   | j                  |   }	| j                  |   }
| j
                  }| j                  }d||z  z
  }d||z  z
  }| j                  |dz  z  |z  }| j                  }|j                  |||	|
|||||z        \  }}}	}
|	| j                  |<   |
| j                  |<   |j                  ||j                        |j                  ||j                        fS )Nr   g      ?)rm   rl   r'   rk   r(   r-   r.   r   r   adamrq   rZ   )r5   r\   rO   rP   rN   rK   r]   rs   rt   r'   r(   r-   r.   fix1fix2rc   r   s                    r   rW   zOptimizer._adamH  sT   ]]7GLL9
mmHhmm<dii [[6DIIcNdii [[6DIIcNyy~yy~WWWWb&j!b&j!__tSy(4/hh.1hhT4Rb8m/
+
Kt 		#		#MM*gmm4MM+x~~6
 	
r!   N)__name__
__module____qualname____doc__r   KeyTr   __annotations__r   strr   r2   r:   r;   r   	__slots__r   
FloatOrSeqr7   r3   rM   r   r^   rV   rW    r!   r   r   r   e   s    tX~

tX~
tD(N+,,CN##D#ID#III	JIOXh/011I, 't,)'2)'2'. -k :!#',E,E 	,E
 ,E ,E ,E ,E ,E ,E !,E\&& '!38_'! '! 	'! '!RL
\
r!   r   )r#   r    r%   r   r   r   )"rn   collectionsr   typingr   r   r   r   r   r	   backendsr   configr   typesr   r   r2   r   r   r:   r   IntOrSeqr   r;   r   r   
optimizersr    r#   r%   objectr   __all__r   r!   r   <module>r      s    # ; ; #  &S#X5$u+y01
d3i*+ 4d3eT3.//0  
t
k*5tCudC/001  Z *<8 &g.%g.#E*"4(#D-8L*MN)+6  	
 
 	    !0 Y*<8 #4(%g.%g.#E*)+6#D-8L*MN 	 	
  
     0 X "$'(5#D,7K*LM 	 	
   &{
 {
| Qr!   