
    i7*                        d dl mZ d dlmZ d dlZd dlmZ ddlmZm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' ddl(m)Z)  ejT                  e+      Z,dZ-dZ. G d de%      Z/ G d de      Z0 G d de      Z1 G d de      Z2 G d de$      Z3 G d d e#      Z4 G d! d"e       Z5 G d# d$e!      Z6 G d% d&e"      Z7g d'Z8y)(    )Callable)OptionalN   )CacheDynamicCache)create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)merge_with_config_defaults)capture_outputs   )CLIPMLP)	LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassification
LlamaModelLlamaPreTrainedModelLlamaRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )	PhiConfigzmicrosoft/phi-1r   c                   R    e Zd Ze	 	 	 d	dedz  ded   dedz  dedef   fd       Z	y)
PhiRotaryEmbeddingNconfigdeviceztorch.deviceseq_lenreturnztorch.Tensorc                 n   | j                   d   }| j                   j                  dd      }t        | dd      xs | j                  | j                  z  }t        ||z        }d}d|t        j                  d|dt        j                        j                  |t        j                  	      |z  z  z  }||fS )
a  
        Computes the inverse frequencies according to the original RoPE implementation
        Args:
            config ([`~transformers.PreTrainedConfig`]):
                The model configuration.
            device (`torch.device`):
                The device to use for initialization of the inverse frequencies.
            seq_len (`int`, *optional*):
                The current sequence length. Unused for this type of RoPE.
        Returns:
            Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
            post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
        
rope_thetapartial_rotary_factorg      ?head_dimNr   r   )dtype)r"   r)   )rope_parametersgetgetattrhidden_sizenum_attention_headsinttorcharangeint64tofloat)	r!   r"   r#   baser'   r(   dimattention_factorinv_freqs	            t/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/phi/modular_phi.pycompute_default_rope_parametersz2PhiRotaryEmbedding.compute_default_rope_parameters(   s    & %%l3 & 6 6 : :;RTW X6:t4h8J8JfNhNh8h(223 U\\!S!5;;?BB&X]XcXcBdgjjk
 )))    )NNN)
__name__
__module____qualname__staticmethodr   r   r/   tupler4   r:    r;   r9   r    r    '   sY    #'+/"*D *(* t* 
~u$	%	* *r;   r    c                        e Zd Zdedef fdZ	 ddej                  deej                  ej                  f   dej                  dz  de	dz  d	eej                  ej                  dz  f   f
d
Z
 xZS )PhiAttentionr!   	layer_idxc                    t         |   ||       t        j                  |j                  |j
                  | j                  z  d      | _        t        j                  |j                  |j                  | j                  z  d      | _	        t        j                  |j                  |j                  | j                  z  d      | _
        t        j                  |j
                  | j                  z  |j                  d      | _        | `t        | j                  |j                  d   z        | _        |j                   | _        | j                   r}t        j"                  |j                  |j
                  z  |j$                  d      | _        t        j"                  |j                  |j
                  z  |j$                  d      | _        y y )NTbiasr'   )epselementwise_affine)super__init__nnLinearr-   r.   r(   q_projnum_key_value_headsk_projv_projdenseo_projr/   r*   rotary_ndimsqk_layernorm	LayerNormlayer_norm_epsq_layernormk_layernormselfr!   rD   	__class__s      r9   rK   zPhiAttention.__init__J   sh   +ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijYYv99DMMI6K]K]dhi
K0F0FG^0_ _`"//!||""f&@&@@fF[F[pt D  "||""f&@&@@fF[F[pt D	 r;   Nhidden_statesposition_embeddingsattention_maskpast_key_valuesr$   c                 b   |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  r"| j                  |      }| j                  |	      }	|\  }}|dd | j                  f   |d| j                  d f   }}|	dd | j                  f   |	d| j                  d f   }}t        ||||      \  }}t        j                  ||fd      }t        j                  ||fd      }	| |j                  |	|
| j                        \  }	}
t!        j"                  | j$                  j&                  t(              } || ||	|
|f| j*                  sdn| j,                  | j.                  d|\  }} |j0                  g |d j3                         }| j5                  |      }||fS )Nr   r   .)r6   g        )dropoutscaling)shaper(   rN   view	transposerP   rQ   rU   rX   rY   rT   r   r0   catupdaterD   r   get_interfacer!   _attn_implementationr   trainingattention_dropoutrd   reshape
contiguousrR   )r[   r]   r^   r_   r`   kwargsinput_shapehidden_shapequery_states
key_statesvalue_statescossin	query_rot
query_passkey_rotkey_passattention_interfaceattn_outputattn_weightss                       r9   forwardzPhiAttention.forward[   sV    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST++L9L))*5J&S 1 1 1112d//112 	
 s/d////0sD--//0 
 2)Wc3O	7 yy)Z!8bAYY2;
&'6'='=j,X\XfXf'g$J(?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
!\ *k));;;;FFHjj-L((r;   )N)r<   r=   r>   r   r/   rK   r0   Tensorr@   r   r   __classcell__r\   s   @r9   rC   rC   I   s    y S , )-8)||8) #5<<#=>8) t+	8)
 8) 
u||U\\D00	18)r;   rC   c                       e Zd Zy)PhiMLPNr<   r=   r>   rA   r;   r9   r   r          r;   r   c                       e Zd Zdedef fdZ	 	 	 	 	 ddej                  dej                  dz  dej                  dz  de	dz  d	e
dz  d
eej                  ej                  f   dz  dee   dej                  fdZ xZS )PhiDecoderLayerr!   rD   c                    t         |           t        ||      | _        t	        |      | _        t        j                  |j                  |j                        | _
        t        j                  |j                        | _        y )N)rD   rH   )rJ   rK   rC   	self_attnr   mlprL   rV   r-   rW   input_layernormDropoutresid_pdropresid_dropoutrZ   s      r9   rK   zPhiDecoderLayer.__init__   s]    %f	B&>!||F,>,>FDYDYZZZ(:(:;r;   Nr]   r_   position_idsr`   	use_cacher^   rp   r$   c           
          |}| j                  |      } | j                  d||||||d|\  }	}
| j                  |	      }	| j                  | j                  |            }|	|z   |z   }|S )N)r]   r_   r   r`   r   r^   rA   )r   r   r   r   )r[   r]   r_   r   r`   r   r^   rp   residualattn_outputs_feed_forward_hidden_statess               r9   r   zPhiDecoderLayer.forward   s     !,,];($.. 
')%+ 3
 
a )),7%)%7%78O%P"$'AAHLr;   )NNNFN)r<   r=   r>   r   r/   rK   r0   r   
LongTensorr   boolr@   r   r   r   r   r   s   @r9   r   r      s    <y <S < /304(,!&HL|| t+ &&-	
  $; #5<<#=>E +, 
r;   r   c                       e Zd ZeedZy)PhiPreTrainedModel)r]   
attentionsN)r<   r=   r>   r   rC   _can_record_outputsrA   r;   r9   r   r      s    ("r;   r   c                        e Zd Zdef fdZeee	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dedz  dej                  dz  d	edz  d
ee   defd                     Z xZS )PhiModelr!   c           	      d   t         |   |       t        j                  t	        |j
                        D cg c]  }t        ||       c}      | _        t        j                  |j                        | _
        t        j                  |j                  |j                        | _        | `y c c}w )Nr   )rJ   rK   rL   
ModuleListrangenum_hidden_layersr   layersr   
embd_pdropembed_dropoutrV   r-   rW   final_layernormnormrZ   s      r9   rK   zPhiModel.__init__   s     mmAFvG_G_A`aI_VY/a
  ZZ(9(9:!||F,>,>FDYDYZI	 bs   B-N	input_idsr_   r   r`   inputs_embedsr   rp   r$   c           
      ^   |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|V||j	                         nd}t        j                  |j                  d   |j                        |z   }|j                  d      }t        | j                  ||||      }	| j                  |      }|}
| j                  |
|      }| j                  d | j                  j                   D ]  } ||
f|	||||d|}
 | j                  |
      }
t!        |
|	      S )
Nz:You must specify exactly one of input_ids or inputs_embeds)r!   r   r   )r"   )r!   r   r_   r`   r   )r   )r_   r   r`   r   r^   )last_hidden_stater`   )
ValueErrorembed_tokensr   r!   get_seq_lengthr0   r1   re   r"   	unsqueezer   r   
rotary_embr   r   r   r
   )r[   r   r_   r   r`   r   r   rp   past_seen_tokenscausal_maskr]   r^   decoder_layers                r9   r   zPhiModel.forward   s^    -t";<YZZ  --i8M0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L(;;')+%
 **=9%"oom,oW![[)H4;;+H+HI 		M)*) /#$7 M		 ,,];&++
 	
r;   )NNNNNN)r<   r=   r>   r   rK   r   r   r   r0   r   r   r   FloatTensorr   r   r   r
   r   r   r   s   @r9   r   r      s    y    .2.204(,26!%4
##d*4
 t+4
 &&-	4

 4
 ((4/4
 $;4
 +,4
 
!4
    4
r;   r   c                        e Zd Z fdZ xZS )PhiForCausalLMc                     t         |   |       t        j                  |j                  |j
                  d      | _        y )NTrF   )rJ   rK   rL   rM   r-   
vocab_sizelm_head)r[   r!   r\   s     r9   rK   zPhiForCausalLM.__init__  s0     yy!3!3V5F5FTRr;   )r<   r=   r>   rK   r   r   s   @r9   r   r     s    S Sr;   r   c                       e Zd Zy)PhiForSequenceClassificationNr   rA   r;   r9   r   r     r   r;   r   c                       e Zd Zy)PhiForTokenClassificationNr   rA   r;   r9   r   r     r   r;   r   )r   r   r   r   r   )9collections.abcr   typingr   r0   torch.nnrL   cache_utilsr   r   masking_utilsr   modeling_layersr	   modeling_outputsr
   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   utils.output_capturingr   clip.modeling_clipr   llama.modeling_llamar   r   r   r   r   r   r   r   r   configuration_phir   
get_loggerr<   logger_CHECKPOINT_FOR_DOC_CONFIG_FOR_DOCr    rC   r   r   r   r   r   r   r   __all__rA   r;   r9   <module>r      s    $    . / 9 6 & @ @ 7 5 (
 
 
 ) 
		H	%' *- *DJ)> J)Z	W 	$0 $N- A
z A
HS% S	#A 		 ; 	r;   