
    i                     v    d Z ddlmZ ddlmZ ddlmZ ddlmZ  ed      e G d	 d
e                    Z	d
gZ
y)zPyTorch Phi-MoE model.    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzmicrosoft/Phi-3.5-MoE-instruct)
checkpointc                       e Zd ZU dZdZdgZdZdZee	d<   dZ
ee	d<   d	Zee	d
<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZee	d<   dZedz  e	d<   dZedz  e	d<   dZeee   z  dz  e	d<   d Zee	d!<   dZeez  dz  e	d"<   dZedz  e	d#<   d$Z eez  e	d%<   dZ!ee	d&<   d'Z"ee	d(<   d Z#ee	d)<   d*Z$ee	d+<   d,Z%ee	d-<   d$Z&ee	d.<   d Z'ee	d/<   d Z(ee	d0<    fd1Z) fd2Z* xZ+S )3PhimoeConfiga  
    num_local_experts (`int`, *optional*, defaults to 16):
        Number of experts per Sparse MLP layer.
    input_jitter_noise (`float`, *optional*, defaults to 0.0):
        Input jitter noise
    lm_head_bias (`bool`, *optional*, defaults to `False`):
        LM head bias

    Example:

    ```python
    >>> from transformers import PhimoeModel, PhimoeConfig
    >>> # Initializing a Phi-3 style configuration
    >>> configuration = PhimoeConfig.from_pretrained("microsoft/Phi-3.5-MoE-instruct")
    >>> # Initializing a model from the configuration
    >>> model = PhimoeModel(configuration)
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```phimoepast_key_valuesg    .Ai@}  
vocab_sizei   hidden_sizei   intermediate_size    num_hidden_layersnum_attention_heads   num_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cacheNpad_token_id   bos_token_id   eos_token_idFtie_word_embeddingsrope_parameterssliding_windowg        attention_dropoutnum_experts_per_tok   num_local_expertsoutput_router_logitsgMbP?router_aux_loss_coefg{Gz?router_jitter_noiseinput_jitter_noiseattention_biaslm_head_biasc                 ^    | j                   | j                  | _         t        |   di | y )N )r   r   super__post_init__)selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/phimoe/configuration_phimoe.pyr0   zPhimoeConfig.__post_init__N   s-    ##+'+'?'?D$''    c                    t         |           | j                  d   dk7  rd| j                  v r| j                  d   | _        | j                  j	                  dd      }| j                  j	                  dd      }t        |t        t        f      st        d|       t        |t        t        f      st        d|       yy)	z?
        Validate the `rope_parameters` configuration.
        	rope_typedefault original_max_position_embeddingsshort_mscaleNlong_mscalez=`rope_parameters`'s short_mscale field must be a number, got z<`rope_parameters`'s long_mscale field must be a number, got )	r/   validate_roper!   r9   get
isinstanceintfloat	TypeError)r1   rope_parameters_short_mscalerope_parameters_long_mscaler3   s      r4   r<   zPhimoeConfig.validate_ropeS   s     	 ,	91T5I5II8<8L8LMo8p5+/+?+?+C+CNTX+Y(*.*>*>*B*B=RV*W':S%LISTpSqr  9C<HRSnRop  I :r5   ),__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencedefault_thetar   r?   __annotations__r   r   r   r   r   r   strr   r   r@   r   r   boolr   r   r   listr    r!   r   dictr"   r#   r$   r&   r'   r(   r)   r*   r+   r,   r0   r<   __classcell__)r3   s   @r4   r
   r
      se   ( J#4"5MJK!s!s!!  J#,S,#u#L%It#L#*# L#* +,L#S	/D(, %%48O^d*T18!%NC$J%%(us{(  s!&$&"'%'!%% ## ND L$(
 r5   r
   N)rG   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r.   r5   r4   <module>rV      sN     . 3 1 # ;<L# L  =L^ 
r5   