
    i                     v    d Z ddlmZ ddlmZ ddlmZ ddlmZ  ed      e G d	 d
e                    Z	d
gZ
y)zDiffLlama model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzkajuma/DiffLlama-0.3B-handcut)
checkpointc                       e Zd ZU dZdZdgZdZeed<   dZ	eed<   dZ
eed	<   d
Zeed<   dZeed<   dZedz  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZedz  ed<   dZedz  ed<   dZeee   z  dz  ed<   dZeed<   dZeez  dz  ed <   dZeed!<   d"Zeez  dz  ed#<   d$Z edz  ed%<   dZ!edz  ed&<    fd'Z" xZ#S )(DiffLlamaConfiga+  
    lambda_std_dev (`float`, *optional*, defaults to 0.1):
        The standard deviation for initialization of parameter lambda in attention layer.

    ```python
    >>> from transformers import DiffLlamaModel, DiffLlamaConfig

    >>> # Initializing a DiffLlama diffllama-7b style configuration
    >>> configuration = DiffLlamaConfig()

    >>> # Initializing a model from the diffllama-7b style configuration
    >>> model = DiffLlamaModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```
    	diffllamapast_key_valuesi }  
vocab_sizei   hidden_sizei    intermediate_size   num_hidden_layers    num_attention_headsNnum_key_value_headssilu
hidden_actmax_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cachepad_token_id   bos_token_id   eos_token_idFtie_word_embeddingsrope_parametersattention_biasg        attention_dropoutg?lambda_std_devhead_dimc                     | j                   | j                  | _         | j                  | j                  n| j                  | j                  z  | _        t	        |   di | y )N )r   r   r%   r   super__post_init__)selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/diffllama/configuration_diffllama.pyr)   zDiffLlamaConfig.__post_init__G   sU    ##+'+'?'?D$)-)BHXHX\`\t\tHt''    )$__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencer   int__annotations__r   r   r   r   r   r   strr   r   floatr   r   boolr   r   r   listr    r!   r   dictr"   r#   r$   r%   r)   __classcell__)r,   s   @r-   r
   r
      s&   $ J#4"5JK!s!s!!&*t*J#'S'#u#L%It#L#*# L#* +,L#S	/D(, %%48O^d*T18 ND ,/us{T)/#&NEDL&HcDj( (r.   r
   N)r2   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r'   r.   r-   <module>rB      sK   " $ . 3 1 # :;1(& 1(  <1(h 
r.   