
    i8                         d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
  ed	      e G d
 de                    ZdgZy)zLLaMA model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstring)intervalzmeta-llama/Llama-2-7b-hf)
checkpointc                       e Zd ZU dZdZdgZddddddddZdgdgfd	d
gd	gfd	gd	gfdZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	dz  e
d<   dZee
d<   dZe	e
d<     edd      d      Zee
d <   d!Zee
d"<   d#Zee
d$<   dZe	dz  e
d%<   d&Ze	dz  e
d'<   d(Ze	ee	   z  dz  e
d)<   d&Ze	dz  e
d*<   d+Zee
d,<   dZe e!z  dz  e
d-<   d+Z"ee
d.<   dZ#e	ez  dz  e
d/<   d+Z$ee
d0<   dZ%e	dz  e
d1<    fd2Z&d3 Z' xZ(S )4LlamaConfigat  
    ```python
    >>> from transformers import LlamaModel, LlamaConfig

    >>> # Initializing a LLaMA llama-7b style configuration
    >>> configuration = LlamaConfig()

    >>> # Initializing a model from the llama-7b style configuration
    >>> model = LlamaModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```llamapast_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi }  
vocab_sizei   hidden_sizei +  intermediate_size    num_hidden_layersnum_attention_headsNnum_key_value_headssilu
hidden_acti   max_position_embeddingsg        g      ?)minmaxg{Gz?)defaultinitializer_rangegư>rms_norm_epsT	use_cachepad_token_id   bos_token_id   eos_token_idpretraining_tpFtie_word_embeddingsrope_parametersattention_biasattention_dropoutmlp_biashead_dimc                     | j                   | j                  | j                  z  | _         | j                  | j                  | _        t	        |   di | y )N )r2   r   r   r   super__post_init__)selfkwargs	__class__s     ~/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/llama/configuration_llama.pyr6   zLlamaConfig.__post_init__V   sO    ==  ,,0H0HHDM##+'+'?'?D$''    c                     | j                   | j                  z  dk7  r&t        d| j                    d| j                   d      y)zOPart of `@strict`-powered validation. Validates the architecture of the config.r   zThe hidden size (z6) is not a multiple of the number of attention heads (z).N)r   r   
ValueError)r7   s    r:   validate_architecturez!LlamaConfig.validate_architecture^   sS    d666!;#D$4$4#5 622327  <r;   ))__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r   r   r   strr    r   r$   floatr%   r&   boolr'   r)   r+   listr,   r-   r.   r   dictr/   r0   r1   r2   r6   r>   __classcell__)r9   s   @r:   r   r      s    J#4"5 &/%.%.%."+ )"+ &(9:#%568IJ!"_$56 JK"s"s!!&*t*J#'S'9xCS9$GuGL%It#L#*# L#* +,L#S	/D(,!"NC$J" %%48O^d*T18 ND ,/sU{T)/HdHcDj(r;   r   N)rB   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   utils.type_validatorsr   r   __all__r4   r;   r:   <module>rU      sP   &   . 3 1 # - 56E" E  7EP /r;   