
    i=                     v    d Z ddlmZ ddlmZ ddlmZ ddlmZ  ed      e G d	 d
e                    Z	d
gZ
y)zFalcon configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringztiiuae/falcon-7b)
checkpointc                       e Zd ZU dZdZdgZdZeed<   dZ	eed<   dZ
eed	<   d
Zeed<   dZedz  ed<   dZedz  ed<   dZeed<   dZeed<   dZeez  dz  ed<   dZeez  dz  ed<   dZedz  ed<   dZedz  ed<   dZedz  ed<   dZedz  ed<   dZedz  ed<   dZedz  ed<   dZeed<   dZeez  dz  ed <   d!Zedz  ed"<   d!Zee e   z  dz  ed#<   dZ!edz  ed$<   dZ"edz  ed%<   d&Z#e$dz  ed'<   dZ%eed(<    fd)Z&e'd*        Z(e'd+        Z) xZ*S ),FalconConfiga  
    num_ln_in_parallel_attn (`int`, *optional*):
        Set to 2 if separate layer norms are to be used for the MLP and the attention output when using parallel
        attention, otherwise, 1.
    alibi (`bool`, *optional*, defaults to `False`):
        Whether to use ALiBi positional biases during self-attention.
    new_decoder_architecture (`bool`, *optional*, defaults to `False`):
        Whether to use the new (Falcon-40B) decoder architecture. If `True`, the `multi_query` and `parallel_attn`
        arguments are ignored, as the new decoder always uses parallel attention.
    multi_query (`bool`, *optional*, defaults to `True`):
        Whether to use multi-query attention in the decoder. Ignored when `new_decoder_architecture` is `True`.
    parallel_attn (`bool`, *optional*, defaults to `True`):
        Whether to compute attention in parallel with the feedforward layer. If False, they are consecutive
        instead, as in the original Transformer architecture. Ignored when `new_decoder_architecture` is `True`.
    bias (`bool`, *optional*, defaults to `False`):
        Whether to use bias on Linear layers.
    ffn_hidden_size (`int`, *optional*):
        The hidden size of the feedforward layer in the Transformer decoder.
        defaults to 4x hidden dim
    activation (`str`, *optional*, defaults to `"gelu"`):
        The activation function used in the feedforward layer.

    Example:

    ```python
    >>> from transformers import FalconModel, FalconConfig

    >>> # Initializing a small (2-layer) Falcon configuration
    >>> configuration = FalconConfig(num_hidden_layers=2)

    >>> # Initializing a model from the small configuration
    >>> model = FalconModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```falconpast_key_valuesi   
vocab_sizei  hidden_size    num_hidden_layersG   num_attention_headsNnum_ln_in_parallel_attngh㈵>layer_norm_epsilong{Gz?initializer_rangeT	use_cacheg        hidden_dropoutattention_dropoutnum_kv_headsFalibinew_decoder_architecturemulti_queryparallel_attnbiasi   max_position_embeddingsrope_parameters   bos_token_ideos_token_idpad_token_idffn_hidden_sizegelu
activationtie_word_embeddingsc                    |j                  dd       }|| j                  n|| _        | j                  | j                  n| j                  | _        | j                  | j                  dz  | _        t        |   di | y )Nn_embed    )popr   r   r   r%   super__post_init__)selfkwargsr*   	__class__s      /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/falcon/configuration_falcon.pyr/   zFalconConfig.__post_init__[   sx    **Y-/64++G8<8I8I8QD44W[WhWh'#'#3#3a#7D ''    c                 4    | j                   | j                  z  S N)r   r   r0   s    r3   head_dimzFalconConfig.head_dime   s    4#;#;;;r4   c                     | j                    S r6   )r   r7   s    r3   rotaryzFalconConfig.rotaryi   s    ::~r4   )+__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencer   int__annotations__r   r   r   r   r   floatr   r   boolr   r   r   r   r   r   r   r   r   r    r   dictr"   r#   listr$   r%   r'   strr(   r/   propertyr8   r:   __classcell__)r2   s   @r3   r
   r
      s   #J J#4"5JKs!!*.S4Z.'++#u#It),NECK$&,,/us{T)/#L#*#E4$;,1dTk1#K#!%M4$;%D$+#'S'48O^d*T18!L#*!+-L#S	/D(-#L#*#"&OS4Z&#Jd
# $$( < <  r4   r
   N)r>   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r,   r4   r3   <module>rO      sN     . 3 1 # -.R# R  /Rj 
r4   