
    i	                     v    d Z ddlmZ ddlmZ ddlmZ ddlmZ  ed      e G d	 d
e                    Z	d
gZ
y)zStableLM model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringzstabilityai/stablelm-3b-4e1t)
checkpointc                       e Zd ZU dZdZdgZdZeed<   dZ	eed<   dZ
eed	<   d
Zeed<   d
Zeed<   d
Zeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeez  dz  ed<   dZeed<   dZeed<   dZeed<   dZeez  ed <   dZeez  ed!<   d"Zedz  ed#<   d"Z ee!e   z  dz  ed$<   dZ"edz  ed%<    fd&Z# xZ$S )'StableLmConfiga'  
    use_parallel_residual (`bool`, *optional*, defaults to `False`):
        Whether to use a "parallel" formulation in each Transformer layer, which can provide a slight training
        speedup at large scales.
    hidden_dropout (`float`, *optional*, defaults to 0.0):
        The dropout ratio after applying the MLP to the hidden states.

    Example:

    ```python
    >>> from transformers import StableLmModel, StableLmConfig

    >>> # Initializing a StableLM stablelm-3b style configuration
    >>> configuration = StableLmConfig()
    ```stablelmpast_key_valuesi  
vocab_sizei   intermediate_sizei 
  hidden_size    num_hidden_layersnum_attention_headsnum_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>layer_norm_epsT	use_cacheFtie_word_embeddingsNrope_parametersuse_qkv_biasqk_layernormuse_parallel_residualg        hidden_dropoutattention_dropoutr   bos_token_ideos_token_idpad_token_idc                 H    |j                  dd       t        |   di | y )Npartial_rotary_factorg      ? )
setdefaultsuper__post_init__)selfkwargs	__class__s     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/stablelm/configuration_stablelm.pyr)   zStableLmConfig.__post_init__C   s$    148''    )%__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencer   int__annotations__r   r   r   r   r   r   strr   r   floatr   r   boolr   r   r   dictr   r   r   r   r    r!   r"   listr#   r)   __classcell__)r,   s   @r-   r
   r
      s$     J#4"5J!s!Ks!!!!J#'S'#u#"NE"It %%48O^d*T18L$L$"'4'"%NECK%%(us{( L#* +,L#S	/D(,#L#*#( (r.   r
   N)r2   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r&   r.   r-   <module>rB      sK    # . 3 1 # 9:,(% ,(  ;,(^ 
r.   