
    i(                         d dl mZ ddlmZ ddlmZ ddlmZmZ ddl	m
Z
  ej                  e      Z ed	      e G d
 de                    Z ed	      e G d de                    ZddgZy)    )strict   )PreTrainedConfig)RopeParameters)auto_docstringlogging   )
AutoConfigzsesame/csm-1b)
checkpointc                       e Zd ZU dZdZdZdgZddiZdZdZ	e
d	z  ed
<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
ed<   dZe
d	z  ed<   dZeed<   dZe
ed<   dZeed<   dZeed<   d Zeed!<   d	Ze
d	z  ed"<   d	Ze
d	z  ed#<   d	Ze
ee
   z  d	z  ed$<   d	Ze e!z  d	z  ed%<   d&Z"eed'<   d(Z#ee
z  d	z  ed)<   d&Z$eed*<   d	Z%e
d	z  ed+<    fd,Z& xZ'S )-CsmDepthDecoderConfiga  
    backbone_hidden_size (`int`, *optional*, defaults to 2048):
        Dimension of the hidden representations of the backbone model used with this depth decoder.

    Example:

    ```python
    >>> from transformers import CsmDepthDecoder, CsmDepthDecoderConfig

    >>> # Initializing a CsmDepthDecoder
    >>> configuration = CsmDepthDecoderConfig()
    >>> model = CsmDepthDecoderModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```csm_depth_decoder_modeldepth_decoder_configpast_key_valuescodebook_size
vocab_size    A    Nnum_codebooks   backbone_hidden_size  i   hidden_size    intermediate_size   num_hidden_layers   num_attention_headsr	   num_key_value_headssilu
hidden_act!   max_position_embeddings{Gz?initializer_rangeh㈵>rms_norm_epsT	use_cachepad_token_idbos_token_ideos_token_idrope_parametersFattention_bias        attention_dropoutmlp_biashead_dimc                    |j                  dd      rt        d      | j                  | j                  | _        | j                  | j                  n| j
                  | j                  z  | _        t        |   di | y )Ntie_word_embeddingsFzE`tie_word_embeddings=True` is not supported for CsmDepthDecoderConfig )pop
ValueErrorr    r   r2   r   super__post_init__selfkwargs	__class__s     z/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/csm/configuration_csm.pyr9   z#CsmDepthDecoderConfig.__post_init__M   sr    ::+U3dee ##+'+'?'?D$)-)BHXHX\`\t\tHt''    )(__name__
__module____qualname____doc__
model_typebase_config_keykeys_to_ignore_at_inferenceattribute_mapdefault_thetar   int__annotations__r   r   r   r   r   r   r    r"   strr$   r&   floatr(   r)   boolr*   r+   r,   listr-   r   dictr.   r0   r1   r2   r9   __classcell__r=   s   @r>   r   r      sH   " +J,O#4"5M M "M3:" $#$JK!s!s  &'t'J#%S%#u#L%It#L#*##L#*#+/L#S	/D(/48O^d*T18 ND ,/us{T)/HdHcDj( (r?   r   c                   f    e Zd ZU dZdZdZdgZdZee	dZ
ddiZd	Zed
z  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   d	Zeed<   dZed
z  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed <   d!Zed
z  ed"<   d#Zed
z  ed$<   d%Z ee!e   z  d
z  ed&<   d'Z"ed
z  ed(<   d
Z#ee!e   z  d
z  ed)<   d!Z$ed
z  ed*<   d+Z%ee!e   z  d
z  ed,<   d
Z&e'e(z  d
z  ed-<   d.Z)eed/<   d0Z*eez  d
z  ed1<   d.Z+eed2<   d
Z,ed
z  ed3<   dZ-ed
z  ed4<   d
Z.e(e/z  d
z  ed5<   d
Z0e(e/z  d
z  ed6<    fd7Z1 xZ2S )8	CsmConfiga  
    codebook_pad_token_id (`int`, *optional*, defaults to 2050):
        Padding token id for codebook tokens.
    codebook_eos_token_id (`int`, *optional*, defaults to 0):
        End of stream token id for codebook tokens.
    audio_token_id (`int`, *optional*, defaults to 128002):
        Audio token id in the text input.
    audio_eos_token_id (`int`, *optional*, defaults to 128003):
        End of stream token id for audio in the text input.
    tie_codebooks_embeddings (`bool`, *optional*, defaults to `True`):
        Whether to tie the codebook tokens embeddings of the backbone model to the codebook tokens embeddings of the depth decoder.
    depth_decoder_config (`CsmDepthDecoderConfig`, *optional*):
        Configuration for the depth decoder.
    codec_config (`PreTrainedConfig`, *optional*):
        Configuration for the codec.

    ```python
    >>> from transformers import CsmForConditionalGeneration, CsmConfig

    >>> # Initializing a CsmConfig
    >>> configuration = CsmConfig()

    >>> # Initializing a model
    >>> model = CsmForConditionalGeneration(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```
    csm
csm_configr   r   )codec_configr   r   r   r   Nr   r   i  text_vocab_sizer   r   r   r      r   r   r   r    r!   r"   r$   r%   r&   r'   r(   Tr)   i r*   i  codebook_pad_token_idr   codebook_eos_token_idi  r+   r,   audio_token_idi audio_eos_token_idr-   Fr.   r/   r0   r1   r2   tie_codebooks_embeddingsr   rV   c                    |j                  dd      rt        d      | j                  %t               | _        t        j                  d       n4t        | j                  t              rt        di | j                  | _        | j                  0t        j                  d      | _        t        j                  d       n>t        | j                  t              r$t        j                  di | j                  | _        | j                  | j                  | _        | j                  | j                  n| j                  | j                  z  | _        d| _        t!        | D  di | y )Nr4   Fz9`tie_word_embeddings=True` is not supported for CsmConfigzAdepth_decoder_config is None, using default depth decoder config.mimiz9codec_config is None, using default audio encoder config.r5   )r6   r7   r   r   loggerinfo
isinstancerO   rV   r
   	for_modelr    r   r2   r   r4   r8   r9   r:   s     r>   r9   zCsmConfig.__post_init__   s   ::+U3XYY$$,(=(?D%KK[\1148(=(Z@Y@Y(ZD%$ * 4 4V <DKKST))40 * 4 4 It7H7H ID##+'+'?'?D$)-)BHXHX\`\t\tHt#( ''r?   )3r@   rA   rB   rC   rD   rE   rF   rH   r
   r   sub_configsrG   r   rI   rJ   r   rW   r   r   r   r   r    r"   rK   r$   r&   rL   r(   r)   rM   r*   rY   rZ   rN   r+   r,   r[   r\   r-   r   rO   r.   r0   r1   r2   r]   r   r   rV   r9   rP   rQ   s   @r>   rS   rS   X   s   < J"O#4"5M" 5K
 	M !#M3:"J!OS!K!s!s!!&'t'J#'S'#u#L%It%L#*%(,3:,453c?T15%L#*%+/L#S	/D(/!'NC$J'17d3i$.748O^d*T18 ND ,/us{T)/HdHcDj,0dTk0;?$!11D8?37L$))D07( (r?   rS   N)huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r   auto.configuration_autor
   
get_loggerr@   r`   r   rS   __all__r5   r?   r>   <module>rl      s     / 3 1 , 0 
		H	% ?+8(, 8(  ,8(v ?+](  ](  ,](B r?   