
    iH)                        d Z ddlZddlmZ ddlmZ ddlmZmZ  ej                  e
      Z ed      e G d	 d
e                    Z ed      e G d de                    Z ed      e G d de                    Zg dZy)zCLVP model configuration    N)strict   )PreTrainedConfig)auto_docstringloggingzsusnato/clvp_dev)
checkpointc                   n   e Zd ZU dZdZddgZdZeed<   dZ	eed<   d	Z
eed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeez  ed<   dZeez  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZed z  ed!<   d"Zeee   z  d z  ed#<   d Zed z  ed$<   e	 d(d%eej>                  z  d&efd'       Z y ))ClvpEncoderConfiga  
    use_rotary_embedding (`bool`, *optional*, defaults to `True`):
        Whether to use rotary_embedding or not.
    use_attention_bias (`bool`, *optional*, defaults to `False`):
        Whether to use bias in Query, Key and Value layers during self attention.
    summary_type (`str`, *optional*, defaults to `"mean"`):
        What strategy to use to get pooler_output from the last_hidden_state. `"last"`, `"first"`, `"mean"` and
        `"cls_index"` are supported.

    Example:

    ```python
    >>> from transformers import ClvpEncoderConfig, ClvpEncoder

    >>> # Initializing a ClvpEncoderConfig with susnato/clvp_dev style configuration
    >>> encoder_configuration = ClvpEncoderConfig()

    >>> # Initializing a ClvpEncoder (with random weights) from the susnato/clvp_dev style configuration
    >>> model = ClvpEncoder(encoder_configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```clvp_encodertext_configspeech_config   
vocab_size   hidden_sizei   intermediate_sizeprojection_dim   num_hidden_layers   num_attention_headsgelu
hidden_acth㈵>layer_norm_eps皙?attention_dropoutdropoutTuse_rotary_embeddingFuse_attention_biasmeansummary_type      ?initializer_factor   Nbos_token_idr   eos_token_idpad_token_idpretrained_model_name_or_pathconfig_typec                 T    | j                   |fi |\  }}|| j                  vrt        d|       |j                  d      dk(  r||   }d|v rGt	        | d      r;|d   | j
                  k7  r)t        j                  d|d    d| j
                   d        | j                  |fi |S )NzSWe can only load either 'text_config' or 'speech_config' but you are trying to load
model_typeclvpzYou are using a model of type z  to instantiate a model of type zN. This is not supported for all configurations of models and can yield errors.)	get_config_dictbase_config_key
ValueErrorgethasattrr,   loggerwarning	from_dict)clsr)   r*   kwargsconfig_dicts        |/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/clvp/configuration_clvp.pyfrom_pretrainedz!ClvpEncoderConfig.from_pretrainedK   s     2c112OZSYZV c111efqers 
 ??<(F2%k2K;&73+E+VbJcgjguguJuNN0\1J0KKk>>""pr
 s}}[3F33    )r   )!__name__
__module____qualname____doc__r,   r/   r   int__annotations__r   r   r   r   r   r   strr   floatr   r   r   boolr    r"   r$   r&   r'   listr(   classmethodosPathLiker:    r;   r9   r
   r
      s   0  J$o6OJK!s!NCs!!J NE %(us{(GUS[!%$%$$L# ##"L#*"+,L#S	/D(,#L#*#R_4,/"++,=4LO4 4r;   r
   c                      e Zd ZU dZdZdZdZeed<   dZ	eed<   dZ
eed	<   d
Zeed<   dZeed<   dZeed<   dZedz  ed<   dZeed<   dZeed<   dZeez  ed<   dZeez  ed<   dZeez  ed<   dZeed<   dZeed<   dZeed<   d Zeed!<   dZedz  ed"<   d Zeed#<   dZeez  ed$<   d Zeed%<   d&Zedz  ed'<   d(Z ee!e   z  dz  ed)<   dZ"edz  ed*<   d+Z#eed,<   d Z$eed-<   d.Z%eed/<   d0Z&e!e   e'ed1f   z  ed2<   d3Z(eed4<   y)5ClvpDecoderConfiga%
  
    max_text_tokens (`int`, *optional*, defaults to 404):
        The maximum sequence length of text tokens that this model might ever be used with. Similar to
        `n_positions` in `GPT2Config`.
    n_inner (`int`, *optional*):
        Dimensionality of the inner feed-forward layers. `None` will set it to 4 times `hidden_size`.
    num_mel_attn_blocks (`int`, *optional*, defaults to 6):
        Denotes the number of self attention layers in [`ClvpConditioningEncoder`].
    summary_type (`string`, *optional*, defaults to `"cls_index"`):
        Argument used when doing sequence summary.
        Has to be one of the following options:
            - `"last"`: Take the last token hidden state (like XLNet).
            - `"first"`: Take the first token hidden state (like BERT).
            - `"mean"`: Take the mean of all tokens hidden states.
            - `"cls_index"`: Supply a Tensor of classification token position (like GPT/GPT-2).
            - `"attn"`: Not implemented now, use multi-head attention.
    summary_use_proj (`bool`, *optional*, defaults to `True`):
        Whether or not to add a projection after the vector extraction.
    summary_activation (`str`, *optional*):
        Pass `"tanh"` for a tanh activation to the output, any other value will result in no activation.
    summary_proj_to_labels (`bool`, *optional*, defaults to `True`):
        Whether the projection outputs should have `config.num_labels` or `config.hidden_size` classes.
    summary_first_dropout (`float`, *optional*, defaults to 0.1):
        The dropout ratio to be used after the projection and activation.
    feature_size (`int`, *optional*, defaults to 80):
        The feature dimension of the extracted mel features. This value is used in [`ClvpConditioningEncoder`].
    use_attention_bias (`bool`, *optional*, defaults to `True`):
        Whether to use bias in Query, Key and Value layers during self attention.
    decoder_fixing_codes (`list`, *optional*, defaults to `[83, 45, 45, 248]`):
        These values are used in the method `fix_speech_decoder_output` to fix decoder generated outputs.

    Example:

    ```python
    >>> from transformers import ClvpDecoderConfig, ClvpDecoder

    >>> # Initializing a ClvpDecoderConfig with susnato/clvp_dev style configuration
    >>> decoder_configuration = ClvpDecoderConfig()

    >>> # Initializing a ClvpDecoder (with random weights) from the susnato/clvp_dev style configuration
    >>> model = ClvpDecoder(decoder_configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```clvp_decoderdecoder_configi   r   i`  max_position_embeddingsi  max_text_tokensi   r      r      r   Nn_inner   num_mel_attn_blocksgelu_newactivation_functionr   resid_pdrop
embd_pdropr   r   layer_norm_epsilong{Gz?initializer_range	cls_indexr"   Tsummary_use_projsummary_activationsummary_proj_to_labelssummary_first_dropout	use_cachei    r&   i   r'   r(   P   feature_sizer    r#   r$   )S   -   rd      .decoder_fixing_codesFadd_cross_attention))r<   r=   r>   r?   r,   r/   r   r@   rA   rN   rO   r   r   r   rR   rT   rV   rB   rW   rC   rX   r   rY   rZ   r"   r\   rD   r]   r^   r_   r`   r&   r'   rE   r(   rb   r    r$   rf   tuplerg   rI   r;   r9   rK   rK   e   sz   ,\  J&OJ#&S&OSKs!!GS4Z  ))"K"!J!%(us{( $$#u##L##!d!%)d
)#'D'),53;,It#L#*#+/L#S	/D(/#L#*#L### ##8I$s)eCHo5I %%r;   rK   c                        e Zd ZU dZdZeeedZdZe	e
z  dz  ed<   dZe	e
z  dz  ed<   dZe	e
z  dz  ed<   dZeed	<   d
Zeed<   dZeed<    fdZ xZS )
ClvpConfiga  
    speech_config (`dict`, *optional*):
        Dictionary of configuration options used to initialize CLVP speech encoder.
    decoder_config (`dict`, *optional*):
        Dictionary of configuration options used to initialize [`ClvpDecoderConfig`].

    Example:

    ```python
    >>> from transformers import ClvpConfig, ClvpModelForConditionalGeneration

    >>> # Initializing a ClvpConfig with susnato/clvp_dev style configuration
    >>> configuration = ClvpConfig()

    >>> # Initializing a ClvpModelForConditionalGeneration (with random weights) from the susnato/clvp_dev style configuration
    >>> model = ClvpModelForConditionalGeneration(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config

    >>> # We can also initialize a CLVPConfig from a CLVPTextConfig, CLVPSpeechConfig and a CLVPAutoRegressiveConfig
    >>> from transformers import ClvpEncoderConfig, ClvpDecoderConfig

    >>> # Initializing a CLVP text, CLVP speech and CLVP decoder configuration
    >>> config_text = ClvpEncoderConfig()
    >>> config_speech = ClvpEncoderConfig()
    >>> decoder_config = ClvpDecoderConfig()

    >>> config = ClvpConfig(config_text, config_speech, decoder_config)
    ```r-   )r   r   rM   Nr   r   rM   r   r   g/L
F@logit_scale_init_valuer#   r$   c                    | j                   %t               | _         t        j                  d       n4t	        | j                   t
              rt        di | j                   | _         | j                  %t               | _        t        j                  d       n4t	        | j                  t
              rt        di | j                  | _        | j                  %t               | _        t        j                  d       n4t	        | j                  t
              rt        di | j                  | _        t        | (  di | y )NzR`text_config` is `None`. initializing the `ClvpEncoderConfig` with default values.zT`speech_config` is `None`. initializing the `ClvpEncoderConfig` with default values.zS`image_config` is `None`. initializing the `ClvpDecoderConfig` with default values.rI   )r   r
   r3   info
isinstancedictr   rM   rK   super__post_init__)selfr7   	__class__s     r9   rq   zClvpConfig.__post_init__   s    #02DKKlm(($/0D43C3CDD%!2!4DKKno**D1!2!HT5G5G!HD&"3"5DKKmn++T2"3"Jd6I6I"JD''r;   )r<   r=   r>   r?   r,   r
   rK   sub_configsr   ro   r   rA   r   rM   r   r@   rk   rC   r$   rq   __classcell__)rs   s   @r9   rj   rj      s    > J(*+K 37K((4/648M4**T1859ND++d29NC$*E* ##( (r;   rj   )rj   rK   r
   )r?   rG   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r   
get_loggerr<   r3   r
   rK   rj   __all__rI   r;   r9   <module>r{      s     	 . 3 , 
		H	% -.E4( E4  /E4P -.M&( M&  /M&` -.A(! A(  /A(H Cr;   