
    iH-                        d Z ddlmZ ddlmZ ddlmZmZ ddlm	Z	m
Z
  ej                  e      Z ed	      e G d
 de                    Z ed	      e G d de                    Z ed	      e G d de                    Z ed	      e G d de                    Z ed	      e G d de                    Zg dZy)zBARK model configuration    )strict   )PreTrainedConfig)auto_docstringlogging   )CONFIG_MAPPING
AutoConfigz	suno/bark)
checkpointc                       e Zd ZU dZdgZdddddZdZeed<   d	Z	eed<   d	Z
eed
<   dZeed<   dZeed<   dZeed<   dZeez  ed<   dZeed<   dZeed<   dZeed<   y)BarkSubModelConfiga  
    block_size (`int`, *optional*, defaults to 1024):
        The maximum sequence length that this model might ever be used with. Typically set this to something large
        just in case (e.g., 512 or 1024 or 2048).
    input_vocab_size (`int`, *optional*, defaults to 10_048):
        Vocabulary size of a Bark sub-model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`{model}`]. Defaults to 10_048 but should be carefully thought with
        regards to the chosen sub-model.
    output_vocab_size (`int`, *optional*, defaults to 10_048):
        Output vocabulary size of a Bark sub-model. Defines the number of different tokens that can be represented
        by the: `output_ids` when passing forward a [`{model}`]. Defaults to 10_048 but should be carefully thought
        with regards to the chosen sub-model.
    bias (`bool`, *optional*, defaults to `True`):
        Whether or not to use bias in the linear layers and layer norm layers.
    past_key_values	num_heads
num_layersinput_vocab_size
block_size)num_attention_headsnum_hidden_layers
vocab_sizewindow_sizei   i@'  output_vocab_size   i   hidden_sizeg        dropoutTbias{Gz?initializer_range	use_cacheN)__name__
__module____qualname____doc__keys_to_ignore_at_inferenceattribute_mapr   int__annotations__r   r   r   r   r   r   floatr   boolr   r        |/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/bark/configuration_bark.pyr   r      s      $5"5  +)(#	M J"c"#s#JIsKGUS[D$#u#Itr*   r   c                       e Zd ZdZdZdZy)BarkSemanticConfiga  
    block_size (`int`, *optional*, defaults to 1024):
        The maximum sequence length that this model might ever be used with. Typically set this to something large
        just in case (e.g., 512 or 1024 or 2048).
    input_vocab_size (`int`, *optional*, defaults to 10_048):
        Vocabulary size of a Bark sub-model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`{model}`]. Defaults to 10_048 but should be carefully thought with
        regards to the chosen sub-model.
    output_vocab_size (`int`, *optional*, defaults to 10_048):
        Output vocabulary size of a Bark sub-model. Defines the number of different tokens that can be represented
        by the: `output_ids` when passing forward a [`{model}`]. Defaults to 10_048 but should be carefully thought
        with regards to the chosen sub-model.
    bias (`bool`, *optional*, defaults to `True`):
        Whether or not to use bias in the linear layers and layer norm layers

    Example:

    ```python
    >>> from transformers import BarkSemanticConfig, BarkSemanticModel

    >>> # Initializing a Bark sub-module style configuration
    >>> configuration = BarkSemanticConfig()

    >>> # Initializing a model (with random weights) from the suno/bark style configuration
    >>> model = BarkSemanticModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```semanticsemantic_configNr   r    r!   r"   
model_typebase_config_keyr)   r*   r+   r-   r-   B   s    < J'Or*   r-   c                       e Zd ZdZdZdZy)BarkCoarseConfiga  
    block_size (`int`, *optional*, defaults to 1024):
        The maximum sequence length that this model might ever be used with. Typically set this to something large
        just in case (e.g., 512 or 1024 or 2048).
    input_vocab_size (`int`, *optional*, defaults to 10_048):
        Vocabulary size of a Bark sub-model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`{model}`]. Defaults to 10_048 but should be carefully thought with
        regards to the chosen sub-model.
    output_vocab_size (`int`, *optional*, defaults to 10_048):
        Output vocabulary size of a Bark sub-model. Defines the number of different tokens that can be represented
        by the: `output_ids` when passing forward a [`{model}`]. Defaults to 10_048 but should be carefully thought
        with regards to the chosen sub-model.
    bias (`bool`, *optional*, defaults to `True`):
        Whether or not to use bias in the linear layers and layer norm layers

    Example:

    ```python
    >>> from transformers import BarkCoarseConfig, BarkCoarseModel

    >>> # Initializing a Bark sub-module style configuration
    >>> configuration = BarkCoarseConfig()

    >>> # Initializing a model (with random weights) from the suno/bark style configuration
    >>> model = BarkCoarseModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```coarse_acousticscoarse_acoustics_configNr0   r)   r*   r+   r4   r4   g   s    < $J/Or*   r4   c                   D    e Zd ZU dZdZdZdZeed<   dZ	e
ed<   dZe
ed	<   y
)BarkFineConfiga  
    block_size (`int`, *optional*, defaults to 1024):
        The maximum sequence length that this model might ever be used with. Typically set this to something large
        just in case (e.g., 512 or 1024 or 2048).
    input_vocab_size (`int`, *optional*, defaults to 10_048):
        Vocabulary size of a Bark sub-model. Defines the number of different tokens that can be represented by the
        `inputs_ids` passed when calling [`{model}`]. Defaults to 10_048 but should be carefully thought with
        regards to the chosen sub-model.
    output_vocab_size (`int`, *optional*, defaults to 10_048):
        Output vocabulary size of a Bark sub-model. Defines the number of different tokens that can be represented
        by the: `output_ids` when passing forward a [`{model}`]. Defaults to 10_048 but should be carefully thought
        with regards to the chosen sub-model.
    bias (`bool`, *optional*, defaults to `True`):
        Whether or not to use bias in the linear layers and layer norm layers
    n_codes_total (`int`, *optional*, defaults to 8):
        The total number of audio codebooks predicted. Used in the fine acoustics sub-model.
    n_codes_given (`int`, *optional*, defaults to 1):
        The number of audio codebooks predicted in the coarse acoustics sub-model. Used in the acoustics
        sub-models.

    Example:

    ```python
    >>> from transformers import BarkFineConfig, BarkFineModel

    >>> # Initializing a Bark sub-module style configuration
    >>> configuration = BarkFineConfig()

    >>> # Initializing a model (with random weights) from the suno/bark style configuration
    >>> model = BarkFineModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```fine_acousticsfine_acoustics_configTtie_word_embeddings   n_codes_total   n_codes_givenN)r   r    r!   r"   r1   r2   r;   r(   r&   r=   r%   r?   r)   r*   r+   r8   r8      s3    !F "J-O $$M3M3r*   r8   c                        e Zd ZU dZdZeeeedZ	dZ
eez  dz  ed<   dZeez  dz  ed<   dZeez  dz  ed<   dZeez  dz  ed<   d	Zeed
<    fdZ xZS )
BarkConfigaf  
    semantic_config ([`BarkSemanticConfig`], *optional*):
        Configuration of the underlying semantic sub-model.
    coarse_acoustics_config ([`BarkCoarseConfig`], *optional*):
        Configuration of the underlying coarse acoustics sub-model.
    fine_acoustics_config ([`BarkFineConfig`], *optional*):
        Configuration of the underlying fine acoustics sub-model.
    codec_config ([`AutoConfig`], *optional*):
        Configuration of the underlying codec sub-model.

    Example:

    ```python
    >>> from transformers import (
    ...     BarkSemanticConfig,
    ...     BarkCoarseConfig,
    ...     BarkFineConfig,
    ...     BarkModel,
    ...     BarkConfig,
    ...     AutoConfig,
    ... )

    >>> # Initializing Bark sub-modules configurations.
    >>> semantic_config = BarkSemanticConfig()
    >>> coarse_acoustics_config = BarkCoarseConfig()
    >>> fine_acoustics_config = BarkFineConfig()
    >>> codec_config = AutoConfig.from_pretrained("facebook/encodec_24khz")


    >>> # Initializing a Bark module style configuration
    >>> configuration = BarkConfig(
    ...     semantic_config, coarse_acoustics_config, fine_acoustics_config, codec_config
    ... )

    >>> # Initializing a model (with random weights)
    >>> model = BarkModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```
    bark)r/   r6   r:   codec_configNr/   r6   r:   rC   r   r   c                    | j                   %t               | _         t        j                  d       n4t	        | j                   t
              rt        di | j                   | _         | j                  %t               | _        t        j                  d       n4t	        | j                  t
              rt        di | j                  | _        | j                  %t               | _        t        j                  d       n4t	        | j                  t
              rt        di | j                  | _        | j                  (t        d          | _
        t        j                  d       nSt	        | j                  t
              r9| j                  j                  dd      }t        |   di | j                  | _
        t        | 8  di | y )NzW`semantic_config` is `None`. Initializing the `BarkSemanticConfig` with default values.z]`coarse_acoustics_config` is `None`. Initializing the `BarkCoarseConfig` with default values.zY`fine_acoustics_config` is `None`. Initializing the `BarkFineConfig` with default values.encodeczN`codec_config` is `None`. Initializing the `codec_config` with default values.r1   r)   )r/   r-   loggerinfo
isinstancedictr6   r4   r:   r8   rC   r	   getsuper__post_init__)selfkwargscodec_model_type	__class__s      r+   rL   zBarkConfig.__post_init__   sc   '#5#7D KKqr,,d3#5#M8L8L#MD ''/+;+=D(KKo 44d;+;+[d>Z>Z+[D(%%-)7)9D&KKst22D9)7)U$:T:T)UD&$ .y 9 ;DKKhi))40#0044\9M ./? @ U4CTCT UD''r*   )r   r    r!   r"   r1   r-   r4   r8   r
   sub_configsr/   rI   r   r&   r6   r:   rC   r   r'   rL   __classcell__)rP   s   @r+   rA   rA      s    (T J-#3!/"	K 7;OT,,t3:>BT$44t;B<@4"22T9@37L$))D07#u#( (r*   rA   )r4   rA   r8   r-   N)r"   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r   autor	   r
   
get_loggerr   rF   r   r-   r4   r8   rA   __all__r)   r*   r+   <module>rY      s    . 3 , - 
		H	% ;'#) #  (#L ;' (+  (  ( (F ;' 0)  0  ( 0F ;')' )  ()X ;'T(! T(  (T(n Ur*   