
    i-                        d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	  e	j                  e      Z ed      e G d	 d
e                    Z ed      e G d de                    Z ed      e G d de                    Z ed      e G d de                    Z ed      e G d de                    Zg dZy)zBlt model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringloggingzitazap/blt-1b-hf)
checkpointc                   4    e Zd ZU dZdZdZdZeed<   dZ	e
dz  ed<   d	Zedz  ed
<   dZedz  ed<   dZeed<   dZeed<   dZedz  ed<   dZeed<   dZeed<   dZeez  dz  ed<   dZeed<   dZeez  dz  ed<   dZeed<   dZedz  ed<   dZeed<    fd Z xZS )!BltLocalEncoderConfigb  
    cross_attn_all_layers (`bool`, *optional*, defaults to `True`):
        Whether all attention layers have cross attention.
    cross_attn_k (`int`, *optional*, defaults to 2):
        Number of cross-attention heads used in the model.
    hidden_size_global (`int`, *int*, defaults to 2048):
        Hidden size of the global transformer layer.
    blt_local_encoder    A  
vocab_sizeFNcross_attn_all_layers   cross_attn_k   hidden_size_global   hidden_size   num_attention_headsnum_key_value_heads   num_hidden_layersh㈵>rms_norm_eps        dropout `  max_position_embeddingsrope_parameterssilu
hidden_actintermediate_size{Gz?initializer_rangec                     | j                   xs | j                  | _         | j                  xs t        d| j                  z  dz        | _        d| _        t        |   di | y N   r   F )r   r   r&   intr   tie_word_embeddingssuper__post_init__selfkwargs	__class__s     z/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/blt/configuration_blt.pyr0   z#BltLocalEncoderConfig.__post_init__9   s]    #'#;#;#Wt?W?W !%!7!7!X3q4CSCS?SVW?W;X#( ''    )__name__
__module____qualname____doc__
model_typedefault_thetar   r-   __annotations__r   boolr   r   r   r   r   r   r   floatr    r"   r#   r   dictr%   strr&   r(   r0   __classcell__r4   s   @r5   r   r      s     %JMJ).4$;. L#* %)d
)K!!&*t*sL%"%GUS[4%#(S(48O^d*T18J$(sTz(#u#( (r6   r   c                       e Zd ZU dZdZdZdZeed<   dZ	e
dz  ed<   d	Zedz  ed
<   dZedz  ed<   dZeed<   dZeed<   dZedz  ed<   dZeed<   dZeed<   dZeez  dz  ed<   dZeed<   dZeez  dz  ed<   dZeed<   dZeed<   dZeed <   dZedz  ed!<   dZedz  ed"<   dZeee   z  dz  ed#<   d$Z e
ed%<    fd&Z! xZ"S )'BltLocalDecoderConfigr   blt_local_decoderr   r   r   TNr   r   r   r   r   r   r   r   r   r   	   r   r   r   r   r    r!   r"   r#   r$   r%   i   r&   r'   r(   pad_token_idbos_token_ideos_token_idFr.   c                    | j                   xs | j                  | _         | j                  | j                  z  | _        | j                  xs t        d| j                  z  dz        | _        d| _        t        |    di | y r*   	r   r   r   head_dimr&   r-   r.   r/   r0   r1   s     r5   r0   z#BltLocalDecoderConfig.__post_init__c   su    #'#;#;#Wt?W?W ((D,D,DD!%!7!7!X3q4CSCS?SVW?W;X#( ''r6   )#r7   r8   r9   r:   r;   r<   r   r-   r=   r   r>   r   r   r   r   r   r   r   r?   r    r"   r#   r   r@   r%   rA   r&   r(   rH   rI   rJ   listr.   r0   rB   rC   s   @r5   rE   rE   @   s    %JMJ)-4$;- L#* %)d
)K!!&*t*sL%"%GUS[4%#(S(48O^d*T18J!s!#u##L#*##L#*#+/L#S	/D(/ %%( (r6   rE   c                        e Zd ZU dZdZdZeed<   dZeed<   dZ	edz  ed<   d	Z
eed
<   dZeed<   dZeez  dz  ed<   dZeed<   dZeez  dz  ed<   dZeed<   dZeed<   dZeed<   dZeed<    fdZ xZS )BltGlobalTransformerConfigblt_global_transformerr   r   r   r   r   Nr      r   r   r   r   r       r"   r#   r$   r%   i   r&   r'   r(   Fr.   c                    | j                   xs | j                  | _         | j                  | j                  z  | _        | j                  xs t        d| j                  z  dz        | _        d| _        t        |    di | y r*   rL   r1   s     r5   r0   z(BltGlobalTransformerConfig.__post_init__~   su    #'#;#;#Wt?W?W ((D,D,DD!%!7!7!X3q4CSCS?SVW?W;X#( ''r6   )r7   r8   r9   r;   r<   r   r-   r=   r   r   r   r   r?   r    r"   r#   r   r@   r%   rA   r&   r(   r.   r>   r0   rB   rC   s   @r5   rP   rP   k   s     *JMK!!&*t*sL%"%GUS[4%#'S'48O^d*T18J!s!#u# %%( (r6   rP   c                        e Zd ZU dZdZeed<   dZeed<   dZeed<   dZ	eed	<   d
Z
ed
z  ed<   dZeed<   dZeed<   dZeez  d
z  ed<   dZeed<   d
Zeez  d
z  ed<   dZeed<   dZeed<    fdZ xZS )BltPatcherConfigblt_patcherr   r   i   r      r      r   Nr   i    r"   r   r   r   r    r   r&   r#   r'   r(   Fr.   c                    | j                   xs | j                  | _         | j                  | j                  z  | _        | j                  xs t        d| j                  z  dz        | _        d| _        d| _        t        | $  di | y )Nr+   r   Fr$   r,   )
r   r   r   rM   r&   r-   r.   r%   r/   r0   r1   s     r5   r0   zBltPatcherConfig.__post_init__   s|    #'#;#;#Wt?W?W ((D,D,DD!%!7!7!X3q4CSCS?SVW?W;X#(  ''r6   )r7   r8   r9   r;   r   r-   r=   r   r   r   r   r"   r   r?   r    r&   r#   r   r@   r(   r.   r>   r0   rB   rC   s   @r5   rV   rV      s     JJKs!!&*t*#'S'L%"%GUS[4%!s!48O^d*T18#u# %%( (r6   rV   c                       e Zd ZU dZdZdgZdZeee	e
dZdZeed<   dZeed	<   d
Zedz  ed<   dZedz  ed<   dZedz  ed<   dZedz  ed<   dZedz  ed<   dZedz  ed<   dZedz  ed<   dZee   dz  ed<   dZedz  ed<   dZedz  ed<   dZee z  dz  ed<   dZ!ee z  dz  ed<   dZ"ee z  dz  ed<   dZ#ee z  dz  ed<   d Z$eed!<   dZ%edz  ed"<   dZ&edz  ed#<   dZ'eee   z  dz  ed$<   d%Z(eed&<   dZ)e*ez  dz  ed'<    fd(Z+ xZ,S ))	BltConfiga  
    patch_in_forward (`bool`, *optional*, defaults to `True`):
        Whether to perform patching during the forward pass.
    patch_size (`int`, *optional*, defaults to 4):
        Size of the patches used in the patching mechanism.
    patching_mode (`str`, *optional*, defaults to `"entropy"`):
        The mode used for patching, such as entropy-based patching.
    patching_threshold (`float`, *optional*, defaults to 1.34):
        Threshold value used for determining when to apply patches.
    patching_batch_size (`int`, *optional*, defaults to 1):
        Batch size used during the patching process.
    max_patch_length (`int`, *optional*):
        Maximum length of patches that can be generated.
    cross_attn_k (`int`, *optional*, defaults to 2):
        Number of cross-attention heads used in the model.
    encoder_hash_byte_group_size (`list`, *optional*):
        List of byte group sizes used in the encoder hash function.
    encoder_hash_byte_group_vocab (`int`, *optional*, defaults to 500002):
        Vocabulary size for the encoder hash byte groups.
    encoder_hash_byte_group_nb_functions (`int`, *optional*, defaults to 1):
        Number of hash functions used in the encoder byte grouping.
    patcher_config (`BltPatcherConfig`, *optional*):
        Configuration for the patcher component of the model.
    global_config (`BltGlobalTransformerConfig`, *optional*):
        Configuration for the global transformer component of the model.

    Example:
    ```python
    >>> from transformers import BltModel, BltConfig

    >>> # Initializing a Blt configuration
    >>> configuration = BltConfig()

    >>> # Initializing a model from the configuration
    >>> model = BltModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```bltpast_key_valuesr   )patcher_configencoder_configdecoder_configglobal_configr   r   rS   r"   TNpatch_in_forward   
patch_sizeentropypatching_modeg   ]?patching_thresholdr   patching_batch_sizemax_patch_lengthr   r   encoder_hash_byte_group_sizei" encoder_hash_byte_group_vocab$encoder_hash_byte_group_nb_functionsr_   r`   ra   rb   Fr.   rH   rI   rJ   r'   r(   r#   c                    | j                   xs g d| _         | j                  1t        | j                        | _        t        j                  d       nZt        | j                  t              r@| j                  j                  d| j                         t        di | j                  | _        | j                  1t        | j                        | _	        t        j                  d       nZt        | j                  t              r@| j                  j                  d| j                         t        di | j                  | _	        | j                  1t        | j                        | _        t        j                  d       nZt        | j                  t              r@| j                  j                  d| j                         t        di | j                  | _        | j                  1t        | j                        | _        t        j                  d       nZt        | j                  t              r@| j                  j                  d| j                         t        di | j                  | _        | j                  j                  | j                   z  }|| j                  j                  k7  r|nd | j                  _        t%        | L  di | y )	N)r   rd            r+   )r(   z8patcher_config is None, using default Blt patcher configr(   z8encoder_config is None, using default Blt encoder configz8decoder_config is None, using default Blt decoder configz6global_config is None, using default Blt global configr,   )rk   r_   rV   r(   loggerinfo
isinstancer@   
setdefaultr`   r   ra   rE   rb   rP   r   r   encoder_cross_output_sizer/   r0   )r2   r3   rv   r4   s      r5   r0   zBltConfig.__post_init__   s.   ,0,M,M,cQc) &"2TE[E["\DKKRS++T2**+>@V@VW"2"IT5H5H"ID&"7$J`J`"aDKKRS++T2**+>@V@VW"7"N$:M:M"ND&"7$J`J`"aDKKRS++T2**+>@V@VW"7"N$:M:M"ND%!;dNdNd!eDKKPQ**D1))*=t?U?UV!;!Qd>P>P!QD %)$7$7$C$CdFWFW$W!)BdFXFXFdFd)d%jn 	4 	''r6   )-r7   r8   r9   r:   r;   keys_to_ignore_at_inferencer<   rV   r   rE   rP   sub_configsr   r-   r=   r"   rc   r>   re   rg   rA   rh   r?   ri   rj   r   rk   rN   rl   rm   r_   r@   r   r`   ra   rb   r.   rH   rI   rJ   r(   r#   r   r0   rB   rC   s   @r5   r\   r\      s   &P J#4"5M*//3	K J#'S'$(dTk(Jd
 )M3:)'88&'t'#'cDj' L#* 59 $s)d"2906!3:678(#*859ND++d2959ND++d2959ND++d2948M4**T18 %%#L#*##L#*#+/L#S	/D(/#u#48O^d*T18&( &(r6   r\   )r\   rV   r   rE   rP   N)r:   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r   
get_loggerr7   rr   r   rE   rP   rV   r\   __all__r,   r6   r5   <module>r      s	    . 3 1 , 
		H	% -.!(, !(  /!(H -.&(, &(  /&(R -.(!1 (  /(4 -.(' (  /(4 -.p(  p(  /p(fr6   