
    i&                         d dl mZ d dlmZ ddlmZ ddlmZmZ ddl	m
Z
  ej                  e      Z ed	      e G d
 de                    Z ed	      e G d de                    ZddgZy)    )Any)strict   )PreTrainedConfig)auto_docstringlogging   )SiglipVisionConfigzgoogle/gemma-3-4b-it)
checkpointc            
       4    e Zd ZU dZdZdgZdddddddddd	Zdgd	gfd
dgd
gfd
gd
gfdZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZee
d<   dZe	e
d<   dZee
d <   d!Zee
d"<   d#Zee
d$<   d%Ze	d&z  e
d'<   d(Ze	ee	   z  d&z  e
d)<   d*Ze	d&z  e
d+<   d#Zee
d,<   d&Zed&z  e
d-<   d.Z ee
d/<   d0Z!e	ez  d&z  e
d1<   dZ"e	e
d2<   d3Z#e	d&z  e
d4<   d&Z$ee   d&z  e
d5<   d&Z%ed&z  e
d6<   d&Z&ed&z  e
d7<   d.Z'ed&z  e
d8<   d9d:d;Z( fd<Z)d= Z*d> Z+ xZ,S )?Gemma3TextConfiga  
    query_pre_attn_scalar (`float`, *optional*, defaults to 256):
        scaling factor used on the attention scores
    final_logit_softcapping (`float`, *optional*):
        Scaling factor when applying tanh softcapping on the logits.
    attn_logit_softcapping (`float`, *optional*):
        Scaling factor when applying tanh softcapping on the attention scores.
    use_bidirectional_attention (`bool`, *optional*, defaults to `False`):
        If True, the model will attend to all text tokens instead of using a causal mask. This does not change
        behavior for vision tokens.

    ```python
    >>> from transformers import Gemma3TextModel, Gemma3TextConfig
    >>> # Initializing a Gemma3Text gemma3_text-7b style configuration
    >>> configuration = Gemma3TextConfig()
    >>> # Initializing a model from the gemma3_text-7b style configuration
    >>> model = Gemma3TextModel(configuration)
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```
    gemma3_textpast_key_valuescolwisereplicated_with_grad_allreducerowwise)	zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.q_normzlayers.*.self_attn.k_normzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormi@  
vocab_sizei 	  hidden_sizei $  intermediate_size   num_hidden_layers   num_attention_heads   num_key_value_heads   head_dimgelu_pytorch_tanhhidden_activationi   max_position_embeddings{Gz?initializer_rangegư>rms_norm_epsT	use_cacher   Npad_token_id   eos_token_idr	   bos_token_idtie_word_embeddingsrope_parametersFattention_biasg        attention_dropoutquery_pre_attn_scalari   sliding_windowlayer_typesfinal_logit_softcappingattn_logit_softcappinguse_bidirectional_attentiong    .Ag     @)globallocalc                 D   | j                   r| j                  dz  dz   | _        |j                  dd      | _        | j                  Et        | j                        D cg c]!  }t        |dz   | j                  z        rdnd# c}| _        t        | $  di | y c c}w )Nr	   r-   sliding_window_pattern   sliding_attentionfull_attention )
r9   r5   get_sliding_window_patternr6   ranger   boolsuper__post_init__)selfkwargsi	__class__s      /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/gemma3/configuration_gemma3.pyrG   zGemma3TextConfig.__post_init__h   s    ++#'#6#6!#;q"@D (.zz2JA'N$# t556  (,QUd6R6R,R'S#Yii D
 	'' s   &Bc                     | j                   | j                  z  dk7  r&t        d| j                    d| j                   d      y)zOPart of `@strict`-powered validation. Validates the architecture of the config.r   zThe hidden size (z6) is not a multiple of the number of attention heads (z).N)r   r    
ValueError)rH   s    rL   validate_architecturez&Gemma3TextConfig.validate_architecturew   sS    d666!;#D$4$4#5 622327  <    c                 z   |j                  dd       }ddiddid}| j                  | j                  n|| _        || j                  d   j                  |       | j                  j                  d      ddi| j                  d<   | j                  d   j	                  d|j                  d| j
                  d                | j                  j                  d      ddi| j                  d<   | j                  d   j	                  d|j                  d	| j
                  d
                | j                          |S )Nrope_scaling	rope_typedefault)r?   r@   r@   
rope_thetar:   r?   rope_local_base_freqr;   )popr1   updaterB   
setdefaultdefault_thetastandardize_rope_params)rH   rI   rR   default_rope_paramss       rL   convert_rope_params_to_dictz,Gemma3TextConfig.convert_rope_params_to_dict   sI   zz.$7
 #.y!9*I6
 8<7K7K7Wt33]p#  !1299,G ##$45=6A95MD  !12-.99&**\43E3Eh3OP	
 ##$78@9Di8PD  !4501<<&**%;T=O=OPW=XY	

 	$$&rP   )-__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planr   int__annotations__r   r   r   r    r"   r$   r&   strr'   r)   floatr*   r+   rE   r,   r.   listr/   r0   r1   dictr2   r3   r4   r5   r6   r7   r8   r9   rZ   rG   rO   r]   __classcell__rK   s   @rL   r   r   !   s   , J#4"5%.%.%.%E%E%."+ )"+
 &(9:#%568IJ!"_$56 JK!s!s    Hc0s0#*S*#u#L%It L#* +,L#S	/D(, L#*  $$#'OTD[' ND ,/sU{T)/!$3$!%NC$J%$(KcT!(,0UT\0+/EDL//44*X>M(rP   r   c                        e Zd ZU dZdZddddZeedZdZ	ee
eef   z  dz  ed	<   dZee
eef   z  dz  ed
<   dZedz  ed<   dZedz  ed<   dZedz  ed<   dZedz  ed<   dZedz  ed<   dZedz  ed<    fdZ xZS )Gemma3Configa  
    mm_tokens_per_image (`int`, *optional*, defaults to 256):
        The number of tokens per image embedding.
    boi_token_index (`int`, *optional*, defaults to 255999):
        The begin-of-image token index to wrap the image prompt.
    eoi_token_index (`int`, *optional*, defaults to 256000):
        The end-of-image token index to wrap the image prompt.

    Example:

    ```python
    >>> from transformers import Gemma3ForConditionalGeneration, Gemma3Config, SiglipVisionConfig, Gemma3TextConfig

    >>> # Initializing a Siglip-like vision config
    >>> vision_config = SiglipVisionConfig()

    >>> # Initializing a Gemma3 Text config
    >>> text_config = Gemma3TextConfig()

    >>> # Initializing a Gemma3 gemma-3-4b style configuration
    >>> configuration = Gemma3Config(vision_config, text_config)

    >>> # Initializing a model from the gemma-3-4b style configuration
    >>> model = Gemma3TextConfig(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```gemma3image_token_indexboi_token_indexeoi_token_index)image_token_idboi_token_ideoi_token_id)text_configvision_configNrw   rx   r#   mm_tokens_per_imagei i  i   r(   r)   Tr0   c                    | j                   %t               | _         t        j                  d       n4t	        | j                   t
              rt        di | j                   | _         t	        | j                  t
              rt        di | j                  | _        n0| j                  $t               | _        t        j                  d       t        | $  di | y )Nz@text_config is None, using default Gemma3TextConfig text config.zFvision_config is None, using default SiglipVisionConfig vision config.rA   )
rw   r   loggerinfo
isinstancerk   rx   r
   rF   rG   )rH   rI   rK   s     rL   rG   zGemma3Config.__post_init__   s    #/1DKKZ[(($//C$2B2BCDd(($/!3!Id6H6H!ID'!3!5DKK`a''rP   )r^   r_   r`   ra   rb   attribute_mapr   r
   sub_configsrw   rk   rh   r   rg   rx   ry   rf   rr   rs   rq   r)   ri   r0   rE   rG   rl   rm   s   @rL   ro   ro      s    : J-))M (+K
 =AK!DcN2T9@@DM%S#X6=D&)t)")OS4Z)")OS4Z)$+sTz+&*ut|*'++( (rP   ro   N)typingr   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r   siglipr
   
get_loggerr^   r{   r   ro   __all__rA   rP   rL   <module>r      s   *  . 3 , ' 
		H	% 12w' w  3wt 12?(# ?(  3?(D -
.rP   