
    i                         d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	 ddl
mZmZ  e	j                  e      Z ed	
      e G d de                    Z ed	
      e G d de                    Z ed	
      e G d de                    Zg dZy)z InstructBLIP model configuration    )strict   )PreTrainedConfig)!MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)auto_docstringlogging   )CONFIG_MAPPING
AutoConfigz"Salesforce/instructblip-flan-t5-xl)
checkpointc                       e Zd ZU dZdZdZdZeed<   dZ	eed<   dZ
eed	<   d
Zeed<   dZeee   z  eeef   z  ed<   dZeee   z  eeef   z  ed<   dZeed<   dZeed<   dZeez  ed<   dZeed<   dZeed<   y)InstructBlipVisionConfiga*  
    Example:

    ```python
    >>> from transformers import InstructBlipVisionConfig, InstructBlipVisionModel

    >>> # Initializing a InstructBlipVisionConfig with Salesforce/instructblip-flan-t5-xl style configuration
    >>> configuration = InstructBlipVisionConfig()

    >>> # Initializing a InstructBlipVisionModel (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration
    >>> model = InstructBlipVisionModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```instructblip_vision_modelvision_config  hidden_sizei   intermediate_size'   num_hidden_layers   num_attention_heads   
image_size   
patch_sizegelu
hidden_actgư>layer_norm_epsg        attention_dropoutg|=initializer_rangeTqkv_biasN)__name__
__module____qualname____doc__
model_typebase_config_keyr   int__annotations__r   r   r   r   listtupler   r   strr   floatr   r    r!   bool     /var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/instructblip/configuration_instructblip.pyr   r      s      -J%OK!s!s!!47Jd3i%S/1746Jd3i%S/16J NE %(us{($u$Hdr0   r   c                       e Zd ZU dZdZdZdZeed<   dZ	eed<   dZ
eed	<   dZeed
<   dZeed<   dZeed<   dZeez  ed<   dZeez  ed<   dZeed<   dZeed<   dZeed<   dZedz  ed<   dZeed<   dZeed<   y)InstructBlipQFormerConfiga  
    cross_attention_frequency (`int`, *optional*, defaults to 2):
        The frequency of adding cross-attention to the Transformer layers.
    encoder_hidden_size (`int`, *optional*, defaults to 1408):
        The hidden size of the hidden states for cross-attention.

    Examples:

    ```python
    >>> from transformers import InstructBlipQFormerConfig, InstructBlipQFormerModel

    >>> # Initializing a InstructBLIP Salesforce/instructblip-flan-t5-xl style configuration
    >>> configuration = InstructBlipQFormerConfig()

    >>> # Initializing a model (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration
    >>> model = InstructBlipQFormerModel(configuration)
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```instructblip_qformerqformer_configi:w  
vocab_sizei   r      r   r   i   r   r   r   g?hidden_dropout_probattention_probs_dropout_probi   max_position_embeddings{Gz?r    g-q=r   r   Npad_token_idr	   cross_attention_frequencyr   encoder_hidden_size)r"   r#   r$   r%   r&   r'   r6   r(   r)   r   r   r   r   r   r,   r8   r-   r9   r:   r    r   r<   r=   r>   r/   r0   r1   r3   r3   >   s    ( (J&OJKs!!!s!J'**03 %#+3#&S&#u#!NE! L#* %&s&##r0   r3   c                        e Zd ZU dZdZddiZeeedZ	dZ
eez  dz  ed<   dZeez  dz  ed<   dZeez  dz  ed	<   d
Zeed<   dZedz  ed<   dZeed<   dZeed<    fdZ xZS )InstructBlipConfiga  
    qformer_config (`dict`, *optional*):
        Dictionary of configuration options used to initialize [`InstructBlipQFormerConfig`].
    num_query_tokens (`int`, *optional*, defaults to 32):
        The number of query tokens passed through the Transformer.

    Example:

    ```python
    >>> from transformers import (
    ...     InstructBlipVisionConfig,
    ...     InstructBlipQFormerConfig,
    ...     OPTConfig,
    ...     InstructBlipConfig,
    ...     InstructBlipForConditionalGeneration,
    ... )

    >>> # Initializing a InstructBlipConfig with Salesforce/instructblip-flan-t5-xl style configuration
    >>> configuration = InstructBlipConfig()

    >>> # Initializing a InstructBlipForConditionalGeneration (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration
    >>> model = InstructBlipForConditionalGeneration(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config

    >>> # We can also initialize a InstructBlipConfig from a InstructBlipVisionConfig, InstructBlipQFormerConfig and any PreTrainedConfig

    >>> # Initializing InstructBLIP vision, InstructBLIP Q-Former and language model configurations
    >>> vision_config = InstructBlipVisionConfig()
    >>> qformer_config = InstructBlipQFormerConfig()
    >>> text_config = OPTConfig()

    >>> config = InstructBlipConfig(vision_config=vision_config, qformer_config=qformer_config, text_config=text_config)
    ```instructblipimage_token_idimage_token_index)text_configr5   r   Nr   r5   rD       num_query_tokensg      ?initializer_factorr;   r    c                 R   | j                   (t        d          | _         t        j                  d       nSt	        | j                   t
              r9| j                   j                  dd      }t        |   di | j                   | _         | j                  %t               | _        t        j                  d       n4t	        | j                  t
              rt        di | j                  | _        | j                  %t               | _	        t        j                  d       n4t	        | j                  t
              rt        di | j                  | _	        | j                  j                  | j                  _        | j                   j                  t        v | _        t!        | D  di | y )NoptzTtext_config is None. Initializing the text config with default values (`OPTConfig`).r&   zWqformer_config is None. Initializing the InstructBlipQFormerConfig with default values.z[`vision_config` is `None`. initializing the `InstructBlipVisionConfig` with default values.r/   )rD   r
   loggerinfo
isinstancedictgetr5   r3   r   r   r   r>   r&   r   use_decoder_only_language_modelsuper__post_init__)selfkwargstext_model_type	__class__s      r1   rQ   z InstructBlipConfig.__post_init__   sG   #-e46DKKno(($/"..22<GO-o>RAQAQRD&";"=DKKqr++T2";"Rd>Q>Q"RD%!9!;DKKuv**D1!9!OD<N<N!OD262D2D2P2P//3/?/?/J/JNo/o,''r0   )r"   r#   r$   r%   r&   attribute_mapr   r3   r   sub_configsr   rM   r   r)   r5   rD   rF   r(   rC   rG   r-   r    rQ   __classcell__)rU   s   @r1   r@   r@   h   s    "H  J-M "31K 59M4**T1859ND++d2926K((4/6c$(sTz( ###u#( (r0   r@   )r@   r3   r   N)r%   huggingface_hub.dataclassesr   configuration_utilsr   models.auto.modeling_autor   utilsr   r   autor
   r   
get_loggerr"   rJ   r   r3   r@   __all__r/   r0   r1   <module>r`      s    ' . 3 J , - 
		H	% ?@/   AB ?@%$ 0 %$  A%$P ?@M() M(  AM(` Zr0   