
    iH                     v    d Z ddlmZ ddlmZ ddlmZ ddlmZ  ed	      e G d
 de                    Z	dgZ
y)zRAG model configuration    )strict   )PreTrainedConfig)auto_docstring   )
AutoConfig )
checkpointc                       e Zd ZU dZdZdZdZedz  ed<   dZ	e
ed<   dZedz  ed<   dZedz  ed<   dZedz  ed	<   dZeee   z  dz  ed
<   dZedz  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZedz  ed<   dZedz  ed<   d Ze
ed!<   d Ze
ed"<   d#Ze ed$<   dZ!e
ed%<   d Z"e
ed&<   d Z#e
ed'<   d Z$e
ed(<   dZ%e
ed)<   dZ&edz  ed*<    fd+Z'e(d,e)d-e)d.e)fd/       Z* xZ+S )0	RagConfiga%  
    prefix (`str`, *optional*):
        A string prefix prepended to every input before passing to the generator model.
    title_sep (`str`, *optional*, defaults to  `" / "`):
        Separator inserted between the title and the text of the retrieved document when calling [`RagRetriever`].
    doc_sep (`str`, *optional*, defaults to  `" // "`):
        Separator inserted between the text of the retrieved document and the original input when calling
        [`RagRetriever`].
    n_docs (`int`, *optional*, defaults to 5):
        Number of documents to retrieve.
    max_combined_length (`int`, *optional*, defaults to 300):
        Max length of contextualized input returned by [`~RagRetriever.__call__`].
    retrieval_vector_size (`int`, *optional*, defaults to 768):
        Dimensionality of the document embeddings indexed by [`RagRetriever`].
    retrieval_batch_size (`int`, *optional*, defaults to 8):
        Retrieval batch size, defined as the number of queries issues concurrently to the faiss index encapsulated
        [`RagRetriever`].
    dataset (`str`, *optional*, defaults to `"wiki_dpr"`):
        A dataset identifier of the indexed dataset in HuggingFace Datasets (list all available datasets and ids
        using `datasets.list_datasets()`).
    dataset_split (`str`, *optional*, defaults to `"train"`):
        Which split of the `dataset` to load.
    index_name (`str`, *optional*, defaults to `"compressed"`):
        The index name of the index associated with the `dataset`. One can choose between `"legacy"`, `"exact"` and
        `"compressed"`.
    index_path (`str`, *optional*):
        The path to the serialized faiss index on disk.
    passages_path (`str`, *optional*):
        A path to text passages compatible with the faiss index. Required if using
        [`~models.rag.retrieval_rag.LegacyIndex`]
    use_dummy_dataset (`bool`, *optional*, defaults to `False`):
        Whether to load a "dummy" variant of the dataset specified by `dataset`.
    reduce_loss (`bool`, *optional*, defaults to `False`):
        Whether or not to reduce the NLL loss using the `torch.Tensor.sum` operation.
    label_smoothing (`float`, *optional*, defaults to 0.0):
        Only relevant if `return_loss` is set to `True`. Controls the `epsilon` parameter value for label smoothing
        in the loss calculation. If set to 0, no label smoothing is performed.
    do_deduplication (`bool`, *optional*, defaults to `True`):
        Whether or not to deduplicate the generations from different context documents for a given input. Has to be
        set to `False` if used while training with distributed backend.
    exclude_bos_score (`bool`, *optional*, defaults to `False`):
        Whether or not to disregard the BOS token when computing the loss.
    do_marginalize (`bool`, *optional*, defaults to `False`):
        If `True`, the logits are marginalized over all documents by making use of
        `torch.nn.functional.log_softmax`.
    output_retrieved (`bool`, *optional*, defaults to `False`):
        If set to `True`, `retrieved_doc_embeds`, `retrieved_doc_ids`, `context_input_ids` and
        `context_attention_mask` are returned. See returned tensors for more detail.
    dataset_revision (`str`, *optional*,):
        The revision (commit hash, tag, or branch) of the Hugging Face dataset used for retrieval.
    ragTN
vocab_sizeis_encoder_decoderprefixbos_token_idpad_token_ideos_token_iddecoder_start_token_idz / 	title_sepz // doc_sep   n_docsi,  max_combined_lengthi   retrieval_vector_size   retrieval_batch_sizewiki_dprdatasettraindataset_split
compressed
index_name
index_pathpassages_pathFuse_dummy_datasetreduce_lossg        label_smoothingdo_deduplicationexclude_bos_scoredo_marginalizeoutput_retrieved	use_cachedataset_revisionc                 ^   d|vsd|vrt        d| j                   d|       |j                  d      }|j                  d      }|j                  d      }|j                  d      }t        j                  |fi || _        t        j                  |fi || _        t        |    di | y )Nquestion_encoder	generatorzA configuration of type zt cannot be instantiated because not both `question_encoder` and `generator` sub-configurations are passed, but only 
model_type )	
ValueErrorr1   popr   	for_modelr/   r0   super__post_init__)selfkwargsquestion_encoder_configquestion_encoder_model_typedecoder_configdecoder_model_type	__class__s         z/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/rag/configuration_rag.pyr7   zRagConfig.__post_init__m   s    V+{&/H*4??*; <HHNxQ 
 #)**-?"@&=&A&A,&O#K0+//= * 4 45P lTk l#--.@SNS''    r:   generator_configreturnc                 P     | d|j                         |j                         d|S )a  
        Instantiate a [`EncoderDecoderConfig`] (or a derived class) from a pre-trained encoder model configuration and
        decoder model configuration.

        Returns:
            [`EncoderDecoderConfig`]: An instance of a configuration object
        )r/   r0   r2   )to_dict)clsr:   rA   r9   s       r?   'from_question_encoder_generator_configsz1RagConfig.from_question_encoder_generator_configs~   s.     v$;$C$C$EQaQiQiQkvouvvr@   ),__name__
__module____qualname____doc__r1   has_no_defaults_at_initr   int__annotations__r   boolr   strr   r   r   listr   r   r   r   r   r   r   r   r    r"   r#   r$   r%   r&   r'   floatr(   r)   r*   r+   r,   r-   r7   classmethodr   rF   __classcell__)r>   s   @r?   r   r      s   2h J"!Jd
!##FC$J#L#*##L#*#+/L#S	/D(/)-C$J-IsGSFCO""!$3$ !#!GS M3 "J"!Jd
! $M3:$#t#K OU !d!#t# ND "d"It#'cDj'(" 
w&6
wJZ
w	
w 
wr@   r   N)rJ   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   auto.configuration_autor   r   __all__r2   r@   r?   <module>rY      sO     . 3 # 0 2pw  pw  pwf -r@   