
    #i                    2   d dl mZ d dlZd dlZd dlZd dlZd dlmZmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlZd d	lmZ d d
lmZmZmZmZ d dlmZmZmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3m4Z4 d dl5m6Z6 d dl7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z= d dl>m?Z?m@Z@mAZA d dlBmCZCmDZDmEZEmFZF d dlGmHZH  eE       rd dlImJZJmKZKmLZLmMZMmNZN  ej                  eP      ZQ	 d dl#mRZR  G d dee      ZTy# eS$ r dZRY w xY w)    )annotationsN)ABCabstractmethod)OrderedDict)Callable)nullcontext)partial)Any)nn)BatchSamplerConcatDataset
DataLoaderRandomSampler)EvalPredictionPreTrainedTokenizerBaseTrainerTrainerCallback)FeatureExtractionMixin)BaseImageProcessor)WandbCallback)ProcessorMixin)TRAINING_ARGS_NAME)EvalLoopOutput)BaseDataCollator)BaseEvaluatorSequentialEvaluator	BaseModel)BaseModelCardCallbackBaseModelCardData)Router)DefaultBatchSamplerGroupByLabelBatchSamplerMultiDatasetDefaultBatchSamplerNoDuplicatesBatchSamplerProportionalBatchSamplerRoundRobinBatchSampler)BaseTrainingArgumentsBatchSamplersMultiDatasetBatchSamplers)disable_loggingfullnameis_datasets_availableis_training_available)deprecated_kwargs)DatasetDatasetDictIterableDatasetIterableDatasetDictValue)TrackioCallbackc                      e Zd ZdZeZeZeZ	e
ZeZ ed      	 	 	 	 	 	 	 	 	 	 	 	 	 	 d"	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d# fd       Z	 d$	 	 	 	 	 	 	 d%dZd&dZd$d' fdZd(d	Z	 	 	 	 	 	 d)d
Zed*d       Z	 	 d+	 	 	 	 	 	 	 d,dZd-dZd$d. fdZ	 	 	 	 d/dZ	 	 	 d0	 	 	 	 	 	 	 d1 fdZ	 	 	 d0	 	 	 	 	 	 	 	 	 	 	 d2 fdZd3dZd$d4dZ	 	 	 d5	 	 	 	 	 	 	 	 	 	 	 	 	 d6dZ	 	 d7	 	 	 	 	 	 	 	 	 d8dZ 	 	 	 	 	 	 	 	 d9dZ!d:dZ"d$d;dZ#d<dZ$dd=dZ%d>dZ&	 d	 	 	 	 	 d?dZ'	 	 	 	 	 	 	 	 d@dZ(	 d$	 	 	 	 	 dAdZ)e*	 	 d	 	 	 	 	 	 	 dBd       Z+	 	 	 	 	 	 	 	 	 dC	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dDd Z,	 d$	 	 	 	 	 dE fd!Z- xZ.S )FBaseTrainerup  
    BaseTrainer is a simple but feature-complete training and eval loop for PyTorch
    based on the 🤗 Transformers :class:`~transformers.Trainer`.

    This trainer integrates support for various :class:`transformers.TrainerCallback` subclasses, such as:

    - :class:`~transformers.integrations.WandbCallback` to automatically log training metrics to W&B if `wandb` is installed
    - :class:`~transformers.integrations.TensorBoardCallback` to log training metrics to TensorBoard if `tensorboard` is accessible.
    - :class:`~transformers.integrations.CodeCarbonCallback` to track the carbon emissions of your model during training if `codecarbon` is installed.

        - Note: These carbon emissions will be included in your automatically generated model card.

    See the Transformers `Callbacks <https://huggingface.co/docs/transformers/main/en/main_classes/callback>`_
    documentation for more information on the integrated callbacks and how to write your own callbacks.

    Args:
        model (:class:`~sentence_transformers.base.model.BaseModel`, *optional*):
            The model to train, evaluate or use for predictions. If not provided, a `model_init` must be passed.
        args (:class:`~sentence_transformers.base.training_args.BaseTrainingArguments`, *optional*):
            The arguments to tweak for training. Will default to a basic instance of
            :class:`~sentence_transformers.base.training_args.BaseTrainingArguments` with the
            `output_dir` set to a directory named *tmp_trainer* in the current directory if not provided.
        train_dataset (Union[:class:`datasets.Dataset`, :class:`datasets.DatasetDict`, :class:`datasets.IterableDataset`, Dict[str, :class:`datasets.Dataset`]], *optional*):
            The dataset to use for training. Must have a format accepted by your loss function.
        eval_dataset (Union[:class:`datasets.Dataset`, :class:`datasets.DatasetDict`, :class:`datasets.IterableDataset`, Dict[str, :class:`datasets.Dataset`]], *optional*):
            The dataset to use for evaluation. Must have a format accepted by your loss function.
        loss (Optional[Union[:class:`torch.nn.Module`, Dict[str, :class:`torch.nn.Module`],            Callable[[:class:`~sentence_transformers.base.model.BaseModel`], :class:`torch.nn.Module`],            Dict[str, Callable[[:class:`~sentence_transformers.base.model.BaseModel`]]]], *optional*):
            The loss function to use for training. Can either be a loss class instance, a dictionary mapping
            dataset names to loss class instances, a function that returns a loss class instance given a model,
            or a dictionary mapping dataset names to functions that return a loss class instance given a model.
            In practice, the latter two are primarily used for hyper-parameter optimization.
        evaluator (Union[:class:`~sentence_transformers.base.evaluation.BaseEvaluator`,            List[:class:`~sentence_transformers.base.evaluation.BaseEvaluator`]], *optional*):
            The evaluator instance for useful evaluation metrics during training. You can use an ``evaluator`` with
            or without an ``eval_dataset``, and vice versa. Generally, the metrics that an ``evaluator`` returns
            are more useful than the loss value returned from the ``eval_dataset``. A list of evaluators will be
            wrapped in a :class:`~sentence_transformers.base.evaluation.SequentialEvaluator` to run them sequentially.
        callbacks (List of [:class:`transformers.TrainerCallback`], *optional*):
            A list of callbacks to customize the training loop. Will add those to the list of default callbacks
            detailed in [here](callback).

            If you want to remove one of the default callbacks used, use the [`Trainer.remove_callback`] method.
        optimizers (`Tuple[:class:`torch.optim.Optimizer`, :class:`torch.optim.lr_scheduler.LambdaLR`]`, *optional*, defaults to `(None, None)`):
            A tuple containing the optimizer and the scheduler to use. Will default to an instance of :class:`torch.optim.AdamW`
            on your model and a scheduler given by :func:`transformers.get_linear_schedule_with_warmup` controlled by `args`.

    Important attributes:

        - **model** -- Always points to the core model. If using a transformers model, it will be a [`PreTrainedModel`]
          subclass.
        - **model_wrapped** -- Always points to the most external model in case one or more other modules wrap the
          original model. This is the model that should be used for the forward pass. For example, under `DeepSpeed`,
          the inner model is wrapped in `DeepSpeed` and then again in `torch.nn.DistributedDataParallel`. If the inner
          model hasn't been wrapped, then `self.model_wrapped` is the same as `self.model`.
        - **is_model_parallel** -- Whether or not a model has been switched to a model parallel mode (different from
          data parallelism, this means some of the model layers are split on different GPUs).
        - **place_model_on_device** -- Whether or not to automatically place the model on the device - it will be set
          to `False` if model parallel or deepspeed is used, or if the default
          `TrainingArguments.place_model_on_device` is overridden to return `False` .
        - **is_in_train** -- Whether or not a model is currently running `train` (e.g. when `evaluate` is called while
          in `train`)

    processing_class)	tokenizerNNc                
   t               s#t        d| j                  j                   d      |Ed}t        j                  d| j                  j                   d| d       | j                  |      }n8t        || j                        s"t        dt        | j                         d	      |=|	|	| _
        | j                         }nYt        d
| j                  j                   d      |	-t        j                  d
| j                  j                   d       |	| _
        |
-t        j                  d| j                  j                   d       | j                  dddi      j                         }|j                  r;|j                   j"                  s%|j                   j%                  |j                         |Bt'        |d      r6t        |j(                  t*        t,        t.        t0        f      r|j(                  }|| j3                  |||      }t5        ddg||g      D ]  \  }}t        |t6              s|j8                  $t;        t=        |            }t>        dt@        dtB        dtD        di}|jG                         D ci c]*  \  }}|tI        |jK                  tM        |      d            , }}}t        d| d| d| d| d 	       t        |tN              rt        |tP              stQ        |      }t        |tN              rt        |tP              stQ        |      }|A|?|jR                  d!k7  r0t        d"|jR                   d#| j                  j                   d$      tT        |   | j                  rd n|||||||nd%||	|
||||&       | jX                  d%k(  rd | _,        i i d'| _-        d| _.        |  |  |  t_        d( | j`                  jb                  D              r td        jf                  ji                  d)d*       tj        Ft_        d+ | j`                  jb                  D              r td        jf                  ji                  d,d*       || jm                  | jn                        }t        |tN              r|jG                         D ci c]  \  }}|| jq                  ||       c}}| _9        t5        ddg||g      D ]  \  }}|	t        |tN              st        d-| d.      tu        |jw                               tu        |jw                               z
  x}s^t        d/| d0ty        |       d1t{        |      d2k(  rd3nd4 d5| d6	       n| jq                  ||      | _9        |t        |t|              st        |      }|| _@        | j                  | j                  |d7      | _A        | jX                  | j                  |d7      | _,        | j                  |       y c c}}w c c}}w )8NzTo train a z model, you need to install the `accelerate` and `datasets` modules. You can do so with the `train` extra:
pip install -U "sentence-transformers[train]"tmp_trainerzNo `args` passed, using `z(output_dir=z)`.)
output_dirzPlease pass an instance of `z` as the `args` argument.`z4` requires either a `model` or `model_init` argumentz` requires either a `model` or `model_init` argument, but not both. `model_init` will overwrite your model when calling the `train` method.z7`compute_metrics` is currently not compatible with the z. Please use the `evaluator` argument instead for detailed evaluation metrics, or the `eval_dataset` argument for the evaluation loss.unuseduse_configured_stateT)r=   accelerator_config	processor)modelargsr8   trainevalstringint64float32boolnullzThe provided `z6_dataset` must have Features. Specify them with e.g.:
z_dataset = z_dataset.cast(Features(z))
or by providing the Features to the IterableDataset initialization method. See the Datasets documentation for more information on dataset Features: https://huggingface.co/docs/datasets/en/about_dataset_featuresnoz%You have set `args.eval_strategy` to zu, but you didn't provide an `eval_dataset` or an `evaluator`. Either provide an `eval_dataset` or an `evaluator` to `z7`, or set `args.eval_strategy='no'` to skip evaluation.dummy)rC   rD   data_collatortrain_dataseteval_datasetr8   
model_initcompute_metrics	callbacks
optimizersoptimizer_cls_and_kwargspreprocess_logits_for_metrics)rE   rF   c              3  <   K   | ]  }t        |t                y wN)
isinstancer   .0callbacks     s/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/sentence_transformers/base/trainer.py	<genexpr>z'BaseTrainer.__init__.<locals>.<genexpr>  s     cxz(M2c   WANDB_PROJECTzsentence-transformersc              3  <   K   | ]  }t        |t                y wrX   )rY   r5   rZ   s     r]   r^   z'BaseTrainer.__init__.<locals>.<genexpr>  s      /
6>Jx1/
r_   TRACKIO_PROJECTz,If the provided `loss` is a dict, then the `z"_dataset` must be a `DatasetDict`.z:If the provided `loss` is a dict, then all keys from the `z;_dataset` dictionary must occur in `loss` also. Currently, z occur   s z in `z_dataset` but not in `loss`.dataset_name)Dr.   RuntimeErrormodel_class__name__loggerinfotraining_args_classrY   
ValueErrorr,   rQ   call_model_init	__class__warningto_dicthub_model_idmodel_card_datamodel_idset_model_idhasattrrB   r   r   r   r   get_data_collatorzipr2   column_namesnextiterstrintfloatrJ   itemsr4   gettypedictr1   eval_strategysuper__init__rP   accum_loss_componentscan_return_lossanycallback_handlerrS   osenviron
setdefaultr5   get_default_lossrC   prepare_losslosssetkeyssortedlenr   r   	evaluatorrO   preprocess_datasetadd_model_card_callback)selfrC   rD   rO   rP   r   r   rN   r8   rQ   rR   rS   rT   rU   rV   r=   default_args_dictrg   datasetsamplenaive_type_mappingkeyvalueexample_featuresloss_fnmissingrp   s                             r]   r   zBaseTrainer.__init__   sJ   4 %&d..778 9@ @  <&JKK3D4L4L4U4U3VVbcmbnnqrs++z+BDD$":":;.x8P8P/Q.RRkl  =%",,,."Qt~~'>'>&??s#tuu%//0 1^ ^ )DO&NNI$..JaJaIb c' ' !445KT4R 5 

') 	 U%:%:%C%C!!..t/@/@A ${+"9;MOegu!v  %  22T\l2mM%('6):]L<Y%Z 	!L''?38L8L8Td7m,&)8S'5)UY[a%b"^d^j^j^l$PZPSUZC155d5k6JKK$  $ !$\N2i#nK~=TUeTf gUU 	 mT*:m[3Y'6MlD)*\;2W&|4L I$5$:L:LPT:T78J8J7K LJJN..JaJaIb cGG  	//$u'')5)AYEV\c-!+!%=*G 	 	
 ' $D 022%>"  $ c4CXCXCbCbccJJ!!/3JK&3 /
BFBWBWBaBa/
 ,
 JJ!!"35LM<((4DdD!fjfpfpfrsMb\[bt'8'8%'HHsDI),gv->P\@])^ %g?!'40$F|nTvw  "',,.1C		4DDD7D$TUaTb c&&,Wo%6fCLTUDUS[]<^^cdpcq  rNO  ))$6DI  I})M+I6I")!%!8!8U\!8!]D( $ 7 7SY 7 ZD$$%67y$F ts   /W9W?c                    t         |j                         D cg c]  }|j                   c}v r|j                  st	        d      | j                  |j                  |j                  |j                        S c c}w )as  
        Load the data collator for the trainer.

        Args:
            model (:class:`~sentence_transformers.base.model.BaseModel`):
                The model to train, evaluate or use for predictions.
            args (:class:`~sentence_transformers.base.training_args.BaseTrainingArguments`):
                The arguments to tweak for training.
            processing_class (Union[:class:`transformers.PreTrainedTokenizerBase`, :class:`transformers.BaseImageProcessor`, :class:`transformers.FeatureExtractionMixin`, :class:`transformers.ProcessorMixin`], *optional*):
                The processing class to use for tokenization or image processing.
        Returns:
            :class:`BaseDataCollator`: The data collator to use for the trainer

        .. note::

            This method can be overridden by subclassing the trainer to use a custom data collator.
        al  You are using a Router module in your model, but you did not provide a `router_mapping` in the training arguments. This means that the Router module will not be able to route the inputs to the correct submodules. Please provide a `router_mapping` that maps column names to routes, e.g. {'column_one': 'query', 'column_two': 'document', 'column_three': 'document'}.)preprocess_fnrouter_mappingprompts)r!   childrenrp   r   rn   data_collator_class
preprocessr   )r   rC   rD   r8   modules        r]   rx   zBaseTrainer.get_data_collator8  sx    6 U^^5EF6f&&FFtObObf  ''**..LL ( 
 	
 Gs   A8c                    | j                  |      }| j                  |       |j                  | j                  | j                  | j
                  | j                  |        y)a)  
        Add a callback responsible for automatically tracking data required for the automatic model card generation

        This method is called in the ``__init__`` method of the trainer subclass.

        Args:
            default_args_dict (Dict[str, Any]): A dictionary of the default training arguments, so we can determine
                which arguments have been changed for the model card.

        .. note::

            This method can be overridden by subclassing the trainer to remove/customize this callback in custom uses cases
        )rC   trainerN)model_card_callback_classadd_callbackon_init_endrD   statecontrolrC   )r   r   model_card_callbacks      r]   r   z#BaseTrainer.add_model_card_callbacka  sR     #<<=NO-.''		4::t||SWS]S]gk'l    c                   t         |   |      }t        | d      s|S t        | j                  t
              r| j                  j                         D ]k  \  }}t        |t        j                  j                        s ||      | j                  |<   @t        |d      sM| j                  ||      | j                  |<   m |S t        | j                  t        j                  j                        s| j	                  |      | _        |S t        | j                  d      r!| j                  | j                  |      | _        |S )N)trialr   rC   )r   ro   rw   rY   r   r   r   torchr   Moduleoverride_model_in_loss)r   r   rC   r   r   rp   s        r]   ro   zBaseTrainer.call_model_initt  s    'e'4tV$L dii& $		 1 QW!'588??;%,U^DIIcNWg.%)%@%@%%PDIIcNQ  DIIuxx7		%(DI
  TYY(33DIIuEDIr   c           	         ddl m} |j                         D ]`  \  }}|dk(  rt        ||      r||_        t        |t
        j                  j                        sDt        ||| j                  ||             b |S )Nr   r   rC   )
 sentence_transformers.base.modelr   named_childrenrY   rC   r   r   r   setattrr   )r   r   rC   r   namechilds         r]   r   z"BaseTrainer.override_model_in_loss  sl    >..0 	OKD%w:eY#?"
E588??3dD$?$?u$MN		O
 r   c                   t        |t        j                  j                        r|j	                  |j
                        }n! ||      j	                  |j
                        }t        |dd      rbt        |d   t              r.|d   j                  j                         D cg c]  }|d   	 }}n|d   g}|D ]  }t        |d      sd|_         |S c c}w )Nrequires_media_countsFr   track_media_countsT)rY   r   r   r   todevicegetattrr!   sub_modulesvaluesrw   r   )r   r   rC   routeinput_modulesr   s         r]   r   zBaseTrainer.prepare_loss  s    
 dEHHOO,775<<(D;>>%,,/D 40%8%(F+7<Qx7K7K7R7R7T Ueq U U!&q
' 56#7804F-5  !Vs   !Cc                     y rX    )r   rC   s     r]   r   zBaseTrainer.get_default_loss  s    r   c                   |j                  dd      }| j                  |      \  }}| j                  }t        |t              r|r||   }|| j
                  k(  r-t        |d      r!|j                  |k7  r| j                  ||      } |||      }	t        |	t              rK| j                  |	       t        j                  t        |	j                                     j                         }	|r|	i fS |	S )a  
        Computes the loss for the BaseModel model.

        It uses ``self.loss`` to compute the loss, which can be a single loss function or a dictionary of loss functions
        for different datasets. If the loss is a dictionary, the dataset name is expected to be passed in the inputs
        under the key "dataset_name". This is done automatically in the ``add_dataset_name_column`` method.
        Note that even if ``return_outputs = True``, the outputs will be empty, as the BaseModel losses do not
        return outputs.

        Args:
            model (BaseModel): The BaseModel model.
            inputs (Dict[str, Union[torch.Tensor, Any]]): The input data for the model.
            return_outputs (bool, optional): Whether to return the outputs along with the loss. Defaults to False.
            num_items_in_batch (int, optional): The number of items in the batch. Defaults to None. Unused, but required by the transformers Trainer.

        Returns:
            Union[torch.Tensor, Tuple[torch.Tensor, Dict[str, Any]]]: The computed loss. If `return_outputs` is True, returns a tuple of loss and outputs. Otherwise, returns only the loss.
        rg   NrC   )popcollect_featuresr   rY   r   model_wrappedrw   rC   r   track_loss_componentsr   stacklistr   sum)
r   rC   inputsreturn_outputsnum_items_in_batchrg   featureslabelsr   r   s
             r]   compute_losszBaseTrainer.compute_loss  s    2 zz.$7008&))gt$l+G
 T''')&11'5AGx(dD!&&t,;;tDKKM23779D
 8Or   c                .   | j                   j                  rdnd}|j                         D ]  \  }}| j                  j                  rt        j                  |      st        j                  |      rw|| j                  |   vr-t        j                  d|j                  |j                        }n9| j                  |   |   d| j                  j                  z   | j                  z
  z  }|| j                  |   vr|| j                  |   |<   | j                  |   |   |z   | j                  |   |<   	 d| j                  |   vr6t        j                  dt        j                        | j                  |   d<   | j                  |   dxx   dz  cc<   y )NrE   rF           dtyper   rc   stepsr   )rC   trainingr   rD   logging_nan_inf_filterr   isnanisinfr   tensorr   r   r   global_step_globalstep_last_loggedr~   )r   r   training_typer   r   s        r]   r   z!BaseTrainer.track_loss_components  si   #'::#6#6F**, 	xJCyy//U[[5G5;;W\K]d88GG!LLEKKUE 66}EcJDJJ222T5Q5QQE $44]CCAF**=9#>AEA[A[\iAjknAorwAw**=9#>	x $44]CCAFaWZchcocoApD&&}5g>""=1':a?:r   c                   d }d|v rd}nd|v rd}|rM|j                         }t        | d      r| j                  | j                  |         }n1ddlm}  || j                  |   | j                  j                        }d	|v r|j                  d	      j                         j                         }| j                  |   d	xx   dz  cc<   |j                         D ]  \  }}|d	k(  r|dk(  r| d
| n|}	t        |j                         |z  j                         d      ||	<   t        j                  d|j                  |j                         | j                  |   |<    |t"        
| I  ||      S t"        
| I  |      S )Nr   rE   	eval_lossrF   _nested_gatherr   )nested_gather)parallel_moder   _   r   r   )copyrw   r   r   transformers.trainer_pt_utilsr   rD   r   r   r   itemr   roundr   r   r   r   r   log)r   logs
start_timer   accum_lossesr   r   r   r   log_keyrp   s             r]   r   zBaseTrainer.log  s   T>#MD "M 99;D t-.#2243M3Mm3\]G,..}=TYYMdMd  ,&$((1557<<>**=9'BaGB"."4"4"6 JCg~ :G6:Qq6WZG$)599;+>*D*D*F$JDMEJ\\5;;u||FD..}=cB !7;tZ007;t$$r   c                   d}g }t               }|D ]  }d}|D ]'  }|j                  d|z         s|dt        |        } n |||v r8|j                  |       |j	                  |j                         D 	ci c]&  \  }}	|j                  |      s|t        |      d |	( c}	}        |j                  dd      }
||
fS c c}	}w )a  Turn the inputs from the dataloader into the separate model inputs & the labels.

        Example::

            >>> list(inputs.keys())
            ['return_loss', 'label', 'sentence_0_input_ids', 'sentence_0_token_type_ids', 'sentence_0_attention_mask', 'sentence_1_input_ids', 'sentence_1_token_type_ids', 'sentence_1_attention_mask']
            >>> features, labels = self.collect_features(inputs)
            >>> len(features)
            2
            >>> list(features[0].keys())
            ['input_ids', 'token_type_ids', 'attention_mask']
            >>> list(features[1].keys())
            ['input_ids', 'token_type_ids', 'attention_mask']
            >>> torch.equal(labels, inputs["label"])
            True
        )	input_idssentence_embeddingpixel_valuesinput_featuresinput_valuespixel_values_videosNr   label)r   endswithr   addappendr   
startswithr   )r   r   feature_suffixesr   seen_prefixescolumnprefixsuffixr   r   r   s              r]   r   zBaseTrainer.collect_features#  s    (
  		tFF* ??3<0#Ns6{l3F ~=!8f%OOr:3[^[i[ijp[qSV/6rs		t GT* ss   8CCc                j    || j                  |d      }n| j                  }t        |   |||      S )NrF   rf   )r   rP   r   evaluate)r   rP   ignore_keysmetric_key_prefixrp   s       r]   r  zBaseTrainer.evaluateN  s@     #22<f2UL,,Lwk;LMMr   c                   t         
|   |||||      }| j                  |S | j                  r\t	        | j
                  t              rB|j                  d      r1|dd  t        | j
                  j                               d   k(  rd}n|S | j                         r
t               nt        t        j                        5  | j                  j                   }|7t"        j$                  j'                  |d      }t#        j(                  |d       | j                  | j*                  || j,                  j.                  | j,                  j0                        }d d d        t	        t              sd	|i}t        |j                               D ]0  }	|	j                  | d
      r|j3                  |	      || d
|	 <   2 |j4                  j7                  |       |S # 1 sw Y   xY w)N)
dataloaderdescriptionprediction_loss_onlyr  r	  eval_   r   rF   Texist_ok)output_pathepochr   r   r   )r   evaluation_loopr   is_in_trainrY   rP   r   r   r   r   is_local_process_zeror   r+   loggingINFOrD   r=   r   pathjoinmakedirsrC   r   r  r   r   metricsupdate)r   r  r  r  r  r	  outputr  evaluator_metricsr   rp   s             r]   r  zBaseTrainer.evaluation_loopZ  s    (!#!5#/ ) 
 >>!M
 
4+<+<d CHYHdHdelHm $T->->-C-C-E(Fq(II$*!"88:[]PWP\P\@] 	))..K& ggll;?K$7 $

4::;K;KSWS]S]SiSi !/ !	 +T2!,.? @ )..01 	]C>>%6$7q"9:BSBWBWX[B\!%6$7q">?	] 	/0%	 	s   ?BGG$c           	     J   t         j                  d| j                  j                   d| j                  j                   d       	 | j                  j                  x}rC|j                  dd      d   }| j                  j                  j                  t        |             	 | j                  | j                  j                         y # t        $ r Y 2w xY w# t        $ rB}t         j                  d| j                  j                   dt        |              Y d }~y d }~ww xY w)	NzLoading best model from z	 (score: z).-rc   z#Could not load the best model from z	. Error: )rk   rl   r   best_model_checkpointbest_metricrsplitrC   rt   set_best_model_stepr~   	Exception_load_from_checkpointerrorr}   )r   
checkpointstepexcs       r]   _load_best_modelzBaseTrainer._load_best_model  s    .tzz/O/O.PPYZ^ZdZdZpZpYqqstu	!ZZ===z=!((a04

**>>s4yI	&&tzz'G'GH	  		
  	LL>tzz?_?_>``ijmnqjristu	s+   AC "%C 	CC	D" 8DD"c                    t        |t              r,|j                         D ]  \  }}| j                  ||        y t	        |j
                        ddhz  x}r"t        d|r|dz   nd dt        |       d      y )	Nrf   return_lossrg   z/The following column names are invalid in your  re   z	dataset: zH. Avoid using these column names, as they are reserved for internal use.)rY   r   r   validate_column_namesr   rz   rn   r   )r   r   rg   overlaps       r]   r1  z!BaseTrainer.validate_column_names  s    gt$)0 O%g**7*NO'../=.2QQQ7QAXd,QTBTjlAmmvw{  }D  xE  wF FZ Z  Rr   c                   |||||d}t        j                  | j                  j                        rAt	        | j                  j                  t
              r | j                  j                  |fi |S t        | j                  j                        r | j                  j                  |fi |S t        |t              r=| j                  j                  t        j                  k7  rt        j                  d       y| j                  j                  t        j                  k(  rt        |fi |S | j                  j                  t        j                  k(  rt        |fddi|S | j                  j                  t        j                   k(  rt#        |fi |S | j                  j                  t        j                  k(  rt        t%        ||      fi |S y)aS  
        Returns the appropriate batch sampler based on the ``batch_sampler`` argument in ``self.args``.
        This batch sampler class supports ``__len__`` and ``__iter__`` methods, and is used as the ``batch_sampler``
        to create the :class:`torch.utils.data.DataLoader`.

        .. note::
            Override this method to provide a custom batch sampler.

        Args:
            dataset (Dataset): The dataset to sample from.
            batch_size (int): Number of samples per batch.
            drop_last (bool): If True, drop the last incomplete batch if the dataset size
                is not divisible by the batch size.
            valid_label_columns (List[str]): List of column names to check for labels.
                The first column name from ``valid_label_columns`` found in the dataset will
                be used as the label column.
            generator (torch.Generator, optional): Optional random number generator for shuffling
                the indices.
            seed (int): Seed for the random number generator to ensure reproducibility. Defaults to 0.
        )
batch_size	drop_lastvalid_label_columns	generatorseedBWhen using an IterableDataset, you cannot specify a batch sampler.Nprecompute_hashesT)r7  )inspectisclassrD   batch_sampler
issubclassr"   callablerY   r2   r)   BATCH_SAMPLERrk   rq   NO_DUPLICATESr%   NO_DUPLICATES_HASHEDGROUP_BY_LABELr#   r   )r   r   r4  r5  r6  r7  r8  batch_sampler_kwargss           r]   get_batch_samplerzBaseTrainer.get_batch_sampler  s   > %"#6" 
 ??499223
499CZCZ\o8p*499**7K6JKK DII++,*499**7K6JKK g/yy&&-*E*EEcd 99""m&A&AA+GL7KLL99""m&H&HH+GdtdOcdd99""m&B&BB+GL7KLL99""m&A&AA&}W	'RkVjkk Br   c                ,   |||d}t        j                  | j                  j                        rAt	        | j                  j                  t
              r | j                  j                  |fi |S t        | j                  j                        r | j                  j                  |fi |S | j                  j                  t        j                  k(  rt        dd|i|S | j                  j                  t        j                  k(  rt        dd|i|S y)a/  
        Returns the appropriate multi-dataset batch sampler based on the ``multi_dataset_batch_sampler`` argument
        in ``self.args``. This batch sampler class supports ``__len__`` and ``__iter__`` methods, and is used as the
        ``batch_sampler`` to create the :class:`torch.utils.data.DataLoader`.

        .. note::
            Override this method to provide a custom multi-dataset batch sampler.

        Args:
            dataset (ConcatDataset): The concatenation of all datasets.
            batch_samplers (List[BatchSampler]): List of batch samplers for each dataset in the concatenated dataset.
            generator (torch.Generator, optional): Optional random number generator for shuffling the indices.
            seed (int, optional): Optional seed for the random number generator
        )batch_samplersr7  r8  r   Nr   )r;  r<  rD   multi_dataset_batch_samplerr>  r$   r?  r*   ROUND_ROBINr'   PROPORTIONALr&   )r   r   rG  r7  r8  multi_batch_sampler_kwargss         r]   get_multi_dataset_batch_samplerz+BaseTrainer.get_multi_dataset_batch_sampler  s    . -"&
" ??499@@AjII113RG
 949988_D^__ DII99:849988_D^__ 99004M4Y4YY)X'X=WXX99004M4Z4ZZ+ZGZ?YZZ [r   c           
        | j                   }t        j                         }| j                  j                  %|j                  | j                  j                         || j                  j                  | j                  j                  | j                  j                  | j                  j                  d}t        |t              rg|j                  || j                  j                  d       | j                  j                  t        j                   k7  ret"        j%                  d       nNt        |t&              rt)        d      t        |t*              r|j-                         D ]  }t        |t              st)        d       |j-                         D cg c]6  }| j/                  ||| j                  j                  |j0                  |      8 }}t3        |j-                               }| j5                  |||| j                  j                        }	|	|d<   n\t        |t6              r:| j/                  ||| j                  j                  |j0                  |      }	|	|d<   nt)        d	| d
| d      t9        |fi |S c c}w )a[  Shared logic for building train/eval/test DataLoaders.

        Args:
            dataset: The dataset to build a DataLoader for.
            batch_size: The batch size to use.
            dataset_kind: A label for error messages, e.g. "train", "eval", or "test".

        Returns:
            A prepared DataLoader for the given dataset.
        )
collate_fnnum_workers
pin_memorypersistent_workersprefetch_factor)r4  r5  r9  zcSentence Transformers is not compatible with IterableDatasetDict. Please use a DatasetDict instead.zYSentence Transformers is not compatible with a DatasetDict containing an IterableDataset.)r4  r5  r6  r7  )r   rG  r7  r8  r=  zUnsupported `zC_dataset` type. Use a Dataset, DatasetDict, or IterableDataset for .)rN   r   	GeneratorrD   r8  manual_seeddataloader_num_workersdataloader_pin_memorydataloader_persistent_workersdataloader_prefetch_factorrY   r2   r  dataloader_drop_lastr=  r)   r@  rk   rq   r3   rn   r1   r   rE  r6  r   rL  r0   r   )
r   r   r4  dataset_kindrN   r7  dataloader_paramssub_datasetrG  r=  s
             r]   _build_dataloaderzBaseTrainer._build_dataloader  sX     **OO%	99>>%!!$))..1 (99;;))99"&))"I"I#yyCC
 g/$$",!%!?!? yy&&-*E*EEcd!45u  -&~~/ k?;$s  $+>>#3	   &&)"ii<<(5(I(I' ' 	N 	 $GNN$45G @@-#YY^^	 A M 2?o.) 22%))88$1$E$E# 3 M 2?o.~-pq}p~~  A  '7%677E	s   ;Jc                :   | j                   #t        d| j                  j                   d      d| j                  _        | j                  j                  | j                  | j                   | j                  j                  d            | _
        | j                  S )a@  
        Returns the training [`~torch.utils.data.DataLoader`].

        Will use no sampler if `train_dataset` does not implement `__len__`, a random sampler (adapted to distributed
        training if necessary) otherwise.

        Subclass and override this method if you want to inject some custom behavior.
        z4Training requires specifying a train_dataset to the rS  FrE   r[  )rO   rn   rp   rj   acceleratoreven_batchespreparer^  rD   train_batch_size_train_dataloader)r   s    r]   get_train_dataloaderz BaseTrainer.get_train_dataloaderm  s     %STXTbTbTkTkSllmnoo
 ).%!%!1!1!9!9""4#5#5tyy7Q7Q`g"h"
 %%%r   c                V   |F| j                   :| j                  t        g       S t        d| j                  j
                   d      ||n| j                   }d| j                  _        | j                  j                  | j                  || j                  j                  d            S )a  
        Returns the evaluation [`~torch.utils.data.DataLoader`].

        Subclass and override this method if you want to inject some custom behavior.

        Args:
            eval_dataset (`torch.utils.data.Dataset`, *optional*):
                If provided, will override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns not accepted
                by the `model.forward()` method are automatically removed. It must implement `__len__`.
        z6Evaluation requires specifying an eval_dataset to the rS  TrF   r`  )rP   r   r   rn   rp   rj   ra  rb  rc  r^  rD   eval_batch_size)r   rP   s     r]   get_eval_dataloaderzBaseTrainer.get_eval_dataloader  s     D$5$5$=~~)!"~%UVZVdVdVmVmUnnopqq'3'?|TEVEV
 )-%''""<1J1JY_"`
 	
r   c                    d| j                   _        | j                   j                  | j                  || j                  j
                  d            S )a  
        Returns the test [`~torch.utils.data.DataLoader`].

        Subclass and override this method if you want to inject some custom behavior.

        Args:
            test_dataset (`torch.utils.data.Dataset`, *optional*):
                The test dataset to use. If it is a [`~datasets.Dataset`], columns not accepted by the
                `model.forward()` method are automatically removed. It must implement `__len__`.
        Ttestr`  )ra  rb  rc  r^  rD   rh  )r   test_datasets     r]   get_test_dataloaderzBaseTrainer.get_test_dataloader  sK     )-%''""<1J1JY_"`
 	
r   c                .   ||n| j                   j                  }t        j                  |d       t        j                  d|        t        | j                   d      r2| j                  j                  || j                   j                         n| j                  j                  |       | j                  | j                  j                  |       t        j                  | j                   t        j                  j                  |t                     y )NTr  zSaving model checkpoint to save_safetensors)safe_serialization)rD   r=   r   r  rk   rl   rw   rC   save_pretrainedro  r8   r   saver  r  r   )r   r=   
state_dicts      r]   _savezBaseTrainer._save  s    #-#9Ztyy?S?S

J.1*>? 49901JJ&&zdiiF`F`&aJJ&&z2   ,!!11*= 	

499bggll:7IJKr   c                    | j                   j                  } ||| j                   j                        }| j                   j                  |j	                                y )N)trust_remote_code)rC   rp   rv  load_state_dictrs  )r   checkpoint_pathri   loaded_models       r]   r(  z!BaseTrainer._load_from_checkpoint  sB    jj**"?djjFbFbc

""<#:#:#<=r   c                    t        |d      s||S | j                  ||       | j                  || j                  | j                        r| j                  |      }d|_        |S )aE  
        Preprocess the dataset by optionally lazily adding a dataset name column, required for multi-dataset training
        with multiple losses or for dataset-specific router mappings.

        Args:
            dataset (DatasetDict | Dataset | None): The dataset to preprocess. If None, no preprocessing is done.
            dataset_name (str | None): The name of the dataset, used for multi-dataset training with multiple losses.

        Returns:
            DatasetDict | Dataset | None: The preprocessed dataset, perhaps with dataset names added as a lazy column.
        #_sentence_transformers_preprocessedrf   T)rw   r1  #should_dataset_name_column_be_addedrD   r   add_dataset_name_columnr{  )r   r   rg   s      r]   r   zBaseTrainer.preprocess_dataset  sf     7ABgoN 	""7"F33GTYY		R227;G 7;3r   c                r   t        |t        t        f      xr t        |t              xs |j                  xr t        |j                  t              xsd |j
                  xrV t        |j
                  t              xr: t        t        t        |j
                  j                                     t              S )a  
        We should add a dataset name column to the dataset, if the dataset is a DatasetDict, *and* one of:

        a. The loss is a dictionary, or
        b. The prompts contain a mapping of dataset names, or
        c. The router_mapping contains a mapping of dataset names.
        )	rY   r1   r3   r   r   r   r{   r|   r   )r   r   rD   r   s       r]   r|  z/BaseTrainer.should_dataset_name_column_be_added  s     'K1D#EF 
tT" ?DLL$!? ## Ot22D9OtD)<)<)C)C)E$FGN	
r   c                   t        |t        t        f      r0|j                         D ]  \  }}| j	                  ||      ||<    |S ||S t        |t
              r3|j                  t        | j                  fd|i|j                         |S t        |t              rG|j                  }|rt        d      |d<   |j                  t        | j                  |      d|      }|S t        d      )N)r   rg   rg   rG   rf   T)batchedr   z`Unsupported `dataset` type. Use a Dataset, DatasetDict, IterableDataset, or IterableDatasetDict.)rY   r3   r1   r   r}  r0   set_transformr	   add_dataset_name_transform_format_kwargsr2   r   r4   maprn   )r   r   rg   inner_datasetr   s        r]   r}  z#BaseTrainer.add_dataset_name_column  s   
 g 3[AB/6}} +m(,(D(D)!- )E )%
 N N gw'!!33!- ,,6 % 1''H+0?(kk33!- ! " G  r r   c                    |r ||       } | rt        | j                               d   r|| S t        t        | j                               d         }|g|z  | d<   | S )a  A transform/map function that adds the dataset name to the batch.

        Args:
            batch (dict[str, list[Any]]): The batch of data, where each key is a column name and each value
                is a list of values.
            dataset_name (str | None, optional): The name of this dataset, only if there are multiple datasets
                that use a different loss. Defaults to None.
            transform (Callable[[dict[str, list[Any]]], dict[str, list[Any]]], optional): An optional transform
                function to apply on the batch before adding the dataset name. Defaults to None.

        Returns:
            dict[str, list[Any]]: The "just-in-time" transformed batch with the dataset name added.
        r   rg   )r   r   r   )batchrg   	transformkwargsr4  s        r]   r  z&BaseTrainer.add_dataset_name_transform.  sd    , e$E D03|7KL elln-a01
!- ;nr   c
                r   | j                         sy |r%| j                  j                  j                  |       |r%| j                  j                  j	                  |       |r%| j                  j                  j                  |       | j                  j                  | j                  j                  |       y )N)
model_name)	is_world_process_zerorC   rt   set_languageset_licenseadd_tags_create_model_cardrD   r=   )r   languagelicensetagsr  finetuned_fromtasksdataset_tagsr   dataset_argsr  s              r]   create_model_cardzBaseTrainer.create_model_cardP  s     ))+JJ&&33H=JJ&&227;JJ&&//5

%%dii&:&:z%Rr   c           
        t        | j                  t              r)t        j                  t        | j                              }n| j                  }t        |   ||      \  }}| j                  |      }h dt        |j                               z  s|j                         D cg c]  \  }}||v s|j                  s| c}}| j                  j                  d|j                         D cg c]  \  }}||vs|j                  s| c}}ddg|d<   |j                  j!                         D ]  \  }	}
t        |j                               h dz  }|r|j#                         nd}|j                         D ci c]  \  }}t%        j&                  |	|      s||! }}}|rJ||   D ]A  }d|v s|d   D cg c]'  t)        fd|j+                         D              s&) c}|d<   C nt-        d|	 d      |j!                         D ci c]  \  }}||v s|| }}}|j!                         D ci c]  \  }}||vs|| }}}|rC||   j/                  t1        |j+                               |
| j                  j                  d	       |sg||   j/                  t1        |j+                               |
dd	        ||fS c c}}w c c}}w c c}}w c c}w c c}}w c c}}w )
a5  
        We have to override the optimizer_grouped_parameters because the Trainer superclass bases it on the `model`
        itself, but the BaseModel losses can have weights that should be updated as well, e.g.
        SoftmaxLoss (see #2872).

        This method requires `transformers` >= 4.43.0.
        >   rC   paramsoptimizer_dict)r  weight_decayr   r  r  c              3  &   K   | ]  }|u 
 y wrX   r   )r[   paramps     r]   r^   z;BaseTrainer.get_optimizer_cls_and_kwargs.<locals>.<genexpr>  s     =sQVaun=ss   z*No parameters found matching the pattern 'z^' in the model. Please check the pattern and ensure it matches some of the model's parameters.)r  lrr  )rY   r   r   r   
Sequentialr   r   get_optimizer_cls_and_kwargsget_decay_parameter_namesr   r   named_parametersrequires_gradrD   r  learning_rate_mappingr   r   researchallr   rn   r   r   )r   rD   rC   
loss_modeloptimizer_clsoptimizer_kwargsdecay_parametersnr  parameter_patternlearning_rateoptimizer_param_keysoptimizer_param_keymatching_paramsgroupmatching_params_with_decaymatching_params_without_decayrp   s           `        r]   r  z(BaseTrainer.get_optimizer_cls_and_kwargsi  s    dii&{499'=>JJ*/'*NtU_*`''  99*E4s;K;P;P;R7SS '1&A&A&C"aM]H]bcbqbq %)II$:$:	 '1&A&A&C"aQaHafgfufu %(	2-.  150J0J0P0P0R *	,}#&'7'<'<'>#?Bg#g @T"6":":"<Zj 1;0K0K0Mq1QSQZQZ[lnoQpq!tqOq-.AB E5(',X+"##=sZiZpZpZr=s:sA+h !@AR@S Te e  <K;P;P;R)l41aVW[kVk!Q$)l&)l>M>S>S>U,sdaYZbrYrQT,s),s) !45<<"&'A'H'H'J"K+(,		(>(> - !45<<"&'D'K'K'M"N+(+I*	X ...u r+ *m,ssT   'K4KK2K?KKK K?'K$'K$K)#K)>K/K/)NNNNNNNNNNNr:   NN)rC   BaseModel | NonerD   zBaseTrainingArguments | NonerO   CDataset | DatasetDict | IterableDataset | dict[str, Dataset] | NonerP   r  r   znn.Module | dict[str, nn.Module] | Callable[[BaseModel], torch.nn.Module] | dict[str, Callable[[BaseModel], torch.nn.Module]] | Noner   z*BaseEvaluator | list[BaseEvaluator] | NonerN   zBaseDataCollator | Noner8   ]PreTrainedTokenizerBase | BaseImageProcessor | FeatureExtractionMixin | ProcessorMixin | NonerQ   zCallable[[], BaseModel] | NonerR   z'Callable[[EvalPrediction], dict] | NonerS   zlist[TrainerCallback] | NonerT   z?tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]rU   z9tuple[type[torch.optim.Optimizer], dict[str, Any]] | NonerV   z;Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | NonereturnNonerX   )rC   r   rD   r(   r8   r  r  r   )r   zdict[str, Any]r  r  )r  r   )r   torch.nn.ModulerC   r   r  r  )r   z8Callable[[BaseModel], torch.nn.Module] | torch.nn.ModulerC   r   r  r  )rC   r   r  r  )FN)rC   r   r   dict[str, torch.Tensor | Any]r   rJ   r  z2torch.Tensor | tuple[torch.Tensor, dict[str, Any]])r   zdict[str, torch.Tensor]r  r  )r   dict[str, float]r   zfloat | Noner  r  )r   r  r  z9tuple[list[dict[str, torch.Tensor]], torch.Tensor | None])NNrF   )rP   z#Dataset | dict[str, Dataset] | Noner  list[str] | Noner	  r}   r  r  )r  r   r  r}   r  zbool | Noner  r  r	  r}   r  r   )r  r  )r   r0   rg   
str | Noner  r  )NNr   )r   r0   r4  r~   r5  rJ   r6  r  r7  torch.Generator | Noner8  r~   r  zBatchSampler | None)Nr   )
r   r   rG  zlist[BatchSampler]r7  r  r8  z
int | Noner  r   )r   'Dataset | DatasetDict | IterableDatasetr4  r~   r[  r}   r  r   )r  r   )rP   z.Dataset | DatasetDict | IterableDataset | Noner  r   )rl  r  r  r   )r=   r  r  r  )rx  r}   r  r  )r   DatasetDict | Dataset | Nonerg   r  r  r  )r   r  rD   r(   r   z nn.Module | dict[str, nn.Module]r  rJ   )r   z=DatasetDict | IterableDatasetDict | Dataset | IterableDatasetrg   r  r  r  )r  dict[str, list[Any]]rg   r  r  z=Callable[[dict[str, list[Any]]], dict[str, list[Any]]] | Noner  r  )	NNNNNNNNN)r  r  r  r  r  str | list[str] | Noner  r  r  r  r  r  r  r  r   r  r  r  r  r  )rD   r(   rC   r  r  ztuple[Any, Any])/rj   
__module____qualname____doc__r   ri   r    model_card_data_classr   r   r   r   r(   rm   r/   r   rx   r   ro   r   r   r   r   r   r   r   r   r  r  r-  r1  rE  rL  r^  rf  ri  rm  rt  r(  r   r|  r}  staticmethodr  r  r  __classcell__)rp   s   @r]   r7   r7   7   s   @D K- 5*/!34 #'-1]a\`
 @D15
 59CG26Vb^bei/u8u8 +u8 [	u8
 Zu8u8 >u8 /u8u8$ 3%u8& A'u8( 0)u8* T+u8, #\-u8. (c/u80 
1u8 5u8~ '
'
 $'
	'
 
'
Rm&2F  
	,    %22 .2 	2 
<2h@*'%R) 3) 	B) Z =A(,!'	
N9
N &
N 	
N
 

N  -1(,!'// / *	/
 &/ / 
/b"
" 15,0?l?l ?l 	?l
 .?l *?l ?l 
?lJ -1*[*[ +*[ *	*[
 *[ 
*[XV88V8 V8 	V8
 
V8p&*
6
&L(> X\3JT	%<
-
 $
 /	

 

4 $(.N. !. 
&	.`  $(SW#  Q
 
 F  $"'+!%%)(,/3*./3SS S %	S
 S #S &S -S (S -S 
S4 FJQ/)Q/2BQ/	Q/ Q/r   r7   )U
__future__r   r;  r  r   r  abcr   r   collectionsr   collections.abcr   
contextlibr   	functoolsr	   typingr
   r   r   torch.utils.datar   r   r   r   transformersr   r   r   r   %transformers.feature_extraction_utilsr   #transformers.image_processing_utilsr   transformers.integrationsr   transformers.processing_utilsr   transformers.trainerr   transformers.trainer_utilsr   (sentence_transformers.base.data_collatorr   %sentence_transformers.base.evaluationr   r   r   r   %sentence_transformers.base.model_cardr   r    "sentence_transformers.base.modulesr!   "sentence_transformers.base.samplerr"   r#   r$   r%   r&   r'   (sentence_transformers.base.training_argsr(   r)   r*   sentence_transformers.utilr+   r,   r-   r.   %sentence_transformers.util.decoratorsr/   datasetsr0   r1   r2   r3   r4   	getLoggerrj   rk   r5   ImportErrorr7   r   r   r]   <module>r     s    "   	 	 # # $ "     S S Z Z H B 3 8 3 5 E T 6 Z 5  u t n n CZZ			8	$9
C/'3 C/	  Os   9D DD