
    #i                    d   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZ d dlZd dlZd dlmZ d dlmZ d d	lmZmZmZmZ d d
lmZ d dlmZ d dlmZm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z-m.Z. d dl/m0Z0m1Z1  ejd                  e3      Z4 G d de"e&      Z5y)    )annotationsN)OrderedDict)Callable)Queue)AnyLiteraloverload)nn)trange)
AutoConfigPretrainedConfigPreTrainedModelis_datasets_available)logging)
deprecated)PairableInput	PairInput)	BaseModel)Transformer)FitMixin)CrossEncoderModelCardData)
LogitScore)batch_to_devicefullnameimport_from_string)!cross_encoder_init_args_decorator)cross_encoder_predict_rank_args_decoratorc                      e Zd ZU dZeZdZded<   dZe		 d#dddddddddddddddddd		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d$ fd
       Z
	 	 	 	 	 	 d%	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d&dZ	 	 	 	 d'	 	 	 	 	 	 	 	 	 d(dZe	 	 	 	 	 	 	 	 	 	 d)d       Zd*dZd+dZed,d       Zed-d       Zed.d       Zd/ fdZe ed      d.d              Zej0                   ed      d0d              Ze ed      d+d              Ze	 	 	 	 	 	 	 	 	 	 	 d1	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d2d       Ze	 	 	 	 	 	 	 	 	 	 	 d3	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d4d       Ze	 	 	 	 	 	 	 	 	 	 	 d1	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d5d       Ze	 	 	 	 	 	 	 	 	 	 	 d1	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d6d       Z ej:                         e	 	 	 	 	 	 	 	 	 	 	 d7	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d8d              Ze	 	 	 	 	 	 	 	 	 	 	 	 	 d9	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d:d       Zd;dZ d< fd Z!d= fd!Z"d>d"Z# xZ$S )?CrossEncoderaL  
    Loads or creates a CrossEncoder model that takes a sentence pair as input and outputs a score or label.

    A CrossEncoder does not produce sentence embeddings. Instead, it processes both sentences jointly through the
    transformer and outputs a score (regression) or class probabilities (classification). This makes it more
    accurate for pairwise tasks like reranking or semantic textual similarity, but it cannot pre-compute embeddings
    for individual sentences.

    Args:
        model_name_or_path (str, optional): If a filepath on disk, loads the model from that path. Otherwise, tries
            to download a pre-trained CrossEncoder model. If that fails, tries to construct a model from the
            Hugging Face Hub with that name. Defaults to None.
        modules (list[nn.Module], optional): A list of torch modules that are called sequentially. Can be used to
            create custom CrossEncoder models from scratch. Defaults to None.
        device (str, optional): Device (like ``"cuda"``, ``"cpu"``, ``"mps"``, ``"npu"``) that should be used for
            computation. If None, checks if a GPU can be used. Defaults to None.
        prompts (dict[str, str], optional): A dictionary with prompts for the model. The key is the prompt name,
            the value is the prompt text. The prompt text will be prepended before any text to encode. For example:
            ``{"query": "query: ", "passage": "passage: "}``. If a model has saved prompts, you can override
            them by passing your own, or pass ``{"query": "", "document": ""}`` to disable them.
            Defaults to None.
        default_prompt_name (str, optional): The name of the prompt that should be used by default. If not set,
            no prompt will be applied. Defaults to None.
        cache_folder (str, optional): Path to store models. Can also be set by the ``SENTENCE_TRANSFORMERS_HOME``
            environment variable. Defaults to None.
        trust_remote_code (bool, optional): Whether to allow for custom models defined on the Hub in their own
            modeling files. Only set to ``True`` for repositories you trust and in which you have read the code,
            as it will execute code present on the Hub on your local machine. Defaults to False.
        revision (str, optional): The specific model version to use. It can be a branch name, a tag name, or a
            commit id, for a stored model on Hugging Face. Defaults to None.
        local_files_only (bool, optional): Whether to only look at local files (i.e., do not try to download
            the model). Defaults to False.
        token (bool or str, optional): Hugging Face authentication token to download private models.
            Defaults to None.
        model_kwargs (dict[str, Any], optional): Keyword arguments passed to the underlying Hugging Face
            Transformers model via ``AutoModel.from_pretrained``. Particularly useful options include:

            - ``torch_dtype``: Override the default ``torch.dtype`` and load the model under a specific
              dtype. Can be ``torch.float16``, ``torch.bfloat16``, ``torch.float32``, or ``"auto"`` to
              use the dtype from the model's ``config.json``.
            - ``attn_implementation``: The attention implementation to use. For example ``"eager"``,
              ``"sdpa"``, or ``"flash_attention_2"``. If you ``pip install kernels``, then
              ``"flash_attention_2"`` should work without having to install ``flash_attn``. It is
              frequently the fastest option. Defaults to ``"sdpa"`` when available (torch>=2.1.1).
            - ``device_map``: Device map for model parallelism, e.g. ``"auto"``.
            - ``provider``: For ``backend="onnx"``, the ONNX execution provider
              (e.g. ``"CUDAExecutionProvider"``).
            - ``file_name``: For ``backend="onnx"`` or ``"openvino"``, the filename to load
              (e.g. for optimized or quantized models).
            - ``export``: For ``backend="onnx"`` or ``"openvino"``, whether to export the model to the
              backend format. Also set automatically if the exported file doesn't exist.

            See the `PreTrainedModel.from_pretrained
            <https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel.from_pretrained>`_
            documentation for more details. Defaults to None.
        processor_kwargs (dict[str, Any], optional): Keyword arguments passed to the Hugging Face Transformers
            processor/tokenizer via ``AutoProcessor.from_pretrained``. See the `AutoTokenizer.from_pretrained
            <https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoTokenizer.from_pretrained>`_
            documentation for more details. Defaults to None.
        config_kwargs (dict[str, Any], optional): Keyword arguments passed to the Hugging Face Transformers
            config via ``AutoConfig.from_pretrained``. See the `AutoConfig.from_pretrained
            <https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoConfig.from_pretrained>`_
            documentation for more details. For example, you can set ``classifier_dropout`` or ``num_labels``
            via this parameter. Defaults to None.
        model_card_data (:class:`~sentence_transformers.cross_encoder.model_card.CrossEncoderModelCardData`, optional):
            A model card data object that contains information about the model. Used to generate a model card
            when saving the model. If not set, a default model card data object is created. Defaults to None.
        backend (str, optional): The backend to use for inference. Can be ``"torch"`` (default), ``"onnx"``,
            or ``"openvino"``. Defaults to ``"torch"``.
        num_labels (int, optional): Number of labels of the classifier. If 1, the CrossEncoder is a regression
            model that outputs a continuous score. If > 1, it outputs several scores that can be soft-maxed to
            get probability scores for the different classes. Defaults to None.
        max_length (int, optional): Max length for input sequences. Longer sequences will be truncated. If None,
            the max length of the model will be used. Defaults to None.
        activation_fn (Callable, optional): Activation function applied on top of the model's logits during
            :meth:`predict`. If None, ``nn.Sigmoid()`` is used when ``num_labels=1``, else ``nn.Identity()``.
            Defaults to None.

    Example:
        ::

            from sentence_transformers import CrossEncoder

            # Load a pre-trained CrossEncoder model
            model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")

            # Predict scores for sentence pairs
            pairs = [
                ("How many people live in Berlin?", "Berlin had a population of 3,520,031 in 2019."),
                ("How many people live in Berlin?", "Berlin is well known for its museums."),
            ]
            scores = model.predict(pairs)
            print(scores)
            # [8.607  1.133]

            # Rank documents by relevance to a query
            results = model.rank(
                "How many people live in Berlin?",
                ["Berlin had a population of 3,520,031 in 2019.", "Berlin is well known for its museums."],
            )
            print(results)
            # [{'corpus_id': 0, 'score': 8.607317}, {'corpus_id': 1, 'score': 1.1329174}]
    zcross-encoder
str | None default_huggingface_organizationcross_encoder_model_idNFtorch)modulesdevicepromptsdefault_prompt_namecache_foldertrust_remote_coderevisionlocal_files_onlytokenmodel_kwargsprocessor_kwargsconfig_kwargsmodel_card_databackend
num_labels
max_lengthactivation_fnc                   d | _         |	|i }||d<   |	|i }||d<   t        | 	  |||||||	|
|||||||       |  ||| _         y | j                   | j                         | _         y y )Nr2   model_max_length)model_name_or_pathr$   r%   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r&   r'   )r4   super__init__get_default_activation_fn)selfr7   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   	__class__s                      z/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/sentence_transformers/cross_encoder/model.pyr9   zCrossEncoder.__init__   s    2 "!$ "*4M,'!'#% 3=/01%/-%-'+ 3 	 	
" 	 $!.D'!%!?!?!AD (    c
           	     \   ||||d}
|i |
ni |
|}|i |
ni |
|}|	i |
ni |
|	}	|s| j                   j                  ||       t        j                  |fd|i|	}t	        |d      r|j
                  |j
                  d   j                  d      rtt        |d||||	| j                        }|j                  j                  d	      }|j                  j                  d
      }||t        d      t        ||      }||gi fS t        |d||||	| j                        }|gi fS )N)r,   r)   r*   r+   )r*   	cache_dirarchitecturesr   ForCausalLMztext-generation)transformer_taskr@   r-   r.   r/   r1   yesnoa  The tokenizer does not have 'yes' and/or 'no' tokens, which are used as the default true/false tokens for the LogitScore post-processing module. Please provide custom modules with your desired LogitScore configuration, or use a model with a tokenizer that supports these tokens.)true_token_idfalse_token_idzsequence-classification)r0   set_base_modelr   from_pretrainedhasattrrA   endswithr   r1   	tokenizerconvert_tokens_to_ids
ValueErrorr   )r;   r7   r,   r(   r*   r)   r+   r-   r.   r/   shared_kwargsconfigtransformer_modelrF   rG   post_processings                   r=   _load_default_modulesz"CrossEncoder._load_default_modules   s    !2  0	
 -9,@(-(FgFgZfFg0@0H,m,NsQ^NsbrNs-:-B)=)Hj=Hj\iHj  //0BX/V#-#=#=$
"$
 $
 FO,$$0$$Q'00? +"!2&)!1+! .77MMeTM.88NNtTN$(> I  )+-O &7;; (6"%-'LL
 ""B&&r>   c           	         |j                  dd      }|j                  dd      }d|d<   d}	|1t        |t              r!t        |      dkD  r| j	                  |      }d}		 |Ft        t        j                  t        |      t        |d         z  dz        d	      }t        |d
      }|d   }
|d   }d}t        t        dt        |      |            D ]!  \  }}||||z    }|
j                  |||g       # t        t        |d
z   d|       D cg c]  }|j                          c}d       }|D cg c]  }|d
   	 }}t        d |D              r#t        d |D              }t!        d|d          |rt        |d   t"        j$                        rt#        j&                  |      }nt        |d   t(        j*                        rt)        j,                  |d      }nut        |d   t              rt/        |g       }nUt/        |g       }nH|r,t#        j0                  g | j2                  j4                        }n|rt)        j6                  g       }ng }||	r| j9                  |       S S c c}w c c}w # |	r| j9                  |       w w xY w)Nconvert_to_tensorFconvert_to_numpyTshow_progress_barr   	processes
   i     inputoutputChunksdescdisablec                    | d   S Nr    xs    r=   <lambda>z-CrossEncoder._multi_process.<locals>.<lambda>;  s
    ad r>   )keyc              3  J   K   | ]  }t        |      d kD  xr |d    du  yw   Nlen.0r\   s     r=   	<genexpr>z.CrossEncoder._multi_process.<locals>.<genexpr>C  s)     W3v;?<vay'<<Ws   !#c              3  J   K   | ]  }t        |      d kD  s|d    s|  ywrj   rl   rn   s     r=   rp   z.CrossEncoder._multi_process.<locals>.<genexpr>E  s$     #fvFVW\bcd\eF#fs   ###zError in worker process: rk   )axisr%   )get
isinstancelistrm   start_multi_process_poolminmathceilmax	enumeraterangeputsortedr   anynextRuntimeErrorr#   Tensorcatnpndarrayconcatenatesumtensormodelr%   arraystop_multi_process_pool)r;   inputsrW   poolr%   
chunk_sizepredict_kwargsrU   rV   created_poolinput_queueoutput_queuechunk_idchunk_startchunk_output_listr\   scoreserror_outputs                       r=   _multi_processzCrossEncoder._multi_process  sx    +../BEJ)--.@$G.3*+<Jvt4Vq008DL4	3! 3v;T+=N9O+ORT+T!UW[\
 Q/
7;G}K8<XL H)25CK3T)U C%+{[:-EF5. ABC
 !-3HqLx]nYn-op!!#p"K /::FfQi:F: W;WW##f#ff"%>|A>O#PQQfQi6"YYv.Fq	2::6^^F;Fq	40 _F _F"b1B1BC!" ,,T2 E q ;8 ,,T2 s,    B0I7 I-'I7 5I2DI7 -
I7 7Jc                "   	 d}	 |j                         \  }}} |j                  |fd| i|}t        |t        j                        r*|j
                  j                  dk7  r|j                         }nt        |t        j                        rt        j                  |      }nbt        |t              rR|D cg c]G  }t        |t        j                        r)|j
                  j                  dk7  r|j                         n|I }}|j                  ||g       c c}w # t        j                  $ r Y yt        $ rS}	t         j#                  d|  d|	        	 |j                  |dt%        |	      g       n# t        $ r Y nw xY wY d}	~	yd}	~	ww xY w)z\
        Internal working process to predict input pairs in a multi-process setup.

        Nr%   cpuzError in worker process on z: )rt   predictru   r#   r   r%   typer   r   r   asarrayrv   r~   queueEmpty	Exceptionloggererrorstr)
target_devicer   r   results_queuer   sentence_pairskwargsr   scorees
             r=   _multi_process_workerz"CrossEncoder._multi_process_worker_  sg    H3>??3D0.&&~VmVvV fell38J8Je8S#ZZ\F

3ZZ/F- &,! (2%'F5<<K\K\`eKe		kppF  !!8V"45!  ;;  :=/A3OP!%%xs1v&>?  sU   B1D 6ADD D F3F;F	E54F	5	F>F	 FF		Fc                    | j                   s|j                  d      r t        |             S t        j	                  d| d       y)z[Instantiate an activation function from a dotted path string, respecting trust_remote_code.ztorch.zActivation function path 'z' is not trusted, using default activation function instead. Please load the CrossEncoder with `trust_remote_code=True` to allow loading custom activation functions via the configuration.N)r)   
startswithr   r   warning)r;   activation_fn_paths     r=   _resolve_activation_fnz#CrossEncoder._resolve_activation_fn  sO    !!%7%B%B8%L9%&89;;();(< =/ /	

 r>   c                   d }t        | j                  d      r2d| j                  j                  v r| j                  j                  d   }nNt        | j                  d      r8| j                  j                  "| j                  j                  }| j                  `|| j	                  |      }||S | j                  j
                  dk(  rt        j                         S t        j                         S )Nsentence_transformersr4   $sbert_ce_default_activation_functionrZ   )	rJ   rP   r   r   r   r2   r
   SigmoidIdentity)r;   r   resolveds      r=   r:   z&CrossEncoder.get_default_activation_fn  s    !4;; 78_PTP[P[PqPq=q!%!B!B?!S DKK!GH@@L!%!Q!Q@)223EFH#;;!!Q&::<{{}r>   c                4    | d   j                   j                  S rc   )r   rP   r;   s    r=   rP   zCrossEncoder.config  s    Aw}}###r>   c                     | d   j                   S rc   )r   r   s    r=   r   zCrossEncoder.model  s    Aw}}r>   c                    t        |       D ]F  }t        |t              r"|j                  j                  j
                  c S t        |t              sF y y)NrZ   )reversedru   r   r   rP   r2   r   )r;   modules     r=   r2   zCrossEncoder.num_labels  sG    tn 	F&+.||**555&*-		 r>   c                ~    |dk(  r(t         t        j                  j                  |   ||      S t         |   ||      S Nr4   )r8   r#   r
   Module__setattr__)r;   namevaluer<   s      r=   r   zCrossEncoder.__setattr__  s;     ?"$;D%HHw"4//r>   zaThe `max_length` property was renamed and is now deprecated. Please use `max_seq_length` instead.c                    | j                   S Nmax_seq_lengthr   s    r=   r3   zCrossEncoder.max_length  s     """r>   c                    || _         y r   r   )r;   r   s     r=   r3   zCrossEncoder.max_length  s     $r>   zqThe `default_activation_function` property was renamed and is now deprecated. Please use `activation_fn` instead.c                    | j                   S r   )r4   r   s    r=   default_activation_functionz(CrossEncoder.default_activation_function  s     !!!r>   c                     y r   rd   r;   r   prompt_nameprompt
batch_sizerW   r4   apply_softmaxrV   rU   r%   r   r   r   s                 r=   r   zCrossEncoder.predict        r>   c                     y r   rd   r   s                 r=   r   zCrossEncoder.predict  s      r>   c                     y r   rd   r   s                 r=   r   zCrossEncoder.predict  r   r>   c                     y r   rd   r   s                 r=   r   zCrossEncoder.predict  s      !r>   c                l   |Lt         j                         t        j                  k(  xs% t         j                         t        j                  k(  }|dk  rt        d| d      | j                  |      }|r.t        |t        j                        r|j                         }|g}nEt        |t              s5t        |t        j                        r|j                         n
t        |      }|t        |
t              r6t        |
      dkD  r( | j                  d||||
||||||||	d|}|r|d   }|S | j                  ||      }|
| j                  j                   }
| j#                  |
       | j%                          |xs | j&                  }| j(                  }g }t        j*                  |D cg c]  }| j-                  |        c}      }| j/                         r| j1                  |      }|D cg c]  }||   	 }}t3        dt        |      |d|       D ]  }||||z    } | j4                  |fd|i|}t7        ||
      } | j8                  |fi |}|d   }| ||      }|r:|j:                  d	kD  r+t<        j>                  j@                  jC                  |d	
      }|d	k(  r |j:                  d	kD  r|jE                  d      }|jG                  |        t        j*                  |      D cg c]  }||   	 }}|	r9t        |      rt=        jH                  |      }nut=        jJ                  g |
      }n]|r[t        jL                  |D cg c]<  }|jO                         jQ                         jS                         jU                         > c}      }|r|d   }|S c c}w c c}w c c}w c c}w )a  
        Performs predictions with the CrossEncoder on the given input pairs.

        .. tip::

            Adjusting ``batch_size`` can significantly improve processing speed. The optimal value depends on your
            hardware, model size, precision, and input length. Benchmark a few batch sizes on a small subset of your
            data to find the best value.

        Args:
            inputs (Union[List[PairInput], PairInput]): A list of input pairs or one input pair, where each element
                can be a string, image, or multimodal dict.
            prompt_name (Optional[str], optional): The name of the prompt to use for encoding.
            prompt (Optional[str], optional): The prompt to use for encoding.
            batch_size (int, optional): Batch size for encoding. Defaults to 32.
            show_progress_bar (bool, optional): Output progress bar. Defaults to None.
            activation_fn (callable, optional): Activation function applied on the logits output of the CrossEncoder.
                If None, the ``model.activation_fn`` will be used, which defaults to :class:`torch.nn.Sigmoid` if num_labels=1, else
                :class:`torch.nn.Identity`. Defaults to None.
            apply_softmax (bool, optional): If set to True and `model.num_labels > 1`, applies softmax on the logits
                output such that for each sample, the scores of each class sum to 1. Defaults to False.
            convert_to_numpy (bool, optional): Whether the output should be a list of numpy vectors. If False, output
                a list of PyTorch tensors. Defaults to True.
            convert_to_tensor (bool, optional): Whether the output should be one large tensor. Overwrites `convert_to_numpy`.
                Defaults to False.
            device (Union[str, List[str]], optional): Device(s) to use for computation. Can be a single device string
                (e.g., "cuda:0", "cpu") or a list of devices (e.g., ["cuda:0", "cuda:1"]). If a list is provided,
                multiprocessing will be used automatically. Defaults to None.
            pool (Dict[str, Any], optional): A pool of workers created with :meth:`start_multi_process_pool`. If provided,
                multiprocessing will be used. If None and ``device`` is a list, a pool will be created automatically.
                Defaults to None.
            chunk_size (int, optional): Size of chunks for multiprocessing. If None, a sensible default is calculated.
                Only used when ``pool`` is not None or ``device`` is a list. Defaults to None.

        Returns:
            Union[List[torch.Tensor], np.ndarray, torch.Tensor]: Predictions for the passed input pairs.
            The return type depends on the ``convert_to_numpy`` and ``convert_to_tensor`` parameters.
            If ``convert_to_tensor`` is True, the output will be a :class:`torch.Tensor`.
            If ``convert_to_numpy`` is True, the output will be a :class:`numpy.ndarray`.
            Otherwise, the output will be a list of :class:`torch.Tensor` values.

        Examples:
            ::

                from sentence_transformers import CrossEncoder

                model = CrossEncoder("cross-encoder/stsb-roberta-base")
                sentences = [["I love cats", "Cats are amazing"], ["I prefer dogs", "Dogs are loyal"]]
                model.predict(sentences)
                # => array([0.6912767, 0.4303499], dtype=float32)

                # Using multiprocessing with automatic pool
                scores = model.predict(sentences, device=["cuda:0", "cuda:1"])

                # Using multiprocessing with manual pool
                pool = model.start_multi_process_pool()
                scores = model.predict(sentences, pool=pool)
                model.stop_multi_process_pool(pool)
        r   z+batch_size must be a positive integer, got .)r   rW   r   r%   r   r   r   r   r4   r   rV   rU   Batchesr_   r   r   rZ   )dimr]   rs   rd   )+r   getEffectiveLevelr   INFODEBUGrN   is_singular_inputru   r   r   tolistrv   rm   r   _resolve_promptr   r%   toevalr4   r2   argsort_input_length_can_flatten_inputs_interleave_sorted_indicesr   
preprocessr   forwardndimr#   r
   
functionalsoftmaxsqueezeextendstackr   r   r   detachfloatnumpy)r;   r   r   r   r   rW   r4   r   rV   rU   r%   r   r   r   r   pred_scoresr2   pairlength_sorted_idxidxinputs_sortedstart_indexbatchfeaturesout_featuresr   r   s                              r=   r   zCrossEncoder.predict  s   Z $((*gll:if>V>V>X\c\i\i>i  ?J:,VWXYY !226:&"**-XFFD)(262::(FV]]_DQWLF 
64 8S[1_-$-- "3%'%++!1"3  !K$ !)!n%%fk: >ZZ&&F		%;););__
JJf'Ud););D)A(A'UV##% $ ? ?@Q R0ABBB!!S%7)ar]rs 	'K!+j0HIE&tuFVFvFH&x8H'4<<;F;L!(+F(&v.q,,44V4C Q6;;?+v&#	'& 46::>O3PQC{3'QQ;#kk+6#ll2f=**Xc%dueiik&8&8&:&@&@&B&H&H&J%deK%a.KI (V C( R &es   -N"2N'3N,AN1c                N   | j                   dk7  rt        d      |D cg c]  }||g }}| j                  ||||||	|
|||||      }g }t        |      D ]5  \  }}|j	                  ||d       |s|d   j                  d||   i       7 t        |d d	      }|d
| S c c}w )a]  
        Performs ranking with the CrossEncoder on the given query and documents. Returns a sorted list with the document indices and scores.

        .. tip::

            Adjusting ``batch_size`` can significantly improve processing speed. The optimal value depends on your
            hardware, model size, precision, and input length. Benchmark a few batch sizes on a small subset of your
            data to find the best value.

        Args:
            query (PairableInput): A single query, e.g. a string, image, or multimodal dict.
            documents (List[PairableInput]): A list of documents, e.g. strings, images, or multimodal dicts.
            top_k (Optional[int], optional): Return the top-k documents. If None, all documents are returned. Defaults to None.
            return_documents (bool, optional): If True, also returns the documents. If False, only returns the indices and scores. Defaults to False.
            prompt_name (Optional[str], optional): The name of the prompt to use for encoding.
            prompt (Optional[str], optional): The prompt to use for encoding.
            batch_size (int, optional): Batch size for encoding. Defaults to 32.
            show_progress_bar (bool, optional): Output progress bar. Defaults to None.
            activation_fn ([type], optional): Activation function applied on the logits output of the CrossEncoder. If None, nn.Sigmoid() will be used if num_labels=1, else nn.Identity. Defaults to None.
            convert_to_numpy (bool, optional): Convert the output to a numpy matrix. Defaults to True.
            apply_softmax (bool, optional): If there are more than 2 dimensions and apply_softmax=True, applies softmax on the logits output. Defaults to False.
            convert_to_tensor (bool, optional): Convert the output to a tensor. Defaults to False.
            device (Union[str, List[str]], optional): Device(s) to use for computation. Can be a single device string
                (e.g., "cuda:0", "cpu") or a list of devices (e.g., ["cuda:0", "cuda:1"]). If a list is provided,
                multiprocessing will be used automatically. Defaults to None.
            pool (Dict[str, Any], optional): A pool of workers created with :meth:`start_multi_process_pool`. If provided,
                multiprocessing will be used. If None and ``device`` is a list, a pool will be created automatically.
                Defaults to None.
            chunk_size (int, optional): Size of chunks for multiprocessing. If None, a sensible default is calculated.
                Only used when ``pool`` is not None or ``device`` is a list. Defaults to None.

        Returns:
            List[Dict[Literal["corpus_id", "score", "text"], Union[int, float, str]]]: A sorted list with the "corpus_id", "score", and optionally "text" of the documents.

        Example:
            ::

                from sentence_transformers import CrossEncoder
                model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")

                query = "Who wrote 'To Kill a Mockingbird'?"
                documents = [
                    "'To Kill a Mockingbird' is a novel by Harper Lee published in 1960. It was immediately successful, winning the Pulitzer Prize, and has become a classic of modern American literature.",
                    "The novel 'Moby-Dick' was written by Herman Melville and first published in 1851. It is considered a masterpiece of American literature and deals with complex themes of obsession, revenge, and the conflict between good and evil.",
                    "Harper Lee, an American novelist widely known for her novel 'To Kill a Mockingbird', was born in 1926 in Monroeville, Alabama. She received the Pulitzer Prize for Fiction in 1961.",
                    "Jane Austen was an English novelist known primarily for her six major novels, which interpret, critique and comment upon the British landed gentry at the end of the 18th century.",
                    "The 'Harry Potter' series, which consists of seven fantasy novels written by British author J.K. Rowling, is among the most popular and critically acclaimed books of the modern era.",
                    "'The Great Gatsby', a novel written by American author F. Scott Fitzgerald, was published in 1925. The story is set in the Jazz Age and follows the life of millionaire Jay Gatsby and his pursuit of Daisy Buchanan."
                ]

                model.rank(query, documents, return_documents=True)

            ::

                [{'corpus_id': 0,
                'score': 10.67858,
                'text': "'To Kill a Mockingbird' is a novel by Harper Lee published in 1960. It was immediately successful, winning the Pulitzer Prize, and has become a classic of modern American literature."},
                {'corpus_id': 2,
                'score': 9.761677,
                'text': "Harper Lee, an American novelist widely known for her novel 'To Kill a Mockingbird', was born in 1926 in Monroeville, Alabama. She received the Pulitzer Prize for Fiction in 1961."},
                {'corpus_id': 1,
                'score': -3.3099542,
                'text': "The novel 'Moby-Dick' was written by Herman Melville and first published in 1851. It is considered a masterpiece of American literature and deals with complex themes of obsession, revenge, and the conflict between good and evil."},
                {'corpus_id': 5,
                'score': -4.8989105,
                'text': "'The Great Gatsby', a novel written by American author F. Scott Fitzgerald, was published in 1925. The story is set in the Jazz Age and follows the life of millionaire Jay Gatsby and his pursuit of Daisy Buchanan."},
                {'corpus_id': 4,
                'score': -5.082967,
                'text': "The 'Harry Potter' series, which consists of seven fantasy novels written by British author J.K. Rowling, is among the most popular and critically acclaimed books of the modern era."}]
        rZ   z|CrossEncoder.rank() only works for models with num_labels=1. Consider using CrossEncoder.predict() with input pairs instead.)r   r   r   r   rW   r4   r   rV   rU   r%   r   r   )	corpus_idr   r]   textc                    | d   S )Nr   rd   re   s    r=   rg   z#CrossEncoder.rank.<locals>.<lambda>:  s
    '
 r>   T)rh   reverseN)r2   rN   r   r|   appendupdater   )r;   query	documentstop_kreturn_documentsr   r   r   rW   r4   r   rV   rU   r%   r   r   docquery_doc_pairsr   resultsir   s                         r=   rankzCrossEncoder.rank  s    r ??aR  EN+NSUCL+N+N"#!/''-/!  
 !&) 	;HAuNNU;<""FIaL#9:		; &:DIv1 ,Os   B"c                b   t         t        f}t               r	 ddlm} ||fz  }t        ||      r t        |      dkD  xr t        |d   |       S t        |t        j                        r7|j                  j                  dv r|j                  dk(  ry|j                  dk  S y# t
        $ r Y w xY w)z
        Check if the input represents a single example or a batch of examples.

        Args:
            inputs: The input to check.
        Returns:
            bool: True if the input is a single example, False if it is a batch.
        r   )Column)UOFrk   T)rv   tupler   datasetsr  ImportErrorru   rm   r   r   dtypekindsizer   )r;   r   
list_typesr  s       r=   r   zCrossEncoder.is_singular_input=  s     E]
 "+vi'
 fj)v;?L:fQi+L'LLfbjj)fll.?.?:.M{{a;;?"  s   B" "	B.-B.c                P    t         |          dt        | j                        iz  S r   )r8   _get_model_configr   r4   )r;   r<   s    r=   r  zCrossEncoder._get_model_configW  s-    w(*Xd&8&89.
 
 	
r>   c                t    t         |   |       d|v r$|d   }|| j                  |      }||| _        y y y y r   )r8   _parse_model_configr   r4   )r;   model_configr   r   r<   s       r=   r  z CrossEncoder._parse_model_config\  sW    #L1l*!-o!>!-667IJ')1D& ( . +r>   c           	     l    | j                   j                  }| j                         }d| d| d| d| d	S )Nz## Testing this pull request
You can test this pull request before merging by loading the model from this PR with the `revision` argument:
```python
from sentence_transformers import zO

# NOTE: Update this to the number of your pull request
pr_number = 2
model = z(
    "z5",
    revision=f"refs/pr/{pr_number}",
    backend="a  ",
)

# Verify that everything works as expected
scores = model.predict([("The weather is lovely today.", "It's so sunny outside!")])
print(scores)

rankings = model.rank("The weather is lovely today.", ["It's so sunny outside!", "He drove to the stadium."])
print(rankings)
```

---
*This PR was auto-generated with [`push_to_hub`](https://sbert.net/docs/package_reference/cross_encoder/cross_encoder.html#sentence_transformers.cross_encoder.CrossEncoder.push_to_hub).*
)r<   __name__get_backend)r;   repo_id
class_namer1   s       r=   _push_to_hub_usage_tipz#CrossEncoder._push_to_hub_usage_tipe  sZ    ^^,,
""$# $., /	 
 Y Y  	r>   r   )&r7   r    r$   z4list[nn.Module] | OrderedDict[str, nn.Module] | Noner%   r    r&   zdict[str, str] | Noner'   r    r(   r    r)   boolr*   r    r+   r   r,   bool | str | Noner-   dict | Noner.   r"  r/   r"  r0   z CrossEncoderModelCardData | Noner1   z$Literal['torch', 'onnx', 'openvino']r2   
int | Noner3   r#  r4   Callable | NonereturnNone)NFFNNN)r7   r   r,   r!  r(   r    r*   r    r)   r   r+   r   r-   dict[str, Any] | Noner.   r'  r/   r'  r%  zDtuple[list[nn.Module] | OrderedDict[str, nn.Module], dict[str, Any]])TNNN)
r   list[PairInput]rW   bool | Noner   9dict[Literal['input', 'output', 'processes'], Any] | Noner%   %str | list[str | torch.device] | Noner   r#  )
r   r   r   r   r   r   r   r   r%  r&  )r   r   r%  r$  )r%  r   )r%  r   )r%  r   )r%  int)r   r   r   r   r%  r&  )r   r,  r%  r&  )........NNN)r   r   r   r    r   r    r   r,  rW   r)  r4   r$  r   r)  rV   Literal[False]rU   r-  r%   r+  r   r*  r   r#  r%  torch.Tensor)......TFNNN)r   list[PairInput] | PairInputr   r    r   r    r   r,  rW   r)  r4   r$  r   r)  rV   Literal[True]rU   r-  r%   r+  r   r*  r   r#  r%  z
np.ndarray)r   r/  r   r    r   r    r   r,  rW   r)  r4   r$  r   r)  rV   r   rU   r0  r%   r+  r   r*  r   r#  r%  r.  )r   r(  r   r    r   r    r   r,  rW   r)  r4   r$  r   r)  rV   r-  rU   r-  r%   r+  r   r*  r   r#  r%  zlist[torch.Tensor])NN    NNFTFNNN)r   r/  r   r    r   r    r   r,  rW   r)  r4   r$  r   r)  rV   r   rU   r   r%   r+  r   r*  r   r#  r%  z.list[torch.Tensor] | np.ndarray | torch.Tensor)NFNNr1  NNFTFNNN)r  r   r  zlist[PairableInput]r  r#  r  r   r   r    r   r    r   r,  rW   r)  r4   r$  rV   r   rU   r   r%   r+  r   r*  r   r#  r%  zDlist[dict[Literal['corpus_id', 'score', 'text'], int | float | str]])r   zPairInput | list[PairInput]r%  r   )r%  dict[str, Any])r  r2  r%  r&  )r  r   r%  r   )%r  
__module____qualname____doc__r   model_card_data_classr!   __annotations__ _model_card_model_id_placeholderr   r9   rS   r   staticmethodr   r   r:   propertyrP   r   r2   r   r   r3   setterr   r	   r   r#   inference_moder   r	  r   r  r  r  __classcell__)r<   s   @r=   r   r   #   s   fP 63B$jB'?$& *.<B IM!)-*.#'"'#!&#'$((,%)<@8?!%!%)-+<B&<B F	<B
 <B '<B (<B !<B  <B <B <B !<B "<B &<B #<B  :!<B" 6#<B& '<B( )<B* '+<B, 
-<B '<BF  $"'!&.226/3E'E' !E' !	E'
 E'  E' E' ,E' 0E' -E' 
NE'T *.JN8<!%G3G3 'G3 H	G3
 6G3 G3R $$$ $ 	$
 
$ $L	, $ $    0 st# u # st$ u $ 	."	 
"  #& ),),%(+.,/8<JN!%   	
  ' ' # ) * 6 H  
 "  #& ),),%(*.,18<JN!%+   	
  ' ' # ( * 6 H  
 "  #& ),),%(!$+.8<JN!%+   	
  ' ' #  ) 6 H  
 "  #& ),),%(+.,/8<JN!%!!  ! 	!
 ! '! '! #! )! *! 6! H! ! 
! !" U. #'!)-)-%*!%"'8<JN!%g+g  g 	g
 g 'g 'g #g g  g 6g Hg g 
8g / gR /
 !!&"&!)-)-!%"'8<JN!%!uu 'u 	u
 u  u u u 'u 'u u  u 6u Hu  !u" 
N#u /un4

2r>   r   )6
__future__r   r   ry   r   collectionsr   collections.abcr   multiprocessingr   typingr   r   r	   r   r   r#   r
   tqdm.autonotebookr   transformersr   r   r   r   transformers.utilstransformers_loggingtyping_extensionsr   )sentence_transformers.base.modality_typesr   r    sentence_transformers.base.modelr   "sentence_transformers.base.modulesr   -sentence_transformers.cross_encoder.fit_mixinr   .sentence_transformers.cross_encoder.model_cardr   7sentence_transformers.cross_encoder.modules.logit_scorer   sentence_transformers.utilr   r   r   %sentence_transformers.util.decoratorsr   r   
get_loggerr  r   r   rd   r>   r=   <module>rQ     sz    "    # $ ! ) )    $ ] ] > ( N 6 : B T N T T 
)		(	(	2^9h ^r>   