
    #i                    v   d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZ d dlZd dlm
Z d dlZd dlmZmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3  ejh                  e5      Z6 G d de!      Z7y)    )annotationsN)Callable)Queue)AnyLiteraloverload)Tensornn)trange)
AutoConfigPretrainedConfig)PreTrainedModel)logging)
deprecated)	BaseModel)	TextInput)Transformer)Pooling)SparseEncoderModelCardData)SparseAutoEncoderSpladePooling)batch_to_deviceselect_max_active_dims)deprecated_kwargs)SimilarityFunctionc                      e Zd ZU dZeZdZded<   dddZded<   d	Z	 e
d
      	 d2ddddddddddddddddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d3 fd       Z e
d      	 	 	 	 	 	 	 	 	 	 	 d4	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d5d       Z e
d      	 	 	 	 	 	 	 	 	 	 	 d4	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d5d       Z e
d      	 	 	 	 	 	 	 	 	 	 	 d4	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d5d       Zd6 fdZd7 fdZed8d       Zej&                  	 	 	 	 d9d       Zd:dZed;d       Zed<d       Zed=d       Zed;d       Ze	 	 	 	 	 	 d<d       Ze	 	 d=d       Z	 	 	 	 d>	 	 	 	 	 	 	 	 	 	 	 d?d Ze	 	 	 	 	 	 	 	 	 	 d@d!       ZdAd"Z ed#e$      dAd%       Z	 	 	 	 	 	 dB	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dCd&Z	 	 	 	 	 	 	 dD	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dEd'Z edFd(       Z!edG fd)       Z"e"j&                  dHd*       Z"edI fd+       Z#dJd,Z$edAd-       Z%e%j&                  dKd.       Z%e	 	 	 	 	 	 dLd/       Z&	 d2	 	 	 	 	 dMd0Z'dNd1Z( xZ)S )OSparseEncodera  
    Loads or creates a SparseEncoder model that can be used to map text to sparse embeddings.

    Args:
        model_name_or_path (str, optional): If a filepath on disk, loads the model from that path. Otherwise, tries
            to download a pre-trained SparseEncoder model. If that fails, tries to construct a model from the
            Hugging Face Hub with that name. Defaults to None.
        modules (list[nn.Module], optional): A list of torch modules that are called sequentially. Can be used to
            create custom SparseEncoder models from scratch. Defaults to None.
        device (str, optional): Device (like ``"cuda"``, ``"cpu"``, ``"mps"``, ``"npu"``) that should be used for
            computation. If None, checks if a GPU can be used. Defaults to None.
        prompts (dict[str, str], optional): A dictionary with prompts for the model. The key is the prompt name,
            the value is the prompt text. The prompt text will be prepended before any text to encode. For example:
            ``{"query": "query: ", "passage": "passage: "}``. If a model has saved prompts, you can override
            them by passing your own, or pass ``{"query": "", "document": ""}`` to disable them.
            Defaults to None.
        default_prompt_name (str, optional): The name of the prompt that should be used by default. If not set,
            no prompt will be applied. Defaults to None.
        cache_folder (str, optional): Path to store models. Can also be set by the ``SENTENCE_TRANSFORMERS_HOME``
            environment variable. Defaults to None.
        trust_remote_code (bool, optional): Whether to allow for custom models defined on the Hub in their own
            modeling files. Only set to ``True`` for repositories you trust and in which you have read the code,
            as it will execute code present on the Hub on your local machine. Defaults to False.
        revision (str, optional): The specific model version to use. It can be a branch name, a tag name, or a
            commit id, for a stored model on Hugging Face. Defaults to None.
        local_files_only (bool, optional): Whether to only look at local files (i.e., do not try to download
            the model). Defaults to False.
        token (bool or str, optional): Hugging Face authentication token to download private models.
            Defaults to None.
        model_kwargs (dict[str, Any], optional): Keyword arguments passed to the underlying Hugging Face
            Transformers model via ``AutoModel.from_pretrained``. Particularly useful options include:

            - ``torch_dtype``: Override the default ``torch.dtype`` and load the model under a specific
              dtype. Can be ``torch.float16``, ``torch.bfloat16``, ``torch.float32``, or ``"auto"`` to
              use the dtype from the model's ``config.json``.
            - ``attn_implementation``: The attention implementation to use. For example ``"eager"``,
              ``"sdpa"``, or ``"flash_attention_2"``. If you ``pip install kernels``, then
              ``"flash_attention_2"`` should work without having to install ``flash_attn``. It is
              frequently the fastest option. Defaults to ``"sdpa"`` when available (torch>=2.1.1).
            - ``device_map``: Device map for model parallelism, e.g. ``"auto"``.
            - ``provider``: For ``backend="onnx"``, the ONNX execution provider
              (e.g. ``"CUDAExecutionProvider"``).
            - ``file_name``: For ``backend="onnx"`` or ``"openvino"``, the filename to load
              (e.g. for optimized or quantized models).
            - ``export``: For ``backend="onnx"`` or ``"openvino"``, whether to export the model to the
              backend format. Also set automatically if the exported file doesn't exist.

            See the `PreTrainedModel.from_pretrained
            <https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel.from_pretrained>`_
            documentation for more details. Defaults to None.
        processor_kwargs (dict[str, Any], optional): Keyword arguments passed to the Hugging Face Transformers
            processor/tokenizer via ``AutoProcessor.from_pretrained``. See the `AutoTokenizer.from_pretrained
            <https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoTokenizer.from_pretrained>`_
            documentation for more details. Defaults to None.
        config_kwargs (dict[str, Any], optional): Keyword arguments passed to the Hugging Face Transformers
            config via ``AutoConfig.from_pretrained``. See the `AutoConfig.from_pretrained
            <https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoConfig.from_pretrained>`_
            documentation for more details. Defaults to None.
        model_card_data (:class:`~sentence_transformers.sparse_encoder.model_card.SparseEncoderModelCardData`, optional):
            A model card data object that contains information about the model. Used to generate a model card
            when saving the model. If not set, a default model card data object is created. Defaults to None.
        backend (str, optional): The backend to use for inference. Can be ``"torch"`` (default), ``"onnx"``,
            or ``"openvino"``. Defaults to ``"torch"``.
        similarity_fn_name (str or SimilarityFunction, optional): The name of the similarity function to use.
            Valid options are ``"cosine"``, ``"dot"``, ``"euclidean"``, and ``"manhattan"``. If not set, it is
            automatically set to ``"cosine"`` when :attr:`similarity` or :attr:`similarity_pairwise` are first
            accessed. Defaults to None.
        max_active_dims (int, optional): The maximum number of active (non-zero) dimensions in the output of the
            model. ``None`` means no limit, which can be slow or memory-intensive if your model wasn't (yet)
            finetuned to high sparsity. Defaults to None.

    Example:
        ::

            from sentence_transformers import SparseEncoder

            # Load a pre-trained SparseEncoder model
            model = SparseEncoder('naver/splade-cocondenser-ensembledistil')

            # Encode some texts
            sentences = [
                "The weather is lovely today.",
                "It's so sunny outside!",
                "He drove to the stadium.",
            ]
            embeddings = model.encode(sentences)
            print(embeddings.shape)
            # (3, 30522)

            # Get the similarity scores between all sentences
            similarities = model.similarity(embeddings, embeddings)
            print(similarities)
            # tensor([[   35.629,     9.154,     0.098],
            #         [    9.154,    27.478,     0.019],
            #         [    0.098,     0.019,    29.553]])
    zsparse-encoder
str | None default_huggingface_organizationN)querydocumentzdict[str, str | None]_default_promptssparse_encoder_model_idprocessor_kwargs)tokenizer_kwargsFtorch)modulesdevicepromptsdefault_prompt_namecache_foldertrust_remote_coderevisionlocal_files_onlytokenmodel_kwargsr$   config_kwargsmodel_card_databackendsimilarity_fn_namemax_active_dimsc                  || _         t        | 	  |||||||	|
|||||||       |  ||dk  rt        d| d      || _        |C| j
                  j                         D ]%  }t        |t              s|j                  | _         y  y y )N)model_name_or_pathr'   r(   r+   r,   r-   r.   r/   r0   r$   r1   r2   r3   r)   r*   r   z0max_active_dims must be a positive integer, got .)
r4   super__init__
ValueErrorr5   _modulesvalues
isinstancer   k)selfr7   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r$   r1   r2   r3   r4   r5   module	__class__s                      {/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/sentence_transformers/sparse_encoder/model.pyr:   zSparseEncoder.__init__   s    0 #51%/-%-'+ 3 	 	
$ 	&?a+?OP_O``abcc."--..0 f&78+188D( #    inputs)	sentencesc                j    ||d| j                   v rd} | j                  d|||||||||	|
||dd|S )aG  
        Computes embeddings specifically optimized for query representation.

        This method is a specialized version of :meth:`encode` that differs in exactly two ways:

        1. If no ``prompt_name`` or ``prompt`` is provided, it uses a predefined "query" prompt,
           if available in the model's ``prompts`` dictionary.
        2. It sets the ``task`` to "query". If the model has a :class:`~sentence_transformers.base.modules.Router`
           module, it will use the "query" task type to route the input through the appropriate submodules.

        .. tip::

            Adjusting ``batch_size`` can significantly improve processing speed. The optimal value depends on your
            hardware, model size, precision, and input length. Benchmark a few batch sizes on a small subset of your
            data to find the best value.

        All other parameters are identical to :meth:`encode`. See :meth:`encode` for the full parameter documentation.

        Example:
            ::

                from sentence_transformers import SparseEncoder

                # Load a pre-trained SparseEncoder model
                model = SparseEncoder("naver/splade-cocondenser-ensembledistil")

                # Encode some texts
                queries = [
                    "What are the effects of climate change?",
                    "History of artificial intelligence",
                    "Technical specifications product XYZ",
                ]
                embeddings = model.encode_query(queries)
                print(embeddings.shape)
                # (3, 30522)
        r    rE   prompt_nameprompt
batch_sizeshow_progress_barconvert_to_tensorconvert_to_sparse_tensorsave_to_cpur(   r5   pool
chunk_sizetask r)   encode)r@   rE   rI   rJ   rK   rL   rM   rN   rO   r(   r5   rP   rQ   kwargss                 rC   encode_queryzSparseEncoder.encode_query   sh    j 6>g6M!Kt{{ 
#!//%=#+!
 
 	
rD   c                |    ||dD ]  }|| j                   v s|} n  | j                  d|||||||||	|
||dd|S )a  
        Computes embeddings specifically optimized for document/passage representation.

        This method is a specialized version of :meth:`encode` that differs in exactly two ways:

        1. If no ``prompt_name`` or ``prompt`` is provided, it uses the first available prompt from the following
           candidates: ``"document"``, ``"passage"``, ``"corpus"`` (checked in that order).
        2. It sets the ``task`` to "document". If the model has a :class:`~sentence_transformers.base.modules.Router`
           module, it will use the "document" task type to route the input through the appropriate submodules.

        .. tip::

            Adjusting ``batch_size`` can significantly improve processing speed. The optimal value depends on your
            hardware, model size, precision, and input length. Benchmark a few batch sizes on a small subset of your
            data to find the best value.

        All other parameters are identical to :meth:`encode`. See :meth:`encode` for the full parameter documentation.

        Example:
            ::

                from sentence_transformers import SparseEncoder

                # Load a pre-trained SparseEncoder model
                model = SparseEncoder("naver/splade-cocondenser-ensembledistil")

                # Encode some texts
                sentences = [
                    "This research paper discusses the effects of climate change on marine life.",
                    "The article explores the history of artificial intelligence development.",
                    "This document contains technical specifications for the new product line.",
                ]
                embeddings = model.encode_document(sentences)
                print(embeddings.shape)
                # (3, 30522)
        )r!   passagecorpusr!   rH   rS   rT   )r@   rE   rI   rJ   rK   rL   rM   rN   rO   r(   r5   rP   rQ   rV   candidate_prompt_names                  rC   encode_documentzSparseEncoder.encode_document
  s~    j 6>)J %(DLL8"7K
 t{{ 
#!//%=#+!
 
 	
rD   c                   |4t         j                         t        j                  t        j                  fv }|dk  rt        d| d      | j                  |      }|r|g}nEt        |t              s5t        |t        j                        r|j                         n
t        |      }| j                         }t        |      t        |      z
  dhz
  x}rnt        | j                  j                   dt        |       d|r#d| j                  j                   d	| dz         d| j                  j                   d
z         |t        |	t              r6t!        |	      dkD  r( | j"                  d||||	||||||d|
d|}|r|d   }|S | j%                  ||      }|	| j&                  }	| j)                  |	       | j+                          |
|
n| j,                  }
t/        |      }|
|
|d<   g }t        j0                  |D cg c]  }| j3                  |        c}      }| j5                         r| j7                  |      }|D cg c]  }||   	 }}t9        dt!        |      |d|       D ]  }||||z    } | j:                  |fd|i|}t=        ||	      }t?        j@                         5   | jB                  |fi |d   }|
tE        ||
      }ddd       |rjG                         }|rjI                         }|jK                          t        j0                  |      D cg c]  }||   	 }}|rit!        |      dk(  rFt?        jL                  g | j&                        }|r|jG                         }|r&|jI                         }nt?        jN                  |      }|r|d   }|S c c}w c c}w # 1 sw Y   xY wc c}w )a  
        Computes sparse sentence embeddings.

        .. tip::

            If you are unsure whether you should use :meth:`encode`, :meth:`encode_query`, or :meth:`encode_document`,
            your best bet is to use :meth:`encode_query` and :meth:`encode_document` for Information Retrieval tasks
            with clear query and document/passage distinction, and use :meth:`encode` for all other tasks.

            Note that :meth:`encode` is the most general method and can be used for any task, including Information
            Retrieval, and that if the model was not trained with predefined prompts and/or task types, then all three
            methods will return identical embeddings.

        .. tip::

            Adjusting ``batch_size`` can significantly improve processing speed. The optimal value depends on your
            hardware, model size, precision, and input length. Benchmark a few batch sizes on a small subset of your
            data to find the best value.

        Args:
            inputs (Union[str, List[str]]): The texts to embed.
            prompt_name (str, optional): The name of the prompt to use for encoding. Must be a key in the ``prompts``
                dictionary, which is either set in the constructor or loaded from the model configuration. For example if
                ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ", ...}, then the sentence "What
                is the capital of France?" will be encoded as "query: What is the capital of France?" because the sentence
                is appended to the prompt. If ``prompt`` is also set, this argument is ignored. Defaults to None.
            prompt (str, optional): The prompt to use for encoding. For example, if the prompt is "query: ", then the
                sentence "What is the capital of France?" will be encoded as "query: What is the capital of France?"
                because the sentence is appended to the prompt. If ``prompt`` is set, ``prompt_name`` is ignored.
                Defaults to None.
            batch_size (int, optional): The batch size used for the computation. Defaults to 32.
            show_progress_bar (bool, optional): Whether to output a progress bar when encoding. Defaults to None,
                in which case the progress bar will be shown if the logger's effective level is INFO or DEBUG.
            convert_to_tensor (bool, optional): Whether the output should be a single stacked tensor (True) or a list
                of individual tensors (False). Sparse tensors may be challenging to slice, so this allows you to
                output lists of tensors instead. Defaults to True.
            convert_to_sparse_tensor (bool, optional): Whether the output should be in the format of a sparse (COO) tensor.
                Defaults to True.
            save_to_cpu (bool, optional): Whether the output should be moved to cpu or stay on the device it has been
                computed on. Defaults to False.
            device (str, torch.device, list, or None, optional): Device(s) to use for computation. Can be:

                - A single device string (e.g., "cuda:0", "cpu") for single-process encoding
                - A list of device strings (e.g., ["cuda:0", "cuda:1"], ["cpu", "cpu", "cpu", "cpu"]) to distribute
                  encoding across multiple processes
                - None to auto-detect available device for single-process encoding

                If a list is provided, multi-process encoding will be used. Defaults to None.
            max_active_dims (int, optional): The maximum number of active (non-zero) dimensions in the output of the
                model. ``None`` means the value from the model's config will be used. Defaults to None. If also None in
                the model's config, there will be no limit on the number of active dimensions, which can be slow or
                memory-intensive if your model wasn't (yet) finetuned to high sparsity.
            pool (dict, optional): A pool created by :meth:`start_multi_process_pool` for multi-process encoding.
                If provided, the encoding will be distributed across multiple processes. This is recommended for large
                datasets and when multiple GPUs are available. Defaults to None.
            chunk_size (int, optional): Size of chunks for multi-process encoding. Only used with multiprocessing, i.e.
                when ``pool`` is not None or ``device`` is a list. If None, a sensible default is calculated.
                Defaults to None.

        Returns:
            Union[list[Tensor], Tensor]: By default, a 2d torch sparse tensor with shape [num_inputs, output_dimension]
            is returned. If only one string input is provided, then the output is a 1d tensor with shape
            [output_dimension]. If ``convert_to_tensor`` is False, a list of individual tensors is returned instead.

        Example:
            ::

                from sentence_transformers import SparseEncoder

                # Load a pre-trained SparseEncoder model
                model = SparseEncoder("naver/splade-cocondenser-ensembledistil")

                # Encode some texts
                sentences = [
                    "The weather is lovely today.",
                    "It's so sunny outside!",
                    "He drove to the stadium.",
                ]
                embeddings = model.encode(sentences)
                print(embeddings.shape)
                # (3, 30522)
        Nr   z+batch_size must be a positive integer, got r8   rR   zZ.encode() has been called with additional keyword arguments that this model does not use: z. zAs per zA.get_model_kwargs(), the valid additional keyword arguments are: zQ.get_model_kwargs(), this model does not accept any additional keyword arguments.T)rE   rL   rP   r(   rQ   rI   rJ   rK   rM   rN   rO   r5   r5   BatchesdescdisablerJ   sentence_embedding)r5   )r(   rS   )(loggergetEffectiveLevelr   INFODEBUGr;   is_singular_inputr>   listnpndarraytolistget_model_kwargssetrB   __name__len_multi_process_resolve_promptr(   toevalr5   dictargsort_input_length_can_flatten_inputs_interleave_sorted_indicesr   
preprocessr   r&   inference_modeforwardr   	to_sparsecpuextendtensorstack)r@   rE   rI   rJ   rK   rL   rM   rN   rO   r(   r5   rP   rQ   rV   rg   r0   unused_kwargs
embeddingsforward_kwargsall_embeddingssenlength_sorted_idxidxinputs_sortedstart_indexinputs_batchfeaturess                              rC   rU   zSparseEncoder.encodeV  s/   F $ & 8 8 :? !
 ?J:,VWXYY !226:XFFD)(262::(FV]]_DQWLF ,,.K#l*;;vhFF=F>>**+  ,F  GK  LY  GZ  F[  []  ^ $ dnn5566w  yE  xF  FG  H 
 #4>>#:#:";  <M  N	  
64 8S[1_,,, "3%'%"3)A  /  !J$ !']
%%fk:>[[F		-<-H/dNbNbf&0?N,-JJF'SS););C)@(@'ST##% $ ? ?@Q R0ABBB!!S[*9ZkVkl 	.K({Z7OPL&t|MFMfMH&x8H%%' e)T\\(EnEFZ[
".!7
Tc!dJ	e ('113
'^^-
!!*-!	.$ :<DU9VW#.-WW>"a'!&b!E+%3%=%=%?N%3%7%7%9N!&^!<+A.NM (T Ce e Xs   N-N2/&N7+O7O 	c                >    t         |          d| j                  iz  S Nr4   )r9   _get_model_config_similarity_fn_namer@   rB   s    rC   r   zSparseEncoder._get_model_config&  s(    w(* $":":.
 
 	
rD   c                l    t         |   |       | j                  |j                  dd       | _        y y r   )r9   _parse_model_configr   getr4   )r@   model_configrB   s     rC   r   z!SparseEncoder._parse_model_config+  s7    #L1##+&2&6&67KT&RD# ,rD   c                \    | j                   t        j                  | _        | j                   S )a  Return the name of the similarity function used by :meth:`SparseEncoder.similarity` and :meth:`SparseEncoder.similarity_pairwise`.

        Returns:
            Literal["cosine", "dot", "euclidean", "manhattan"]: The name of the similarity function.
                Defaults to "dot" when first accessed if not explicitly set.

        Example:
            >>> model = SparseEncoder("naver/splade-cocondenser-ensembledistil")
            >>> model.similarity_fn_name
            'dot'
        )r   r   DOTr4   r@   s    rC   r4   z SparseEncoder.similarity_fn_name0  s*     ##+&8&<&<D#'''rD   c                    t        |t              r|j                  }|| _        |5t        j                  |      | _        t        j                  |      | _        y y N)r>   r   valuer   to_similarity_fn_similarityto_similarity_pairwise_fn_similarity_pairwiser@   r   s     rC   r4   z SparseEncoder.similarity_fn_nameA  sR    
 e/0KKE#( 1BB5ID(:(T(TUZ([D% rD   c                D    | D ]  }t        |t              s||_         y y)a8  
        Sets the ``include_prompt`` attribute in the pooling layer in the model, if there is one.

        This is useful for models where the prompt should be excluded from the pooling strategy,
        e.g. CSR models with a :class:`~sentence_transformers.sentence_transformer.modules.Pooling` layer.
        N)r>   r   include_prompt)r@   r   rA   s      rC   set_pooling_include_promptz(SparseEncoder.set_pooling_include_promptN  s(      	F&'*(6%	rD   c                     y r   rS   r@   embeddings1embeddings2s      rC   
similarityzSparseEncoder.similarityZ  s    NQrD   c                     y r   rS   r   s      rC   r   zSparseEncoder.similarity]  s    psrD   c                2    | j                    | j                  S )aQ  
        Compute the similarity between two collections of embeddings. The output will be a matrix with the similarity
        scores between all embeddings from the first parameter and all embeddings from the second parameter. This
        differs from `similarity_pairwise` which computes the similarity between each pair of embeddings.
        This method supports only embeddings with fp32 precision and does not accommodate quantized embeddings.

        Args:
            embeddings1 (Union[Tensor, ndarray]): [num_embeddings_1, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.
            embeddings2 (Union[Tensor, ndarray]): [num_embeddings_2, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.

        Returns:
            Tensor: A [num_embeddings_1, num_embeddings_2]-shaped torch tensor with similarity scores.

        Example:
            ::

                >>> model = SparseEncoder("naver/splade-cocondenser-ensembledistil")
                >>> sentences = [
                ...     "The weather is so nice!",
                ...     "It's so sunny outside.",
                ...     "He's driving to the movie theater.",
                ...     "She's going to the cinema.",
                ... ]
                >>> embeddings = model.encode(sentences)
                >>> model.similarity(embeddings, embeddings)
                tensor([[   30.953,    12.871,     0.000,     0.011],
                        [   12.871,    27.505,     0.580,     0.578],
                        [    0.000,     0.580,    36.068,    15.301],
                        [    0.011,     0.578,    15.301,    39.466]])
                >>> model.similarity_fn_name
                "dot"
                >>> model.similarity_fn_name = "cosine"
                >>> model.similarity(embeddings, embeddings)
                tensor([[    1.000,     0.441,     0.000,     0.000],
                        [    0.441,     1.000,     0.018,     0.018],
                        [    0.000,     0.018,     1.000,     0.406],
                        [    0.000,     0.018,     0.406,     1.000]])
        )r4   r   r   s    rC   r   zSparseEncoder.similarity`  s    R 	rD   c                     y r   rS   r   s      rC   similarity_pairwisez!SparseEncoder.similarity_pairwise  s    WZrD   c                     y r   rS   r   s      rC   r   z!SparseEncoder.similarity_pairwise  s     rD   c                2    | j                    | j                  S )a  
        Compute the similarity between two collections of embeddings. The output will be a vector with the similarity
        scores between each pair of embeddings.
        This method supports only embeddings with fp32 precision and does not accommodate quantized embeddings.

        Args:
            embeddings1 (Union[Tensor, ndarray]): [num_embeddings, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.
            embeddings2 (Union[Tensor, ndarray]): [num_embeddings, embedding_dim] or [embedding_dim]-shaped numpy array or torch tensor.

        Returns:
            Tensor: A [num_embeddings]-shaped torch tensor with pairwise similarity scores.

        Example:
            ::

                >>> model = SparseEncoder("naver/splade-cocondenser-ensembledistil")
                >>> sentences = [
                ...     "The weather is so nice!",
                ...     "It's so sunny outside.",
                ...     "He's driving to the movie theater.",
                ...     "She's going to the cinema.",
                ... ]
                >>> embeddings = model.encode(sentences, convert_to_sparse_tensor=False)
                >>> model.similarity_pairwise(embeddings[::2], embeddings[1::2])
                tensor([12.871, 15.301])
                >>> model.similarity_fn_name
                "dot"
                >>> model.similarity_fn_name = "cosine"
                >>> model.similarity_pairwise(embeddings[::2], embeddings[1::2])
                tensor([0.441, 0.406])
        )r4   r   r   s    rC   r   z!SparseEncoder.similarity_pairwise  s    H 	(((rD   c           	        |j                  dd      }d|d<   d}|#t        |t              r| j                  |      }d}	 |Ft	        t        j                  t        |      t        |d         z  dz        d      }t        |d      }|d	   }	|d
   }
|r!t        j                  t        |      |z        nd}t        |      D ]#  }||z  }||||z    }|	j                  |||g       % t        t        |d|       D cg c]  }|
j                          c}d       }|D ]  }t        |d   t              s|d    |D cg c]  }|d   	 }}|rt        |d   t              r)t        t        j                  j!                  |            }n~t        |d   t"        j$                        rt#        j&                  |      }nKt        |d   t(        j*                        r.t)        j,                  |d      }n|rt#        j$                         }||r| j/                  |       S S c c}w c c}w # |r| j/                  |       w w xY w)a  Internal method for multi-process encoding.

        Distributes encoding across multiple processes using the provided pool or list of devices.
        If a pool is not provided but ``device`` is a list, a pool is created and cleaned up automatically.
        rM   FrL   T	processes
   i     inputoutputr   Chunksr_   c                    | d   S )Nr   rS   )xs    rC   <lambda>z.SparseEncoder._multi_process.<locals>.<lambda>  s
    ad rD   )key)axis)r   r>   rh   start_multi_process_poolminmathceilro   maxrangeputsortedr   	Exception	itertoolschainfrom_iterabler&   r	   catri   rj   concatenatestop_multi_process_pool)r@   rE   rL   rP   r(   rQ   encode_kwargsrM   created_poolinput_queueoutput_queue
num_chunkschunk_idchunk_startchunk_output_listr   r   s                      rC   rp   zSparseEncoder._multi_process  s2    *--.A5I-2)* <Jvt4008DL'	3! 3v;T+=N9O+ORT+T!UW[\
 Q/
7;G}K8<XL@F3v;#;<AJ!*- B&3{[:-EF5- @AB
 !-3JX[lWl-mn!!#n"K & $fQi3 )O$
 3>>&)>J>jmT2!%ioo&C&CJ&O!PJ
1u||<!&:!6J
1rzz:!#
!CJ""\\^
 ,,T2 / o ? ,,T2 s2    B>H, >H""H, 8
H, H'B?H, "
H, ,Ic                   	 	 |j                         \  }}} |j                  |fd| i|}t        |t        j                        r)|j
                  j                  dk7  r|j                         }|j                  ||g       # t        j                  $ r Y yt        $ rI}t        j                  d|  d|        	 |j                  |g       n# t        $ r Y nw xY wY d}~yd}~ww xY w)zInternal working process to encode sentences in multi-process setup.

        Workers are terminated externally via ``stop_multi_process_pool``.
        r(   r}   zError in worker process on z: N)r   rU   r>   r&   r	   r(   typer}   r   queueEmptyr   rc   error)	target_devicemodelr   results_queuer   rE   rV   r   es	            rC   _multi_process_workerz#SparseEncoder._multi_process_worker  s     +6??+<(&&)U\\&QQ&Q
j%,,7J<M<M<R<RV[<[!+!1J!!8Z"89  ;;  :=/A3OP!%%xm4  sB   A?B C)C) C$<CC$	CC$CC$$C)c                    d}t        | j                  j                               D ]/  }dD ]#  }t        ||d      }t	        |      s |       } n |. |S  |S )a  
        Returns the number of dimensions in the output of :meth:`SparseEncoder.encode`.

        Unlike :class:`~sentence_transformers.sentence_transformer.model.SentenceTransformer`, sparse encoders do not support ``truncate_dim``,
        so this returns the raw output dimension from the last module in the pipeline.

        Returns:
            int or None: The number of dimensions in the output of ``encode``. If it's not known, it's ``None``.
        N)get_embedding_dimension get_sentence_embedding_dimensionget_word_embedding_dimension)reversedr<   r=   getattrcallable)r@   
output_dimrA   namemethods        rC   r   z%SparseEncoder.get_embedding_dimension  sv     
t}}3356 	F 
 !t4F#!'J %	 rD   z\The `get_sentence_embedding_dimension` method has been renamed to `get_embedding_dimension`.)categoryc                "    | j                         S r   )r   r   s    rC   r   z.SparseEncoder.get_sentence_embedding_dimension.  s    
 ++--rD   c
           	        ||||d}
i |
|xs i }i |
|xs i }i |
|	xs i }	t        j                  |fd|i|	}t        d t        |dd      xs g D              }|rBt	        |d||||	| j
                        }t        j                  d       t        d	
      }||g}nt        j                  d       t	        |d||||	| j
                        }t        |j                         d      }t        |j                         d|j                         z  dd      }|||g}|s| j                  j                  ||       |i fS )a  
        Creates a simple transformer-based model and returns the modules.
        For models with a ForMaskedLM architecture, uses SpladePooling with 'max' strategy.
        For regular Transformers, uses a CSR implementation (Pooling + SparseAutoEncoder) by default.

        Args:
            model_name_or_path (str): The name or path of the pre-trained model.
            token (Optional[Union[bool, str]]): The token to use for the model.
            cache_folder (Optional[str]): The folder to cache the model.
            revision (Optional[str], optional): The revision of the model. Defaults to None.
            trust_remote_code (bool, optional): Whether to trust remote code. Defaults to False.
            local_files_only (bool, optional): Whether to use only local files. Defaults to False.
            model_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the model. Defaults to None.
            processor_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the processor/tokenizer. Defaults to None.
            config_kwargs (Optional[Dict[str, Any]], optional): Additional keyword arguments for the config. Defaults to None.
            has_modules (bool, optional): Whether the model has modules.json. Defaults to False.

        Returns:
            tuple[list[nn.Module], dict[str, Any]]: The modules and an empty kwargs dict.
        r/   r,   r-   r.   	cache_dirc              3  >   K   | ]  }|j                  d         yw)ForMaskedLMN)endswith).0archs     rC   	<genexpr>z6SparseEncoder._load_default_modules.<locals>.<genexpr>c  s     qD4==7qs   architecturesNz	fill-mask)transformer_taskr   r0   r$   r1   r3   z.Detected MLM architecture, using SpladePoolingr   )pooling_strategyz`No MLM architecture detected, using default Transformer + mean Pooling + SparseAutoEncoder (CSR)zfeature-extractionmean)pooling_mode      i   	input_dim
hidden_dimr?   k_aux)r-   )r   from_pretrainedanyr   r   r3   rc   infor   r   r   r   r2   set_base_model)r@   r7   r/   r+   r-   r,   r.   r0   r$   r1   shared_kwargsconfigis_mlm_modeltransformer_modelpooling_modelr'   poolingsaes                     rC   _load_default_modulesz#SparseEncoder._load_default_modules5  s   B !2  0	
 A-@L,>B@HmH0@0FBHB=B]-@bB#-#=#=$
*6$
:G$
 qGFTceiDjDpnpqq +"!,&)!1+! KKHI)5AM(-8G KKr !,"!5&)!1+! /GGIX^_G#!99;w>>@@	C )'37G  //0BX/V{rD   c                    |
dk7  r| j                  |||||||||		      S ||||d}i ||xs i }i ||xs i }i ||	xs i }	t        j                  d       | j                  |||||||||		      \  }| _        t        |j                               }d}t        |      D ]   }t        |d      s|j                         } n |t        d      t        |d|z  |dz  |d	z  
      }|j                  |       d| _        || j                  fS )a)  Converts a non-SparseEncoder model into a SparseEncoder by appending a SparseAutoEncoder.

        If ``model_type`` is ``"SentenceTransformer"``, loads the SentenceTransformer modules and appends a
        SparseAutoEncoder on top. Otherwise, falls back to :meth:`_load_default_modules`.
        SentenceTransformer)r/   r+   r-   r,   r.   r0   r$   r1   r   zWSentenceTransformer model found, appending SparseAutoEncoder on top to form a CSR modelNr   zCannot determine the embedding dimension from the loaded modules. At least one module must have a `get_embedding_dimension` method.r      r   )r  rc   r   _load_config_modulesmodule_kwargsrh   r=   r   hasattrr   r;   r   append_model_card_text)r@   r7   r/   r+   r-   r,   r.   r0   r$   r1   
model_typer  r'   r   rA   r  s                   rC   _load_converted_modulesz%SparseEncoder._load_converted_modules  s   $ ..--")!"3!1)!1+ . 
 
 !2  0	
 A-@L,>B@HmH0@0FBHB=B]-@bBmn&*&?&?%/-%-' '@ 
'
## w~~'(
w' 	Fv89#;;=
	 T    :~Ao/	
 	s $****rD   c                   t        | t        j                        st        d      | j                  dvrt        d| j                   d      | j                  dk(  r| j                  d      } | j                  \  }}|dk(  s|dk(  rddd	S | j                         } | j                         }|dd
 |d
d z
  }t        j                  |j                               j                         }d||z  z
  }||d	S )a  
        Calculate sparsity statistics for the given embeddings, including the mean number of active
        (non-zero) dimensions and the mean sparsity ratio.

        For a single embedding (1D), the values are for that embedding directly. For a batch of embeddings
        (2D), they are averaged across the batch.

        Args:
            embeddings (torch.Tensor): The embeddings to analyze. Must be a 1D or 2D tensor.

        Returns:
            dict[str, float]: Dictionary with ``"active_dims"`` (mean active dimensions) and
                ``"sparsity_ratio"`` (mean sparsity ratio).

        Example:
            ::

                from sentence_transformers import SparseEncoder

                model = SparseEncoder("naver/splade-cocondenser-ensembledistil")
                embeddings = model.encode(["The weather is so nice!", "It's so sunny outside."])
                stats = model.sparsity(embeddings)
                print(stats)
                # => {'active_dims': 44.0, 'sparsity_ratio': 0.9985584020614624}
        z!Embeddings must be a torch.Tensor)r   r  z Expected a 1D or 2D tensor, got D.r   r   g        g      ?)active_dimssparsity_ratioN)r>   r&   r	   	TypeErrorndimr;   	unsqueezeshapeto_sparse_csrcrow_indicesr   floatitem)r   num_rowsnum_colsr  non_zero_per_rowmean_active_dimsmean_sparsity_ratios          rC   sparsityzSparseEncoder.sparsity  s   6 *ell3?@@??&(?
?PPRSTT ??a#--a0J'--(q=HM""%   --/
!..0'+l3B.?? ::&6&<&<&>?DDF!%5%@A ,1
 	
rD   c                    t         |   S )a  
        Returns the maximal input sequence length for the model. Longer inputs will be truncated.

        Returns:
            int: The maximal input sequence length.

        Example:
            ::

                from sentence_transformers import SparseEncoder

                model = SparseEncoder("naver/splade-cocondenser-ensembledistil")
                print(model.max_seq_length)
                # => 512
        )r9   max_seq_lengthr   s    rC   r'  zSparseEncoder.max_seq_length  s    " w%%rD   c                    || d   _         y)zs
        Property to set the maximal input sequence length for the model. Longer inputs will be truncated.
        r   N)r'  r   s     rC   r'  zSparseEncoder.max_seq_length'  s     "'QrD   c                    t         |   S )a/  
        Property to get the underlying transformers PreTrainedModel instance, if it exists.
        Note that it's possible for a model to have multiple underlying transformers models, but this property
        will return the first one it finds in the module hierarchy.

        Returns:
            PreTrainedModel or None: The underlying transformers model or None if not found.

        Example:
            ::

                from sentence_transformers import SparseEncoder

                model = SparseEncoder("naver/splade-v3")

                # You can now access the underlying transformers model
                transformers_model = model.transformers_model
                print(type(transformers_model))
                # => <class 'transformers.models.bert.modeling_bert.BertForMaskedLM'>
        )r9   transformers_modelr   s    rC   r*  z SparseEncoder.transformers_model/  s    , w))rD   c                j    | j                   j                         D ]  }t        |t              s|c S  y)zVReturns the SpladePooling module if present, or None. Only searches top-level modules.N)r<   r=   r>   r   )r@   rA   s     rC   _get_splade_poolingz!SparseEncoder._get_splade_poolingG  s2    mm**, 	F&-0	 rD   c                j    | j                         }||j                  S t        j                  d       y)a  
        Returns the chunk size of the SpladePooling module, if present.

        This chunk size is along the sequence length dimension (i.e., number of tokens per chunk).
        If None, processes the entire sequence at once. Using smaller chunks reduces memory usage but may
        lower training and inference speed. Default is None.

        This property is only meaningful for SPLADE-architecture models. For CSR-architecture models
        (Transformer + Pooling + SparseAutoEncoder), it returns None.

        Returns:
            int or None: The chunk size, or None if SpladePooling is not found or chunk_size is not set.
        Nz6SpladePooling module not found. Cannot get chunk_size.r,  rQ   rc   warning)r@   splade_poolings     rC   splade_pooling_chunk_sizez'SparseEncoder.splade_pooling_chunk_sizeN  s4     113%!,,,OPrD   c                b    | j                         }|||_        yt        j                  d       y)zN
        Sets the chunk size of the SpladePooling module, if present.
        Nz6SpladePooling module not found. Cannot set chunk_size.r.  )r@   r   r0  s      rC   r1  z'SparseEncoder.splade_pooling_chunk_sizec  s-    
 113%(-N%NNSTrD   c                "   | j                   s| j                         } |j                   s|j                         }| j                  dk7  rt        d| j                   d      | j                  d   |j                  d   k7  r,t        d| j                  d    d|j                  d    d      |j                  dk(  r| |z  }nO|j                  dk(  r't        j                  |D cg c]  }| |z  	 c}      }nt        d	|j                   d      |j                         }|j                         d
kD  }t        j                  |j                         dd|f   |j                         |   |j                         |j                        }|S c c}w )a  
        Compute the intersection of two sparse embeddings via element-wise multiplication.

        For each dimension, the result retains the minimum contribution from both embeddings, keeping only
        dimensions where both inputs are positive (i.e., shared active dimensions). This is useful for
        token-level matching and interpretability when combined with :meth:`decode`.

        Args:
            embeddings_1 (torch.Tensor): First embedding tensor of shape ``(vocab_size,)``.
            embeddings_2 (torch.Tensor): Second embedding tensor of shape ``(vocab_size,)`` or
                ``(batch_size, vocab_size)``.

        Returns:
            torch.Tensor: Sparse intersection tensor with the same shape as ``embeddings_2``.

        Example:
            ::

                from sentence_transformers import SparseEncoder

                model = SparseEncoder("naver/splade-cocondenser-ensembledistil")
                query_emb = model.encode_query("What is AI?")
                doc_emb = model.encode_document("Artificial intelligence is a branch of computer science.")
                shared = model.intersection(query_emb, doc_emb)
                print(model.decode(shared, top_k=5))
        r   z-Expected 1D tensor for embeddings_1, but got z shape.r  z+Vocab dimension mismatch: embeddings_1 has z, embeddings_2 has r8   r  z3Expected 1D or 2D tensor for embeddings_2, but got r   N)sizer(   )	is_sparser|   r  r;   r  r&   r   coalescer=   sparse_coo_tensorindicesr4  r(   )embeddings_1embeddings_2intersection	embeddingr  s        rC   r;  zSparseEncoder.intersectionn  s   > %%'113L%%'113L!L\M_M_L``ghiib!\%7%7%;;=l>P>PQS>T=U V$$0$6$6r$:#;1> 
 !',6L!# ;;R^'_Yy(@'_`LRS_SeSeRffmnoo $,,."))+a/..  "1k>2!+.""$&&	
  (`s   #Fc           
        ||dk  rt        d| d      t        |t        j                        st	        dt        |             |j                  dk(  }|r|j                  d      }n(|j                  dk7  rt        d|j                   d      |j                  s|j                         }|j                         }|j                         }|j                         }|j                         dk(  r0t        |j                  d            D cg c]  }g  }}|r|d   S |S |d   |d   }	}t        j                   ||j                  d      	      j#                         }
g }d}|
D ]  }|dk(  r|j%                  g        ||||z    }|	|||z    }|t'        ||      n|}||k  r!t        j(                  ||      \  }}||   }|}n!t        j*                  |d
      }||   }||   }| j,                  j/                  |j#                               }|j%                  t1        t3        ||j#                                            ||z  } |r|d   S |S c c}w )a  
        Decode a sparse embedding into (token, weight) pairs sorted by descending weight.

        Args:
            embeddings (torch.Tensor): Sparse embedding tensor of shape ``(vocab_size,)``
                for a single embedding or ``(batch_size, vocab_size)`` for a batch.
            top_k (int, optional): Maximum number of top-weighted tokens to return per sample.
                If ``None``, all non-zero tokens are returned. Must be positive. Defaults to ``None``.

        Returns:
            list[tuple[str, float]]: If the input is 1D, a list of ``(token, weight)`` tuples.
            list[list[tuple[str, float]]]: If the input is 2D, a list (one per sample)
                of lists of ``(token, weight)`` tuples.
        r   z&top_k must be a positive integer, got r8   zExpected torch.Tensor, got r   r  z#Input tensor must be 1D or 2D, got r  )	minlengthT)
descending)r;   r>   r&   r	   r  r   r  r  r5  r|   r6  r8  r=   numelr   r4  bincountrk   r  r   topkru   	tokenizerconvert_ids_to_tokensrh   zip)r@   r   top_kwas_1dr8  r=   r   resultssample_indicestoken_indicessample_counts	start_idxcountsample_valuessample_tokenseffective_k
top_valuestop_idx
sorted_idx
token_strss                       rC   decodezSparseEncoder.decode  sc   " !EeWANOO*ell39$z:J9KLMM A%#--a0J__!B:??BSSUVWW ###--/J((*
$$&""$<<>QBG
XYHZB[5\Qb5\G5\!'71:4W4(/
GAJ~QRAST[[]	" 	Ezr""9y5/@AM))i%6GHM/4/@#eU+eKU"&+jj&L#
G -g 6 *"]]=TJ
 -j 9 -j 9==m>R>R>TUJNN4J0D0D0F GHII+	. $wqz00? 6]s   	Ic           	     l    | j                   j                  }| j                         }d| d| d| d| d	S )Nz## Testing this pull request
You can test this pull request before merging by loading the model from this PR with the `revision` argument:
```python
from sentence_transformers import zO

# NOTE: Update this to the number of your pull request
pr_number = 2
model = z(
    "z5",
    revision=f"refs/pr/{pr_number}",
    backend="a  ",
)

# Verify that everything works as expected
embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
print(embeddings.shape)

similarities = model.similarity(embeddings, embeddings)
print(similarities)
```

---
*This PR was auto-generated with [`push_to_hub`](https://sbert.net/docs/package_reference/sparse_encoder/SparseEncoder.html#sentence_transformers.sparse_encoder.SparseEncoder.push_to_hub).*
)rB   rn   get_backend)r@   repo_id
class_namer3   s       rC   _push_to_hub_usage_tipz$SparseEncoder._push_to_hub_usage_tip  sZ    ^^,,
""$# $., /	 
 Y Y  	rD   r   )$r7   r   r'   zlist[nn.Module] | Noner(   r   r)   zdict[str, str] | Noner*   r   r+   r   r,   boolr-   r   r.   r[  r/   bool | str | Noner0   dict[str, Any] | Noner$   r]  r1   r]  r2   z!SparseEncoderModelCardData | Noner3   z$Literal['torch', 'onnx', 'openvino']r4   zstr | SimilarityFunction | Noner5   
int | NonereturnNone)NN    NTTFNNNN)rE   zlist[TextInput] | TextInputrI   r   rJ   r   rK   intrL   bool | NonerM   r[  rN   r[  rO   r[  r(   4str | torch.device | list[str | torch.device] | Noner5   r^  rP   9dict[Literal['input', 'output', 'processes'], Any] | NonerQ   r^  rV   r   r_  list[Tensor] | Tensor)r_  dict[str, Any])r   rg  r_  r`  )r_  z2Literal['cosine', 'dot', 'euclidean', 'manhattan'])r   zNLiteral['cosine', 'dot', 'euclidean', 'manhattan'] | SimilarityFunction | Noner_  r`  )r   r[  r_  r`  )r   r	   r   r	   r_  r	   )r   npt.NDArray[np.float32]r   rh  r_  r	   )r_  zVCallable[[Tensor | npt.NDArray[np.float32], Tensor | npt.NDArray[np.float32]], Tensor])TNNN)rE   zlist[TextInput]rL   rc  rP   re  r(   rd  rQ   r^  r_  rf  )
r   strr   r   r   r   r   r   r_  r`  )r_  r^  )NFFNNN)r7   ri  r/   r\  r+   r   r-   r   r,   r[  r.   r[  r0   r]  r$   r]  r1   r]  r_  &tuple[list[nn.Module], dict[str, Any]])NFFNNNN)r7   ri  r/   r\  r+   r   r-   r   r,   r[  r.   r[  r0   r]  r$   r]  r1   r]  r  r   r_  rj  )r   torch.Tensorr_  zdict[str, float])r_  rb  )r   rb  r_  r`  )r_  zPreTrainedModel | None)r_  zSpladePooling | None)r   r^  r_  r`  )r9  rk  r:  rk  r_  r	   )r   rk  rF  r^  r_  z7list[tuple[str, float]] | list[list[tuple[str, float]]])rX  ri  r_  ri  )*rn   
__module____qualname____doc__r   model_card_data_classr   __annotations__r"    _model_card_model_id_placeholderr   r:   rW   r\   rU   r   r   propertyr4   setterr   r   r   r   rp   staticmethodr   r   r   FutureWarningr   r  r  r%  r'  r*  r,  r1  r;  rU  rZ  __classcell__)rB   s   @rC   r   r   #   sV   _B 73C$jC8<$.O+O'@$(:; *.4 +/!)-*.#'"'#!&#'.226/3=A8?>B&*)4&4 (	4
 4 '4 (4 !4  4 4 4 !4 ,4 04 -4  ;!4" 6#4& <'4( $)4* 
+4 <4l * #'!)-"&)-!GK&*JN!%F
+F
  F
 	F

 F
 'F
  F
 #'F
 F
 EF
 $F
 HF
 F
 F
 
F
 +F
P * #'!)-"&)-!GK&*JN!%I
+I
  I
 	I

 I
 'I
  I
 #'I
 I
 EI
 $I
 HI
 I
 I
 
I
 +I
V * #'!)-"&)-!GK&*JN!%M+M  M 	M
 M 'M  M #'M M EM $M HM M M 
M +M^

S
 ( (  
\]
\ 

\ 
\
 Q Qs s)  ) V Z Z2AX	  $)	_$) $)R *.JNGK!%>3>3 '>3 H	>3
 E>3 >3 
>3@ #0?DUZ	 22 f.	.  $"'!&.226/3WW !W !	W
 W  W W ,W 0W -W 
0W|  $"'!&.226/3!%J+J+ !J+ !	J+
 J+  J+ J+ ,J+ 0J+ -J+ J+ 
0J+X 7
 7
r & &$ ' ' * *.  ( %%U &U >">"> 
> >B =AF1&F1/9F1	@F1PrD   r   )8
__future__r   r   r   r   r   collections.abcr   multiprocessingr   typingr   r   r   numpyri   numpy.typingnptr&   r	   r
   tqdmr   transformersr   r   transformers.modeling_utilsr   transformers.utilstransformers_loggingtyping_extensionsr   sentence_transformers.baser   )sentence_transformers.base.modality_typesr   "sentence_transformers.base.modulesr   2sentence_transformers.sentence_transformer.modulesr   /sentence_transformers.sparse_encoder.model_cardr   ,sentence_transformers.sparse_encoder.modulesr   r   sentence_transformers.utilr   r   %sentence_transformers.util.decoratorsr   %sentence_transformers.util.similarityr   
get_loggerrn   rc   r   rS   rD   rC   <module>r     sy    "     $ ! ) )      5 7 > ( 0 ? : F V Y N C D 
)		(	(	2pI prD   