
    ic                        d dl Z d dlmZ d dlmZmZmZmZmZ d dl	m
Z
mZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ dZ e
       j5                  e      d   ZdZdZdee   deeef   fdZd Z  G d de      Z!d Z"y)    N)islice)AnyCallableDictIterableOptional)ConfigModel   )Errors)Language)Scorer)Examplevalidate_get_examples)Vocab   )TextCategorizeraX  
[model]
@architectures = "spacy.TextCatEnsemble.v2"

[model.tok2vec]
@architectures = "spacy.Tok2Vec.v2"

[model.tok2vec.embed]
@architectures = "spacy.MultiHashEmbed.v2"
width = 64
rows = [2000, 2000, 500, 1000, 500]
attrs = ["NORM", "LOWER", "PREFIX", "SUFFIX", "SHAPE"]
include_static_vectors = false

[model.tok2vec.encode]
@architectures = "spacy.MaxoutWindowEncoder.v2"
width = ${model.tok2vec.embed.width}
window_size = 1
maxout_pieces = 3
depth = 2

[model.linear_model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = false
length = 262144
ngram_size = 1
no_output_layer = false
modelzq
[model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = false
ngram_size = 1
no_output_layer = false
aa  
[model]
@architectures = "spacy.TextCatReduce.v1"
exclusive_classes = false
use_reduce_first = false
use_reduce_last = false
use_reduce_max = false
use_reduce_mean = true

[model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 4
embed_size = 2000
window_size = 1
maxout_pieces = 3
subword_features = true
examplesreturnc                 4    t        j                  | dfddi|S )Ncatsmulti_labelT)r   
score_cats)r   kwargss     r/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/pipeline/textcat_multilabel.pytextcat_multilabel_scorer   I   s/      	     c                      t         S )N)r    r   r   make_textcat_multilabel_scorerr!   R   s    ##r   c                       e Zd ZdZ	 deddedededede	e
   dd	fd
Zed        Zd	d	dde
g ee   f   de	e   de	ee      fdZdee   fdZy	)MultiLabel_TextCategorizerzlPipeline component for multi-label text classification.

    DOCS: https://spacy.io/api/textcategorizer
    )scorervocabr   name	thresholdr$   r   Nc                t    || _         || _        || _        d| _        g |d}t	        |      | _        || _        y)a  Initialize a text categorizer for multi-label classification.

        vocab (Vocab): The shared vocabulary.
        model (thinc.api.Model): The Thinc Model powering the pipeline component.
        name (str): The component instance name, used to add entries to the
            losses during training.
        threshold (float): Cutoff to consider a prediction "positive".
        scorer (Optional[Callable]): The scoring method.

        DOCS: https://spacy.io/api/textcategorizer#init
        N)labelsr'   )r%   r   r&   _rehearsal_modeldictcfgr$   )selfr%   r   r&   r'   r$   r,   s          r   __init__z#MultiLabel_TextCategorizer.__init__\   s=    ( 

	 $)49r   c                      y)NTr    )r-   s    r   support_missing_valuesz1MultiLabel_TextCategorizer.support_missing_valuesx   s    r   )nlpr)   get_examplesr1   r)   c                   t        |d       |9 |       D ].  }|j                  j                  D ]  }| j                  |        0 n|D ]  }| j                  |        t	        t         |       d            }| j                  |       |D cg c]  }|j                   }	}| j                  |      \  }
}| j                          t        |	      dkD  s/J t        j                  j                  | j                               t        |
      dkD  s/J t        j                  j                  | j                               | j                  j!                  |	|
       yc c}w )a\  Initialize the pipe for training, using a representative set
        of data examples.

        get_examples (Callable[[], Iterable[Example]]): Function that
            returns a representative sample of gold-standard Example objects.
        nlp (Language): The current nlp object the component is part of.
        labels: The labels to add to the component, typically generated by the
            `init labels` command. If no labels are provided, the get_examples
            callback is used to extract the labels from the data.

        DOCS: https://spacy.io/api/textcategorizer#initialize
        z%MultiLabel_TextCategorizer.initializeN
   r   )r&   )XY)r   yr   	add_labellistr   _validate_categories	reference_examples_to_truth_require_labelslenr   E923formatr&   r   
initialize)r-   r2   r1   r)   examplecatlabelsubbatcheg
doc_samplelabel_sample_s               r   rA   z%MultiLabel_TextCategorizer.initialize|   s4   & 	l,ST>'> ("99>> (CNN3'((   &u%&|~r23!!(+-56rbll6
611(;a:"FFKK$6$6DII$6$FF"< 1$Hfkk&8&8dii&8&HH$


l; 7s   E r   c                     |D ]_  }|j                   j                  j                         D ]6  }|dk(  r	|dk(  rt        t        j
                  j                  |             a y)zThis component allows any type of single- or multi-label annotations.
        This method overwrites the more strict one from 'textcat'.g      ?g        )valN)r;   r   values
ValueErrorr   E851r@   )r-   r   exrK   s       r   r:   z/MultiLabel_TextCategorizer._validate_categories   sc      	BB||((//1 Bs
cSj$V[[%7%7C%7%@AAB	Br   )textcat_multilabel)__name__
__module____qualname____doc__r   r   r
   strfloatr   r   r.   propertyr0   r   r   r   rA   r:   r    r   r   r#   r#   V   s     )	 &>  	  " 
8   #'*.#<r8G#445#< h	#<
 #'#<JBXg-> Br   r#   c                 x    | dk(  r!t        j                  d      }|j                  S t        dt         d|        )Nmake_multilabel_textcatzspacy.pipeline.factorieszmodule z has no attribute )	importlibimport_modulerY   AttributeErrorrQ   )r&   modules     r   __getattr__r^      sA    (((()CD---
78*,>tfE
FFr   )#rZ   	itertoolsr   typingr   r   r   r   r   	thinc.apir	   r
   errorsr   languager   r$   r   trainingr   r   r%   r   textcatr   multi_label_default_configfrom_strDEFAULT_MULTI_TEXTCAT_MODELmulti_label_bow_configmulti_label_cnn_configrU   r   r!   r#   r^   r    r   r   <module>rk      s      : : #    5  $ 8 %h//0JKGT   *x'8 tCQTH~ $RB RBlGr   