
    i,)                     >   d dl Z d dlmZmZmZmZmZmZmZ d dl	m
Z
mZmZmZ d dlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ dZ e
       jA                  e      d   Z!d Z"dee   dee#ef   fdZ$dedee%e%f   fdZ& G d de      Z'd Z(y)    N)AnyCallableDictIterableListOptionalTuple)ConfigModel	Optimizerset_dropout_rate)Floats2d   )Errors)Language)Scorer)DocSpan)Example   )DEFAULT_SPANS_KEY)TrainablePipea  
[model]
@architectures = "spacy.SpanFinder.v1"

[model.scorer]
@layers = "spacy.LinearLogistic.v1"
nO = 2

[model.tok2vec]
@architectures = "spacy.Tok2Vec.v2"

[model.tok2vec.embed]
@architectures = "spacy.MultiHashEmbed.v2"
width = 96
rows = [5000, 1000, 2500, 1000]
attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"]
include_static_vectors = false

[model.tok2vec.encode]
@architectures = "spacy.MaxoutWindowEncoder.v2"
width = ${model.tok2vec.embed.width}
window_size = 1
maxout_pieces = 3
depth = 4
modelc                      t         S N)span_finder_score     k/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/pipeline/span_finder.pymake_span_finder_scorerr    ,   s    r   examplesreturnc                 R   t        |      }d|d   |j                  d         |j                  dfd       |j                  dfd       |j                  dd	       |j                  d
d       t        j                  | fi |}|j	                  |d    dd        |S )Nspans_	spans_keyattrgetterc                 T    | j                   j                  |t              d  g       S r   )spansgetlen)dockeyattr_prefixs     r   <lambda>z#span_finder_score.<locals>.<lambda>6   s#    399==S5E5G1H"#M r   has_annotationc                      | j                   v S r   )r)   )r,   r-   s    r   r/   z#span_finder_score.<locals>.<lambda>8   s    C3994D r   allow_overlapTlabeledF	_per_type)dict
setdefaultr   score_spanspop)r!   kwargsscoresr.   r-   s      @@r   r   r   0   s    &\FK

C
fcU34
M &(DE
ot,
i'3F3F
JJ&.!+T2Mr   spanc                 d    | d   j                   }| d   j                   t        | d         z   }||fS )Nr   )idxr+   )r;   startends      r   _char_indicesrA   @   s3    GKKE
r(,,T"X
&C#:r   c                   D   e Zd ZdZ	 dedddeddedeee	   e
f   deded	ed
ee   dee   dee   ddfdZdee	   fdZdee	   de
ddfdZdddddee   dedee   deeeef      deeef   f
dZdeee
f   fdZdee
e
f   fdZdddeg ee   f   dee   ddfdZy) 
SpanFinderzUPipeline that learns span boundaries.

    DOCS: https://spacy.io/api/spanfinder
    g      ?N)r%   	threshold
max_length
min_lengthscorernlpr   namer%   rD   rE   rF   rG   r"   c                    |j                   | _         ||dk  s|/|dk  r*t        t        j                  j	                  ||            || _        || _        || _        ||||d| _        y)a  Initialize the span finder.
        model (thinc.api.Model): The Thinc Model powering the pipeline
            component.
        name (str): The component instance name, used to add entries to the
            losses during training.
        threshold (float): Minimum probability to consider a prediction
            positive.
        scorer (Optional[Callable]): The scoring method.
        spans_key (str): Key of the doc.spans dict to save the spans under.
            During initialization and training, the component will look for
            spans on the reference document under the same key.
        max_length (Optional[int]): Maximum length of the produced spans,
            defaults to None meaning unlimited length.
        min_length (Optional[int]): Minimum length of the produced spans,
            defaults to None meaning shortest span length is 1.

        DOCS: https://spacy.io/api/spanfinder#init
        Nr   )rF   rE   )rF   rE   rD   r%   )	vocab
ValueErrorr   E1053formatr   rI   rG   cfg)	selfrH   r   rI   r%   rD   rE   rF   rG   s	            r   __init__zSpanFinder.__init__L   sw    < YY
"zA~"zA~##zj#Q  
	$$""	$
r   docsc                 <    | j                   j                  |      }|S )a  Apply the pipeline's model to a batch of docs, without modifying
        them.

        docs (Iterable[Doc]): The documents to predict.
        RETURNS: The models prediction for each document.

        DOCS: https://spacy.io/api/spanfinder#predict
        )r   predict)rP   rR   r:   s      r   rT   zSpanFinder.predict{   s     ##D)r   r:   c           	         d}t        |      D ]K  \  }}g |j                  | j                  d   <   g }g }|||t        |      z    }t	        ||      D ]f  \  }	}
|
d   | j                  d   k\  r|j                  |	j                         |
d   | j                  d   k\  sL|j                  |	j                         h |D ]  }|D ]  }|dz   |z
  }|dk  r| j                  d   | j                  d   |k  s3| j                  d   || j                  d   k  sU|j                  | j                  d      j                  |||dz             |t        |      z  }N y)a  Modify a batch of Doc objects, using pre-computed scores.
        docs (Iterable[Doc]): The documents to modify.
        scores: The scores to set, produced by SpanFinder predict method.

        DOCS: https://spacy.io/api/spanfinder#set_annotations
        r   r%   rD   r   rF   NrE   )	enumerater)   rO   r+   zipappendi)rP   rR   r:   offsetrY   r,   startsends
doc_scorestokentoken_scorer?   r@   span_lengths                 r   set_annotationszSpanFinder.set_annotations   sr    o 	FAs/1CIIdhh{+,FD#c():;J&)#z&: )"{q>TXXk%::MM%''*q>TXXk%::KK(	)   V VC"%'E/K"Q .688L1[@.6&$((<*@@		$((;"78??ECRSG@TUVV c#hF3	r           )dropsgdlossesr!   rc   rd   re   c                x   |i }|j                  | j                  d       |D cg c]  }|j                   }}t        | j                  |       | j                  j                  |      \  }}| j                  ||      \  }	}
 ||
       || j                  |       || j                  xx   |	z  cc<   |S c c}w )a?  Learn from a batch of documents and gold-standard information,
        updating the pipe's model. Delegates to predict and get_loss.
        examples (Iterable[Example]): A batch of Example objects.
        drop (float): The dropout rate.
        sgd (Optional[thinc.api.Optimizer]): The optimizer.
        losses (Optional[Dict[str, float]]): Optional record of the loss during
            training. Updated using the component name as the key.
        RETURNS (Dict[str, float]): The updated losses dictionary.

        DOCS: https://spacy.io/api/spanfinder#update
        rb   )r6   rI   	predictedr   r   begin_updateget_lossfinish_update)rP   r!   rc   rd   re   egrg   r:   backprop_scoreslossd_scoress              r   updatezSpanFinder.update   s    & >F$))S),45bR\\5	5T*"&**"9"9)"Dx8h!?s#tyyT! 6s   B7c                     | j                  || j                  j                        \  }}|| j                  j                  j                  |      z
  }||z  }t	        |dz  j                               }||fS )ab  Find the loss and gradient of loss for the batch of documents and
        their predicted scores.
        examples (Iterable[Examples]): The batch of examples.
        scores: Scores representing the model's predictions.
        RETURNS (Tuple[float, Floats2d]): The loss and the gradient.

        DOCS: https://spacy.io/api/spanfinder#get_loss
        r   )_get_aligned_truth_scoresr   ops	asarray2ffloatsum)rP   r!   r:   truthsmasksrn   rm   s          r   ri   zSpanFinder.get_loss   sj     66xPDJJNN44V<<Ehk&&()X~r   c                    g }g }|D ]  }|j                   j                  |j                  j                  k7  r)t        t        j
                  j                  d            t        |j                        }|j                  j                  |dfd      }|j                  j                  |dfd      }| j                  d   |j                  j                  v r|j                  j                  | j                  d      D ]  }	t        |	      \  }
}|j                  j!                  |
|d      }t        |      \  }}||
k(  }||k(  }|rd	||d
   j"                  d
f<   nd
||d
   j"                  d
f<   |rd	||d   j"                  d	f<   d
||d   j"                  d	f<    |j%                  |       |j%                  |        |j                  j'                  |d
      }|j                  j'                  |d
      }||fS )z\Align scores of the predictions to the references for calculating
        the loss.
        span_finder)	componentr   float32)dtyper%   expand)alignment_moder   r   r=   )axis)xtextyrL   r   E1054rN   r+   rg   xpzerosonesrO   	referencer)   rA   	char_spanrY   rX   concatenate)rP   r!   rr   rv   rw   rk   n_tokenstruthmaskr;   ref_start_charref_end_char	pred_spanpred_start_charpred_end_charstart_match	end_matchs                    r   rq   z$SpanFinder._get_aligned_truth_scores   s     	BttyyBDDII% !4!4}!4!MNN2<<(HFFLL(AiL@E66;;!}I;>Dxx$(:(::LL..txx/DE 5D3@3F0NL " 6 6&X !7 !I 6C95M2O]"1^"CK - =I"34ilnna/023Yq\^^Q./ 45imooq0134Yr]__a/05  MM% LL1	2 ##F#3""5q"1u}r   )rH   get_examplesc                b   g } |       D ]"  }t        |      dk  s|j                  |       $ |r`|D cg c]  }|j                   }}| j                  || j                  j
                        \  }}| j                  j                  ||       y| j                  j                          yc c}w )a  Initialize the pipe for training, using a representative set
        of data examples.
        get_examples (Callable[[], Iterable[Example]]): Function that
            returns a representative sample of gold-standard Example objects.
        nlp (Optional[Language]): The current nlp object the component is part
            of.

        DOCS: https://spacy.io/api/spanfinder#initialize
        
   )XYN)r+   rX   r   rq   r   rr   
initialize)rP   r   rH   subbatchrk   rR   r   _s           r   r   zSpanFinder.initialize   s     #%. 	$B8}r!#	$ +34RBLL4D411(DJJNNKDAqJJ!!DA!.JJ!!#	 5s   B,)ry   )__name__
__module____qualname____doc__r   r   r   r   r   r   r   strrt   r   intr   rQ   rT   ra   r   r   r   ro   r	   ri   rq   r   r   r   r   rC   rC   F   s    "	-
 +$($(%6-
-
 Xc]H,--
 	-
 -
 -
 SM-
 SM-
 "-
 
-
^
HSM 
!HSM !8 ! !N #'-17# 	
 i  c5j)* 
c5j	@E%/,B !%(@R:S !N #'	$r8G#445$ h	$
 
$r   rC   c                 x    | dk(  r!t        j                  d      }|j                  S t        dt         d|        )Nmake_span_finderzspacy.pipeline.factorieszmodule z has no attribute )	importlibimport_moduler   AttributeErrorr   )rI   modules     r   __getattr__r     sA    !!(()CD&&&
78*,>tfE
FFr   ))r   typingr   r   r   r   r   r   r	   	thinc.apir
   r   r   r   thinc.typesr   errorsr   languager   rG   r   tokensr   r   trainingr   spancatr   trainable_piper   span_finder_default_configfrom_strDEFAULT_SPAN_FINDER_MODELr    r   r   r   rA   rC   r   r   r   r   <module>r      s     G G G @ @        & ) 4 #H--.HI'R  1 S#X   sCx P$ P$hGr   