
    i6^                     v   d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZ d dlZd dlmZmZmZmZmZ d dlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$ ddl%m&Z&m'Z'm(Z( ddlm)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ dZ0dZ1 e       je                  e1      d   Z3d Z4d Z5 G d de/      Z6d Z7y)    N)islice)Path)AnyCallableDictIterableListOptionalUnion)ConfigCosineDistanceModel	Optimizerset_dropout_rate)Floats2d   )util)Errors)	CandidateKnowledgeBase)Language)Scorer)DocSpan)Examplevalidate_examplesvalidate_get_examples)SimpleFrozenList)Vocab   )deserialize_config)TrainablePipeTz
[model]
@architectures = "spacy.EntityLinker.v2"

[model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 2
embed_size = 2000
window_size = 1
maxout_pieces = 3
subword_features = true
modelc                 P    t        j                  | fdt        j                  gi|S )Nnegative_labels)r   score_linksEntityLinkerNIL)exampleskwargss     m/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/pipeline/entity_linker.pyentity_linker_scorer,   +   s&    hU9I9I8JUfUU    c                      t         S N)r,    r-   r+   make_entity_linker_scorerr1   /   s    r-   c            $          e Zd ZdZdZ	 d7eedddedede	de
e	   d	ed
edededeeege
e   f   deee
e   ge
e
e      f   deeegef   dedee   dededee   ddf"dZde
e   de
e   fdZdeegef   fdZd8dZddddeg e
e   f   dee   deeegef      fdZd  Zd!ddd"de
e   d#ed$ee   d%eee	ef      dee	ef   f
d&Zde
e   d'efd(Zd)e
e    de!e	   fd*Z"d)e
e    d+e!e	   ddfd,Z# e$       d-d.Z% e$       d-d/Z& e'       d-d0e(e	e)f   d1e
e	   ddfd2Z* e'       d-d0e(e	e)f   d1e
e	   dd fd3Z+ddd4d5Z,d6 Z-y)9r'   z^Pipeline component for named entity linking.

    DOCS: https://spacy.io/api/entitylinker
    r(   N)	overwritescorer	thresholdvocabr#   namelabels_discardn_sents
incl_priorincl_contextentity_vector_lengthget_candidatesget_candidates_batchgenerate_empty_kbr3   r4   use_gold_entscandidates_batch_sizer5   returnc                    |8d|cxk  rdk  s-n t        t        j                  j                  dd|            | _        | _        | _        t        |       _        | _	        | _
        | _        |	 _        |
 _        d|i _        t        d       _         | j                  |       _        | _        | _        | _        |dk  rt        t        j*                        dt,        t.           f fd	}| _        y)
aU  Initialize an entity linker.

        vocab (Vocab): The shared vocabulary.
        model (thinc.api.Model): The Thinc Model powering the pipeline component.
        name (str): The component instance name, used to add entries to the
            losses during training.
        labels_discard (Iterable[str]): NER labels that will automatically get a "NIL" prediction.
        n_sents (int): The number of neighbouring sentences to take into account.
        incl_prior (bool): Whether or not to include prior probabilities from the KB in the model.
        incl_context (bool): Whether or not to include the local context in the model.
        entity_vector_length (int): Size of encoding vectors in the KB.
        get_candidates (Callable[[KnowledgeBase, Span], Iterable[Candidate]]): Function that
            produces a list of candidates, given a certain knowledge base and a textual mention.
        get_candidates_batch (
            Callable[[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]],
            Iterable[Candidate]]
            ): Function that produces a list of candidates, given a certain knowledge base and several textual mentions.
        generate_empty_kb (Callable[[Vocab, int], KnowledgeBase]): Callable returning empty KnowledgeBase.
        scorer (Optional[Callable]): The scoring method. Defaults to Scorer.score_links.
        use_gold_ents (bool): Whether to copy entities from gold docs or not. If false, another
            component must provide entity annotations.
        candidates_batch_size (int): Size of batches for entity candidate generation.
        threshold (Optional[float]): Confidence threshold for entity predictions. If confidence is below the
            threshold, prediction is discarded. If None, predictions are not filtered by any threshold.
        DOCS: https://spacy.io/api/entitylinker#init
        Nr   r    )range_start	range_endvaluer3   F)	normalizer)   c                     sS j                   s	 | fi |S j                  |       } j                  d | D              }t        | |      D ]  \  }}||_          | fi |S )Nc              3   4   K   | ]  }|j                     y wr/   )	predicted).0egs     r+   	<genexpr>zFEntityLinker.__init__.<locals>._score_with_ents_set.<locals>.<genexpr>   s     5bR\\5s   )r@   _ensure_entspipeziprJ   )r)   r*   docsrL   docr4   selfs        r+   _score_with_ents_setz3EntityLinker.__init__.<locals>._score_with_ents_set   s     %%h1&11,,X6yy5H5  #8T2 'GB#&BL'h1&11r-   )
ValueErrorr   E1043formatr6   r#   r7   listr8   r9   r:   r;   r=   r>   cfgr   distancekbr@   rA   r5   E1044r   r   r4   )rS   r6   r#   r7   r8   r9   r:   r;   r<   r=   r>   r?   r3   r4   r@   rA   r5   rT   s   `            `    r+   __init__zEntityLinker.__init__;   s   b  !y*=A*=## !# $   

	">2$(,$8!$/#;&7#DJJ0DE*%:"" 1$V\\**	28G+< 	2  +r-   r)   c                     | j                   s|S g }|D ]G  }|j                         \  }}|j                         }||j                  _        |j                  |       I |S )zLIf use_gold_ents is true, set the gold entities to (a copy of) eg.predicted.)r@   get_aligned_ents_and_nercopyrJ   entsappend)rS   r)   new_examplesrL   ra   _new_egs          r+   rN   zEntityLinker._ensure_ents   se    !!O 	(B113GD!WWYF$(F!'		(
 r-   	kb_loaderc                     t        |      s2t        t        j                  j	                  t        |                   || j                        | _        y)ziDefine the KB of this pipe by providing a function that will
        create it using this object's vocab.)arg_typeN)callablerU   r   E885rW   typer6   r[   )rS   rf   s     r+   set_kbzEntityLinker.set_kb   s=     	"V[[//i/IJJDJJ'r-   c                 J   | j                   3t        t        j                  j	                  | j
                              t        | j                   d      rN| j                   j                         r3t        t        j                  j	                  | j
                              y y )Nr7   is_empty)	r[   rU   r   E1018rW   r7   hasattrro   E139rS   s    r+   validate_kbzEntityLinker.validate_kb   sr    77?V\\00dii0@AA477J'DGG,<,<,>V[[//TYY/?@@ -?'r-   )nlprf   get_examplesru   c                   t        |d       || j                  |       | j                          | j                  j                  }g }g }| j                  t         |       d            }|D ]S  }|j                  }	|j                  |	       |j                  | j                  j                  j                  |             U t        |      dkD  s/J t        j                  j                  | j                                t        |      dkD  s/J t        j                  j                  | j                                t#        |D 	cg c]  }	|	j$                   c}	      }
|
s|d   }	|	dd }d|_        |f|	_        | j                  j)                  || j                  j                  j+                  |d	      
       |
sg 	_        yyc c}	w )a  Initialize the pipe for training, using a representative set
        of data examples.

        get_examples (Callable[[], Iterable[Example]]): Function that
            returns a representative sample of gold-standard Example objects.
        nlp (Language): The current nlp object the component is part of.
        kb_loader (Callable[[Vocab], KnowledgeBase]): A function that creates a KnowledgeBase from a Vocab
            instance. Note that providing this argument will overwrite all data accumulated in the current KB.
            Use this only when loading a KB as-such from file.

        DOCS: https://spacy.io/api/entitylinker#initialize
        zEntityLinker.initializeN
   r   rn   r    XXXfloat32dtype)XY)r   rl   rt   r[   r<   rN   r   xrb   r#   opsalloc1flenr   E923rW   r7   anyra   label_
initializeasarray)rS   rv   ru   rf   nO
doc_samplevector_sampler)   rL   rR   has_annotationsents               r+   r   zEntityLinker.initialize   s   & 	l,EF KK	"WW))
$$VLNB%?@ 	=B$$Cc"  !7!7!;<	= :"FFKK$6$6DII$6$FF"=!A%Iv{{'9'9tyy'9'II% :>Csxx>?Q-Ca(CCJvCH

DJJNN22=	2R 	 	
 CH  ?s   Gc                     |D ]G  }|j                   j                  D ],  }t        | j                  | j                  |            }|s+  y I y)zCheck if a batch contains a learnable example.

        If one isn't present, then the update step needs to be skipped.
        TF)rJ   ra   rX   r=   r[   )rS   r)   rL   r   
candidatess        r+   batch_has_learnable_examplez(EntityLinker.batch_has_learnable_example   sR      	 B||((  !$"5"5dggs"CD
 	  r-           )dropsgdlossesr   r   r   c                   | j                          |i }|j                  | j                  d       |s|S | j                  |      }t	        |d       | j                  |      s|S t        | j                  |       |D cg c]  }|j                   }}| j                  j                  |      \  }}| j                  ||      \  }	}
 ||
       || j                  |       || j                  xx   |	z  cc<   |S c c}w )a.  Learn from a batch of documents and gold-standard information,
        updating the pipe's model. Delegates to predict and get_loss.

        examples (Iterable[Example]): A batch of Example objects.
        drop (float): The dropout rate.
        sgd (thinc.api.Optimizer): The optimizer.
        losses (Dict[str, float]): Optional record of the loss during training.
            Updated using the component name as the key.
        RETURNS (Dict[str, float]): The updated losses dictionary.

        DOCS: https://spacy.io/api/entitylinker#update
        r   zEntityLinker.update)sentence_encodingsr)   )rt   
setdefaultr7   rN   r   r   r   r#   rJ   begin_updateget_lossfinish_update)rS   r)   r   r   r   rL   rQ   r   
bp_contextlossd_scoress              r+   updatezEntityLinker.update   s    ( 	>F$))S)M$$X.($9: //9MT*'/000)-)@)@)F&J1H ' 
h 	8?s#tyyT! 1s   ?C<r   c                 |   t        |d       g }d}g }|D ]}  }|j                  dd      }|j                         D ]U  }||j                     }	|	r=| j                  j                  |	      }
|j                  |
       |j                  |       |dz  }W  | j                  j                  j                  |d      }||   }|s1 | j                  j                  j                  |j                   }d|fS |j                  |j                  k7  r,t        j                  j                  d	d
      }t        |      | j                   j#                  ||      } | j                  j                  j                  |j                   }|||<   | j                   j%                  ||      }|t'        |      z  }t)        |      |fS )NzEntityLinker.get_lossr   	ENT_KB_IDT)	as_stringr    rz   r{   r   zgold entities do not match upmethodmsg)r   get_alignedget_matching_entsstartr[   
get_vectorrb   r#   r   	asarray2falloc2fshaper   E147rW   RuntimeErrorrZ   get_gradr   r   float)rS   r)   r   entity_encodingseidx	keep_entsrL   kb_idsr   kb_identity_encodingselected_encodingsouterr	gradientsr   s                   r+   r   zEntityLinker.get_loss*  s   ($;<	 
	B^^K4^@F++- syy)&*gg&8&8&?O$++O<$$T*	
	  ::>>334DI3V/	: ($**..((*<*B*BCCc6M##'7'='==++$$!'F % C s##MM**+=?OP	$djjnn$$&8&>&>?"I}}%%&8:JKc*++T{Cr-   rQ   c                    | j                          d}g }| j                  j                  j                  }|s|S t	        |t
              r|g}t        |      D ]  \  }}t        |      dk(  r|j                  D cg c]  }| }}t        dt        |j                        | j                        D ]q  }	|j                  |	|	| j                  z    }
t        t        |
            D cg c]  }|
|   j                  | j                  vr|! }}t        | j                  dkD  r-| j                  | j                   |D cg c]  }|
|   	 c}      n,|D cg c]!  }| j#                  | j                   |
|         # c}      }t        |
      D ]  \  }}t%        |d      sJ t        |j                        }|j'                  |d         |j'                  |d         f}|d   |d   cxk\  rdk\  sJ  J | j(                  rt+        d|d   | j,                  z
        }t/        t        |      dz
  |d   | j,                  z         }||   j0                  }||   j2                  }||| j5                         }| j                  j7                  |g      d   }|j8                  }|j:                  j=                  |      }|dz  }|j                  | j                  v r|j?                  | j@                         it        ||         }|s|j?                  | j@                         t        |      dk(  r,| jB                   |j?                  |d   jD                         tG        jH                  |       |jK                  |D cg c]  }|jL                   c}      }| jN                  s|jK                  |D cg c]  }d c}      }|}| j(                  r|jK                  |D cg c]  }|jP                   c}      }|j:                  j=                  |d      } t        |      t        |      k7  r*tS        tT        jV                  jY                  dd	            |j[                  |      | z  z  }!|!j\                  |j\                  k7  rt_        tT        j`                        ||!z   ||!z  z
  }|j?                  | jB                  |j+                         | jB                  k\  r+||jc                         je                            jD                  ntf        j@                          t  t        |      |k(  s,tT        jV                  jY                  dd
	      }"tS        |"      |S c c}w c c}w c c}w c c}w c c}w c c}w c c}w )ap  Apply the pipeline's model to a batch of docs, without modifying them.
        Returns the KB IDs for each entity in each doc, including NIL if there is
        no prediction.

        docs (Iterable[Doc]): The documents to predict.
        RETURNS (List[str]): The models prediction for each document.

        DOCS: https://spacy.io/api/entitylinker#predict
        r   r    sentsr   )axispredictzvectors not of equal lengthr   z$result variables not of equal length)4rt   r#   r   xp
isinstancer   	enumerater   r   rangera   rA   r   r8   rX   r>   r[   r=   rq   indexr;   maxr9   minr   endas_docr   Tlinalgnormrb   r(   r5   entity_randomshuffler   
prior_probr:   entity_vectorr   r   r   rW   dotr   rU   E161argmaxitemr'   )#rS   rQ   entity_countfinal_kb_idsr   irR   s	sentencesent_idx	ent_batchidxvalid_ent_idxbatch_candidatesjr   r   sent_indicesstart_sentenceend_sentencestart_token	end_tokensent_docsentence_encodingsentence_encoding_tsentence_normr   cprior_probsrd   scoresr   entity_normsimsr   s#                                      r+   r   zEntityLinker.predictR  s    	"$ZZ^^dC 6Do _	FAs3x1}$'II.q.I. !CM43M3MN YHHWw9S9S/ST	
  %S^4! ~,,D4G4GG ! ! $( 11A5 --M!JS)C.!J $1 ++DGGYs^D	$  (	2 CFAs"3000 OE!a1!b	2$L (?l1oBBBBBB((),QQ$,,0N)O'*	NQ.Q$,,0N( '0&?&E&E$-l$;$?$?	#&{9#=#D#D#F -1JJ,>,>z,J1,M).?.A.A+(*		7J(K A%LzzT%8%88$++DHH5%)*:1*=%>
)(//9 _1dnn6L(//
10E0EF"NN:6*,**J5Wqall5W*XK#'??.0jjz9R!#9R.S%0F#0035::>H$IQ__$I4" 0 /1iinn=MTUn.V#&'7#8C<L#L*6(.(:(:3<0M ); )*+& %& (*vv.>@S'T$1K$?(" $(::1B1B#B*4V[[*A$A)4t);{T?Q)R(//#'>>#9#)::<4>>#A !+6==?+?+?+A B J J &2%5%5	}C-Y_	B L!\1++$$ &L % C s##G /! "KZ 6X9R
 %Js*   ;	U$&$U):U.&U3=U82	U=Vr   c                    t        |D cg c]  }|j                  D ]  }|  c}}      }|t        |      k7  r3t        t        j                  j                  |t        |                  d}| j                  d   }|D ]=  }|j                  D ],  }||   }|dz  }|D ]  }	|	j                  dk(  s|s||	_         . ? yc c}}w )a  Modify a batch of documents, using pre-computed scores.

        docs (Iterable[Doc]): The documents to modify.
        kb_ids (List[str]): The IDs to set, produced by EntityLinker.predict.

        DOCS: https://spacy.io/api/entitylinker#set_annotations
        )ra   idsr   r3   r    N)	r   ra   rU   r   E148rW   rY   	ent_kb_id
ent_kb_id_)
rS   rQ   r   rR   r   
count_entsr   r3   r   tokens
             r+   set_annotationszEntityLinker.set_annotations  s     B#B##B#BC
V$V[[//ZS[/QRRHH[)	 	1Cxx 1q	Q  1E!+y+0(11	1 Cs   C
excludec                     j                          i }t         d      r j                   fd|d<    fd|d<    j                  j                  |d<    j
                  j                  |d<   t        j                  |      S )zSerialize the pipe to a bytestring.

        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (bytes): The serialized object.

        DOCS: https://spacy.io/api/entitylinker#to_bytes
        rY   c                  B    t        j                   j                        S r/   )srsly
json_dumpsrY   rs   s   r+   <lambda>z'EntityLinker.to_bytes.<locals>.<lambda>  s    u'7'7'A r-   c                  <    j                   j                         S Nr   )r6   to_bytes)r   rS   s   r+   r   z'EntityLinker.to_bytes.<locals>.<lambda>  s    TZZ%8%8%8%I r-   r6   r[   r#   )_validate_serialization_attrsrq   rY   r[   r   r#   r   )rS   r   	serializes   `` r+   r   zEntityLinker.to_bytes  su     	**,	4DHH$8AIeI	'''**	$!ZZ00	'}}Y00r-   c                      j                           fd}i }t         d      r j                   fd|d<    fd|d<    fd|d<   ||d<   t        j                  ||        S )	zLoad the pipe from a bytestring.

        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (TrainablePipe): The loaded object.

        DOCS: https://spacy.io/api/entitylinker#from_bytes
        c                     	 j                   j                  |        y # t        $ r t        t        j
                        d w xY wr/   )r#   
from_bytesAttributeErrorrU   r   E149brS   s    r+   
load_modelz+EntityLinker.from_bytes.<locals>.load_model  s:    8

%%a(! 8 -478s	    $ArY   c                 `    j                   j                  t        j                  |             S r/   )rY   r   r   
json_loadsr  s    r+   r   z)EntityLinker.from_bytes.<locals>.<lambda>  s    488??5;K;KA;N+O r-   c                 >    j                   j                  |       S r   )r6   r  )r  r   rS   s    r+   r   z)EntityLinker.from_bytes.<locals>.<lambda>  s    )>)>q')>)R r-   r6   c                 :    j                   j                  |       S r/   )r[   r  r  s    r+   r   z)EntityLinker.from_bytes.<locals>.<lambda>  s    dgg&8&8&; r-   r[   r#   )r   rq   rY   r   r  )rS   
bytes_datar   r  deserializes   ` `  r+   r  zEntityLinker.from_bytes  sm     	**,	8 4DHH$8!OKRG;D)G
K9r-   pathr   c                |     i } fd|d<    fd|d<    fd|d<    fd|d<   t        j                  ||       y	)
zSerialize the pipe to disk.

        path (str / Path): Path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.

        DOCS: https://spacy.io/api/entitylinker#to_disk
        c                 >    j                   j                  |       S r   )r6   to_diskpr   rS   s    r+   r   z&EntityLinker.to_disk.<locals>.<lambda>  s    tzz'9'9!W'9'M r-   r6   c                 D    t        j                  | j                        S r/   )r   
write_jsonrY   r  rS   s    r+   r   z&EntityLinker.to_disk.<locals>.<lambda>  s    U%5%5a%B r-   rY   c                 :    j                   j                  |       S r/   )r[   r  r  s    r+   r   z&EntityLinker.to_disk.<locals>.<lambda>  s    DGGOOA$6 r-   r[   c                 :    j                   j                  |       S r/   )r#   r  r  s    r+   r   z&EntityLinker.to_disk.<locals>.<lambda>  s    tzz'9'9!'< r-   r#   N)r   r  )rS   r  r   r   s   ` ` r+   r  zEntityLinker.to_disk  sA     	M	'B	%6	$<	'T9g.r-   c                      fd}i } fd|d<    fd|d<    fd|d<   ||d<   t        j                  ||        S )	aN  Load the pipe from disk. Modifies the object in place and returns it.

        path (str / Path): Path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (EntityLinker): The modified EntityLinker object.

        DOCS: https://spacy.io/api/entitylinker#from_disk
        c                     	 | j                  d      5 }j                  j                  |j                                d d d        y # 1 sw Y   y xY w# t        $ r t        t        j                        d w xY w)Nrb)openr#   r  readr  rU   r   r  )r  infilerS   s     r+   r  z*EntityLinker.from_disk.<locals>.load_model(  sa    8VVD\ 9VJJ))&++-89 9 9! 8 -478s'   A *AA AA A $A7c                 L    j                   j                  t        |             S r/   )rY   r   r!   r  s    r+   r   z(EntityLinker.from_disk.<locals>.<lambda>0  s    txx7I!7L'M r-   rY   c                 >    j                   j                  |       S r   )r6   	from_diskr  s    r+   r   z(EntityLinker.from_disk.<locals>.<lambda>1  s    )=)=a)=)Q r-   r6   c                 :    j                   j                  |       S r/   )r[   r!  r  s    r+   r   z(EntityLinker.from_disk.<locals>.<lambda>2  s    dgg&7&7&: r-   r[   r#   )r   r!  )rS   r  r   r  r  s   ` `  r+   r!  zEntityLinker.from_disk  sL    	8 8:MEQG:D)Gt['2r-   )r   r   c                    t         r/   NotImplementedError)rS   r)   r   r   configs        r+   rehearsezEntityLinker.rehearse7      !!r-   c                     t         r/   r$  )rS   labels     r+   	add_labelzEntityLinker.add_label:  r(  r-   )entity_linker)rB   N).__name__
__module____qualname____doc__r(   BACKWARD_OVERWRITEr,   r   r   strr   intboolr   r   r   r   r
   r   r]   r   rN   rl   rt   r   r   r   r   r   r   r   r   r   r	   r   r   tupler   r  r   r   r   r  r!  r'  r+  r0   r-   r+   r'   r'   3   s   
 C $	^+  -%8 &*)^+^+ ^+ 	^+ !^+ ^+ ^+ ^+ "^+ !-!68K!KL^+ 'HTN+Xhy6I-JJ
^+ $UCL-$?@^+  !^+" "#^+$ %^+&  #'^+( E?)^+* 
+^+@Xg%6 8G;L (%-)? @ (A #'@D4r8G#4454 h	4
 HeWm%;<=4l$ #'-1-7#- 	-
 i - c5j)*- 
c5j	-^& '!2 &  & PxHSM xd3i xt1HSM 149 1 1* #(' 1" 16 4 CSBT/#t)$/2:3-/	/$ CSBT#t)$2:3-	6 )-T ""r-   r'   c                 x    | dk(  r!t        j                  d      }|j                  S t        dt         d|        )Nmake_entity_linkerzspacy.pipeline.factorieszmodule z has no attribute )	importlibimport_moduler7  r  r-  )r7   modules     r+   __getattr__r;  ?  sA    ##(()CD(((
78*,>tfE
FFr-   )8r8  r   	itertoolsr   pathlibr   typingr   r   r   r   r	   r
   r   r   	thinc.apir   r   r   r   r   thinc.typesr    r   errorsr   r[   r   r   languager   r4   r   tokensr   r   trainingr   r   r   r   r6   r   rO   r!   trainable_piper"   r1  default_model_configfrom_strDEFAULT_NEL_MODELr,   r1   r'   r;  r0   r-   r+   <module>rJ     s        G G G  P P     )    H H #  $ )    H%%&:;GD VH"= H"XGr-   