
    iI                     H   d dl mZmZmZmZmZmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlZd dlZd dlmZmZmZ d dlmZ d dlZd dlmZmZ d d	lmZ d d
lmZmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z'm(Z(m)Z) d dl*m+Z+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2 dZ3d Z4 G d de!      Z5y)    )OptionalIterableCallableDictUnionListAny)Floats2d)Path)isliceN)CosineDistanceModel	Optimizer)set_dropout_rate)KnowledgeBase	Candidate)empty_kb)DocSpan)deserialize_config)TrainablePipe)Language)Vocab)Examplevalidate_examplesvalidate_get_examples)ErrorsWarnings)SimpleFrozenList)util)ScorerTc                 P    t        j                  | fdt        j                  gi|S )Nnegative_labels)r!   score_linksEntityLinker_v1NIL)exampleskwargss     v/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy_legacy/components/entity_linker.pyentity_linker_scorer*      s'    hX9L9L8MXQWXX    c                   "   e Zd ZdZdZ	 d0eeddedede	de
e	   ded	ed
ededeeege
e   f   dedee   ddfdZdeegef   fdZd1dZddddeg e
e   f   dee   deeegef      fdZddddde
e   dedee   deee	ef      dee	ef   f
dZde
e   d efd!Zd"e
e   dee	   fd#Z d"e
e   d$ee	   ddfd%Z! e"       d&d'Z# e"       d&d(Z$ e%       d&d)e&e	e'f   d*e
e	   ddfd+Z( e%       d&d)e&e	e'f   d*e
e	   dd fd,Z)ddd-d.Z*d/ Z+y)2r%   z^Pipeline component for named entity linking.

    DOCS: https://spacy.io/api/entitylinker
    r&   )	overwritescorervocabmodelnamelabels_discardn_sents
incl_priorincl_contextentity_vector_lengthget_candidatesr-   r.   returnNc                   || _         || _        || _        t        |      | _        || _        || _        || _        |	| _        d|
i| _	        t        d      | _         t        |      | j                         | _        || _        y)a  Initialize an entity linker.

        vocab (Vocab): The shared vocabulary.
        model (thinc.api.Model): The Thinc Model powering the pipeline component.
        name (str): The component instance name, used to add entries to the
            losses during training.
        labels_discard (Iterable[str]): NER labels that will automatically get a "NIL" prediction.
        n_sents (int): The number of neighbouring sentences to take into account.
        incl_prior (bool): Whether or not to include prior probabilities from the KB in the model.
        incl_context (bool): Whether or not to include the local context in the model.
        entity_vector_length (int): Size of encoding vectors in the KB.
        get_candidates (Callable[[KnowledgeBase, Span], Iterable[Candidate]]): Function that
            produces a list of candidates, given a certain knowledge base and a textual mention.
        scorer (Optional[Callable]): The scoring method. Defaults to Scorer.score_links.
        DOCS: https://spacy.io/api/entitylinker#init
        r-   F)	normalizeN)r/   r0   r1   listr2   r3   r4   r5   r7   cfgr   distancer   kbr.   )selfr/   r0   r1   r2   r3   r4   r5   r6   r7   r-   r.   s               r)   __init__zEntityLinker_v1.__init__+   s}    > 

	">2$(,$/#;&7 1(/0<r+   	kb_loaderc                     t        |      s2t        t        j                  j	                  t        |                   || j                        | _        y)ziDefine the KB of this pipe by providing a function that will
        create it using this object's vocab.)arg_typeN)callable
ValueErrorr   E885formattyper/   r>   )r?   rA   s     r)   set_kbzEntityLinker_v1.set_kbY   s=     	"V[[//i/IJJDJJ'r+   c                    | j                   3t        t        j                  j	                  | j
                              t        | j                         dk(  r3t        t        j                  j	                  | j
                              y )Nr1   r   )r>   rE   r   E1018rG   r1   lenE139r?   s    r)   validate_kbzEntityLinker_v1.validate_kba   sb    77?V\\00dii0@AAtww<1V[[//TYY/?@@ r+   )nlprA   get_examplesrQ   c                   t        |d       || j                  |       | j                          | j                  j                  }g }g }t         |       d      D ]Q  }|j                  |j                         |j                  | j                  j                  j                  |             S t        |      dkD  s/J t        j                  j                  | j                               t        |      dkD  s/J t        j                  j                  | j                               | j                  j!                  || j                  j                  j#                  |d             y)	a  Initialize the pipe for training, using a representative set
        of data examples.

        get_examples (Callable[[], Iterable[Example]]): Function that
            returns a representative sample of gold-standard Example objects.
        nlp (Language): The current nlp object the component is part of.
        kb_loader (Callable[[Vocab], KnowledgeBase]): A function that creates an InMemoryLookupKB from a Vocab instance.
            Note that providing this argument, will overwrite all data accumulated in the current KB.
            Use this only when loading a KB as-such from file.

        DOCS: https://spacy.io/api/entitylinker#initialize
        zEntityLinker_v1.initializeN
   r   rK   float32)dtype)XY)r   rI   rP   r>   r6   r   appendxr0   opsalloc1frM   r   E923rG   r1   
initializeasarray)r?   rR   rQ   rA   nO
doc_samplevector_sampleexamples           r)   r^   zEntityLinker_v1.initializeh   s"   & 	l,HI KK	"WW))
lnb1 	=Ggii(  !7!7!;<	= :"FFKK$6$6DII$6$FF"=!A%Iv{{'9'9tyy'9'II%

DJJNN22=	2R 	 	
r+           )dropsgdlossesr'   re   rf   rg   c                l   | j                          |i }|j                  | j                  d       |s|S t        |d       g }|D ]  }|j                  j
                  D cg c]  }| }}|j                  dd      }	|j                  j                  D ]  }
|	|
j                     }|s	 |j                  |
j                        }t        d|| j                   z
        }t#        t%        |      dz
  || j                   z         }||   j                  }||   j&                  }|j(                  || j+                         }|j-                  |         t/        | j0                  |       |s5t3        j4                  t6        j8                  j;                  d	
             |S | j0                  j=                  |      \  }}| j?                  ||      \  }} ||       || jA                  |       || j                  xx   |z  cc<   |S c c}w # t        $ r t        t        j                        dw xY w)a.  Learn from a batch of documents and gold-standard information,
        updating the pipe's model. Delegates to predict and get_loss.

        examples (Iterable[Example]): A batch of Example objects.
        drop (float): The dropout rate.
        sgd (thinc.api.Optimizer): The optimizer.
        losses (Dict[str, float]): Optional record of the loss during training.
            Updated using the component name as the key.
        RETURNS (Dict[str, float]): The updated losses dictionary.

        DOCS: https://spacy.io/api/entitylinker#update
        Nrd   zEntityLinker_v1.update	ENT_KB_IDT	as_stringr      zEntity LinkerrK   )sentence_encodingsr'   )!rP   
setdefaultr1   r   	referencesentsget_alignedentsstartindexsentAttributeErrorRuntimeErrorr   E030maxr3   minrM   end	predictedas_docrY   r   r0   warningswarnr   W093rG   begin_updateget_lossfinish_update)r?   r'   re   rf   rg   sentence_docsegs	sentenceskb_idsentkb_id
sent_indexstart_sentenceend_sentencestart_token	end_tokensent_docrm   
bp_contextlossd_scoress                         r)   updatezEntityLinker_v1.update   s   ( 	>F$))S)M($<= 	3B$&LL$6$67q7I7^^K4^@F||(( 3syy)B%.__SXX%>

 &)J,E%FN#&s9~'9:;T#UL"+N";"A"AK ), 7 ; ;I!||K	BIIKH!((2'3	3. 	T*MM(--..O.DEM)-)@)@)O&J1H ' 
h 	8?s#tyyT!E 8 * B*6;;7TABs   !	H
+H$H3rm   c                 ~   t        |d       g }|D ]n  }|j                  dd      }|j                  j                  D ]@  }||j                     }|s| j
                  j                  |      }|j                  |       B p | j                  j                  j                  |      }|j                  |j                  k7  r,t        j                  j                  dd      }	t        |	      | j                   j#                  ||      }
| j                   j%                  ||      }|t'        |      z  }t)        |      |
fS )NzEntityLinker_v1.get_lossri   Trj   r   zgold entities do not match upmethodmsg)r   rq   ro   rr   rs   r>   
get_vectorrY   r0   r[   	asarray2fshaper   E147rG   rw   r=   get_gradr   rM   float)r?   r'   rm   entity_encodingsr   r   r   r   entity_encodingerr	gradientsr   s               r)   r   zEntityLinker_v1.get_loss   s,   ($>? 	=B^^K4^@F||(( =syy)&*gg&8&8&?O$++O<	=	=  ::>>334DE##'7'='==++$$!'F % C s##MM**+=?OP	}}%%&8:JKc*++T{I%%r+   docsc                    | j                          d}g }|s|S t        |t              r|g}t        |      D ]  \  }}|j                  D cg c]  }| }}t        |      dkD  s/|j                  D ]E  }|j                  }	|j                  |	      }
|
dk\  sJ t        d|
| j                  z
        }t        t        |      dz
  |
| j                  z         }||   j                  }||   j                  }||| j                         }| j                  j                   j"                  }| j$                  rF| j                  j'                  |g      d   }|j(                  }|j*                  j-                  |      }|dz  }|j.                  | j0                  v r|j3                  | j4                         Ct7        | j9                  | j:                  |            }|s|j3                  | j4                         t        |      dk(  r |j3                  |d   j<                         t?        j@                  |       |jC                  |D cg c]  }|jD                   c}      }| jF                  s|jC                  |D cg c]  }d c}      }|}| j$                  r|jC                  |D cg c]  }|jH                   c}      }|j*                  j-                  |d      }t        |      t        |      k7  r*tK        tL        jN                  jQ                  dd            |jS                  |      |z  z  }|jT                  |jT                  k7  rtW        tL        jX                        ||z   ||z  z
  }|j[                         j]                         }||   }|j3                  |j<                         H  t        |      |k(  s,tL        jN                  jQ                  dd      }tK        |      |S c c}w c c}w c c}w c c}w )	ap  Apply the pipeline's model to a batch of docs, without modifying them.
        Returns the KB IDs for each entity in each doc, including NIL if there is
        no prediction.

        docs (Iterable[Doc]): The documents to predict.
        RETURNS (List[str]): The models prediction for each document.

        DOCS: https://spacy.io/api/entitylinker#predict
        r   rl   rd   )axispredictzvectors not of equal lengthr   z$result variables not of equal length)/rP   
isinstancer   	enumeraterp   rM   rr   ru   rt   ry   r3   rz   rs   r{   r}   r0   r[   xpr5   r   Tlinalgnormlabel_r2   rY   r&   r;   r7   r>   entity_randomshuffler_   
prior_probr4   entity_vectorrw   r   r   rG   dotr   rE   E161argmaxitem)r?   r   entity_countfinal_kb_idsidocr   r   r   ru   r   r   r   r   r   r   r   sentence_encodingsentence_encoding_tsentence_norm
candidatescprior_probs_scoresr   entity_normsims
best_indexbest_candidater   s                                  r)   r   zEntityLinker_v1.predict   s    	"$dC 6Do =	HFAs$'II.q.I.3x!|88 9HC88D!*!6J%?*?%(J,E%FN#&s9~'9:;T#UL"+N";"A"AK ), 7 ; ;I";y9@@BH**B((,0JJ,>,>z,J1,M).?.A.A+(*		7J(K A%LzzT%8%88$++DHH5%)$*=*=dggs*K%L
)(//9 _1(//
10E0EF"NN:6*,**J5Wqall5W*XK#'??.0jjz9R!#9R.S%0F#0035::>H$IQ__$I4" 0 /1iinn=MTUn.V#&'7#8C<L#L*6(.(:(:3<0M ); )*+& %& (*vv.>@S'T$1K$?(" $(::1B1B#B*4V[[*A$A)4t);{T?Q)R)/)=)=)?J-7
-CN(//0F0FGs9H	=	H| L!\1++$$ &L % C s##E /D 6X9R
 %Js   	O:O? 	P-P	r   c                    t        |D cg c]  }|j                  D ]  }|  c}}      }|t        |      k7  r3t        t        j                  j                  |t        |                  d}| j                  d   }|D ]=  }|j                  D ],  }||   }|dz  }|D ]  }	|	j                  dk(  s|s||	_         . ? yc c}}w )a  Modify a batch of documents, using pre-computed scores.

        docs (Iterable[Doc]): The documents to modify.
        kb_ids (List[str]): The IDs to set, produced by EntityLinker.predict.

        DOCS: https://spacy.io/api/entitylinker#set_annotations
        )rr   idsr   r-   rl   N)	rM   rr   rE   r   E148rG   r<   	ent_kb_id
ent_kb_id_)
r?   r   r   r   r   
count_entsr   r-   r   tokens
             r)   set_annotationszEntityLinker_v1.set_annotations7  s     B#B##B#BC
V$V[[//ZS[/QRRHH[)	 	1Cxx 1q	Q  1E!+y+0(11	1 Cs   C
excludec                     j                          i }t         d      r j                   fd|d<    fd|d<    j                  j                  |d<    j
                  j                  |d<   t        j                  |      S )zSerialize the pipe to a bytestring.

        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (bytes): The serialized object.

        DOCS: https://spacy.io/api/entitylinker#to_bytes
        r<   c                  B    t        j                   j                        S N)srsly
json_dumpsr<   rO   s   r)   <lambda>z*EntityLinker_v1.to_bytes.<locals>.<lambda>W  s    u'7'7'A r+   c                  <    j                   j                         S Nr   )r/   to_bytes)r   r?   s   r)   r   z*EntityLinker_v1.to_bytes.<locals>.<lambda>X  s    TZZ%8%8%8%I r+   r/   r>   r0   )_validate_serialization_attrshasattrr<   r>   r   r0   r    )r?   r   	serializes   `` r)   r   zEntityLinker_v1.to_bytesL  su     	**,	4DHH$8AIeI	'''**	$!ZZ00	'}}Y00r+   c                      j                           fd}i }t         d      r j                   fd|d<    fd|d<    fd|d<   ||d<   t        j                  ||        S )	zLoad the pipe from a bytestring.

        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (TrainablePipe): The loaded object.

        DOCS: https://spacy.io/api/entitylinker#from_bytes
        c                     	 j                   j                  |        y # t        $ r t        t        j
                        d w xY wr   )r0   
from_bytesrv   rE   r   E149br?   s    r)   
load_modelz.EntityLinker_v1.from_bytes.<locals>.load_modelg  s:    8

%%a(! 8 -478s	    $Ar<   c                 `    j                   j                  t        j                  |             S r   )r<   r   r   
json_loadsr   s    r)   r   z,EntityLinker_v1.from_bytes.<locals>.<lambda>o  s    488??5;K;KA;N+O r+   c                 >    j                   j                  |       S r   )r/   r   )r   r   r?   s    r)   r   z,EntityLinker_v1.from_bytes.<locals>.<lambda>p  s    )>)>q')>)R r+   r/   c                 :    j                   j                  |       S r   )r>   r   r   s    r)   r   z,EntityLinker_v1.from_bytes.<locals>.<lambda>q  s    dgg&8&8&; r+   r>   r0   )r   r   r<   r    r   )r?   
bytes_datar   r   deserializes   ` `  r)   r   zEntityLinker_v1.from_bytes]  sm     	**,	8 4DHH$8!OKRG;D)G
K9r+   pathr   c                |     i } fd|d<    fd|d<    fd|d<    fd|d<   t        j                  ||       y	)
zSerialize the pipe to disk.

        path (str / Path): Path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.

        DOCS: https://spacy.io/api/entitylinker#to_disk
        c                 >    j                   j                  |       S r   )r/   to_diskpr   r?   s    r)   r   z)EntityLinker_v1.to_disk.<locals>.<lambda>  s    tzz'9'9!W'9'M r+   r/   c                 D    t        j                  | j                        S r   )r   
write_jsonr<   r   r?   s    r)   r   z)EntityLinker_v1.to_disk.<locals>.<lambda>  s    U%5%5a%B r+   r<   c                 :    j                   j                  |       S r   )r>   r   r   s    r)   r   z)EntityLinker_v1.to_disk.<locals>.<lambda>  s    DGGOOA$6 r+   r>   c                 :    j                   j                  |       S r   )r0   r   r   s    r)   r   z)EntityLinker_v1.to_disk.<locals>.<lambda>  s    tzz'9'9!'< r+   r0   N)r    r   )r?   r   r   r   s   ` ` r)   r   zEntityLinker_v1.to_diskv  sA     	M	'B	%6	$<	'T9g.r+   c                      fd}i } fd|d<    fd|d<    fd|d<   ||d<   t        j                  ||        S )	aN  Load the pipe from disk. Modifies the object in place and returns it.

        path (str / Path): Path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (EntityLinker): The modified EntityLinker object.

        DOCS: https://spacy.io/api/entitylinker#from_disk
        c                     	 | j                  d      5 }j                  j                  |j                                d d d        y # 1 sw Y   y xY w# t        $ r t        t        j                        d w xY w)Nrb)openr0   r   readrv   rE   r   r   )r   infiler?   s     r)   r   z-EntityLinker_v1.from_disk.<locals>.load_model  sa    8VVD\ 9VJJ))&++-89 9 9! 8 -478s'   A *AA AA A $A7c                 L    j                   j                  t        |             S r   )r<   r   r   r   s    r)   r   z+EntityLinker_v1.from_disk.<locals>.<lambda>  s    txx7I!7L'M r+   r<   c                 >    j                   j                  |       S r   )r/   	from_diskr   s    r)   r   z+EntityLinker_v1.from_disk.<locals>.<lambda>  s    )=)=a)=)Q r+   r/   c                 :    j                   j                  |       S r   )r>   r   r   s    r)   r   z+EntityLinker_v1.from_disk.<locals>.<lambda>  s    dgg&7&7&: r+   r>   r0   )r    r   )r?   r   r   r   r   s   ` `  r)   r   zEntityLinker_v1.from_disk  sL    	8 8:MEQG:D)Gt['2r+   )rf   rg   c                    t         r   NotImplementedError)r?   r'   rf   rg   configs        r)   rehearsezEntityLinker_v1.rehearse      !!r+   c                     t         r   r   )r?   labels     r)   	add_labelzEntityLinker_v1.add_label  r  r+   )entity_linker)r8   N),__name__
__module____qualname____doc__r&   BACKWARD_OVERWRITEr*   r   r   strr   intboolr   r   r   r   r   r@   rI   rP   r   r   r^   r   r   r   r   r
   r   r   r   r   r   tupler   r   r   r   r   r   r   r  r   r+   r)   r%   r%   #   sr   
 C $	, -%8,, , 	, !, , , , ", !-!68K!KL, , ", 
,\(%-)? @ (A #'@D!
r8G#445!
 h	!

 HeWm%;<=!
N #'-1?7#? 	?
 i ? c5j)*? 
c5j	?B&'!2 & &*THSM Td3i Tl1HSM 149 1 1* #(' 1" 16 4 CSBT/#t)$/2:3-/	/$ CSBT#t)$2:3-	6 )-T ""r+   r%   )6typingr   r   r   r   r   r   r	   thinc.typesr
   pathlibr   	itertoolsr   r   r   	thinc.apir   r   r   r   r~   spacy.kbr   r   spacy.mlr   spacy.tokensr   r   spacy.pipeline.piper   spacy.pipeline.trainable_piper   spacy.languager   spacy.vocabr   spacy.trainingr   r   r   spacy.errorsr   r   
spacy.utilr   spacyr    spacy.scorerr!   r  r*   r%   r  r+   r)   <module>r$     so    H G G       6 6 &  -  " 2 7 #  L L ) '    YC"m C"r+   