
    idI                     @   d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZ d dlZd dlmZmZmZmZ d dlmZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z& ddl'm(Z(m)Z)m*Z* ddlm+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 dZ2d Z3 G d de1      Z4y)    N)islice)Path)AnyCallableDictIterableListOptionalUnion)CosineDistanceModel	Optimizerset_dropout_rate)Floats2d   )util)ErrorsWarnings)	CandidateKnowledgeBase)Language)empty_kb)Scorer)DocSpan)Examplevalidate_examplesvalidate_get_examples)SimpleFrozenList)Vocab   )deserialize_config)TrainablePipeTc                 P    t        j                  | fdt        j                  gi|S )Nnegative_labels)r   score_linksEntityLinker_v1NIL)exampleskwargss     t/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/pipeline/legacy/entity_linker.pyentity_linker_scorer,      s'    hX9L9L8MXQWXX    c                   "   e Zd ZdZdZ	 d0eeddedede	de
e	   ded	ed
ededeeege
e   f   dedee   ddfdZdeegef   fdZd1dZddddeg e
e   f   dee   deeegef      fdZddddde
e   dedee   deee	ef      dee	ef   f
dZde
e   d efd!Zd"e
e   dee	   fd#Z d"e
e   d$ee	   ddfd%Z! e"       d&d'Z# e"       d&d(Z$ e%       d&d)e&e	e'f   d*e
e	   ddfd+Z( e%       d&d)e&e	e'f   d*e
e	   dd fd,Z)ddd-d.Z*d/ Z+y)2r'   z^Pipeline component for named entity linking.

    DOCS: https://spacy.io/api/entitylinker
    r(   )	overwritescorervocabmodelnamelabels_discardn_sents
incl_priorincl_contextentity_vector_lengthget_candidatesr/   r0   returnNc                   || _         || _        || _        t        |      | _        || _        || _        || _        |	| _        d|
i| _	        t        d      | _         t        |      | j                         | _        || _        y)a  Initialize an entity linker.

        vocab (Vocab): The shared vocabulary.
        model (thinc.api.Model): The Thinc Model powering the pipeline component.
        name (str): The component instance name, used to add entries to the
            losses during training.
        labels_discard (Iterable[str]): NER labels that will automatically get a "NIL" prediction.
        n_sents (int): The number of neighbouring sentences to take into account.
        incl_prior (bool): Whether or not to include prior probabilities from the KB in the model.
        incl_context (bool): Whether or not to include the local context in the model.
        entity_vector_length (int): Size of encoding vectors in the KB.
        get_candidates (Callable[[KnowledgeBase, Span], Iterable[Candidate]]): Function that
            produces a list of candidates, given a certain knowledge base and a textual mention.
        scorer (Optional[Callable]): The scoring method. Defaults to Scorer.score_links.
        DOCS: https://spacy.io/api/entitylinker#init
        r/   F)	normalizeN)r1   r2   r3   listr4   r5   r6   r7   r9   cfgr   distancer   kbr0   )selfr1   r2   r3   r4   r5   r6   r7   r8   r9   r/   r0   s               r+   __init__zEntityLinker_v1.__init__+   s}    > 

	">2$(,$/#;&7 1(/0<r-   	kb_loaderc                     t        |      s2t        t        j                  j	                  t        |                   || j                        | _        y)ziDefine the KB of this pipe by providing a function that will
        create it using this object's vocab.)arg_typeN)callable
ValueErrorr   E885formattyper1   r@   )rA   rC   s     r+   set_kbzEntityLinker_v1.set_kbY   s=     	"V[[//i/IJJDJJ'r-   c                    | j                   3t        t        j                  j	                  | j
                              t        | j                         dk(  r3t        t        j                  j	                  | j
                              y )Nr3   r   )r@   rG   r   E1018rI   r3   lenE139rA   s    r+   validate_kbzEntityLinker_v1.validate_kba   sb    77?V\\00dii0@AAtww<1V[[//TYY/?@@ r-   )nlprC   get_examplesrS   c                   t        |d       || j                  |       | j                          | j                  j                  }g }g }t         |       d      D ]Q  }|j                  |j                         |j                  | j                  j                  j                  |             S t        |      dkD  s/J t        j                  j                  | j                               t        |      dkD  s/J t        j                  j                  | j                               | j                  j!                  || j                  j                  j#                  |d             y)	a  Initialize the pipe for training, using a representative set
        of data examples.

        get_examples (Callable[[], Iterable[Example]]): Function that
            returns a representative sample of gold-standard Example objects.
        nlp (Language): The current nlp object the component is part of.
        kb_loader (Callable[[Vocab], KnowledgeBase]): A function that creates an InMemoryLookupKB from a Vocab instance.
            Note that providing this argument, will overwrite all data accumulated in the current KB.
            Use this only when loading a KB as-such from file.

        DOCS: https://spacy.io/api/entitylinker#initialize
        zEntityLinker_v1.initializeN
   r   rM   float32)dtype)XY)r   rK   rR   r@   r8   r   appendxr2   opsalloc1frO   r   E923rI   r3   
initializeasarray)rA   rT   rS   rC   nO
doc_samplevector_sampleexamples           r+   r`   zEntityLinker_v1.initializeh   s"   & 	l,HI KK	"WW))
lnb1 	=Ggii(  !7!7!;<	= :"FFKK$6$6DII$6$FF"=!A%Iv{{'9'9tyy'9'II%

DJJNN22=	2R 	 	
r-           )dropsgdlossesr)   rg   rh   ri   c                l   | j                          |i }|j                  | j                  d       |s|S t        |d       g }|D ]  }|j                  j
                  D cg c]  }| }}|j                  dd      }	|j                  j                  D ]  }
|	|
j                     }|s	 |j                  |
j                        }t        d|| j                   z
        }t#        t%        |      dz
  || j                   z         }||   j                  }||   j&                  }|j(                  || j+                         }|j-                  |         t/        | j0                  |       |s5t3        j4                  t6        j8                  j;                  d	
             |S | j0                  j=                  |      \  }}| j?                  ||      \  }} ||       || jA                  |       || j                  xx   |z  cc<   |S c c}w # t        $ r t        t        j                        dw xY w)a.  Learn from a batch of documents and gold-standard information,
        updating the pipe's model. Delegates to predict and get_loss.

        examples (Iterable[Example]): A batch of Example objects.
        drop (float): The dropout rate.
        sgd (thinc.api.Optimizer): The optimizer.
        losses (Dict[str, float]): Optional record of the loss during training.
            Updated using the component name as the key.
        RETURNS (Dict[str, float]): The updated losses dictionary.

        DOCS: https://spacy.io/api/entitylinker#update
        Nrf   zEntityLinker_v1.update	ENT_KB_IDT	as_stringr      zEntity LinkerrM   )sentence_encodingsr)   )!rR   
setdefaultr3   r   	referencesentsget_alignedentsstartindexsentAttributeErrorRuntimeErrorr   E030maxr5   minrO   end	predictedas_docr[   r   r2   warningswarnr   W093rI   begin_updateget_lossfinish_update)rA   r)   rg   rh   ri   sentence_docsegs	sentenceskb_idsentkb_id
sent_indexstart_sentenceend_sentencestart_token	end_tokensent_docro   
bp_contextlossd_scoress                         r+   updatezEntityLinker_v1.update   s   ( 	>F$))S)M($<= 	3B$&LL$6$67q7I7^^K4^@F||(( 3syy)B%.__SXX%>

 &)J,E%FN#&s9~'9:;T#UL"+N";"A"AK ), 7 ; ;I!||K	BIIKH!((2'3	3. 	T*MM(--..O.DEM)-)@)@)O&J1H ' 
h 	8?s#tyyT!E 8 * B*6;;7TABs   !	H
+H$H3ro   c                 ~   t        |d       g }|D ]n  }|j                  dd      }|j                  j                  D ]@  }||j                     }|s| j
                  j                  |      }|j                  |       B p | j                  j                  j                  |      }|j                  |j                  k7  r,t        j                  j                  dd      }	t        |	      | j                   j#                  ||      }
| j                   j%                  ||      }|t'        |      z  }t)        |      |
fS )NzEntityLinker_v1.get_lossrk   Trl   r   zgold entities do not match upmethodmsg)r   rs   rq   rt   ru   r@   
get_vectorr[   r2   r]   	asarray2fshaper   E147rI   ry   r?   get_gradr   rO   float)rA   r)   ro   entity_encodingsr   r   r   r   entity_encodingerr	gradientsr   s               r+   r   zEntityLinker_v1.get_loss   s,   ($>? 	=B^^K4^@F||(( =syy)&*gg&8&8&?O$++O<	=	=  ::>>334DE##'7'='==++$$!'F % C s##MM**+=?OP	}}%%&8:JKc*++T{I%%r-   docsc                    | j                          d}g }|s|S t        |t              r|g}t        |      D ]  \  }}|j                  D cg c]  }| }}t        |      dkD  s/|j                  D ]E  }|j                  }	|j                  |	      }
|
dk\  sJ t        d|
| j                  z
        }t        t        |      dz
  |
| j                  z         }||   j                  }||   j                  }||| j                         }| j                  j                   j"                  }| j$                  rF| j                  j'                  |g      d   }|j(                  }|j*                  j-                  |      }|dz  }|j.                  | j0                  v r|j3                  | j4                         Ct7        | j9                  | j:                  |            }|s|j3                  | j4                         t        |      dk(  r |j3                  |d   j<                         t?        j@                  |       |jC                  |D cg c]  }|jD                   c}      }| jF                  s|jC                  |D cg c]  }d c}      }|}| j$                  r|jC                  |D cg c]  }|jH                   c}      }|j*                  j-                  |d      }t        |      t        |      k7  r*tK        tL        jN                  jQ                  dd            |jS                  |      |z  z  }|jT                  |jT                  k7  rtW        tL        jX                        ||z   ||z  z
  }|j[                         j]                         }||   }|j3                  |j<                         H  t        |      |k(  s,tL        jN                  jQ                  dd      }tK        |      |S c c}w c c}w c c}w c c}w )	ap  Apply the pipeline's model to a batch of docs, without modifying them.
        Returns the KB IDs for each entity in each doc, including NIL if there is
        no prediction.

        docs (Iterable[Doc]): The documents to predict.
        RETURNS (List[str]): The models prediction for each document.

        DOCS: https://spacy.io/api/entitylinker#predict
        r   rn   rf   )axispredictzvectors not of equal lengthr   z$result variables not of equal length)/rR   
isinstancer   	enumeraterr   rO   rt   rw   rv   r{   r5   r|   ru   r}   r   r2   r]   xpr7   r   Tlinalgnormlabel_r4   r[   r(   r=   r9   r@   entity_randomshufflera   
prior_probr6   entity_vectorry   r   r   rI   dotr   rG   E161argmaxitem)rA   r   entity_countfinal_kb_idsidocr   r   r   rw   r   r   r   r   r   r   r   sentence_encodingsentence_encoding_tsentence_norm
candidatescprior_probs_scoresr   entity_normsims
best_indexbest_candidater   s                                  r+   r   zEntityLinker_v1.predict   s    	"$dC 6Do =	HFAs$'II.q.I.3x!|88 9HC88D!*!6J%?*?%(J,E%FN#&s9~'9:;T#UL"+N";"A"AK ), 7 ; ;I";y9@@BH**B((,0JJ,>,>z,J1,M).?.A.A+(*		7J(K A%LzzT%8%88$++DHH5%)$*=*=dggs*K%L
)(//9 _1(//
10E0EF"NN:6*,**J5Wqall5W*XK#'??.0jjz9R!#9R.S%0F#0035::>H$IQ__$I4" 0 /1iinn=MTUn.V#&'7#8C<L#L*6(.(:(:3<0M ); )*+& %& (*vv.>@S'T$1K$?(" $(::1B1B#B*4V[[*A$A)4t);{T?Q)R)/)=)=)?J-7
-CN(//0F0FGs9H	=	H| L!\1++$$ &L % C s##E /D 6X9R
 %Js   	O:O? 	P-P	r   c                    t        |D cg c]  }|j                  D ]  }|  c}}      }|t        |      k7  r3t        t        j                  j                  |t        |                  d}| j                  d   }|D ]=  }|j                  D ],  }||   }|dz  }|D ]  }	|	j                  dk(  s|s||	_         . ? yc c}}w )a  Modify a batch of documents, using pre-computed scores.

        docs (Iterable[Doc]): The documents to modify.
        kb_ids (List[str]): The IDs to set, produced by EntityLinker.predict.

        DOCS: https://spacy.io/api/entitylinker#set_annotations
        )rt   idsr   r/   rn   N)	rO   rt   rG   r   E148rI   r>   	ent_kb_id
ent_kb_id_)
rA   r   r   r   r   
count_entsr   r/   r   tokens
             r+   set_annotationszEntityLinker_v1.set_annotations7  s     B#B##B#BC
V$V[[//ZS[/QRRHH[)	 	1Cxx 1q	Q  1E!+y+0(11	1 Cs   C
excludec                     j                          i }t         d      r j                   fd|d<    fd|d<    j                  j                  |d<    j
                  j                  |d<   t        j                  |      S )zSerialize the pipe to a bytestring.

        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (bytes): The serialized object.

        DOCS: https://spacy.io/api/entitylinker#to_bytes
        r>   c                  B    t        j                   j                        S N)srsly
json_dumpsr>   rQ   s   r+   <lambda>z*EntityLinker_v1.to_bytes.<locals>.<lambda>W  s    u'7'7'A r-   c                  <    j                   j                         S Nr   )r1   to_bytes)r   rA   s   r+   r   z*EntityLinker_v1.to_bytes.<locals>.<lambda>X  s    TZZ%8%8%8%I r-   r1   r@   r2   )_validate_serialization_attrshasattrr>   r@   r   r2   r   )rA   r   	serializes   `` r+   r   zEntityLinker_v1.to_bytesL  su     	**,	4DHH$8AIeI	'''**	$!ZZ00	'}}Y00r-   c                      j                           fd}i }t         d      r j                   fd|d<    fd|d<    fd|d<   ||d<   t        j                  ||        S )	zLoad the pipe from a bytestring.

        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (TrainablePipe): The loaded object.

        DOCS: https://spacy.io/api/entitylinker#from_bytes
        c                     	 j                   j                  |        y # t        $ r t        t        j
                        d w xY wr   )r2   
from_bytesrx   rG   r   E149brA   s    r+   
load_modelz.EntityLinker_v1.from_bytes.<locals>.load_modelg  s:    8

%%a(! 8 -478s	    $Ar>   c                 `    j                   j                  t        j                  |             S r   )r>   r   r   
json_loadsr   s    r+   r   z,EntityLinker_v1.from_bytes.<locals>.<lambda>o  s    488??5;K;KA;N+O r-   c                 >    j                   j                  |       S r   )r1   r   )r   r   rA   s    r+   r   z,EntityLinker_v1.from_bytes.<locals>.<lambda>p  s    )>)>q')>)R r-   r1   c                 :    j                   j                  |       S r   )r@   r   r   s    r+   r   z,EntityLinker_v1.from_bytes.<locals>.<lambda>q  s    dgg&8&8&; r-   r@   r2   )r   r   r>   r   r   )rA   
bytes_datar   r   deserializes   ` `  r+   r   zEntityLinker_v1.from_bytes]  sm     	**,	8 4DHH$8!OKRG;D)G
K9r-   pathr   c                |     i } fd|d<    fd|d<    fd|d<    fd|d<   t        j                  ||       y	)
zSerialize the pipe to disk.

        path (str / Path): Path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.

        DOCS: https://spacy.io/api/entitylinker#to_disk
        c                 >    j                   j                  |       S r   )r1   to_diskpr   rA   s    r+   r   z)EntityLinker_v1.to_disk.<locals>.<lambda>  s    tzz'9'9!W'9'M r-   r1   c                 D    t        j                  | j                        S r   )r   
write_jsonr>   r   rA   s    r+   r   z)EntityLinker_v1.to_disk.<locals>.<lambda>  s    U%5%5a%B r-   r>   c                 :    j                   j                  |       S r   )r@   r   r   s    r+   r   z)EntityLinker_v1.to_disk.<locals>.<lambda>  s    DGGOOA$6 r-   r@   c                 :    j                   j                  |       S r   )r2   r   r   s    r+   r   z)EntityLinker_v1.to_disk.<locals>.<lambda>  s    tzz'9'9!'< r-   r2   N)r   r   )rA   r   r   r   s   ` ` r+   r   zEntityLinker_v1.to_diskv  sA     	M	'B	%6	$<	'T9g.r-   c                      fd}i } fd|d<    fd|d<    fd|d<   ||d<   t        j                  ||        S )	aN  Load the pipe from disk. Modifies the object in place and returns it.

        path (str / Path): Path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (EntityLinker): The modified EntityLinker object.

        DOCS: https://spacy.io/api/entitylinker#from_disk
        c                     	 | j                  d      5 }j                  j                  |j                                d d d        y # 1 sw Y   y xY w# t        $ r t        t        j                        d w xY w)Nrb)openr2   r   readrx   rG   r   r   )r   infilerA   s     r+   r   z-EntityLinker_v1.from_disk.<locals>.load_model  sa    8VVD\ 9VJJ))&++-89 9 9! 8 -478s'   A *AA AA A $A7c                 L    j                   j                  t        |             S r   )r>   r   r"   r   s    r+   r   z+EntityLinker_v1.from_disk.<locals>.<lambda>  s    txx7I!7L'M r-   r>   c                 >    j                   j                  |       S r   )r1   	from_diskr   s    r+   r   z+EntityLinker_v1.from_disk.<locals>.<lambda>  s    )=)=a)=)Q r-   r1   c                 :    j                   j                  |       S r   )r@   r   r   s    r+   r   z+EntityLinker_v1.from_disk.<locals>.<lambda>  s    dgg&7&7&: r-   r@   r2   )r   r   )rA   r   r   r   r   s   ` `  r+   r   zEntityLinker_v1.from_disk  sL    	8 8:MEQG:D)Gt['2r-   )rh   ri   c                    t         r   NotImplementedError)rA   r)   rh   ri   configs        r+   rehearsezEntityLinker_v1.rehearse      !!r-   c                     t         r   r  )rA   labels     r+   	add_labelzEntityLinker_v1.add_label  r  r-   )entity_linker)r:   N),__name__
__module____qualname____doc__r(   BACKWARD_OVERWRITEr,   r    r   strr   intboolr   r   r   r   r
   rB   rK   rR   r   r   r`   r   r   r   r   r   r   r   r	   r   r   tupler   r   r   r   r   r   r   r  r	   r-   r+   r'   r'   #   sr   
 C $	, -%8,, , 	, !, , , , ", !-!68K!KL, , ", 
,\(%-)? @ (A #'@D!
r8G#445!
 h	!

 HeWm%;<=!
N #'-1?7#? 	?
 i ? c5j)*? 
c5j	?B&'!2 & &*THSM Td3i Tl1HSM 149 1 1* #(' 1" 16 4 CSBT/#t)$/2:3-/	/$ CSBT#t)$2:3-	6 )-T ""r-   r'   )5r   r   	itertoolsr   pathlibr   typingr   r   r   r   r	   r
   r   r   	thinc.apir   r   r   r   thinc.typesr    r   errorsr   r   r@   r   r   languager   mlr   r0   r   tokensr   r   trainingr   r   r   r   r1   r    piper"   trainable_piper#   r  r,   r'   r  r-   r+   <module>r"     sk        G G G  H H    & *      I I $  % *  YC"m C"r-   