
    i.                        d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZmZ d dlmZ ddlmZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZ ddlmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z% dee   dee&ef   fdZ'd Z( G d de%      Z)d Z*y)    N)Path)AnyCallableDictIterableListOptionalTupleUnion)Model   )util)ErrorsWarnings)Language)Lookupsload_lookups)Scorer)DocToken)Example)SimpleFrozenListlogger)Vocab   )Pipeexamplesreturnc                 0    t        j                  | dfi |S )Nlemma)r   score_token_attr)r   kwargss     j/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/pipeline/lemmatizer.pylemmatizer_scorer$      s    ""8W???    c                      t         S N)r$    r%   r#   make_lemmatizer_scorerr)      s    r%   c                      e Zd ZdZededeee   ee   f   fd       Z	 d%dde	dde
d	ee   d
edededee   ddfdZed        ZdedefdZ	 d&ddddeeg ee   f      dee   dee   fdZej4                  fdeddfdZdedee   fdZdedee   fdZdedefdZ e        dde!ee"f   dee   fd Z# e        dde!ee"f   dee   dd fd!Z$ e        ddee   de%fd"Z& e        dd#e%dee   dd fd$Z'y)'
Lemmatizerz
    The Lemmatizer supports simple part-of-speech-sensitive suffix rules and
    lookup tables.

    DOCS: https://spacy.io/api/lemmatizer
    moder   c                 6    |dk(  rdgg fS |dk(  rdgddgfS g g fS )a  Returns the lookups configuration settings for a given mode for use
        in Lemmatizer.load_lookups.

        mode (str): The lemmatizer mode.
        RETURNS (Tuple[List[str], List[str]]): The required and optional
            lookup tables for this mode.
        lookuplemma_lookuprulelemma_rules	lemma_exclemma_indexr(   )clsr,   s     r#   get_lookups_configzLemmatizer.get_lookups_config$   s<     8#$b))V^"Ok=%ABBBxr%   r.   F)r,   	overwritescorervocabmodelnamer6   r7   Nc                   || _         || _        || _        || _        t	               | _        || _        d| _        | j                  dk(  r| j                  | _
        nv| j                  dk(  r| j                  | _
        nU| j                   d}t        | |      s)t        t        j                  j!                  |            t#        | |      | _
        i | _        || _        y)a&  Initialize a Lemmatizer.

        vocab (Vocab): The vocab.
        model (Model): A model (not yet implemented).
        name (str): The component name. Defaults to "lemmatizer".
        mode (str): The lemmatizer mode: "lookup", "rule". Defaults to "lookup".
        overwrite (bool): Whether to overwrite existing lemmas. Defaults to
            `False`.
        scorer (Optional[Callable]): The scoring method. Defaults to
            Scorer.score_token_attr for the attribute "lemma".

        DOCS: https://spacy.io/api/lemmatizer#init
        Fr.   r0   
_lemmatize)r,   N)r8   r9   r:   _moder   lookupsr6   
_validatedr,   lookup_lemmatize	lemmatizerule_lemmatizehasattr
ValueErrorr   E1003formatgetattrcacher7   )selfr8   r9   r:   r,   r6   r7   	mode_attrs           r#   __init__zLemmatizer.__init__3   s    . 

	
y"99 !22DNYY& !00DN99+Z0I4+ !4!4$!4!?@@$T95DN
r%   c                     | j                   S r'   )r=   )rI   s    r#   r,   zLemmatizer.mode]   s    zzr%   docc                 N   | j                   s| j                  t        j                         | j	                         }	 |D ]7  }| j
                  s|j                  dk(  s| j                  |      d   |_        9 |S # t        $ r } || j                  | |g|       Y d}~yd}~ww xY w)zApply the lemmatizer to one document.

        doc (Doc): The Doc to process.
        RETURNS (Doc): The processed Doc.

        DOCS: https://spacy.io/api/lemmatizer#call
        r   N)r?   _validate_tablesr   E1004get_error_handlerr6   r    rA   lemma_	Exceptionr:   )rI   rM   error_handlertokenes        r#   __call__zLemmatizer.__call__a   s     !!&,,/..0	5 <>>U[[A%5#'>>%#8#;EL< J 	5$))TC5!44	5s    A; A; ;	B$BB$)nlpr>   get_examplesrX   r>   c                   | j                  | j                        \  }}|t        j                  d       t	        | j
                  j                  |      }t	        | j
                  j                  |d      }|j                  D ]#  }|j                  ||j                  |             % || _
        | j                  t        j                         y)a  Initialize the lemmatizer and load in data.

        get_examples (Callable[[], Iterable[Example]]): Function that
            returns a representative sample of gold-standard Example objects.
        nlp (Language): The current nlp object the component is part of.
        lookups (Lookups): The lookups object containing the (optional) tables
            such as "lemma_rules", "lemma_index", "lemma_exc" and
            "lemma_lookup". Defaults to None.
        Nz2Lemmatizer: loading tables from spacy-lookups-data)langtablesF)r[   r\   strict)r5   r,   r   debugr   r8   r[   r\   	set_table	get_tabler>   rO   r   rP   )rI   rY   rX   r>   required_tablesoptional_tablesoptional_lookupstables           r#   
initializezLemmatizer.initializet   s      ,0+B+B499+M(?LLMN"

PG+ZZ___U  *00 L!!%)9)C)CE)JKLfll+r%   error_messagec                     | j                  | j                        \  }}|D ]K  }|| j                  vst        |j	                  | j                  || j                  j
                               d| _        y)z8Check that the lookups are correct for the current mode.)r,   r\   foundTN)r5   r,   r>   rD   rF   r\   r?   )rI   rf   ra   rb   rd   s        r#   rO   zLemmatizer._validate_tables   sv    +/+B+B499+M($ 	EDLL( !((!YY."ll11 )  	 r%   rU   c                     | j                   j                  di       }|j                  |j                  |j                        }t	        |t
              r|g}|S )zLemmatize using a lookup-based approach.

        token (Token): The token to lemmatize.
        RETURNS (list): The available lemmas for the string.

        DOCS: https://spacy.io/api/lemmatizer#lookup_lemmatize
        r/   )r>   r`   gettext
isinstancestr)rI   rU   lookup_tableresults       r#   r@   zLemmatizer.lookup_lemmatize   sJ     ||--nbA!!%**ejj9fc"XFr%   c                    |j                   |j                  |j                  j                  f}|| j                  v r| j                  |   S |j
                  }|j                  j                         }|dv r9|dk(  r#t        j                  t        j                         |j                         gS | j                  |      r|j                         gS | j                  j                  di       }| j                  j                  di       }| j                  j                  di       }t        |j!                  |      |j!                  |      |j!                  |      f      s|dk(  r|gS |j                         gS |j!                  |i       }|j!                  |i       }	|j!                  |i       }
|}|j                         }g }g }|
D ]  \  }}|j#                  |      s|dt%        |      t%        |      z
   |z   }|s8||v s|j'                         s)||v r|j)                  d|       c|j+                  |       u|j+                  |        t-        t.        j1                  |            }|	j!                  |g       D ]  }||vs|j)                  d|        |s|j3                  |       |s|j+                  |       || j                  |<   |S )	zLemmatize using a rule-based approach.

        token (Token): The token to lemmatize.
        RETURNS (list): The available lemmas for the string.

        DOCS: https://spacy.io/api/lemmatizer#rule_lemmatize
        ) eolspacerq   r3   r2   r1   propnNr   )orthposmorphkeyrH   rk   pos_lowerwarningswarnr   W108is_base_formr>   r`   anyrj   endswithlenisalphainsertappendlistdictfromkeysextend)rI   rU   	cache_keystringuniv_posindex_table	exc_tablerules_tableindex
exceptionsrulesorigforms	oov_formsoldnewforms                    r#   rB   zLemmatizer.rule_lemmatize   s{    ZZEKKOO<	

"::i((::##%++2~hmm,LLN##U#LLN##ll,,]B?LL**;;	ll,,]B?)h')
 7"x''"-]]8R0
"-	 	+HCs#6Fc#h 67#=U]$,,.u}Q-T*$$T*	+ T]]5)*
 NN62. 	&D5 Q%	& LL#LL %

9r%   c                      y)a  Check whether the token is a base form that does not need further
        analysis for lemmatization.

        token (Token): The token.
        RETURNS (bool): Whether the token is a base form.

        DOCS: https://spacy.io/api/lemmatizer#is_base_form
        Fr(   )rI   rU   s     r#   r~   zLemmatizer.is_base_form   s     r%   excludepathr   c                \     i } fd|d<    fd|d<   t        j                  ||       y)zSerialize the pipe to disk.

        path (str / Path): Path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.

        DOCS: https://spacy.io/api/lemmatizer#to_disk
        c                 >    j                   j                  |       S Nr   )r8   to_diskpr   rI   s    r#   <lambda>z$Lemmatizer.to_disk.<locals>.<lambda>  s    tzz'9'9!W'9'M r%   r8   c                 :    j                   j                  |       S r'   )r>   r   r   rI   s    r#   r   z$Lemmatizer.to_disk.<locals>.<lambda>	  s    )=)=a)@ r%   r>   N)r   r   )rI   r   r   	serializes   ` ` r#   r   zLemmatizer.to_disk   s.     	M	'@	)T9g.r%   c                ~     i } fd|d<    fd|d<   t        j                  ||        j                           S )aH  Load the pipe from disk. Modifies the object in place and returns it.

        path (str / Path): Path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (Lemmatizer): The modified Lemmatizer object.

        DOCS: https://spacy.io/api/lemmatizer#from_disk
        c                 >    j                   j                  |       S r   )r8   	from_diskr   s    r#   r   z&Lemmatizer.from_disk.<locals>.<lambda>  s    )=)=a)=)Q r%   r8   c                 :    j                   j                  |       S r'   )r>   r   r   s    r#   r   z&Lemmatizer.from_disk.<locals>.<lambda>  s    4<<+A+A!+D r%   r>   )r   r   rO   )rI   r   r   deserializes   ` ` r#   r   zLemmatizer.from_disk  s?     8:QG!DIt['2r%   c                z     i } fd|d<    j                   j                  |d<   t        j                  |      S )zSerialize the pipe to a bytestring.

        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (bytes): The serialized object.

        DOCS: https://spacy.io/api/lemmatizer#to_bytes
        c                  <    j                   j                         S r   )r8   to_bytes)r   rI   s   r#   r   z%Lemmatizer.to_bytes.<locals>.<lambda>'  s    TZZ%8%8%8%I r%   r8   r>   )r>   r   r   )rI   r   r   s   `` r#   r   zLemmatizer.to_bytes  s9     	I	'#||44	)}}Y00r%   
bytes_datac                ~     i } fd|d<    fd|d<   t        j                  ||        j                           S )a  Load the pipe from a bytestring.

        bytes_data (bytes): The serialized pipe.
        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (Lemmatizer): The loaded Lemmatizer.

        DOCS: https://spacy.io/api/lemmatizer#from_bytes
        c                 >    j                   j                  |       S r   )r8   
from_bytes)br   rI   s    r#   r   z'Lemmatizer.from_bytes.<locals>.<lambda>7  s    )>)>q')>)R r%   r8   c                 :    j                   j                  |       S r'   )r>   r   )r   rI   s    r#   r   z'Lemmatizer.from_bytes.<locals>.<lambda>8  s    4<<+B+B1+E r%   r>   )r   r   rO   )rI   r   r   r   s   ` ` r#   r   zLemmatizer.from_bytes+  s?     8:RG!EI
K9r%   )
lemmatizerr'   )(__name__
__module____qualname____doc__classmethodrm   r
   r   r5   r$   r   r	   r   boolr   rK   propertyr,   r   rW   r   r   r   r   re   r   E912rO   r   r@   rB   r~   r   r   r   r   r   bytesr   r   r(   r%   r#   r+   r+      s(    c eDItCy4H.I  $ !	( %5(( ( 	( ( ( "( 
(T  5C 5C 5* CG, #'%),xHW,=(=>?, h	,
 '",8 5;KK c D e S	 DE Dd3i DL	% 	D 	 CSBT/#t)$/2:3-/  CSBT#t)$2:3-	$ 4D3E 18C= 1% 1 >N=O-5c]	r%   r+   c                 x    | dk(  r!t        j                  d      }|j                  S t        dt         d|        )Nmake_lemmatizerzspacy.pipeline.factorieszmodule z has no attribute )	importlibimport_moduler   AttributeErrorr   )r:   modules     r#   __getattr__r   ?  sA      (()CD%%%
78*,>tfE
FFr%   )+r   r{   pathlibr   typingr   r   r   r   r   r	   r
   r   	thinc.apir   rq   r   errorsr   r   languager   r>   r   r   r7   r   tokensr   r   trainingr   r   r   r8   r   piper   rm   r$   r)   r+   r   r(   r%   r#   <module>r      su       N N N   %  +    +  @x0 @tCH~ @_ _F	Gr%   