
    i>                     N    d dl Z d dlmZmZmZ ddlmZ ddlmZ  G d de      Z	y)    N)ListOptionalTuple   )
Lemmatizer)Tokenc                       e Zd ZdZededeee   ee   f   f fd       Zde	dee   fdZ
dedee   dee   fd	Zd
edee   dedee   dee   f
dZd
edee   dedee   dee   f
dZd
edee   dedee   dee   f
dZd
edee   dedee   dee   f
dZd
edee   dedee   dee   f
dZd
edee   dee   dee   dee   f
dZd
edee   dee   dee   dee   f
dZd
edee   dee   dee   dee   f
dZ xZS )SpanishLemmatizerzH
    Spanish rule-based lemmatizer with morph-based rule selection.
    modereturnc                 <    |dk(  rg d}|g fS t         |   |      S )Nrule)lemma_ruleslemma_rules_groupslemma_index	lemma_exc)superget_lookups_config)clsr   required	__class__s      i/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/lang/es/lemmatizer.pyr   z$SpanishLemmatizer.get_lookups_config   s)    6>XHb>!7-d33    tokenc                 d   |j                   |j                  t        |j                        f}|| j                  v r| j                  |   S |j
                  }|j                  j                         }t        |j                        }|dv r|j                         gS |dv r%|j                  r|dk7  r|j                         gS |gS |j                         }| j                  j                  d      j                  |i       j                  |      }|t        |      }n|dk(  rd}n|}| j                  |t        |            }	| j                  j                  d      j                  |g       }
 t        | d|z         |||	|
      }t        t         j#                  |            }|| j                  |<   |S )	N) eolspace)	adpcconjintjpartpropnpunctsconjsymxr#   r   auxverbr   
lemmatize_)orthposstrmorphcachetextpos_lowersetis_sent_startlookups	get_tablegetlistselect_rulegetattrdictfromkeys)selfr   	cache_keystringr,   featuresexclemmasrule_posr   indexs              r   rule_lemmatizez SpanishLemmatizer.rule_lemmatize   s   ZZC,<=	

"::i((jj u{{#&&LLN## 

 

 ""sg~''xll$$[155c2>BB6J?#YFe|!##Hd8n=DLL**=9==hKE;WT<(#:;$F $--/0F &

9r   r,   r@   c                     | j                   j                  d      }||v r.||   D ]&  }t        |d         j                  |      s!|d   c S  y )Nr      r   )r5   r6   r3   issubset)r=   r,   r@   groupsgroups        r   r9   zSpanishLemmatizer.select_ruleB   sU    ''(<=&= $uQx=))(3 8O$ r   wordr   rD   c           	         g }g }| j                   j                  d      j                  |g       D ]6  \  }}t        j                  |dz   ||      }	|	|k7  s&|j                  |	       8 g }
d|v r|D ]~  }	|	j                  d      s|	j                  d      s&| j                   j                  d      j                  dg       D ]+  \  }}|
j                  t        j                  |||	             -  |j                  |
       |D ]  }||v s|j                  |        t        |      dkD  r|S t        |      dkD  r|S |gS )a"  
        Lemmatize an adjective.

        word (str): The word to lemmatize.
        features (List[str]): The morphological features as a list of Feat=Val
            pairs.
        index (List[str]): The POS-specific lookup list.

        RETURNS (List[str]): The list of lemmas.
        r   $Number=Plurnsaccentsr   	r5   r6   r7   resubappendendswithextendlenr=   rK   r@   r   rD   possible_lemmasselected_lemmasoldnewpossible_lemmaadditional_lemmaslemmas               r   lemmatize_adjzSpanishLemmatizer.lemmatize_adjJ   \     ..}=AA$K 	7HCVVC#IsD9N%&&~6	7 H$"1 S!**3/>3J3J33O$(LL$:$:=$I$M$M!2% SS *00S.1QRSS 	01$ 	.E~&&u-	.
 !#""!A%""6Mr   c                     | j                   j                  d      j                  dg       D ]  \  }}||k(  s|gc S  |gS )a  
        Lemmatize an adverb.

        word (str): The word to lemmatize.
        features (List[str]): The morphological features as a list of Feat=Val
            pairs.
        index (List[str]): The POS-specific lookup list.

        RETURNS (List[str]): The list of lemmas.
        r   adverbs)r5   r6   r7   )r=   rK   r@   r   rD   r\   r]   s          r   lemmatize_advzSpanishLemmatizer.lemmatize_advz   sJ     ..}=AA)RP 	HCs{u	
 vr   c                 z   g }g }| j                   j                  d      j                  dg       D ]  \  }}||k(  s|gc S  | j                   j                  d      j                  dg       D ]  \  }}||k(  s|gc S  | j                   j                  d      j                  dg       D ]0  \  }}t        j                  |dz   ||      }	|j                  |	       2 |j                  |       t        |      dk(  r|S t        |      dkD  r/|D ]  }
|
|v s|j                  |
        t        |      dk\  r|S |S g S )a"  
        Lemmatize a determiner.

        word (str): The word to lemmatize.
        features (List[str]): The morphological features as a list of Feat=Val
            pairs.
        index (List[str]): The POS-specific lookup list.

        RETURNS (List[str]): The list of lemmas.
        r   detdet_and_pron_fixeddet_and_pron_generalrM   rG   r5   r6   r7   rS   rT   rU   rX   r=   rK   r@   r   rD   rZ   r[   r\   r]   r^   r`   s              r   lemmatize_detzSpanishLemmatizer.lemmatize_det   s_     ..}=AA%L 	HCs{u	 ..}=AA "
 	HC s{u		 ..}=AA"B
 	3HC  VVC#IsD9N"">2		3
 	t$1$""!A%( 2E>#**512 ?#q(&&&&Ir   c           	         g }g }| j                   j                  d      j                  |g       D ]6  \  }}t        j                  |dz   ||      }	|	|k7  s&|j                  |	       8 g }
d|v r|D ]~  }	|	j                  d      s|	j                  d      s&| j                   j                  d      j                  dg       D ]+  \  }}|
j                  t        j                  |||	             -  |j                  |
       |D ]  }||v s|j                  |        t        |      dkD  r|S t        |      dkD  r|S |gS )a  
        Lemmatize a noun.

        word (str): The word to lemmatize.
        features (List[str]): The morphological features as a list of Feat=Val
            pairs.
        index (List[str]): The POS-specific lookup list.

        RETURNS (List[str]): The list of lemmas.
        r   rM   rN   rO   rP   rQ   r   rR   rY   s               r   lemmatize_nounz SpanishLemmatizer.lemmatize_noun   rb   r   c                 2   | j                   j                  d      j                  dg       D ]  \  }}||k(  s|gc S  |j                  d      }t	        j
                  d|d         rt	        j                  dd|      }t	        j                  dd|      }|gS )	a  
        Lemmatize a numeral.

        word (str): The word to lemmatize.
        features (List[str]): The morphological features as a list of Feat=Val
            pairs.
        index (List[str]): The POS-specific lookup list.

        RETURNS (List[str]): The list of lemmas.
        r   num,z(\.)([0-9]{3})$r   z\.r   .)r5   r6   r7   splitrS   searchrT   )r=   rK   r@   r   rD   r\   r]   splitted_words           r   lemmatize_numzSpanishLemmatizer.lemmatize_num   s      ..}=AA%L 	HCs{u	
 

399'q)9:66%d+DvvdD$'vr   c                    g }g }| j                   j                  d      j                  dg       D ]  \  }}||k(  s|gc S  | j                   j                  d      j                  dg       D ]  \  }}||k(  s|gc S  | j                   j                  d      j                  dg       D ]6  \  }}t        j                  |dz   ||      }	|	|k7  s&|j                  |	       8 |j                  |       t        |      dk(  r|S t        |      dkD  r/|D ]  }
|
|v s|j                  |
        t        |      dk\  r|S |S g S )a  
        Lemmatize a pronoun.

        word (str): The word to lemmatize.
        features (List[str]): The morphological features as a list of Feat=Val
            pairs.
        index (List[str]): The POS-specific lookup list.

        RETURNS (List[str]): The list of lemmas.
        r   pronrh   ri   rM   rG   rj   rk   s              r   lemmatize_pronz SpanishLemmatizer.lemmatize_pron  sg     ..}=AA&"M 	HCs{u	 ..}=AA "
 	HC s{u		 ..}=AA"B
 	7HC  VVC#IsD9N%&&~6	7 	t$1$""!A%( 2E>#**512 ?#q(&&&&Ir   c           	      b   d|v r| j                  ||||      S g }g }t        |xs d      }| j                  j                  d      j	                  |g       D ]6  \  }}t        j                  |dz   ||      }	|	|k7  s&|j                  |	       8 |D ]  }
|
|v s|j                  |
        t        |      dk(  r|D ]  }
| j                  j                  d      j	                  dg       D ]J  \  }}||
v st        |
      D ]2  \  }}||k(  s|
d| |z   |
|dz   d z   }||v s"|j                  |       4 L | j                  j                  d      j	                  d	g       D ]3  \  }}||
v s|
j                  ||d      }||v s#|j                  |       5  g }|D ][  }	| j                  j                  d      j	                  d
g       D ]+  \  }}|j                  t        j                  |||	             - ] |j                  |       t        |      dkD  r|S t        |      dkD  r|S |gS )a  
        Lemmatize a verb.

        word (str): The word to lemmatize.
        features (List[str]): The morphological features as a list of Feat=Val
            pairs.
        index (List[str]): The POS-specific lookup list.

        RETURNS (List[str]): The list of lemmas.
        PronType=Prsr   r   rM   r   	voc_alt_1NrG   	voc_alt_2rQ   )lemmatize_verb_pronr-   r5   r6   r7   rS   rT   rU   rX   	enumeratereplacerW   )r=   rK   r@   r   rD   rZ   r[   r\   r]   r^   r`   icharvoc_alt_lemmar_   s                  r   lemmatize_verbz SpanishLemmatizer.lemmatize_verbG  s    X%++D(D%HH  4:2..}=AA$K 	7HCVVC#IsD9N%&&~6	7
 % 	.E~&&u-	. 1$ ) B $ 6 6} E I I! JHC e|'0'7 JGAt#s{05bq	C%A.0P#0E#9$3$:$:=$I	J	J !% 6 6} E I I! BHC e|(-c3(B(E1+22=ABB& - 	KN LL22=AEEiQST KS!((S.)IJK	K 	01 !#""!A%""6Mr   c           	         d}g }|}t        j                  ||      }|st        |      dk  ret        j                  |j	                  d      dz   d|      }|j	                  d      g|z   }t        j                  ||      }|t        |      dk  re| j
                  j                  d      j                  dg       D ]  \  }	}
t        j                  |	|
|      } | j
                  j                  d      j                  d	i       j                  |      }||d
   }n-| j                  d	|      }| j                  ||dhz
  ||      d
   }g }|D ]  }| j
                  j                  d      j                  di       j                  |      }||j                  |d
          T| j                  d|      }|j                  | j                  ||||      d
           |dz   dj                  |      z   gS )Nz^(.*?)([mts]e|l[aeo]s?|n?os)$r      rM   r   r   rQ   r   r)   r   r{   rx    )rS   rt   rX   rT   rJ   r5   r6   r7   r9   r   rU   ry   join)r=   rK   r@   r   rD   	pron_pattpronsr)   mr\   r]   rA   
verb_lemmapron_lemmasrx   s                  r   r~   z%SpanishLemmatizer.lemmatize_verb_pron  s    4	IIi&mE
a66!''!*s*B5DWWQZL5(E		)T*A mE
a
 ..}=AA)RP 	*HC66#sD)D	* ll$$[155fbAEEdK?QJ##FH5D,,N++	
 J  	XD,,((599&"EII$OC""3q6*''9""4#6#6tXtU#STU#VW	X S 388K#8899r   )__name__
__module____qualname____doc__classmethodr-   r   r   r   r   rE   r   r9   ra   re   rl   rn   rv   ry   r   r~   __classcell__)r   s   @r   r
   r
      sU    4c 4eDItCy4H.I 4 4+E +d3i +Zs d3i HSM ..#'9.47.@DS	.	c.`#'947@DS		c,33#'93473@DS	3	c3j..#'9.47.@DS	.	c.`#'947@DS		c655#'954<SM5JNs)5	c5nCC#'9C4<SMCJNs)C	cCJ#:#:#'9#:4<SM#:JNs)#:	c#:r   r
   )
rS   typingr   r   r   pipeliner   tokensr   r
    r   r   <module>r      s"    	 ( ( " g:
 g:r   