
    i'                         d dl mZmZmZmZmZ d dlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ d	d	d
Z G d de	      Zdedeeeeef   f   fdZy)    )CallableDictListOptionalTuple)Model   )
Lemmatizer)lemmatizer_score)POS)Token)Vocab")   «   »c                        e Zd Z	 dddeddedee   dededed	ee	   d
df fdZ
ded
ee   fdZded
ee   fdZded
ee   fdZded
ee   fdZded
ee   fdZded
ee   fdZ xZS )RussianLemmatizer	pymorphy3Fmode	overwritescorervocabmodelnamer   r   r   returnNc                .   |dv r#	 ddl m} t        | dd       5 |d      | _        n&|dv r"	 ddlm} t        | dd        |d      | _        t        |   ||||||
       y # t        $ r t        d      d w xY w# t        $ r t        d	      d w xY w)N>   	pymorphy2pymorphy2_lookupr   )MorphAnalyzerzThe lemmatizer mode 'pymorphy2' requires the pymorphy2 library and dictionaries. Install them with: pip install pymorphy2# for Ukrainian dictionaries:pip install pymorphy2-dicts-uk_morphru)lang>   r   pymorphy3_lookupzThe lemmatizer mode 'pymorphy3' requires the pymorphy3 library and dictionaries. Install them with: pip install pymorphy3# for Ukrainian dictionaries:pip install pymorphy3-dicts-ukr   )r   r    ImportErrorgetattrr!   r   super__init__)	selfr   r   r   r   r   r   r    	__class__s	           i/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/lang/ru/lemmatizer.pyr(   zRussianLemmatizer.__init__   s     44	3 tXt,4+666	3 tXt,4+65$TYv 	 	
/  !5   !5 s   A% A> %A;>Btokenc                 |   |j                   }|j                  }|j                  j                         }|dk(  rt        j                  ||      gS |dvr| j                  |      S | j                  j                  |      }g }|D ]Z  }|j                  st        t        |j                              \  }}	||k(  s|dv r|dv s|dk(  sD|dk(  sJ|j                  |       \ t        |      s|j                         gS |t        |      dk(  r>t         |v r6t#        t$        j'                  |D cg c]  }|j(                   c}            S |dv rg d}
n|d	k(  rd
dg}
n|dk(  rg d}
ng d}
|g }}|D ]n  }t        t        |j                              \  }	}|
D ]5  }||v s||v s||   j                         ||   j                         k7  s5 ] |j                  |       p t        |      s|j                         gS t#        t$        j'                  |D cg c]  }|j(                   c}            S c c}w c c}w )NPUNCT)ADJDETNOUNNUMPRONPROPNVERB)r1   r4   r3   r0      )r/   r0   r1   r4   )CaseNumberGenderr2   r7   r9   )r7   r8   r9   Person)Aspectr9   Moodr8   TenseVerbFormVoice)textpos_morphto_dictPUNCT_RULESget_pymorphy_lookup_lemmatizer!   parseis_knownoc2udstrtagappendlenlowerr   listdictfromkeysnormal_form)r)   r,   stringuniv_pos
morphologyanalysesfiltered_analysesanalysisanalysis_pos_features_to_compareanalysis_morphfeatures                r+   _pymorphy_lemmatizez%RussianLemmatizer._pymorphy_lemmatize7   sQ   ::[[((*
wOOFF344QQ22599;;$$V,  
	3H$$#C$56OL!( $55(FW:W!V+(e2C!((2
	3 $%LLN###j/Q"63*;LDUVx33VW  66">#)8"4"H# '8#  
	3H %c(,,&7 8A~. 3z)>1"7+113~g7N7T7T7VV3 "((2
	3 $%LLN##MM@QRH8//RS
 	
A WB Ss   ,H4H9c                     |j                   }| j                  j                  |      }t        |D cg c]  }|j                   c}      }t        |      dk(  rt        t        |            gS |gS c c}w )Nr6   )r@   r!   rG   setrR   rM   nextiter)r)   r,   rS   rV   annormal_formss         r+   rF   z,RussianLemmatizer._pymorphy_lookup_lemmatizet   sf    ;;$$V, X>rBNN>?|!l+,--x ?s   A1c                 $    | j                  |      S Nr^   r)   r,   s     r+   pymorphy2_lemmatizez%RussianLemmatizer.pymorphy2_lemmatize~       ''..    c                 $    | j                  |      S rf   rF   rh   s     r+   pymorphy2_lookup_lemmatizez,RussianLemmatizer.pymorphy2_lookup_lemmatize       ..u55rk   c                 $    | j                  |      S rf   rg   rh   s     r+   pymorphy3_lemmatizez%RussianLemmatizer.pymorphy3_lemmatize   rj   rk   c                 $    | j                  |      S rf   rm   rh   s     r+   pymorphy3_lookup_lemmatizez,RussianLemmatizer.pymorphy3_lookup_lemmatize   ro   rk   )
lemmatizer)__name__
__module____qualname__r   r   r   r   rJ   boolr   r(   r   r   r^   rF   ri   rn   rq   rs   __classcell__)r*   s   @r+   r   r      s    
 !	&
  %5&
&
 &
 	&
 &
 &
 "&
 
&
P;
 ;
49 ;
z $s) / /49 /6 6$s) 6/ /49 /6 6$s) 6rk   r   oc_tagr   c                    i ddddddddddd	d
ddddddddddddddddddddddddddddd d!d"d#d$d%d&d&d&d'd'd(d)d*
d+d,d-d.d/d0d1d d2d3d4d5d6dd7id8d9d:d9d8d;d<d=d>d?d@d@dAdBdCdDdDdEdFdGdHdIdJdKidL}dM}t               }t               }| j                  dNdO      j                  dO      }|D ]S  }dP}t	        |j                               D ]  \  }}	||	v sdQ}|dRk(  r|	|   }|	|   ||<   ! |rC|j                  |       U t        |      dSkD  r8|j                         }|dTv rdU}n|dVk(  rdW}n
|dXk(  rdY|dZ<   t        |      dSkD  r8||fS )[NADJFr/   ADJSADVBADVApror0   COMPCONJCCONJGRNDr5   INFNINTJr1   NPROr3   NUMRr2   NUMBPNCTr.   PRCLPARTPREPADPPRTF)PRTSr5   AnimInan)animinanImpPerf)impfperfInsAccDatGenLocNomVoc)
abltaccsdatvgen1gen2gentloc2loctnomnvoctCmpSup)r   SuprFemMascNeut)femnmascneutInd)imprindcPlurSing)plursingDigit123)1per2per3perexclinclFutPastPres)futrpastpresBrev)r}   r   ConvInfPartFin)r   r   r   r   r5   ActPass)actvpssvAbbrYes)_POSAnimacyr;   r7   Degreer9   r<   r8   NumFormr:   r=   Variantr>   r?   r   X ,FTr   r   )NamePatrSurnGeoxOrgnr4   AuxtAUXPltmPtanr8   )	rP   r`   replacesplitsorteditemsaddrM   pop)
rz   gram_mapposrU   	unmatchedgramsgrammatchcateggmaps
             r+   rI   rI      s   
E
E
 E
 E	

 E
 G
 F
 F
 F
 F
 F
 E
 E
 G
 F
  E!
" F#
$ '
* #F3 &1
 !%0 &&A.!62G$S#sS@"F3
  0i5Hl CJINN3$**3/E 
 !(.."23 	3KE4t|F?t*C(,T
Ju%	3 MM$
  i.1
}};;CV^CV^#)Jx  i.1
 
?rk   N)typingr   r   r   r   r   	thinc.apir   pipeliner
   pipeline.lemmatizerr   symbolsr   tokensr   r   r   rD   r   rJ   rI    rk   r+   <module>r      s^    8 8  " 3   $z6
 z6zN# N%T#s(^ 34 Nrk   