
    iA                        d dl Z d dlZd dlZd dlmZ d dlZd dlmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZmZmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlm Z m!Z! ddl"m#Z# eegZ$ejJ                  d        Z&ejJ                  d        Z'ejJ                  d        Z(ejR                  jU                  d      d        Z+ejR                  jU                  d      d        Z,ejR                  jU                  d      d        Z-ejR                  jU                  d      d        Z.ejR                  jU                  d      d        Z/ejR                  jU                  d      d        Z0ejR                  jU                  d      d        Z1ejR                  jU                  d      d        Z2ejR                  jg                  de$      d        Z4ejR                  jg                  de$      d         Z5ejR                  jg                  de$      d!        Z6d" Z7d# Z8d$ Z9d% Z:ejR                  jU                  d&      d'        Z;ejR                  jg                  de$      d(        Z<d) Z=d* Z>d+ Z?d, Z@y)-    N)Linear)Vocabloadregistry)English)Language)DependencyParserEntityRecognizerEntityRulerSentenceRecognizerTaggerTextCategorizerTrainablePipe)DEFAULT_PARSER_MODEL)DEFAULT_SENTER_MODEL)DEFAULT_TAGGER_MODEL)DEFAULT_SINGLE_TEXTCAT_MODEL)Span)ensure_path
load_model   )make_tempdirc                     ddddddd}dt         i}t        j                  |d	
      d   }t        | |fi |}|j	                  d       |S )NF   d            ?        learn_tokensmin_action_frequpdate_with_oracle_cut_size
beam_widthbeam_update_probbeam_densitymodelTvalidatensubj)r   r   resolver	   	add_labelen_vocabconfigcfgr&   parsers        ~/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/serialize/test_serialize_pipeline.pyr0   r0       se     '*F (
)CS409Eh88F
WM    c                 v    ddddddd}dt         i}t        j                  |d	
      d   }t        | |fi |}|S )NFr   r   r   r   r   r   r&   Tr'   )r   r   r*   r	   r,   s        r1   blank_parserr4   1   sW     '*F (
)CS409Eh88FMr2   c                 ~    dt         i}t        j                  |d      d   }t        | |      }t        | |      }||fS Nr&   Tr'   )r   r   r*   r   )r-   r/   r&   tagger1tagger2s        r1   taggersr9   A   sG    (
)CS409EXu%GXu%GGr2   i  c                      t               } | j                  d      }|j                  d       | j                          t	        | j                  ddg             y )NtaggerAhi )r   add_piper+   
initializelistpipe)nlpr;   s     r1   test_issue3456rD   J   sF     )C\\(#F
SNN4*	r2   i  c                 ,   ddddddiddigddddigddd	d
dgdddddg}t        |       }t        ||d      }|j                         }t        |      t        |      k(  sJ t        |j                        dk(  sJ |j
                  sJ t        |      }|j                  |      }t        |      t        |      k(  sJ t        |j                        dk(  sJ |j
                  |j
                  k(  sJ |j                  |j                  k(  sJ y )NHELLOhello worldlabelpatternBYELOWERbyeORTHCOMPLEXfoo*rN   OPTECH_ORGApplea1rI   rJ   idvocabTpatternsoverwrite_ents   )r   r   to_byteslenlabels	overwrite
from_bytes
ent_id_sep)r-   r\   rC   rulerruler_bytes	new_rulers         r1   test_issue_3526_1rh   T   s.    m4gu%57G$HI'8&9:%s)C(DE=H 
"ChtDE.."Ku:X&&&u||!!!???C I$$[1Iy>SZ'''y A%%%%//1115#3#3333r2   c                    ddddddiddigddddigddd	d
dgdddddg}t        |       }t        ||d      }t        j                  |j                        }t        |      }|j                  |      }t        |      t        |      k(  sJ |j                  D ]  }||j                  v rJ  |j                  |j                  usJ y )NrF   rG   rH   rK   rL   rM   rN   rO   rP   rQ   rR   rT   rU   rV   rW   rY   Tr[   )r   r   srslymsgpack_dumpsr\   rc   r`   rb   )r-   r\   rC   re   bytes_old_stylerg   rJ   s          r1   test_issue_3526_2rm   k   s     m4gu%57G$HI'8&9:%s)C(DE=H 
"ChtDE))%..9OC I$$_5Iy>SZ'''>> -),,,,,-eoo555r2   c                    ddddddiddigddddigddd	d
dgdddddg}t        |       }t        ||d      }t               5 }|dz  }t        j                  |j                  d      |j                         t        |      j                  |      }|j                  D ]  }||j                  v rJ  t        |      t        |      k(  sJ |j                  |j                  usJ 	 d d d        y # 1 sw Y   y xY w)NrF   rG   rH   rK   rL   rM   rN   rO   rP   rQ   rR   rT   rU   rV   rW   rY   Tr[   entity_rulerz.jsonl)
r   r   r   rj   write_jsonlwith_suffixr\   	from_diskr`   rb   )r-   r\   rC   re   tmpdirout_filerg   rJ   s           r1   test_issue_3526_3ru      s    m4gu%57G$HI'8&9:%s)C(DE=H 
"ChtDE	 :6N*(..x8%..I$..x8	~~ 	1Gi00000	19~U+++""%//999: : :s   	A,C666C66C?c                    t        |       }dddg}ddi}|j                  d|      }|j                  |       t               5 }|j	                  |       |j                  d      }|j                  dddgk(  sJ |j                  du sJ t        |      }|j                  d      }|j                  dddgk(  sJ |j                  du sJ 	 d d d        y # 1 sw Y   y xY w)	NrY   ORGrU   rH   r]   Tro   r.   )	r   r?   add_patternsr   to_diskget_piper\   rb   r   )r-   rC   r\   r.   re   rs   nlp2rg   s           r1   test_issue_3526_4r}      s    

"CG45H%FLLL7E	x 	 +6F^,~~Eg"F!GGGG$&&&F|MM.1	!!'&J%KKKK""d***+ + +s   B	CC!i  c                  J   t               } | j                  d      }|j                  d       | j                          ddddddidd	igdg}| j                  d
d      }|j	                  |        | d      }|j
                  d   j                  dk(  sJ t               5 }t        |      }|j                         s|j                          | j                  |       t        |      } |d      }|j
                  d   j                  dk(  sJ 	 ddd       y# 1 sw Y   yxY w)z@Test that serialization of an EntityRuler before NER works fine.ner
SOME_LABELMY_ORGrU   rH   MY_GPElowersan	franciscoro   )beforeWhat do you think about Apple ?r   N)r   r?   r+   r@   ry   entslabel_r   r   existsmkdirrz   r   )	rC   r   r\   re   doc1d
output_dirr|   doc2s	            r1   test_issue4042r      s    )C
,,u
CMM,NN w/%(87K:P'QRH
 LLL6E	x 01D99Q<(***	 /1 ^
  "J*%56yy|""h.../ / /s   !A.DD"c                     t               } | j                  d      }|j                  d       | j                           | d      }t	        |j
                        dk(  sJ d|j
                  v sJ t        |ddd      }t        |j                        |gz   |_        |j                  d        ||       t	        |j
                        d	k(  sJ d|j
                  v sJ d|j
                  v sJ t               5 }t        |      }|j                         s|j                          |j                  |       i }| j                  d|
      }|j                  |       t	        |j
                        d	k(  sJ 	 ddd       y# 1 sw Y   yxY w)z
    Test that serialization of an NER works fine when new labels were added.
    This is the second bug of two bugs underlying the issue 4042.
    r   r   r   r         r   )rI   r   rx   N)r   r?   r+   r@   r`   ra   r   rA   r   r   r   r   r   rz   create_piperr   )nlp1ner1r   	apple_entr   r   r.   ner2s           r1   test_issue4042_bug2r      s]    9D==DNN< OO12Dt{{q   4;;&&&T1ax0ITYY9+-DINN8Jt{{q   4;;&&&t{{"""	 	%1 ^
  "Z f5z"4;;1$$$	% 	% 	%s   9A=F  F	iu  c                     t        d      } t        |       }ddi}|j                  d|      }t               5 }|dz  j	                  d	      5 }t        j                  ||       |j                  d   dk(  sJ 	 d
d
d
       |dz  j	                  d      5 }t        j                  |      }|j                  d   dk(  sJ 	 d
d
d
       d
d
d
       y
# 1 sw Y   YxY w# 1 sw Y   xY w# 1 sw Y   y
xY w)z(Ensure the pickling of the NER goes welltest_vocab_add_vector)vectors_namerY   r"   o   r   rx   zner.pklwbNrb)	r   r   r   r   openpickledumpr/   r   )rZ   rC   r.   r   tmp_pathfile_r   s          r1   test_issue4725_1r      s	    67E

C%sF //%/
/C	 B8"((. 	A%KKU#7789S@@@	A "((. 	B%;;u%D889:cAAA	BB B	A 	A	B 	BB Bs;   C++C;C+*CC+C	C+C(	$C++C4Parserc                 <   dt         i}t        j                  |d      d   } || |      } || |      }|j                  |j	                  dg            }|j	                  dg      }|j	                  dg      }t        |      t        |      k(  sJ ||k(  sJ y )Nr&   Tr'   rZ   exclude)r   r   r*   rc   r_   r`   )r-   r   r/   r&   r0   
new_parserbytes_2bytes_3s           r1   %test_serialize_parser_roundtrip_bytesr      s    (
)CS409EHe$F%(J&&vy'IJJ!!7)!4Goowio0Gw<3w<'''gr2   c                    t               }d}||j                  vsJ dt        i}t        j                  |d      d   } | ||      }|j                  |       ||j                  j                  v sJ t               }||j                  vsJ  | ||      }|j                  |j                  dg            }||j                  j                  v sJ y )N
FunnyLabelr&   Tr'   rZ   r   )	r   stringsr   r   r*   r+   rZ   rc   r_   )r   vocab1rI   r/   r&   parser1vocab2parser2s           r1   test_serialize_parser_stringsr     s    WFE&&&(
)CS409EVU#GeGMM)))))WF&&&VU#G  !1!17)!1!DEGGMM)))))r2   c                    dt         i}t        j                  |d      d   } || |      }t               5 }|dz  }|j	                  |        || |      }|j                  |      }|j                  ddg      }|j                  ddg      }	t        |      t        |	      k(  sJ ||	k(  sJ 	 d d d        y # 1 sw Y   y xY w)Nr&   Tr'   r0   rZ   r   )r   r   r*   r   rz   rr   r_   r`   )
r-   r   r/   r&   r0   r   	file_pathparser_dparser_bytesparser_d_bytess
             r1   $test_serialize_parser_roundtrip_diskr     s    (
)CS409EHe$F	 .1L	y!(E*%%i0/AB!**GW3E*F< C$7777~---. . .s   A9B99Cc                    | j                   dusJ |j                   dusJ |j                  j                  | j                  j                  k7  sJ | j                  dg      } |j                   j                  d   |j                   | j                  j                         |j                  |       |j                   dusJ |j                  j                  | j                  j                  k(  sJ y )NTrZ   r   resize_output)r&   movesn_movesr_   attrsrc   )r0   r4   
bytes_datas      r1   test_to_from_bytesr   '  s    <<t###T)))%%)=)===='3J-L_-l.@.@&,,BVBVWJ'T)))%%)=)====r2   c                 T   |d   }|j                         }|j                  |      }|j                         |k(  sJ dt        i}t        j                  |d      d   }t        | |      j                  |      }|j                         }t        |      t        |      k(  sJ ||k(  sJ y )Nr   r&   Tr'   )r_   rc   r   r   r*   r   r`   )r-   r9   r7   	tagger1_br/   r&   new_tagger1new_tagger1_bs           r1   %test_serialize_tagger_roundtrip_bytesr   3  s    ajG  "I  +G***(
)CS409E5)44Y?K((*M}Y///I%%%r2   c                    |\  }}t               5 }|dz  }|dz  }|j                  |       |j                  |       dt        i}t        j                  |d      d   }t        | |      j                  |      }	t        | |      j                  |      }
|	j                         |
j                         k(  sJ 	 d d d        y # 1 sw Y   y xY w)Nr7   r8   r&   Tr'   )r   rz   r   r   r*   r   rr   r_   )r-   r9   r7   r8   r   
file_path1
file_path2r/   r&   	tagger1_d	tagger2_ds              r1   $test_serialize_tagger_roundtrip_diskr   @  s    GW	 	<1]
]

#
#,-  t4W=8U+55jA	8U+55jA	!!#y'9'9';;;;	< 	< 	<s   B(CCc                    d}|| j                   vsJ ||j                   vsJ |d   }||j                  j                   vsJ t               5 }|j                  |       ||j                  j                   v sJ |dz  }|j	                  |       dt
        i}t        j                  |d      d   }t        ||      j                  |      }	||	j                  j                   v sJ 	 d d d        y # 1 sw Y   y xY w)NSomeWeirdLabelr   r7   r&   Tr'   )
r   rZ   r   r+   rz   r   r   r*   r   rr   )
r-   de_vocabr9   rI   r;   r   r   r/   r&   r8   s
             r1   test_serialize_tagger_stringsr   N  s    E((((((((((QZF,,,,,	 
.1,,,,,	M	y!,-  t4W=5)33I>-----
. 
. 
.s   BC//C8iQ  c                     dt         i}t        j                  |d      d   }t        | |d      }|j	                  dg       y )Nr&   Tr'   g      ?)	thresholdrZ   r   )r   r   r*   r   r_   )r-   r/   r&   textcats       r1   test_serialize_textcat_emptyr   a  sG     0
1CS409Eh=GgY'r2   c                     dt         i}t        j                  |d      d    fd}        }d|j                  d<    |       j	                  |j                  dg            }d|j                  v sJ  |       j	                  |j                  dg      d	g      }d|j                  vsJ  |       j	                  |j                  d	g      dg      }d|j                  vsJ y )
Nr&   Tr'   c                             } | S N )r   r   r-   r&   s    r1   get_new_parserz3test_serialize_pipe_exclude.<locals>.get_new_parsero  s    He,
r2   barrP   rZ   r   r/   )r   r   r*   r/   rc   r_   )r-   r   r/   r   r0   r   r&   s   ``    @r1   test_serialize_pipe_excluder   j  s    (
)CS409E He$FFJJu!,,V__gY_-OPJJNN"""!,,	*UG - J 
&&&!,,(7) - J 
&&&r2   c                     dt         i}t        j                  |d      d   }t        | |      }|j	                         }t        | |      j                  |      }|j	                         |j	                         k(  sJ y r6   )r   r   r*   r   r_   rc   )r-   r/   r&   srsr_bsr_ds         r1   !test_serialize_sentencerecognizerr     sl    (
)CS409E	He	,B;;=Dh.99$?D;;=DMMO+++r2   c                  x   t               } | j                  d       | j                  d       | j                  d       | j                  d   d   dgk(  sJ | j                  j	                         }t        j
                  |      }|j                  dgk(  sJ |j                  ddgk(  sJ |j                  dgk(  sJ |j                  d   d   dgk(  sJ t               5 }|j                  |       t        j                  |      }d d d        j                  dgk(  sJ |j                  ddgk(  sJ t               5 }|j                  |       t        j                  |dg      }d d d        j                  g k(  sJ |j                  ddgk(  sJ |j                  ddgk(  sJ t               5 }| j                  |       t        j                  |dg      }d d d        j                  dgk(  sJ |j                  dgk(  sJ |j                  g k(  sJ y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   XxY w)Nr   r;   rC   disabled)disabler   )r   r?   disable_piper.   copyfrom_config
pipe_namescomponent_namesr   r   rz   spacyr   )rC   r.   r|   r   nlp3nlp4nlp5s          r1   &test_serialize_pipeline_disable_enabler     s#   
)CLLLLX::eZ(XJ666ZZ__Fv&D??ug%%%E8#4444==XJ&&&;;uj)hZ777	 1Qzz!} ??ug%%%E8#4444	 .1Qzz!eW-. ??b   E8#4444==UH----	 11Azz!hZ01 ??ug%%%E7***==B! 
. .1 1s$   'H<*H$/*H0H!$H-0H9c                      G d dt               }  G d dt               } G d dt               } | t                     }t        j                  t              5  |j                          d d d        t               5 }t        j                  t              5  |j                  |       d d d        d d d         |t                     }t        j                  t              5  |j                          d d d        t               5 }t        j                  t              5  |j                  |       d d d        d d d         |t               t                     }|j                         } |t               t                     j                  |      }|j                         |k(  sJ t               5 }|j                  |        |t               t                     j                  |      }d d d        |j                         |k(  sJ y # 1 sw Y   xY w# 1 sw Y   oxY w# 1 sw Y   txY w# 1 sw Y   ?xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   mxY w)Nc                       e Zd Zd Zy)<test_serialize_custom_trainable_pipe.<locals>.BadCustomPipe1c                      y r   r   selfrZ   s     r1   __init__zEtest_serialize_custom_trainable_pipe.<locals>.BadCustomPipe1.__init__  s    r2   N__name__
__module____qualname__r   r   r2   r1   BadCustomPipe1r     s    	r2   r   c                       e Zd Zd Zy)<test_serialize_custom_trainable_pipe.<locals>.BadCustomPipe2c                      || _         d | _        y r   rZ   r&   r   s     r1   r   zEtest_serialize_custom_trainable_pipe.<locals>.BadCustomPipe2.__init__  s    DJDJr2   Nr   r   r2   r1   BadCustomPipe2r     s    	r2   r   c                       e Zd Zd Zy)8test_serialize_custom_trainable_pipe.<locals>.CustomPipec                      || _         || _        y r   r   )r   rZ   r&   s      r1   r   zAtest_serialize_custom_trainable_pipe.<locals>.CustomPipe.__init__  s    DJDJr2   Nr   r   r2   r1   
CustomPiper    s    	r2   r  )r   r   pytestraises
ValueErrorr_   r   rz   r   rc   rr   )r   r   r  rB   r   
pipe_bytesnew_pipes          r1   $test_serialize_custom_trainable_piper	    s     
] 
 %'"D	z	" 	 1]]:& 	LLO	 %'"D	z	" 	 1]]:& 	LLO	 egvx(DJ%'68,77
CH*,,,	 >1Qegvx0::1=> *,,,% 	 	  	 	 > >sl   H:H H&H H-IH:.I.:IHH	H  H*-H7:I	?IIIc                     t        j                  d      } t        | j                  j                        }d}| j                  j                  j                  |       t        | j                  j                        |dz   k(  sJ t               5 }| j                  |       t        |      }t        | j                  j                        t        |j                  j                        k(  sJ ||j                  j                  v sJ t        |dg      }|t        |j                  j                        k(  sJ ||j                  j                  vsJ 	 d d d        y # 1 sw Y   y xY w)Nen  unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_unlikely_word_r   r   r   )	r   blankr`   rZ   r   addr   rz   r   )rC   orig_strings_lengthwordr   reloaded_nlps        r1   test_load_without_stringsr    s$   
++d
Ccii//0 DII$syy  !%81%<<<<	 	61AAw399$$%\-?-?-G-G)HHHH|))11111A	{3"c,*<*<*D*D&EEEE<--55555	6 	6 	6s   CEE$)Ar   r  rj   	thinc.apir   r   r   r   r   spacy.lang.enr   spacy.languager   spacy.pipeliner	   r
   r   r   r   r   r   spacy.pipeline.dep_parserr   spacy.pipeline.senterr   spacy.pipeline.taggerr   spacy.pipeline.textcatr   spacy.tokensr   
spacy.utilr   r   utilr   test_parsersfixturer0   r4   r9   markissuerD   rh   rm   ru   r}   r   r   r   parametrizer   r   r   r   r   r   r   r   r   r   r   r	  r  r   r2   r1   <module>r#     s        ' ' ! #   ; 6 6 ?  .  "23        4  44 4, 46 6& 4: :( 4+ +" 4/ /8 4% %D 4B B$ <0	 1	 <0* 1*  <0. 1.	>
&<.& 4( ( <0' 1',,>"-J6r2   