
    iF                     b   d dl Z d dlmZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZmZ d dlmZmZmZmZ d dlmZmZ d d	lmZ d d
lmZmZmZmZ ddlmZ dZ dZ!dZ"dZ# ejH                  d      d        Z%e jL                  jO                  d      d        Z(d Z)d Z*d Z+d Z,d Z-e jL                  j]                  de"e#g      d        Z/d Z0d Z1d Z2d Z3d  Z4e jL                  jk                  d!      d"        Z6d# Z7d$ Z8d% Z9e jL                  j]                  de"e#g      d&        Z:d' Z;d( Z<y))    N)RegistryError)ConfigConfigValidationError)German)English)DEFAULT_CONFIGDEFAULT_CONFIG_PRETRAIN_PATHLanguage)MaxoutWindowEncoderMultiHashEmbedbuild_tb_parser_modelbuild_Tok2Vec_model)ConfigSchemaConfigSchemaPretrain)Example)load_configload_config_from_strload_model_from_configregistry   )make_tempdira  
[paths]
train = null
dev = null

[corpora]

[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths.train}

[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths.dev}

[training]

[training.batcher]
@batchers = "spacy.batch_by_words.v1"
size = 666

[nlp]
lang = "en"
pipeline = ["tok2vec", "tagger"]

[components]

[components.tok2vec]
factory = "tok2vec"

[components.tok2vec.model]
@architectures = "spacy.HashEmbedCNN.v1"
pretrained_vectors = null
width = 342
depth = 4
window_size = 1
embed_size = 2000
maxout_pieces = 3
subword_features = true

[components.tagger]
factory = "tagger"

[components.tagger.model]
@architectures = "spacy.Tagger.v2"

[components.tagger.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.width}
a  
[paths]
train = null
dev = null

[corpora]

[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths.train}

[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths.dev}

[training]

[training.batcher]
@batchers = "spacy.batch_by_words.v1"
size = 666

[nlp]
lang = "en"
pipeline = ["tok2vec", "tagger"]

[components]

[components.tok2vec]
factory = "tok2vec"

[components.tok2vec.model]
@architectures = "spacy.HashEmbedCNN.v1"
pretrained_vectors = null
width = 342
depth = 4
window_size = 1
embed_size = 2000
maxout_pieces = 3
subword_features = true

[components.tagger]
factory = "tagger"

[components.tagger.model]
@architectures = "spacy.Tagger.v2"

[components.tagger.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.width}

[pretraining]
aX  
[model]
@architectures = "spacy.TransitionBasedParser.v2"
state_type = "parser"
extra_state_tokens = false
hidden_width = 66
maxout_pieces = 2
use_upper = true

[model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v1"
pretrained_vectors = null
width = 333
depth = 4
embed_size = 5555
window_size = 1
maxout_pieces = 7
subword_features = false
aY  
[model]
@architectures = "spacy.TransitionBasedParser.v2"
state_type = "parser"
extra_state_tokens = false
hidden_width = 66
maxout_pieces = 2
use_upper = false

[model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v1"
pretrained_vectors = null
width = 333
depth = 4
embed_size = 5555
window_size = 1
maxout_pieces = 7
subword_features = false
my_test_parserc            	      |    t        t        dddgddgd      t        dddd	
            } t        | ddddd      }|S )NiA  LOWERSHAPEi8  F)widthattrsrowsinclude_static_vectors      r   )r   window_sizemaxout_piecesdepthparserTA      )tok2vec
state_typeextra_state_tokenshidden_widthr#   	use_upper)r   r   r   r   )r(   r%   s     |/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/serialize/test_serialize_config.py	my_parserr.      s^    !G$#(		
 	#1AQOG #F M    i  c                     ddiddid} t        j                  |       }t               5 }|dz  }|j                  |       t	        j
                  |dddii	      }|j                  d   d   dk(  sJ 	 d
d
d
       y
# 1 sw Y   y
xY w)z?Test that config overrides are not lost after load is complete.langenkeyvalue)nlpcustom
test_modelr6   updated_valueconfigN)r   from_configr   to_diskspacyloadr:   )
source_cfg
source_nlpdir_pathsource_pathr5   s        r-   test_issue8190rC      s    
 D
 '"	J $$Z0J	 >8-;'jjh8P-QRzz(#E*o===> > >s   A	A<<Bc                     t               j                  t              } t        j                  t
              5  t        | d       d d d        t        | d      }|j                  d   d   d   dk(  sJ t        |j                  d         dkD  sJ |j                  d	d
gk(  sJ t        |j                  d         dk(  sJ t        |j                  d   d         dk(  sJ |j                  d
       t        |j                  d         dk(  sJ t        |j                  d   d         dk(  sJ t        j                  t              5  di i}t        t        |      d       d d d        t        j                  t              5  dddii}t        t        |      d       d d d        y # 1 sw Y   gxY w# 1 sw Y   UxY w# 1 sw Y   y xY w)NF	auto_fillTtrainingbatchersizei     r(   tagger
componentsr   r5   pipelineyolofoobar)r   from_strnlp_config_stringpytestraisesr   r   r:   len
pipe_namesremove_pipe
ValueError)r:   r5   bad_cfgs      r-   test_create_nlp_from_configrZ      s   X01F	,	- 8v78
 4
8C::j!),V4;;;szz*%&***>>i2222szz,'(A---szz% ,-222OOHszz,'(A---szz% ,-222	z	" @2,vg$?@ 
z	" @u~.vg$?@ @8 8@ @@ @s#   F*F7G*F47G Gc                      t               j                  t              } t        t              }| j                  |      }t        j                  |d   t               y)z;Test that the default pretraining config validates properlypretraining)schemaN)	r   rQ   pretrain_config_stringr   r	   merger   resolver   )r:   pretrain_configfilleds      r-   'test_create_nlp_from_pretraining_configrc      sD    X56F!">?O\\/*FVM*3GHr/   c                  h   t               j                  t              } | d   d   | d   d   | d   d   d| d<   t        | d   j	                               | d   d<   t        | d      }|j                  g dk(  sJ |j                  d	      j                  dk(  sJ |j                  d
      j                  dk(  sJ |j                  d      j                  dk(  sJ |j                  d   }t        |      dk(  sJ t        |j	                               g dk(  sJ |j                  d   d   g dk(  sJ y)zmTest that the nlp object is created correctly for a config with multiple
    instances of the same component.rL   r(   rK   )t2vtagger1tagger2r5   rM   TrE   re   rf   rg   r    N)r   rQ   rR   listkeysr   rV   get_pipe_metafactoryr:   rU   )r:   r5   pipeline_configs      r-   .test_create_nlp_from_config_multiple_instancesrm      sD    X01Fl#I.,'1,'1F<
 !%VL%9%>%>%@ AF5M*
 4
8C>>::::U#++y888Y'//8;;;Y'//8;;;jj.O1$$$$$&'+HHHH::eZ(,IIIIr/   c                     t               j                  t              } t        | d      }|j	                  d      j                  d       |j                          d|j                  v sJ d|j                  v sJ d|j                  vsJ |j	                  d      j                  j                  d      j                  d      dk(  sJ t               5 }|j                  |       t        j                  |      }d|j                  v sJ d|j                  v sJ d|j                  vsJ |j	                  d      j                  j                  d      j                  d      dk(  sJ 	 d	d	d	       y	# 1 sw Y   y	xY w)
zNCreate a custom nlp pipeline from config and ensure it serializes it correctlyTrE   rK   Ar(   r%   nOV  N)r   rQ   rR   r   get_pipe	add_label
initializerV   modelget_refget_dimr   r<   r=   r>   )
nlp_configr5   dnlp2s       r-   test_serialize_nlpr{     sM   ""#45J
 t
<CLL$$S)NN&&&s~~%%%3>>)))<<!''//	:BB4HCOOO	 U1Azz!}DOO+++4??***t...}}X&,,44Y?GGMQTTTTU U Us   BE22E;c                     t               } t               }ddi|d<   | j                  d|       | j                          t	               5 }| j                  |       t        j                  |      }|j                  d      j                  }|j                  d       |j                  d      j                  d      d	k(  sJ |j                  d
      j                  d      d	k(  sJ 	 ddd       y# 1 sw Y   yxY w)zBCreate a custom nlp pipeline and ensure it serializes it correctlyz@architecturesr   ru   r%   r9   r(   uppernIr&   lowerN)r   dictadd_pipert   r   r<   r=   r>   rr   ru   rv   rw   )r5   
parser_cfgry   rz   ru   s        r-   test_serialize_custom_nlpr     s    
)CJ+-=>JwLL*L-NN	 :1Azz!}h'--i }}W%--d3r999}}W%--d3r999: : :s   	BC00C9parser_config_stringc                 H   t               }t               j                  |       }|j                  d|      }|j	                  d       |j                          t               5 }|j                  |       t        j                  |      }|j                  d      j                  }|j                  d       |j                  d   r%|j                  d      j                  d      dk(  sJ |j                  d	      j                  d      dk(  sJ 	 d
d
d
       y
# 1 sw Y   y
xY w)zGCreate a non-default parser config to check nlp serializes it correctlyr%   r9   nsubjr(   	has_upperr}   r~   B   r   N)r   r   rQ   r   rs   rt   r   r<   r=   r>   rr   ru   rv   r   rw   )r   r5   model_configr%   ry   rz   ru   s          r-   test_serialize_parserr   .  s    
 )C8$$%9:L\\(<\8F
WNN	 :1Azz!}h'--i ;;{#==)11$72===}}W%--d3r999: : :s   "B,DD!c                     t               } | j                  d       | j                  d       t        | j                  d      }|j                  | j                  k(  sJ |j                  | j                  k(  sJ |j
                  | j
                  k(  sJ |j                  | j                  k(  sJ |j                  | j                  k(  sJ y)zTTest that a config produced by the nlp object passes training config
    validation.entity_rulernerFrE   N)r   r   r   r:   rV   _pipe_configs
_pipe_meta_factory_meta)r5   new_nlps     r-   test_config_nlp_roundtripr   D  s     )CLL LL$SZZ5AG>>SZZ'''///  C$5$5555///  C$5$5555r/   c                  t   t               } | j                         }t               j                  |      }|j                  | j                  k(  sJ t               } t	               5 }| j                  |       t        j                  |      }ddd       |j                  | j                  k(  sJ y# 1 sw Y   %xY w)zQTest that the config is serialized correctly and not interpolated
    by mistake.N)r   to_bytes
from_bytesr:   r   r<   r=   r>   )r5   	nlp_bytesr   ry   s       r-   $test_config_nlp_roundtrip_bytes_diskr   R  s     )CIi""9-G>>SZZ'''
)C	  1A**Q-  >>SZZ'''   s   #'B..B7c                     d} t        j                  | ddi      dt        dt        dt        fd       }t               }|j                  |       rJ t               }|j                  |       sJ |j                  | ddid	
       |j                  d   d	   }|d   dk(  sJ |d   | k(  sJ t               5 }|j                  |       t        j                  |      }ddd       j                  |       sJ |j                  d	gk(  sJ |j                  d	      j                  | k(  sJ |j                  d   d	   }|d   dk(  sJ |d   | k(  sJ t               j                  |j                  j!                               }d|d   d<   t#        j$                  t&              5  t)        |       ddd       y# 1 sw Y   xY w# 1 sw Y   yxY w)zVTest that config serialization works as expected with language-specific
    factories.'test_serialize_config_language_specificrO      )default_configr5   namec                     d S )Nc                     | S N )docs    r-   <lambda>zQtest_serialize_config_language_specific.<locals>.custom_factory.<locals>.<lambda>g  s    3 r/   r   )r5   r   rO   s      r-   custom_factoryz?test_serialize_config_language_specific.<locals>.custom_factorye  s    r/   d   rP   )r:   r   rL   rk   Nder1   )r   rk   r
   strinthas_factoryr   r:   r   r<   r=   r>   rV   rj   r   rQ   to_strrS   rT   rX   r   )r   r   r5   pipe_configry   rz   r:   s          r-   r   r   `  s    5D__T5"+6H C c  7 *Ct$$$
)C??4   LLuclL7**\*51Ku$$$y!T)))	 1Azz!} D!!!??ug%%%e$,,444++l+E2Ku$$$y!T)))Xt{{1134F F5M&	z	" 'v&' ' ' 's   5'F6!G6F?Gc                     t               j                  t              } | d   j                  d       d| d   d   v sJ d| d   vsJ t	        j
                  t              5  t        | d       d d d        y # 1 sw Y   y xY w)NrL   r(   r5   rM   TrE   )r   rQ   rR   poprS   rT   rX   r   r9   s    r-   #test_serialize_config_missing_pipesr     s}    X01F
<Y'uj1111F<0000	z	" 7v67 7 7s    A77B c                     dddgdi} ddgd}t               j                  t        |      }t        |d      }t	        |t
              sJ |j                  dgk(  sJ t               j                  t              }t        |d      }t	        |t              sJ |j                  d	dgk(  sJ t               5 }|j                  |       t        j                  || 
      }d d d        t	        |t
              sJ |j                  dgk(  sJ t               5 }|j                  |       t        j                  ||
      }d d d        t	        |t
              sJ |j                  dgk(  sJ t               5 }|j                  |       t        j                  |      }d d d        t	        |t              sJ |j                  d	dgk(  sJ y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   GxY w)Nr5   r   rK   )r1   rM   )znlp.langznlp.pipeline)	overridesTrE   r(   r9   )r   rQ   rR   r   
isinstancer   rV   r   r   r<   r=   r>   )overrides_nestedoverrides_dotr:   r5   base_configbase_nlpry   s          r-   test_config_overridesr     s   8*EF!%zBMX0MJF
 4
8Cc6""">>hZ'''(##$56K%kTBHh(((9h"7777	 51jj#345 c6""">>hZ'''	 21jj=12 c6""">>hZ'''	 1jjm c7###>>i22225 5
2 2
 s$   8)G)G6'G#GG #G,zignore:\[W036c            
      \   t        j                  d      } | j                  d       t               5 }| j	                  |       t        j
                  |dddddiiii      }|j                  d   d   d   d   dk(  sJ t        j                  d      d	        }t        j
                  |dddd
ddidiii      }|j                  d   d   d   d   ddik(  sJ t        j                  |j                  d      i       }|j                  |g      }d|v sJ 	 d d d        y # 1 sw Y   y xY w)Nr2   attribute_rulerrL   scorer@scorerszspacy.tagger_scorer.v1r9   test_some_other_keyc                       y)Nsome_other_keyr   r   r/   r-   misc_some_other_keyzGtest_config_overrides_registered_functions.<locals>.misc_some_other_key  s    #r/   z)spacy.overlapping_labeled_spans_scorer.v1@misc)r   	spans_keyr   za b cspans_some_other_key_f)r=   blankr   r   r<   r>   r:   r   miscr   	from_dictmake_docevaluate)r5   ry   nlp_re1r   nlp_re2examplescoress          r-   *test_config_overrides_registered_functionsr     sx   
++d
CLL"#	 *21A**% :/G"H(	
 NN<():;HEjQ'(	
( 
,	-	$ 
.	$ **% (S*13H)I#(	
 ~~l+,=>xH
,-. 	. . ##G$4$4W$=rB!!7),'6111U*2 *2 *2s   C'D""D+c                  |   t               j                  t        d      } | d   d   d   dk(  sJ | j                         }|d   d   d   J t	        j
                  |       }|j                  d   d   d   dk(  sJ d}| d   d	   d
   d   d   |k(  sJ |j                  d   d	   d
   d   d   |k(  sJ |j                  j                         }|d   d   d   J |d   d	   d
   d   d   dk(  sJ t	        j
                  |      }|j                  d   d   d   J |j                  d   d	   d
   d   d   dk(  sJ y )NF)interpolatecorporatrainpathz${paths.train}z!${components.tok2vec.model.width}rL   rK   ru   r(   r   rq   )r   rQ   rR   r   r   r;   r:   )r:   interpolatedr5   r   interpolated2rz   s         r-   test_config_interpolationr     s   X0eDF)W%f-1AAAA%%'L	"7+F3;;;


f
%C::i )&15EEEE/E,)'29=gF%OOO::l#H-g6yA'JeSSSJJ**,M#G,V4<<<&x09)DWMQTTTT|,D;;y!'*62:::;;|$X.w7	B7KsRRRr/   c                     t               j                  t              } t        j                  |       } d| vsJ t        j                  | t        d      }t               j                  |j                               }|d   i k(  sJ y )Nr\   F)r]   validate)	r   rQ   rR   r   r_   r   fillr   r   )r:   rb   
new_configs      r-   test_config_optional_sectionsr     sr    X01F!!&)F&&&]]6,GF
 ""6==?3Jm$***r/   c                      t        ddii d      } t        | d      sJ t        ddiddid      } t        | dd	      }d|j                  d
   vsJ t        |j                         y )Nr1   r2   )r5   rG   TrE   extrahelloF)rF   r   rG   )r   r   r:   )r:   r5   s     r-   "test_config_auto_fill_extra_fieldsr     sn    VTN;<F!&D999VTN'8JKLF
 4%
HC#**Z00003::&r/   c                    t               }t               j                  |       }d|d   d<   t        j                  t
              5  |j                  d|       d d d        d|d   d<   |j                  d|       y # 1 sw Y   %xY w)Nnonsenseru   r)   r%   r9   r   )r   r   rQ   rS   rT   r   r   )r   r5   r:   s      r-   test_config_validate_literalr     s|     )CX34F$.F7OL!	,	- .Xf-.$)F7OL!LL&L). .s   A==Bc                  2   t               } | j                  }ddi|d   d<   ddi|d   d<   t        |d      } t        j                  t
              5  | j                          d	d	d	       d	| j                  d   d<   | j                          y	# 1 sw Y   ,xY w)
zTest that only the relevant blocks are resolved in the different methods
    and that invalid blocks are ignored if needed. For instance, the [initialize]
    shouldn't be resolved at runtime.
    r   nonexistentrG   before_to_diskrt   lookupsTrE   N)r   r:   r   rS   rT   r   rt   )r5   r:   s     r-   (test_config_only_resolve_relevant_blocksr     s    
 )CZZF,3]+CF:'('.&>F<#
 4
8C	}	% *.CJJ|Y'NN s   BBc                      d} t         j                  j                  d       G d dt                     }t	        j
                  t        |             }|j                  d      j                  ddgk(  sJ y )Nz
    [nlp]
    lang = "en"
    pipeline = ["my_punctual_component"]

    [components]

    [components.my_punctual_component]
    factory = "my_punctual_component"
    punctuation = ["?","-"]
    my_punctual_componentc                       e Zd ZdZd Zy)2test_hyphen_in_config.<locals>.MyPunctualComponentr   c                     || _         y r   )punctuation)selfr5   r   r   s       r-   __init__z;test_hyphen_in_config.<locals>.MyPunctualComponent.__init__5  s      +Dr/   N)__name__
__module____qualname__r   r   r   r/   r-   MyPunctualComponentr   1  s    &	+r/   r   ?-)	r=   r
   rk   objectr   r;   r   rr   r   )hyphen_config_strr   r5   s      r-   test_hyphen_in_configr   $  sp    
 ^^34	+f 	+ 5	+ 

23DE
FC<</0<<c
JJJr/   )=rS   	cataloguer   	thinc.apir   r   r=   spacy.lang.der   spacy.lang.enr   spacy.languager   r	   r
   spacy.ml.modelsr   r   r   r   spacy.schemasr   r   spacy.trainingr   
spacy.utilr   r   r   r   utilr   rR   r^   parser_config_string_upperparser_config_string_no_upperarchitecturesr.   markissuerC   rZ   rc   rm   r{   r   parametrizer   r   r   r   r   r   filterwarningsr   r   r   r   r   r   r   r   r/   r-   <module>r     sz    # 3    ! Q Q  = "   1 f3 l *! * () ** 4> >$@*IJ*U(:$ 79VW::&6( 'F73< ,--2 .-2`S&
+' 79VW**$Kr/   