
    i /                        d dl mZ d dlZd dlZd dlZd dlmZmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ ddl
mZ dZ dZ!dZ"i ddidddddgZ#ddddddddddgZ$d Z%ejL                  jO                  d e#      ejL                  jO                  d!d"      d#               Z(ejL                  jO                  d e$      d$        Z)ejL                  jO                  d e$      d%        Z*ejL                  jO                  d&e!e g      d'        Z+d( Z,d) Z-d* Z.d+ Z/d, Z0d- Z1y).    )PathN)Configget_current_ops)util)English)DEFAULT_CONFIG_PATHDEFAULT_CONFIG_PRETRAIN_PATH)create_pretrain_vectors)DocDocBin)init_nlp)train)pretrain)Vectors)Vocab   )make_tempdiraE  
[nlp]
lang = "en"
pipeline = ["tok2vec", "tagger"]

[components]

[components.tok2vec]
factory = "tok2vec"

[components.tok2vec.model]
@architectures = "spacy.HashEmbedCNN.v1"
pretrained_vectors = null
width = 342
depth = 4
window_size = 1
embed_size = 2000
maxout_pieces = 3
subword_features = true

[components.tagger]
factory = "tagger"

[components.tagger.model]
@architectures = "spacy.Tagger.v2"

[components.tagger.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.width}

[pretraining]
max_epochs = 5

[training]
max_epochs = 5
a  
[nlp]
lang = "en"
pipeline = ["tagger"]

[components]

[components.tagger]
factory = "tagger"

[components.tagger.model]
@architectures = "spacy.Tagger.v2"

[components.tagger.model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v1"
pretrained_vectors = null
width = 342
depth = 4
window_size = 1
embed_size = 2000
maxout_pieces = 3
subword_features = true

[pretraining]
max_epochs = 5

[training]
max_epochs = 5
a  
[nlp]
lang = "en"
pipeline = ["tok2vec", "tagger"]

[components]

[components.tok2vec]
factory = "tok2vec"

[components.tok2vec.model]
@architectures = "spacy.HashEmbedCNN.v1"
pretrained_vectors = null
width = 342
depth = 4
window_size = 1
embed_size = 2000
maxout_pieces = 3
subword_features = true

[components.tagger]
factory = "tagger"

[components.tagger.model]
@architectures = "spacy.Tagger.v2"

[components.tagger.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.width}

[pretraining]
max_epochs = 5

[pretraining.objective]
@architectures = spacy.PretrainVectors.v1
maxout_pieces = 3
hidden_size = 300
loss = cosine

[training]
max_epochs = 5
@architectureszspacy.PretrainCharacters.v1   *   )r   maxout_pieceshidden_sizen_characterszspacy.PretrainVectors.v1   ,  cosine)r   r   r   loss   L2c                      t               j                  t              } t        j                  | dd      }|j
                  }t        j                  t              }|j                  |      }d|d   d   d   v sJ y)	z7Test that pretraining defaults to a character objectiveTF	auto_fillvalidatePretrainCharacterspretraining	objectiver   N)	r   from_strpretrain_string_internalr   load_model_from_configconfigload_configr	   merge)r*   nlpfilledpretrain_configs       v/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/training/test_pretraining.pytest_pretraining_defaultr1      sq    X78F

%
%fu
MCZZF&&'CDO""6*F6-#8#EFV#WWWW    r&   	skip_last)TFc                    t               j                  t              }| |d   d<   t        j                  |dd      }|j
                  }t        j                  t              }|j                  |      }t               5 }t        |      }||d   d<   |j                         }|d   d   d	k(  sJ t        |||
       t        |dz        j                         sJ t        |dz        j                         sJ t        |dz        j                         rJ |rt        |dz        j                         r J t        |dz        j                         sJ ddd       y# 1 sw Y   yxY w)z8Test that pretraining works with the character objectiver%   r&   TFr!   pathsraw_text	componenttok2vec)r3   
model0.bin
model4.bin
model5.binmodel-last.binNr   r'   pretrain_string_listenerr   r)   r*   r+   r	   r,   r   write_sample_jsonlinterpolater   r   exists)r&   r3   r*   r-   r.   r/   tmp_dir	file_paths           r0   #test_pretraining_tok2vec_charactersrD      s[    X78F)2F=+&

%
%fu
MCZZF&&'CDO""6*F	 =7&w/	&/w
###%m$[1Y>>>I6Gl*+22444Gl*+22444,./66888G&667>>@@@"223::<<<= = =s   >CEE'c                    t               j                  t              }| |d   d<   t        j                  |dd      }|j
                  }t        j                  t              }|j                  |      }t               5 }t        |      }||d   d<   |j                         }|d   d	   J t        j                  t              5  t        ||       d
d
d
       d
d
d
       y
# 1 sw Y   xY w# 1 sw Y   y
xY w)z]Test that pretraining doesn't works with the vectors objective if there are no static vectorsr%   r&   TFr!   r5   r6   
initializevectorsN)r   r'   r>   r   r)   r*   r+   r	   r,   r   r?   r@   pytestraises
ValueErrorr   )r&   r*   r-   r.   r/   rB   rC   s          r0   %test_pretraining_tok2vec_vectors_failrK      s     X78F)2F=+&

%
%fu
MCZZF&&'CDO""6*F	 &7&w/	&/w
###%l#I.666]]:& 	&VW%	&& &
	& 	&& &s%   >AC/C#C/#C,	(C//C8c                    t               j                  t              }| |d   d<   t        j                  |dd      }|j
                  }t        j                  t              }|j                  |      }t               5 }t        |      }||d   d<   t        |      }||d   d	<   |j                         }t        ||       d
d
d
       y
# 1 sw Y   y
xY w)zQTest that pretraining works with the vectors objective and static vectors definedr%   r&   TFr!   r5   r6   rF   rG   N)r   r'   r>   r   r)   r*   r+   r	   r,   r   r?   write_vectors_modelr@   r   )r&   r*   r-   r.   r/   rB   rC   nlp_paths           r0    test_pretraining_tok2vec_vectorsrO      s     X78F)2F=+&

%
%fu
MCZZF&&'CDO""6*F	 "7&w/	&/w
#&w/*2|Y'##%!" " "s   >AC

Cr*   c                    t               j                  t              } t        j                  | dd      }|j
                  }t        j                  t              }|j                  |      }t               5 }t        |      }||d   d<   d|d   d<   d	|d   d
<   |j                         }t        ||       t        |dz        j                         sJ t        |dz        j                         sJ t        |dz        j                         sJ t        |dz        j                         rJ 	 ddd       y# 1 sw Y   yxY w)z?Test pretraining of the tagger's tok2vec layer (via a listener)TFr!   r5   r6   taggerr%   r7   r8   layerr9   r:   r<   r;   Nr=   r*   r-   r.   r/   rB   rC   s         r0   test_pretraining_tagger_tok2vecrT      s3    X78F

%
%fu
MCZZF&&'CDO""6*F	 
97&w/	&/w
#-5}k*)2}g&##%!Gl*+22444Gl*+22444G../66888,./668888
9 
9 
9s   6B8D88Ec                     t               j                  t              } t        j                  | dd      }|j
                  }t        j                  t              }|j                  |      }t               5 }t        |      }||d   d<   d|d   d<   |j                         }t        j                  t              5  t        ||       d	d	d	       d	d	d	       y	# 1 sw Y   xY w# 1 sw Y   y	xY w)
z\Test pretraining of the tagger itself will throw an error (not an appropriate tok2vec layer)TFr!   r5   r6   rQ   r%   r7   N)r   r'   r(   r   r)   r*   r+   r	   r,   r   r?   r@   rH   rI   rJ   r   rS   s         r0   test_pretraining_taggerrV      s    X78F

%
%fu
MCZZF&&'CDO""6*F	 &7&w/	&/w
#-5}k*##%]]:& 	&VW%	&& &
	& 	&& &s%   6AC%;CC%C"	C%%C.c            	      \   t               j                  t              } t        j                  | dd      }|j
                  }t        j                  t              }|j                  |      }t        j                  t              }|j                  |      }t               5 }|dz  }|j                          t        |      }||d   d<   d|d   d	<   d
|d   d<   |dz  }|j                          t        |      \  }	}
|	|d   d<   |
|d   d<   |j                         }|d   }t        |      }|j!                  |d	         j"                  j%                  |d         j%                  d      }d}|j'                         D ]  }|j(                  dk(  s|} t+        ||       t-        |dz        }|j/                         sJ t1        |      |d   d<   t        |      }|j!                  |d	         j"                  j%                  |d         j%                  d      }d}|j'                         D ]  }|j(                  dk(  s|} t3        j4                  t3        j6                  |j9                  d      |j9                  d                  sJ t;        ||       ddd       y# 1 sw Y   yxY w)z5Test that training can use a pretrained Tok2Vec modelTFr!   r   r5   r6   rQ   r%   r7   r8   rR   r   devembedN	hashembedz
model3.binrF   init_tok2vecE)r   r'   r(   r   r)   r*   r+   r	   r,   r   r   mkdirr?   write_sample_trainingr@   r   get_pipemodelget_refwalknamer   r   rA   strnpany	not_equal	get_paramr   )r*   r-   r.   r/   train_configrB   pretrain_dirrC   	train_dir
train_pathdev_pathPnlp_base
model_base
embed_basenodepretrained_modelr`   rY   s                      r0   test_pretraining_trainingrt     s   X78F

%
%fu
MCZZF&&'CDO""6*F##$78L'F	 "7+&|4	&/w
#-5}k*)2}g&g%	4Y?
H#-w !)w##%=!F#an-33;;AgJGOOPWX 	 
OO% 	"DyyK'!
	" 	&| ;<&&(((/23C/D|^,vQ{^,22::1W:FNNwWJJL 	DyyK'	 vvbll5??3#79M9Mc9RSTTTc9E" " "s    C/J"B/J" AJ""J+c                 h    ddidddddddid	ddddg}|  d
}t        j                  ||       |S )Nid1z$This is the best TV you'll ever buy!   r   )posneg)metatextcats2zI wouldn't buy this again.z/text.jsonl)srslywrite_jsonl)rB   datarC   s      r0   r?   r?   :  s_     3K:a(	
 3K0a(	
D );'I	i&r2   c                     g d}g d}t        t               j                  ||      }t               }|j	                  |       |  d}|  d}|j                  |       |j                  |       ||fS )N)Theplayersstart.)DTNNVBZr   )wordstagsz/train.spacyz
/dev.spacy)r   r   vocabr   addto_disk)rB   r   r   docdoc_binrl   rm   s          r0   r^   r^   L  sn    ,E#D
giooU
6ChGKK9L)J*%HOOJOOHxr2   c                 x   dd l }t               }|j                  j                  ddd      |j                  j                  ddd      |j                  j                  ddd      d}|j	                         D ]  \  }}|j                  ||        | dz  }t        |      }|j                  |       t        |      S )Nr   rx   )r   )dogcatorangevectors_model)	numpyr   randomuniformitems
set_vectorr   r   rd   )rB   r   r   vector_datawordvectorrN   r-   s           r0   rM   rM   Y  s    GE||##B62||##B62,,&&r1f5K
 $))+ 'fv&'(H
%.CKKx=r2   c                     t               } | j                  d       | j                          t        d      | j                  _         t        ddd      | j                  | j                  d      j                         t        t               j                  j                  d      dd      | j                  _         t        ddd      | j                  | j                  d      j                         t        j                  t        d	      5  t               | j                  _         t        ddd      | j                  | j                  d      j                         d d d        y # 1 sw Y   y xY w)
Nr8   )
   r   )shaperx   r   floret)r   mode
hash_countE875)match)r   add_piperF   r   r   rG   r
   r_   r`   r   xpzerosrH   rI   rJ   )r-   s    r0   test_pretrain_default_vectorsr   j  s   
)CLLNN  h/CII+Aq(+CIIs||I7N7T7TU  !!''1QCII ,Aq(+CIIs||I7N7T7TU 
z	0 
#I		/1h/IIs||I.44	

 
 
s   AE((E1)2pathlibr   r   re   rH   r   	thinc.apir   r   spacyr   spacy.lang.enr   spacy.languager   r	   spacy.ml.models.multi_taskr
   spacy.tokensr   r   spacy.training.initializer   spacy.training.loopr   spacy.training.pretrainr   spacy.vectorsr   spacy.vocabr   r   r>   r(   pretrain_string_vectorsCHAR_OBJECTIVESVECTOR_OBJECTIVESr1   markparametrizerD   rK   rO   rT   rV   rt   r?   r^   rM   r    r2   r0   <module>r      s       -  ! L > $ . % , !  # J >) X 457		 5	 5	  X o6m4= 5 7=. &78& 9&" &78" 9"" $<>V#WX9 Y9(&"+\$
 "
r2   