
    i4                        d dl Z d dlmZ d dlmZ d dlZd dlZd dlmZ ddl	m
Z
 ddlmZ ddlmZmZ d	d
lmZmZmZmZmZmZmZ  ej0                  d       edd       eddd       edd       edddd       ed ddd       edddd       edddd        ed!d"d#d$d%       edd&d'd(d)       ed*d+d,d-      f
d.ed/ed0ed1ed2ed3ed4ee   d5ed6ee   d7efd8       Zd9ed6ed:dfd;Z ej0                  d9ddd<d=       edd>dd?       edd@       eddAdBdC       ed!d"d#d$d%       eddDdEdF      fdGej<                  dHedIedJee   d5edKefdL       Z ej0                  dMddd<N       edd>dd?       eddO       eddAdBdC       ed!d"d#d$d%       eddDdEdF      fdGej<                  dHedIedJee   d5edKefdP       Z dQ Z!y)R    N)Path)Optional)msg   )util)Language)convert_vectorsinit_nlp   )ArgOptimport_codeinit_cliparse_config_overrides	setup_gpushow_validation_errorvectors.z(The language of the nlp object to create)helpzVectors file in Word2Vec formatT)r   existszPipeline output directoryz--prunez-pz&Optional number of vectors to prune toz
--truncatez-tzFOptional number of vectors to truncate to when reading in vectors filedefaultz--modez-mzVectors mode: default or floretz--namez-nz?Optional name for the word vectors, e.g. en_core_web_lg.vectorsFz	--verbosez-Vz-VVz/Display more information for debugging purposesz--lexemes-jsonlz-jz+Location of JSONL-formatted attributes file)r   hiddenORTHz--attrz-az?Optional token attribute to use for vectors, e.g. LOWER or NORMlangvectors_loc
output_dirprunetruncatemodenameverbose	jsonl_locattrc
           	         |r-t         j                  j                  t        j                         t        j                  d|  d        t        j                  |              }
|t        |
|       t        |
||||||	       t        j                  dt        |
j                  j                         d       |
j                  |       t        j                  d|j                                y)zConvert word vectors for use with spaCy. Will export an nlp object that
    you can use in the [initialize] block of your config to initialize
    a model with vectors.
    z(Creating blank nlp object for language ''N)r   r   r    r   r#   zSuccessfully converted z vectorszSaved nlp object with vectors to output directory. You can now use the path to it in your config as the 'vectors' setting in [initialize].)r   loggersetLevelloggingDEBUGr   infoget_lang_classupdate_lexemesr	   goodlenvocabr   to_diskresolve)r   r   r   r   r   r   r    r!   r"   r#   nlps              h/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/cli/init_pipeline.pyinit_vectors_clir4      s    ` W]]+HH7vQ?@
#$

d
#
%CsI& HH&s399+<+<'=&>hGHKK
HH	N    r2   returnc                     t        j                  |      }|D ]+  }d|v r| j                  |d      } |j                  di | - y )Nsettingsorth )srsly
read_jsonlr/   	set_attrs)r2   r"   	lex_attrsattrslexemes        r3   r,   r,   _   sQ      +I "5=)!5!	"r5   )allow_extra_argsignore_unknown_options)context_settingsr   zPath to config file)r   r   
allow_dashz&Output directory for the prepared dataz--codez-czNPath to Python file with additional code (registered functions) to be importedz--gpu-idz-gzGPU ID or -1 for CPUctxconfig_pathoutput_path	code_pathuse_gpuc                    |r-t         j                  j                  t        j                         t        | j                        }t        |       t        |       t        |      5  t        j                  ||      }d d d        t        d      5  t        |      }d d d        j                  |       t        j                  d|        y # 1 sw Y   UxY w# 1 sw Y   ?xY w)N	overridesF	hint_fillrI   zSaved initialized pipeline to )r   r&   r'   r(   r)   r   argsr   r   r   load_configr
   r0   r   r-   	rE   rF   rG   rH   r!   rI   rL   configr2   s	            r3   init_pipeline_clirT   i   s    8 W]]+&sxx0I	g	{	+ D!!+CD		/ 0vw/0KKHH-k];<D D0 0s   &CCCC'labels)rC   zOutput directory for the labelsc                    |r-t         j                  j                  t        j                         |j                         s|j                  d       t        | j                        }t        |       t        |       t        |      5  t        j                  ||      }ddd       t        d      5  t        |      }ddd       t        |       y# 1 sw Y   8xY w# 1 sw Y   "xY w)zGenerate JSON files for the labels in the data. This helps speed up the
    training process, since spaCy won't have to preprocess the data to
    extract the labels.T)parentsrK   NFrM   rO   )r   r&   r'   r(   r)   r   mkdirr   rP   r   r   r   rQ   r
   _init_labelsrR   s	            r3   init_labels_clirZ      s    < W]]+$'&sxx0I	g	{	+ D!!+CD		/ 0vw/0k"	D D0 0s   C4C#C #C,c                     | j                   D ]o  \  }}t        |dd       D|| dz  }t        j                  ||j                         t        j                  d| d|        Wt        j                  d| d       q y )N
label_dataz.jsonz!Saving label data for component 'z' to z#No label data found for component 'r%   )pipelinegetattrr;   
write_jsonr\   r   r-   r*   )r2   rG   r    	componentoutput_files        r3   rY   rY      s    << Di9lD1=%4&6K[)*>*>?HH8eK=QRHH:4&BCDr5   )"r(   pathlibr   typingr   r;   typerwasabir    r   languager   training.initializer	   r
   _utilr   r   r   r   r   r   r   commandstrintboolr4   r,   ContextrT   rZ   rY   r:   r5   r3   <module>ro      sD           ;   ) CHIC&GPTU3%@A
It"J 	U	 Ix4UVN	 > !$:! N	GD
D D 	D
 D D D 3-D* +D8 ~9DF GD DN" "T "d " 	*.$O ' C&NO #]	! > r:t2HI)!=	!= != != ~!= !=( )!=
!=H *.$O ' C&GH #]	! > r:t2HI)%#	%# %# %# ~%# %#( )%#	%#PDr5   