
    i%                        U d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlmZ ddlmZmZ ddlmZ dd	lmZmZmZmZ d
dlmZmZmZmZ eeeeeedZ ee!e	de
e   f   f   e"d<   dZ#dZ$ G d de!e      Z% ejL                  d       eddd       edddd       edddd       ed
dd d!       ed"d#d$d%       edd&d'd(d)       ed"d*d+d,       ed"d-d.d/       ee#d0d1d2 e'e jQ                                       edd3d4d5d       edd6d7d8       edd9d:d;      fd<e!d=ed>e%d?e)d@e*dAee!   dBe*dCe*dDe!dEee   dFee!   dGe*fdH       Z+dId
d"dd"d"ddd"dddJd<ed=ee!ef   d>e!d?e)d@e*dAee!   dBe*dCe*dDe!dEee   dFee!   dGe*dKe*dLee   dMdfdNZ,dOedPe!dMdfdQZ-dOedRedPe!dMdfdSZ.dTe!dMee!   fdUZ/dLed<ed=ee!ef   d>e!dDe!dEee   fdVZ0d<efdWZ1y)X    N)Enum)Path)AnyCallableIterableMappingOptionalUnion)Printer   )DocDocBin)docs_to_json)conll_ner_to_docsconllu_to_docsiob_to_docsjson_to_docs   )ArgOptappwalk_directory)	conllubioconlluconllneriobjson.
CONVERTERSauto)r   c                       e Zd ZdZdZy)	FileTypesr   spacyN)__name__
__module____qualname__r   r#        b/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/cli/convert.pyr"   r"   *   s    DEr(   r"   convertzInput file or directoryT)helpexists-z!Output directory. '-' for stdout.)r+   
allow_dashr,   r#   z--file-typez-tzType of data to produce)r+   z	--n-sentsz-nz*Number of sentences per doc (0 to disable)Fz--seg-sentsz-szSegment sentences (for -c ner)z--modelz--basez-bzQTrained spaCy pipeline for sentence segmentation to use as base (for --seg-sents)z--morphologyz-mz#Enable appending morphology to tagsz--merge-subtokensz-TzMerge CoNLL-U subtokensz--converterz-czConverter: z	--ner-mapz-nmz6NER tag mapping (as JSON-encoded dict of entity types)z--langz-lz Language (if tokenizer required)z--concatenatez-Cz#Concatenate output to a single file
input_path
output_dir	file_typen_sents	seg_sentsmodel
morphologymerge_subtokens	converterner_maplangconcatenatec                     t        |       } |t        d      k(  rdn|}|dk(  }t        |      }t        |||       }t        || ||j                  ||	       t        | ||j                  |||||||	|
|||       y)a  
    Convert files into json or DocBin format for training. The resulting .spacy
    file can be used with the train command and other experiment management
    functions.

    If no output_dir is specified and the output format is JSON, the data
    is written to stdout, so you can pipe them forward to a JSON file:
    $ spacy convert some_file.conllu --file-type json > some_file.json

    DOCS: https://spacy.io/api/cli#convert
    r-   no_print)r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   silentmsgN)r   r   _get_converterverify_cli_argsvaluer*   )r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r>   r?   s                 r)   convert_clirC   /   s    t j!J*4S	*A3zJ3F
6
"CsIz:ICZ)WU//'r(   r   )r1   r2   r3   r4   r5   r6   r8   r9   r:   r>   r?   r>   r?   returnc                   t        |       } |st        |      }|	t        j                  |	      nd }	g }t	        | |      D ][  }|j                  dd      5 }|j                         }d d d        t        |   } ||||||
|||		      }|j                  ||f       ] |r7t        j                  j                  |D cg c]  \  }}|	 c}}      }| |fg}|D ]  \  }}|dk(  rt        |      g}t        |      }n(t        |d      }t        |      }|j                         }|d	k(  rt!        ||       ]|| k7  r2|j#                  |       }t        |      |j%                  d
|       z  }n/t        |      |j&                  d   z  }|j%                  d
|       }t)        |||       |j+                  d| d|         y # 1 sw Y   ^xY wc c}}w )Nr<   rzutf-8encoding)r2   r3   append_morphologyr6   r9   r4   r=   r8   r   T)docsstore_user_datar-   .zGenerated output file (z documents): )r   r   srsly	read_jsonr   openreadr   append	itertoolschainfrom_iterabler   lenr   to_bytes_print_docs_to_stdoutrelative_towith_suffixparts_write_docs_to_filegood)r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r>   r?   	doc_files	input_locinfile
input_datafuncrJ   _all_docsdatalen_docsdbsubpathoutput_files                              r)   r*   r*      s   " j!Jv&*1*=eoog&4GI#J	: ,	^^C'^2 	'fJ	' )$(+

 	)T*+!," ??00i1P71d$1PQ (+,	$ U	4 &'D4yHT48B2wH;;=D!$	2J&#//
;":.1D1Dq_1UU":.1DD)55)oFk9=HH.xjk]ST%U'	' 	'" 2Qs   G?G
G	re   output_typec                     |dk(  rt        j                  d|        y t        j                  j                  j                  |        y )Nr   r-   )rN   
write_jsonsysstdoutbufferwrite)re   rj   s     r)   rX   rX      s2    fd#

%r(   ri   c                    |j                   j                         s|j                   j                  d       |dk(  rt        j                  ||        y |j                  d      5 }|j                  |        d d d        y # 1 sw Y   y xY w)NT)parentsr   wb)parentr,   mkdirrN   rl   rP   rp   )re   ri   rj   file_s       r)   r\   r\      st    $$&   .fd+d# 	uKK	 	 	s   $A??Bra   c                 v   | j                  d      d d }ddd}t        j                  d      }t        j                  d      }|D ]O  }|j                         }|j	                  |      r|dxx   dz  cc<   |j	                  |      sC|d	xx   dz  cc<   Q |d   dk(  r	|d	   dkD  ry	|d	   dk(  r	|d   dkD  ryy )
N
   r   )r   r   z\S+\|(O|[IB]-\S+)z\S+\s+(O|[IB]-\S+)$r   r   r   )splitrecompilestripsearch)ra   linesformat_guessesiob_rener_relines         r)   autodetect_ner_formatr      s    T"3B'Eq)NZZ,-FZZ./F 'zz|==5!Q&!==5!Q&!' e!nU&;a&?e!nU&;a&?r(   c                    |t         vr|dk(  r| j                  d| dd       |j                         s| j                  d|d       |dk7  r-t        |      j                         s| j                  d|d       |-t        |      j                         s| j                  d|d       |j	                         r.t        ||      }t        |      d	k(  r| j                  d
|d       |t        vr| j                  d| d       y y )Nr-   zCan't write .z4 data to stdout. Please specify an output directory.r   exitszInput file not foundzOutput directory not foundzNER map not foundr   zNo input files in directoryzCan't find converter for )FILE_TYPES_STDOUTfailr,   r   is_dirr   rV   r   )r?   r/   r0   r1   r7   r8   
input_locss          r)   rA   rA      s    ))jC.?I;&Z[ 	 	
 '1=Sj!1!8!8!:-zC4=#7#7#9$gQ7#J	:
z?aHH2JaHH
",YK8B #r(   c           
         |j                         r|t        k(  rut        |d       }t        t	        |D cg c]  }|j
                  dd   c}            }t        |      dk\  r%dj                  |      }| j                  d|d       |d   }nt        ||      d   }|t        k(  r|j
                  dd  }|dk(  s|d	k(  r{|j                  d
      5 }|j                         }d d d        t              }	|	dk(  r| j                  d       |	}|S |	d	k(  r| j                  d       |	}|S | j                  d       |S c c}w # 1 sw Y   `xY w)N)suffixr   r   ,z!All input files must be same typer   r   r   r   utf8rG   z'Auto-detected token-per-line NER formatz*Auto-detected sentence-per-line NER formatzgCan't automatically detect NER format. Conversion may not succeed. See https://spacy.io/api/cli#convert)r   AUTOr   listsetr   rV   joinr   rP   rQ   r   infowarn)
r?   r7   r/   r   loc
file_typesfile_types_strrv   ra   converter_autodetects
             r)   r@   r@      sY   '
4@JcZ"Hc3::ab>"HIJJ:!#!$*!5<nTUV#AJ'
9EaHJD%%ab)	EY%/__f_- 	&J	&4Z@5(HH>?,I  "U*HHAB,I  HH7
 3 #I	& 	&s   D>EE)2rS   r{   rm   enumr   pathlibr   typingr   r   r   r   r	   r
   rN   wasabir   tokensr   r   trainingr   training.convertersr   r   r   r   _utilr   r   r   r   r   str__annotations__r   r   r"   commandtuplekeysintboolrC   r*   rX   r\   r   rA   r@   r'   r(   r)   <module>r      s    	 
   D D     #  1 0  :
GC#x}"4556    T 
 Y #$=dK5$t +D 	;#O }d)I ` ~t*O  "D/H mT+eJOO<M6N5O(P "E h#E ot*OSNN N N N N C=N, -N2 3N8 9N> d^?NL 3-MNR SN Nj !"!<U<Uc4i <U 	<U
 <U <U C=<U <U <U <U d^<U 3-<U <U <U 
'	<U  
!<U~& &# &$ &c  3 4 c hsm &C	CC c4i C 	C
 C d^C6t r(   