
    i4	                     n    d dl mZ ddlmZ ddlmZmZ ddlmZm	Z	 ddl
mZ ddlmZ dd	lmZ dd
Zd Zy)    )Printer   )Errors)DocSpan)iob_to_biluotags_to_entities)	minibatch)Vocab   )n_sents_infoc              /      K   t               }t        |      }|dkD  rt        ||       t        | j	                  d      ||      E d{    y7 w)a   
    Convert IOB files with one sentence per line and tags separated with '|'
    into Doc objects so they can be saved. IOB and IOB2 are accepted.

    Sample formats:

    I|O like|O London|I-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|O
    I|O like|O London|B-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|O
    I|PRP|O like|VBP|O London|NNP|I-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|O
    I|PRP|O like|VBP|O London|NNP|B-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|O
    )no_printr   
N)r   r   r   read_iobsplit)
input_datan_sentsr   argskwargsvocabmsgs          v/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/training/converters/iob_to_docs.pyiob_to_docsr      sG      GE
8
$C{S'"
((.w???s   AA	A
Ac              #     K   t        | |      D ]  }g }g }g }g }g }|D ]  }	|	j                         s|	j                         D 
cg c]  }
|
j                  d       }}
t        |d         dk(  rt	        | \  }}}nEt        |d         dk(  rt	        | \  }}dgt        |      z  }nt        t        j                        |j                  |       |j                  |       |j                  |       |j                  |       |j                  d       |j                  |dd  D cg c]  }d	 c}        t        ||
      }t        |      D ]  \  }}|||   _         t        |      D ]  \  }}|||   _         t        |      }t        |      }|D cg c]  \  }}}t!        |||dz   |       c}}}|_        |  y c c}
w c c}w c c}}}w w)N)size|r   r      -Tr   F)words)startendlabel)r
   stripr   lenzip
ValueErrorr   E902extendappendr   	enumeratetag_is_sent_startr   r	   r   ents)	raw_sentsr   r   grouptokensr    tagsiobsent_startslinetsent_tokens
sent_words	sent_tagssent_iob_docitag
sent_startbiluoentitiesLses                            r   r   r      s    973  	@D::<15>A1773<>K>;q>"a'25{2C/
Ix[^$)'*K'8$
H EC
O3	 --LL$KK	"JJx MM+&t$z!"~>!>?!	@" %u%o 	FAsCFK	&{3 	.MAz#-CF 	.S!#E*LTUUy1aDA1q5:U	? ?  ? Vs,   AG!GCG!+	G4A-G!!G
>#G!N)
   F)wasabir   errorsr   r1   r   r   trainingr   r	   utilr
   r   r   conll_ner_to_docsr   r   r        r   <module>rM      s%       6   +@& rL   