
    i                     b   d dl Z d dlZd dlmZmZ d dlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ  ej                   d	g d
d      dedefd       Z ej                   dg dd      defd       Z ej                   ddgd      ddededefd       Z G d d      Z G d d      Zd Zy)    N)AnyDict   )util)Warnings)Language)Matcher)Docmerge_noun_chunks)	token.depz	token.tagz	token.posT)requiresretokenizesdocreturnc                    | j                  d      s| S | j                         5 }| j                  D ]B  }|j                  j                  |j                  j
                  d}|j                  ||       D 	 ddd       | S # 1 sw Y   | S xY w)zMerge noun chunks into a single token.

    doc (Doc): The Doc object.
    RETURNS (Doc): The Doc object with merged noun chunks.

    DOCS: https://spacy.io/api/pipeline-functions#merge_noun_chunks
    DEP)tagdepattrsN)has_annotation
retokenizenoun_chunksrootr   r   merge)r   retokenizernpr   s       i/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/pipeline/functions.pyr   r      s     e$
		 /[// 	/BGGKK<Eb.	// J	/ Js   ABBmerge_entities)zdoc.entsztoken.ent_iobztoken.ent_typec                    | j                         5 }| j                  D ]M  }|j                  j                  |j                  j                  |j
                  d}|j                  ||       O 	 ddd       | S # 1 sw Y   | S xY w)zMerge entities into a single token.

    doc (Doc): The Doc object.
    RETURNS (Doc): The Doc object with merged entities.

    DOCS: https://spacy.io/api/pipeline-functions#merge_entities
    )r   r   ent_typer   N)r   entsr   r   r   labelr   )r   r   entr   s       r   r   r   $   st     
	 0[88 	0CHHLL399UEc/	00 J	0 Js   AA99Bmerge_subtokensr   r#   c                 \   t        | j                        }|j                  d|ddgg        ||       }t        j                  |D cg c]  \  }}}| ||dz     c}}}      }| j                         5 }|D ]  }	|j                  |	        	 ddd       | S c c}}}w # 1 sw Y   | S xY w)zMerge subtokens into a single token.

    doc (Doc): The Doc object.
    label (str): The subtoken dependency label.
    RETURNS (Doc): The Doc object with merged subtokens.

    DOCS: https://spacy.io/api/pipeline-functions#merge_subtokens
    SUBTOK+)r   op   N)r	   vocabaddr   filter_spansr   r   )
r   r#   mergermatches_startendspansr   spans
             r   r%   r%   8   s     SYYF
JJx54567SkGQQ5#s5373QRE		 $[ 	$Dd#	$$ J	 R$ Js   
B6B!!B+c                   z    e Zd ZddedefdZdedefdZdeee	f   fdZ
i fdeee	f   dd	fd
Zd Zd Zd Zd Zy	)TokenSplitter
min_lengthsplit_lengthc                      || _         || _        y Nr7   r8   )selfr7   r8   s      r   __init__zTokenSplitter.__init__N   s    $(    r   r   c           	         | j                   dkD  r| j                  dkD  r|j                         5 }|D ]  }t        |j                        | j                   k\  s&g }g }i }t        dt        |j                        | j                        D ]M  }|j                  |j                  ||| j                  z           |j                  ||| j                  z  f       O |j                  ||||        	 d d d        |S |S # 1 sw Y   |S xY w)Nr   )r7   r8   r   lentextrangeappendsplit)r<   r   r   torthsheadsr   is           r   __call__zTokenSplitter.__call__R   s    ??Q4#4#4q#8! 	B[ BA166{doo5 " " "!&q#aff+t7H7H!I EA!LLA8I8I4I)JK!LL!Q1B1B-B)CDE $))!UE5AB	B 
s
	B 
s   (C;BC;;Dc                 4    | j                   | j                  dS )Nr;   r;   r<   s    r   _get_configzTokenSplitter._get_config`   s    // --
 	
r>   configNc                 `    |j                  dd      | _        |j                  dd      | _        y )Nr7   r   r8   )getr7   r8   )r<   rM   s     r   _set_configzTokenSplitter._set_configf   s'     **\15"JJ~q9r>   c                 >     d fdi}t        j                  |g       S )Ncfgc                  J    t        j                   j                               S r:   )srsly
json_dumpsrL   rK   s   r   <lambda>z(TokenSplitter.to_bytes.<locals>.<lambda>l   s    5++D,<,<,>? r>   r   to_bytesr<   kwargsserializerss   `  r   rX   zTokenSplitter.to_bytesj   s#    ?
 }}["--r>   c                 D     d fdi}t        j                  ||g         S )NrR   c                 L    j                  t        j                  |             S r:   )rP   rT   
json_loadsbr<   s    r   rV   z*TokenSplitter.from_bytes.<locals>.<lambda>r   s    T--e.>.>q.AB r>   r   
from_bytesr<   datarZ   deserializerss   `   r   rb   zTokenSplitter.from_bytesp   s'    B
 	mR0r>   c                 j     t        j                  |      }d fdi}t        j                  ||g       S )NrR   c                 L    t        j                  | j                               S r:   )rT   
write_jsonrL   pr<   s    r   rV   z'TokenSplitter.to_disk.<locals>.<lambda>z   s    U--a1A1A1CD r>   r   ensure_pathto_diskr<   pathrZ   r[   s   `   r   rm   zTokenSplitter.to_diskw   s5    %D
 ||D+r22r>   c                 l     t        j                  |      }d fdi}t        j                  ||g        y )NrR   c                 L    j                  t        j                  |             S r:   )rP   rT   	read_jsonri   s    r   rV   z)TokenSplitter.from_disk.<locals>.<lambda>   s    T--eooa.@A r>   r   rl   	from_diskrn   s   `   r   rt   zTokenSplitter.from_disk~   s2    %A
 	t["-r>   )r   r   )__name__
__module____qualname__intr=   r
   rI   r   strr   rL   rP   rX   rb   rm   rt    r>   r   r6   r6   M   sp    )3 )# )C C 
T#s(^ 
 46 :$sCx. :$ :.3.r>   r6   c                   T    e Zd Zdddeeef   defdZdedefdZ	d	 Z
d
 Zd Zd Zy)
DocCleanerT)silentr   r}   c                *    t        |      |d| _        y )N)r   r}   )dictrR   )r<   r   r}   s      r   r=   zDocCleaner.__init__   s    -1%[F#Kr>   r   r   c                    | j                   d   }| j                   d   }|j                         D ]  \  }}|}|j                  d      }d}|d d D ]S  }	t        ||	      rt	        ||	      }d}|r!t        j                  t        j                  j                  |             U |ryt        ||d         rt        ||d   |       |rt        j                  t        j                  j                  |              |S )Nr   r}   .FT)attr)rR   itemsrD   hasattrgetattrwarningswarnr   W116formatsetattr)
r<   r   r   r}   r   valueobjpartsskipparts
             r   rI   zDocCleaner.__call__   s    hhw'xx) ;;= 	GKD%CJJsOEDcr
 G3%!#t,CD! hmm&:&:&:&EFG 3b	*CrE2! hmm&:&:&:&EF!	G" 
r>   c                 >     d fdi}t        j                  |g       S )NrR   c                  B    t        j                   j                        S r:   )rT   rU   rR   rK   s   r   rV   z%DocCleaner.to_bytes.<locals>.<lambda>   s    5++DHH5 r>   rW   rY   s   `  r   rX   zDocCleaner.to_bytes   s#    5
 }}["--r>   c                 D     d fdi}t        j                  ||g         S )NrR   c                 `    j                   j                  t        j                  |             S r:   )rR   updaterT   r^   r_   s    r   rV   z'DocCleaner.from_bytes.<locals>.<lambda>   s    TXX__U-=-=a-@A r>   ra   rc   s   `   r   rb   zDocCleaner.from_bytes   s'    A
 	mR0r>   c                 j     t        j                  |      }d fdi}t        j                  ||g       S )NrR   c                 D    t        j                  | j                        S r:   )rT   rh   rR   ri   s    r   rV   z$DocCleaner.to_disk.<locals>.<lambda>   s    U--a: r>   rk   rn   s   `   r   rm   zDocCleaner.to_disk   s5    %:
 ||D+r22r>   c                 l     t        j                  |      }d fdi}t        j                  ||g        y )NrR   c                 `    j                   j                  t        j                  |             S r:   )rR   r   rT   rr   ri   s    r   rV   z&DocCleaner.from_disk.<locals>.<lambda>   s    TXX__U__Q-?@ r>   rs   rn   s   `   r   rt   zDocCleaner.from_disk   s2    %@
 	t["-r>   N)ru   rv   rw   r   ry   r   boolr=   r
   rI   rX   rb   rm   rt   rz   r>   r   r|   r|      sJ    @D Ld38n L LC C ,.3.r>   r|   c                     | dk(  r!t        j                  d      }|j                  S | dk(  r!t        j                  d      }|j                  S t	        dt
         d|        )Nmake_doc_cleanerzspacy.pipeline.factoriesmake_token_splitterzmodule z has no attribute )	importlibimport_moduler   r   AttributeErrorru   )namemodules     r   __getattr__r      sg    !!(()CD&&&	&	&(()CD)))
78*,>tfE
FFr>   )subtok)r   r   typingr   r   rT    r   errorsr   languager   matcherr	   tokensr
   	componentr   r   ry   r%   r6   r|   r   rz   r>   r   <module>r      s             4
3 3 
" <
 
 %4P S   Q(6. 6.r3. 3.nGr>   