
    i!                     @   d dl mZ d dlmZ d dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZ ej                  d        Zej                  d        Zej                  d	        Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zej4                  j7                  d      defd       Zd Zy)    )Random)ListN)Matcher)DocSpan	SpanGroup)filter_spansc                     | d      }t        | j                  d      }|j                  di i i i gg       |j                  di i gg       |j                  di gg        ||      }g }|D ]?  }|j                  t	        ||d   |d   | j                  j
                  |d	                   A t        d
      j                  |       t        |dddi|      |j                  d<   |S Nz0 1 2 3 4 5 6T)validate421      r   *   SPANSkeyvaluenameattrsspans
r   vocabaddappendr   stringsr   shuffler   r   en_tokenizerdocmatchermatchesr   matchs         p/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/doc/test_span_group.pyr"   r"      s    

'Cl((48G KKr2r2&'(KKr2j\"KKrfXclGE 
eAha,*<*<*D*DU1X*NO	

 2Ju"'%!1CIIg J    c                     | d      }t        | j                  d      }|j                  di i i i gg       |j                  di i gg       |j                  di gg        ||      }g }|D ]?  }|j                  t	        ||d   |d   | j                  j
                  |d	                   A t        d
      j                  |       t        |dddi|      |j                  d<   |S r   r   r    s         r&   	other_docr)   "   s    

'Cl((48G KKr2r2&'(KKr2j\"KKrfX clGE 
eAha,*<*<*D*DU1X*NO	

 2Ju"'%!1CIIg Jr'   c                     | d      }t        | j                  d      }|j                  di i i i gg       |j                  di i gg       |j                  di gg        ||      }g }|D ]?  }|j                  t	        ||d   |d   | j                  j
                  |d	                   A t        d
      j                  |       t        |dddi|      |j                  d<   y r   r   r    s         r&   
span_groupr+   :   s    

'Cl((48G KKr2r2&'(KKr2j\"KKrfX clGE 
eAha,*<*<*D*DU1X*NO	

 2Ju"'%!1CIIgr'   c                    | j                   d   }|j                         }||k7  sJ |j                  |j                  k(  sJ |j                  |j                  k(  sJ t	        |      t	        |      k(  sJ t        |      t        |      k(  sJ d|_        d|j                  d<   |j                  t        | ddd             |j                  |j                  k7  sJ |j                  |j                  k7  sJ |j                  d   dk(  sJ t        |      t        |      k7  sJ t        | j                  | D cg c]  }|j                  d	z    c}
      }t        j                  t              5  |j                  |       d d d        | j                         }|j                         5 }|j                  |dd        |j                  |dd        d d d        t!        | | dd | dd g      }t#        ||j                  |            D ];  \  }}|j$                  |j$                  k(  sJ |j&                  |j&                  k(  r;J  y c c}w # 1 sw Y   xY w# 1 sw Y   xY w)Nr   new_name	new_valuer   r      LABELr   x)words)r"   r      r   )r   copyr   r   lenlistr   r   r   r   textpytestraises
ValueError
retokenizemerger   zip
start_charend_char)	r"   r+   clonetdoc2doc3retokenizerspan1span2s	            r&   test_span_group_copyrH   Q   s2   7#JOOEJ::(((;;******u:Z(((
tE{***EJ$EKK	LLc1a)*::(((;;******E"g---
tE{*** syys ;!# ;<D	z	" "D!" 88:D		 %k$q)$$q)$% 3s1QxQq&:;JJ
D(AB 0u5#3#3333~~///0 !<" "
% %s   9I
0I+)III&c                 L   | j                   d   }d}||   }d|_        | j                  j                  d   |_        ||   j
                  |j
                  k7  sJ ||   j                  |j                  k7  sJ |||<   ||   j                  |j                  k(  sJ ||   j                  |j                  k(  sJ ||   j
                  |j
                  k(  sJ ||   j                  |j                  k(  sJ ||   |k(  sJ t        j                  t              5  ||d<   d d d        t        j                  t              5  ||d<   d d d        t        |dd      }t        j                  t              5  |||<   d d d        y # 1 sw Y   exY w# 1 sw Y   JxY w# 1 sw Y   y xY w)	Nr      z	NEW LABELKB_IDd   r   r   )r   label_r   r   kb_idlabelstartendr9   r:   
IndexErrorr   r;   )r"   r)   r+   indexspans        r&   test_span_group_set_itemrV   q   s   7#JEeDDK""7+DJe""djj000e""djj000Jue""djj000e  DHH,,,e""djj000e""djj000e$$$	z	"  
4 	z	" 
3 	1a D	z	" ! 
5! !    ! !s$   F?F3FFFF#c                 >    | j                   d   }|j                  sJ y )Nr   )r   has_overlap)r"   r+   s     r&   test_span_group_has_overlaprY      s    7#J!!!!r'   c                    | j                   d   }| dd | dd g}t        | dddd|      }|j                  |      }|j                  |j                  k(  sJ |j                  d	ddk(  sJ t        |      t        |      z   }t        |      t        |      k(  sJ t        |      t        |      z   }|j                  |d
      }||k(  sJ |j                  |j                  k(  sJ |j                  d	ddk(  sJ t        |      t        |      k(  sJ |j                   d   }t        j                  t              5  |j                  |       d d d        y # 1 sw Y   y xY w)Nr   r   rJ   r/   
MORE_SPANSr.   r   new_keyr   r   T)inplace)	r   r   _concatr   r   r7   r9   r:   r;   )r"   r)   span_group_1r   span_group_2span_group_3span_list_expecteds          r&   test_span_group_concatrd      sw   99W%L1Xs1Qx E!k:	L  ''5L 1 1111[!IIIIl+d<.@@&8!9999 l+d<.@@''d'CL<''' 1 1111[!IIII&8!9999??7+L	z	" +\*+ + +s   =EE!c                 n   | j                   d   }t        |      }d}||   }||dz      }||= t        |      |dz
  k(  sJ ||   |k7  sJ ||   |k(  sJ t        j                  t              5  |d= d d d        t        j                  t              5  |d= d d d        y # 1 sw Y   /xY w# 1 sw Y   y xY w)Nr   rJ   r   rL   rM   )r   r6   r9   r:   rS   )r"   r+   lengthrT   rU   	next_spans         r&   test_span_doc_delitemrh      s    7#J_FEeD519%I5z?fqj(((e$$$e	)))	z	" t	z	" sO   s   -BB+B(+B4c                    | j                   d   }| dd | dd g}t        | dddd|      }|j                  |      }||z   }t        |      t        |      k(  sJ |j                  d	ddk(  sJ t        |      t        |      k(  sJ y )
Nr   r   rJ   r/   r[   r.   r\   r   r   )r   r   r_   r6   r   r7   )r"   r`   r   ra   span_group_3_expectedrb   s         r&   test_span_group_addrk      s    99W%L1Xs1Qx E!k:	L )00>,.L|$9 ::::[!IIII&;!<<<<r'   c                    | j                   d   j                         }| dd | dd g}t        | dddd|      }|j                  |      }||z  }t	        |      t	        |      k(  sJ |j
                  d	ddk(  sJ t        |      t        |      k(  sJ | j                   d   j                         }||z  }t	        |      t	        |      k(  sJ |j
                  d
d	ik(  sJ t        |      t        |      k(  sJ y Nr   r   rJ   r/   r[   r.   r\   r   r   r   )r   r5   r   r_   r6   r   r7   r"   r`   r   ra   span_group_1_expecteds        r&   test_span_group_iaddrp      s)   99W%**,L1Xs1Qx E!k:	L )00>L L|$9 ::::[!IIII&;!<<<<99W%**,LEL|$9 ::::w"    &;!<<<<r'   c                    | j                   d   j                         }| dd | dd g}t        | dddd|      }|j                  |      }|j	                  |       t        |      t        |      k(  sJ |j                  d	ddk(  sJ t        |      t        |      k(  sJ | j                   d   }|j	                  |       t        |      t        |      k(  sJ |j                  d
d	ik(  sJ t        |      t        |      k(  sJ y rm   )r   r5   r   r_   extendr6   r   r7   rn   s        r&   test_span_group_extendrs      s   99W%**,L1Xs1Qx E!k:	L )00>%|$9 ::::[!IIII&;!<<<<99W%L|$9 ::::%!1111&;!<<<<r'   c                     t        j                  t              5  t        | j                         d d d        y # 1 sw Y   y xY w)N)r9   r:   AttributeErrorprintr"   )r+   s    r&   test_span_group_deallocrw     s/    	~	& jnn  s	   9Ai.  r"   c                     | j                   d   }t        |      }t        |      D ]  \  }}|||   cxk(  r
||   k(  rJ  J  t        |       y)zXTests whether typing of `SpanGroup` as `Iterable[Span]`-like object is accepted by mypy.r   N)r   r7   	enumerater	   )r"   r+   r   irU   s        r&   test_span_group_typingr{     s]      IIg.JZ(EZ( 14z!}0a000001r'   c                      | d      } | d      }t        ||dd |dd g      }t        j                  t              5  t        ||dd |dd g      }ddd       y# 1 sw Y   yxY w)z5Test that all spans must come from the specified doc.za b cr   r   r   r4   N)r   r9   r:   r;   )r!   doc1rC   r+   s       r&   test_span_group_init_docr~     su     D D4Qq	4!9'=>J	z	" CtD1ItAay+AB
C C Cs   AA')randomr   typingr   r9   spacy.matcherr   spacy.tokensr   r   r   
spacy.utilr	   fixturer"   r)   r+   rH   rV   rY   rd   rh   rk   rp   rs   rw   markissuer{   r~    r'   r&   <module>r      s       ! - - #  ,  .  ,0@!8"
+8"=$=4=0
 5  Cr'   