
    i|                         d dl Z d dlZd dlmZmZmZmZmZ d dlm	Z	 ej                  j                  d      d        Zd Zd Zd Zd	 Zd
 Zd Zej                  j'                  dddgdg      d        Zd Zd Zy)    N)DEPMORPHORTHPOSSHAPE)Doci  c                    g d}g d}g d}|D cg c]  }| j                   j                  |       }}|D cg c]  }| j                   j                  |       }}t        | |      }|j                  dt	        j
                  |d             |j                  dt	        j
                  |d             |D 	cg c]  }	|	j                   c}	|k(  sJ |D 	cg c]  }	|	j                   c}	|k(  sJ |j                  ddg      }
t        |j                  |      j                  ddg|
      }|D 	cg c]  }	|	j                   c}	|k(  sJ |D 	cg c]  }	|	j                   c}	|k(  sJ y	c c}w c c}w c c}	w c c}	w c c}	w c c}	w )
z5Test that lemmas are set correctly in doc.from_array.)Iz'llsurvive)PRPMDVB)z-PRON-willr   wordsTAGuint64)dtypeLEMMAN)
stringsaddr   
from_arraynumpyarraytag_lemma_to_arrayvocab)en_vocabr   tagslemmastagtag_idslemma	lemma_idsdoct	doc_arraynew_docs               k/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/doc/test_array.pytest_issue2203r+      sZ    $ED*F489Sx##C(9G9:@A!!%%e,AIA
he
$CNN5%++gX>?NN7EKK	BC qAFF D(((!"AHH"f,,,eW-.I#))5)44eW5EyQG#$qAFF$,,,%&AHH&&000 :A
 !" %&s"   "E1"E6<E;F 6FF
c                     t        | g d      }|j                  d   }|j                  |j                  k7  sJ |j	                  t
        t        f      }|d   d   |d   d   k7  sJ |d   d   |d   d   k7  sJ y )NAnexamplesentencer   r/   r      r   r   orthshaper   r   r   r   r&   r/   feats_arrays       r*   test_doc_array_attr_of_tokenr7      s    
h;
<Cii	"G<<7==(((,,e}-Kq>!Aq 1111q>!Aq 1111    c                    t        | g d      }|j                  d   }|j                  |j                  k7  sJ |j	                  t
        t        f      }|j	                  d      }|d   d   |d   d   k(  sJ |d   d   |d   d   k(  sJ y )Nr-   r   r/   )r   r   r   r1   r2   )r   r&   r/   r6   feats_array_stringys        r*   $test_doc_stringy_array_attr_of_tokenr;   &   s    
h;
<Cii	"G<<7==(((,,e}-K,,'89q!!$Aq(9999q!!$Aq(9999r8   c                     t        | g d      }|j                  d   }|j                  |j                  k7  sJ |j	                  t
              }|j                  dk(  sJ y )Nr-   r   r/   )   )r   r   r3   r4   r   r   r5   s       r*   test_doc_scalar_attr_of_tokenr>   0   sU    
h;
<Cii	"G<<7==(((,,t$K$$$r8   c                    g d}g d}t        | ||      }|d   j                  |d   j                  cxk7  r&|d   j                  cxk7  r|d   j                  k7  sJ  J |j                  t        t        f      }|d   d   |d   j                  k(  sJ |d   d   |d   j                  k(  sJ |d   d   |d   j                  k(  sJ |d   d   |d   j                  k(  sJ y )NAnicer0   .)DETADJNOUNPUNCT)r   posr   r1      r=   )r   rH   r   r   r   )r   r   rH   r&   r6   s        r*   test_doc_array_tagrJ   8   s    *E
)C
he
-Cq6::Q?s1vzz?SVZZ?????,,c{+Kq>!A

***q>!A

***q>!A

***q>!A

***r8   c                    g d}g d}t        | ||      }|d   t        |d   j                        k(  sJ |d   t        |d   j                        k(  sJ |d   t        |d   j                        k(  sJ |j                  t        t
        f      }|d   d   |d   j                  j                  k(  sJ |d   d   |d   j                  j                  k(  sJ |d   d   |d   j                  j                  k(  sJ y )N)Eatblueham)zFeat=VzFeat=JzFeat=N)r   morphsr   r1   rI   )r   strmorphr   r   r   key)r   r   rQ   r&   r6   s        r*   test_doc_array_morphrS   D   s    "E*E
heE
2C8s3q6<<((((8s3q6<<((((8s3q6<<((((,,e}-Kq>!A 0 0000q>!A 0 0000q>!A 0 0000r8   c                 6   g d}g d}t        | ||      }|j                  t        t        f      }|d   d   |d   j                  k(  sJ |d   d   |d   j                  k(  sJ |d   d   |d   j                  k(  sJ |d   d   |d   j                  k(  sJ y )Nr@   )detamodROOTpunct)r   depsr   r1   rI   r=   )r   r   r   r   dep)r   r   rY   r&   r6   s        r*   test_doc_array_depr[   R   s    *E+D
he$
/C,,c{+Kq>!A

***q>!A

***q>!A

***q>!A

***r8   attrsr   r   IS_ALPHAc                 ~    g d}t        | |      }t        | |      j                  ||j                  |             y)z|Test that both Doc.to_array and Doc.from_array accept string attrs,
    as well as single attrs and sequences of attrs.
    r-   r   N)r   r   r   )r   r\   r   r&   s       r*   #test_doc_array_to_from_string_attrsr_   ]   s5    
 *E
he
$C))%e1DEr8   c                     g d}t        | |      j                  d      }|d   dk(  sJ |d   dk(  sJ |d   dk(  sJ y	)
z7Test that Doc.to_array can retrieve token start indicesr-   r   IDXr   r1   r=   rI      N)r   r   )r   r   offsetss      r*   test_doc_array_idxrd   g   sP    )E(%(11%8G1:??1:??1:r8   c                     g d}t        | |      }|D ]  }|d   |_         |j                  dg      }t        | |      }|j                  dg|       |j                  dg      }t	        j
                  d      j                  t        j                        |d<   t        | |      }t        j                  t              5  |j                  dg|       ddd       |j                  dg      }t	        j
                  d      j                  t        j                        |d<   t        | |      }t        j                  t              5  |j                  dg|       ddd       y# 1 sw Y   xY w# 1 sw Y   yxY w)zBTest that Doc.from_array doesn't set heads that are out of bounds.)Thisisar0   rC   r   r   HEADN   )r   headr   r   r   int32astyper   pytestraises
ValueError)r   r   r&   tokenarrdoc_from_arrays         r*   #test_doc_from_array_heads_in_boundsru   p   sD   0E
he
$C V
 ,,x
 C/Nvh, ,,x
 C[[_##ELL1CF/N	z	" 1!!6(C01 ,,x
 C[[^""5<<0CF/N	z	" 1!!6(C01 11 11 1s   E(E4(E14E=)r   ro   spacy.attrsr   r   r   r   r   spacy.tokensr   markissuer+   r7   r;   r>   rJ   rS   r[   parametrizer_   rd   ru    r8   r*   <module>r|      s      4 4  41 1(2:%	+1+ FG#4j"ABF CF1r8   