
    i                        d dl Z d dlZd dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZ d dlmZ d	d
lmZ ej,                  j/                  d      d        Zej,                  j/                  d      d        Zej,                  j/                  d      d        Zej,                  j/                  d      d        Zej,                  j/                  d      d        Zej,                  j/                  d      d        Zej,                  j/                  d      d        Zej,                  j/                  d      d        Zej,                  j/                  d      d        Z d Z!d Z"d Z#d  Z$d! Z%d" Z&y)#    N)DEPHEAD)English)Language)MatcherPhraseMatcher)Doc)Vectors)Vocab   )make_tempdiri  c                     t        t                     } t        j                  dd      }t	        |g d      }| j                  d      }|j                  d       |j                  j                  dd	      d	k(  sJ ||j                  _
        t               5 }|j                  |       | j                  d      j                  |      }|j                  j                  dd	      d	k(  sJ 	 d
d
d
       y
# 1 sw Y   y
xY w)zfTest that models with no pretrained vectors can be deserialized
    correctly after vectors are added.)   i,  fdtype)IamMatt)datakeystaggerPRPpretrained_dimsr   N)r   r   numpyonesr
   create_pipe	add_labelcfggetvocabvectorsr   to_disk	from_disk)nlpr   r"   r   paths        y/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/serialize/test_serialize_doc.pytest_issue1727r(      s     57
C::hc*D4&9:G__X&F
U::>>+Q/1444"FLL	 94t*44T:zz~~/3q8889 9 9s   AC55C>i  c            
      j   t        j                  ddgddgddgddgdd	gd
dgddggd      } t        t               dj	                               }|j
                  j                  j                  d       |j                  t        t        g|       }t        t        |j                              dk(  sJ y)z[Test sentence boundaries are deserialized correctly, even for
    non-projective sentences.   i     i  r   i  i  r   l   LP^& l    i  l    i  uint64r   zJust what I was looking for .wordsROOTN)r   asarrayr	   r   splitr!   stringsadd
from_arrayr   r   lenlistsents)
heads_depsdocs     r'   test_issue1799r:   #   s     HHHH#$!3'!3'	
 J eg<BBD
ECII&!
..$j
1CtCII1$$$    i*  c            
      V   g d} t        t               |       }d|d   _        t        |j                        j	                  |j                               }|d   j                  sJ |j                  d      rJ |j                  d      rJ t        t               | dgt        |       z  g ddgt        |       z  	      }t        |j                        j	                  |j                               }|d   j                  sJ |j                  d      sJ |j                  d      sJ y
)zVTest that sentence boundaries & parse/tag flags are not lost
    during serialization.)	Thisisafirstsentence.Andanotheroner-   T   r   TAG)	r   r   r   r   r   r   rF   rF   rF   dep)r.   tagsheadsdepsN)	r	   r   is_sent_startr!   
from_bytesto_bytes
sent_starthas_annotationr5   )r.   r9   new_docs      r'   test_issue1834rR   9   s	    SE
egU
#CCF#))n''7G1:    %%e,,,%%e,,,
Ws5z!)Ws5z!C #))n''7G1:    !!%(((!!%(((r;   i[  c                  8   t        t                     } | j                  dddigg       t        | j                  dg      }t         | |            dk(  sJ t        j                  |       }t        |j                  dg      }t         ||            dk(  sJ y )Npat1orthhellor-   r*   )r   r   r3   r	   r!   r5   copydeepcopy)matcherr9   new_matcherrQ   s       r'   test_issue1883r[   Q   s    egGKK67+,-.
gmmG9
-Cws|!!!--(K+##G95G{7#$)))r;   i
  c                     t               } | j                  d      }|j                  d       | j                           | d      }|j	                  d      sJ | j                  ddg      }t        |      }|j	                  d      sJ y)zQTest the tagger sets has_annotation("TAG") correctly when used via Language.pipe.r   Azhello worldrG   rV   worldN)r   add_piper   
initializerP   pipenext)r%   r   r9   docs	piped_docs        r'   test_issue2564re   \   s     *C\\(#F
SNN
m
Ce$$$88Wg&'DT
I##E***r;   i  c                  H   t               } t        | j                        }|j                  d | d       | d       | d      g       |j                  d | d      g       t	        j
                  |      }t	        j                  |      }t        |      t        |      k(  sJ y)z5Test that the PhraseMatcher can be pickled correctly.TEST1r?   bcTEST2dN)r   r   r!   r3   pickledumpsloadsr5   )r%   rY   r   rZ   s       r'   test_issue3248_2ro   j   s     )CCII&GKK#c(CHc#h78KK#c($<< D,,t$K{s7|+++r;   i  c                      t               } | j                  d       | j                         }t               }|j                  d       |j                  |       y)zeTest that Language.to_bytes handles serializing a pipeline component
    with an uninitialized model.textcatN)r   r_   rN   rM   )r%   
bytes_datanew_nlps      r'   test_issue3289rt   v   sE     )CLLJiGYz"r;   i  c                     t               } | j                  d        | d      }|d   j                  sJ |j                  d      sJ t	        t        |j                              dk(  sJ |j                         }t        | j                        j                  |      }|d   j                  sJ |j                  d      sJ t	        t        |j                              dk(  sJ y)z|Test that sentence boundaries are set correctly so Doc.has_annotation("SENT_START") can
    be restored after serialization.sentencizerzHello worldr   
SENT_STARTr*   N)r   r_   rL   rP   r5   r6   r7   rN   r	   r!   rM   )r%   r9   	doc_bytesrQ   s       r'   test_issue3468ry      s     )CLL
m
Cq6l+++tCII1$$$I#))n''	2G1:####!!,///tGMM"#q(((r;   iw  c                  R   t               }  | d      }|d   j                  dk(  sJ d|d   _        |d   j                  dk(  sJ t               5 }|dz  }|j                  |        | d      }|j	                  |       |d   j                  dk(  sJ 	 ddd       y# 1 sw Y   yxY w)z=Ensure that a modified pos attribute is serialized correctly.zSdisplaCy uses JavaScript, SVG and CSS to show you how computers understand languager    NOUNmy_docN)r   pos_r   r#   r$   )r%   r9   tmp_dir	file_pathdoc2s        r'   test_issue3959r      s     )C
]C q6;;"CFKq6;;&   	 &7h&	I2wy!Aw||v%%%& & &s   ABB&c                    t        |       }|j                         }t        |       }|j                  |       t        |      t        |      k(  sJ t	        ||      D ]   \  }}|j
                  |j
                  k(  r J  y )N)r	   rN   rM   r5   ziptext)en_vocabr9   r   r   token1token2s         r'   test_serialize_empty_docr      sp    
h-C<<>Dx=DOODs8s4y   c4. *{{fkk)))*r;   c                     t        | ddg      }ddi|_        |j                         }t        |       j                  |      }|j                         |k(  sJ y )NrV   r^   r-   r]   g      ?)r	   catsrN   rM   )r   r9   doc_brQ   s       r'   "test_serialize_doc_roundtrip_bytesr      sT    
hw0
1CSzCHLLNE(m&&u-G&&&r;   c                    t        | ddg      }t               5 }|dz  }|j                  |       t        |       j                  |      }|j	                         |j	                         k(  sJ 	 d d d        y # 1 sw Y   y xY wNrV   r^   r-   r9   )r	   r   r#   r$   rN   r   r9   rk   r   doc_ds        r'   !test_serialize_doc_roundtrip_diskr      ss    
hw0
1C	 21I	IH''	2||~!1111	2 2 2s   AA88Bc                    t        | ddg      }t               5 }|dz  }t        |      }|j                  |       t        |       j	                  |      }|j                         |j                         k(  sJ 	 d d d        y # 1 sw Y   y xY wr   )r	   r   strr#   r$   rN   r   s        r'   *test_serialize_doc_roundtrip_disk_str_pathr      s|    
hw0
1C	 21I		N	IH''	2||~!11112 2 2s   ABBc                    t        | ddg      }d|j                  d<   t        |       j                  |j                               }|j                  d   dk(  sJ t        |       j                  |j                         dg      }|j                  rJ t        |       j                  |j                  dg            }|j                  rJ y )NrV   r^   r-   barfoo	user_data)exclude)r	   r   rM   rN   )r   r9   rQ   s      r'   test_serialize_doc_excluder      s    
hw0
1C CMM%(m&&s||~6GU#u,,,(m&&s||~}&MG    (m&&s||[M|'JKG     r;   c                    t        | g d      }|dd }d|_        d|_        d|_        |g|j                  d<   t        |       j                  |j                               }t        |j                  d         d	k(  sJ |j                  d   d   j                  dk(  sJ |j                  d   d   j                  dk(  sJ |j                  d   d   j                  dk(  sJ y )
N)rV   r^   !r-   r   r   $test_serialize_doc_span_groups_label!test_serialize_doc_span_groups_id$test_serialize_doc_span_groups_kb_idcontentr*   )r	   label_id_kb_id_spansrM   rN   r5   )r   r9   spanrQ   s       r'   test_serialize_doc_span_groupsr      s    
h5
6Cq8D8DK2DH8DK 6CIIi(m&&s||~6Gw}}Y'(A---==#A&--1WWWW==#A&**.QQQQ==#A&--1WWWWr;   )'rW   rl   r   pytestspacy.attrsr   r   spacy.lang.enr   spacy.languager   spacy.matcherr   r   spacy.tokensr	   spacy.vectorsr
   spacy.vocabr   utilr   markissuer(   r:   rR   r[   re   ro   rt   ry   r   r   r   r   r   r   r    r;   r'   <module>r      s       ! ! # 0  !   49 9  4% %* 4) ). 4* * 4
+ 
+ 4, , 4# # 4) )  4& &$*'22!Xr;   