
    i                     V   d dl Z d dlZd dlmZ d dlmZmZ d dlmZ e j                  j                  d      d        Ze j                  j                  d      d        Ze j                  j                  d	      d
        Zd Zd Ze j                  j!                  dg d      d        Zy)    N)English)DocDocBin)
Underscorei  c                  N    t                t        dg       t        g d       y)zTest that docbin init goes wellLEMMAattrs)r   ENT_IOBENT_TYPEN)r        |/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/serialize/test_serialize_docbin.pytest_issue4367r   	   s     H
'
12r   i  c                 x   t        | ddg      }d|j                  d<   d|j                  d<   t        d      }|j                  |       |j	                         }t        d      j                  |      }t        |j                  |             d	   }|j                  d   dk(  sJ |j                  d   dk(  sJ y
)z6Test that user_data is correctly serialized in DocBin.helloworldwordsbarfoo)z._.r   NNTstore_user_datar   N)r   	user_datar   addto_bytes
from_byteslistget_docs)en_vocabdocdoc_bindoc_bin_bytesnew_doc_binnew_docs         r   test_issue4528r&      s     hw0
1C CMM%05CMM,-T*GKK$$&M.99-HK;''1215GU#u,,,78EAAAr   i  c                     t        ddg      }t        |j                  |             g k(  sJ |j                         }t               j	                  |      }t        |j                  |             g k(  sJ y)z6Ensure an empty DocBin does not crash on serializationDEPHEADr	   N)r   r   r   r   r   )r    r"   r#   	doc_bin_2s       r   test_issue5141r+   !   sn     E6?+G  *+r111$$&M##M2I	""8,-333r   c                     t        g dd      } g d}ddi}t               }|j                  |      D ]X  }||_        |dd }d	|_        d
|_        d|_        |g|j                  d<   d|d   _        d|d   _	        | j                  |       Z | j                         }t        j                  d      }t               j                  |      } t        | j!                  |j"                              }t%        |      D ]  \  }}|j&                  ||   k(  sJ |j                  |k(  sJ t)        |j                        dk(  sJ |j                  d   d   j                  d	k(  sJ |j                  d   d   j
                  d
k(  sJ |j                  d   d   j                  dk(  sJ |d   j                  dk(  sJ |d   j                  dk(  rJ  y )N)r   r   r   NORMENT_IDT)r
   r   )z	Some textzLots of texts...z...Ag      ?r      UNUSUAL_SPAN_LABELUNUSUAL_SPAN_IDUNUSUAL_SPAN_KB_IDstartUNUSUAL_TOKEN_NORMUNUSUAL_TOKEN_ENT_IDen   )r   r   pipecatslabel_id_kb_id_spansnorm_ent_id_r   r   spacyblankr   r   r   vocab	enumeratetextlen)	r"   textsr:   nlpr!   span
bytes_datareloaded_docsis	            r   test_serialize_doc_binrM   +   s   @RVG 5E:D
)Cxx 	1Qx*$*"V		'+A/AC	 !!#J ++d
Ch!!*-G))#))45MM* 83xx58###xx4399~"""yy!!$++/CCCCyy!!$((,====yy!!$++/CCCC1v||33331v~~!77778r   c                    t        | ddg      }|j                  sJ |j                  dk(  sJ t        | ddgddg      }|j                  rJ |j                  dk(  sJ t               j	                  t        ||g      j                               }|j                  |       \  }}|j                  sJ |j                  dk(  sJ |j                  rJ |j                  dk(  sJ y )	Nthatz'sr   zthat 's F)r   spaceszthat's)docs)r   has_unknown_spacesrE   r   r   r   r   )r    doc1doc2r"   re_doc1re_doc2s         r   %test_serialize_doc_bin_unknown_spacesrW   M   s    x~.D""""99
"""x~uenED&&&&99   h!!&tTl";"D"D"FGG''1GW%%%%<<:%%%))))<<8###r   z$writer_flag,reader_flag,reader_value))TTr   )TFr   )FTnothing)FFrX   c                    t        j                  dd       t        | ddg      }d|j                  _        t	        |      }|j                  |       |j                         }t	        |      j                  |      }t        |j                  |             d	   }|j                  j                  |k(  sJ i t        _        y
)z?Test that custom extensions are correctly serialized in DocBin.r   rX   )defaultr   r   r   r   r   r   N)r   set_extension_r   r   r   r   r   r   r   r   doc_extensions)	r    writer_flagreader_flagreader_valuer!   	doc_bin_1r#   r*   doc_2s	            r   test_serialize_custom_extensionrc   ]   s     eY/
hw0
1CCEEI{3IMM#&&(M{3>>}MI##H-.q1E77;;,&&& "Jr   )pytestrA   spacy.lang.enr   spacy.tokensr   r   spacy.tokens.underscorer   markissuer   r&   r+   rM   rW   parametrizerc   r   r   r   <module>rk      s      ! $ . 43 3 4B B 44 48D$  *##r   