
    i
                     .   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ dd	lmZ ej"                  d
        Zej&                  j)                  d      d        ZdZej&                  j)                  d      d        Zd Zd Zd Zy)    N)English)Italian)Language)	Tokenizer)Example)load_config_from_str   )make_tempdirc                  "    ddddddddddd d	d
S )Nname-in-fixturezversion-in-fixturezdescription-in-fixturezauthor-in-fixturezemail-in-fixturezurl-in-fixturezlicense-in-fixturer   )widthvectorskeysname)r   versiondescriptionauthoremailurllicenser    r       ~/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/tests/serialize/test_serialize_language.py	meta_datar      s/     "'/%#'1aF	 	r   i	  c                      t               } | j                  d       | j                         }t               j                  |       y)zBTest we can serialize and deserialize a blank NER or parser model.nerN)r   add_pipeto_bytes
from_bytes)nlpbs     r   test_issue2482r"      s2     )CLLAIr   a<  
[nlp]
lang = "en"
pipeline = ["tok2vec", "tagger"]

[components]

[components.tok2vec]
factory = "tok2vec"

[components.tok2vec.model]
@architectures = "spacy.Tok2Vec.v1"

[components.tok2vec.model.embed]
@architectures = "spacy.MultiHashEmbed.v1"
width = ${components.tok2vec.model.encode:width}
attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
rows = [5000,2500,2500,2500]
include_static_vectors = false

[components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v1"
width = 96
depth = 4
window_size = 1
maxout_pieces = 3

[components.ner]
factory = "ner"

[components.tagger]
factory = "tagger"

[components.tagger.model]
@architectures = "spacy.Tagger.v2"
nO = null

[components.tagger.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode:width}
upstream = "*"
i&  c                       t        j                  t        t                      j	                   fd       t        j                            d       t        j                          y)zwTest that the nlp object with initialized tok2vec with listeners pickles
    correctly (and doesn't have lambdas).
    c                  V    t        j                   j                  d      ddgi      gS )NhellotagsV)r   	from_dictmake_docr    s   r   <lambda>z test_issue6950.<locals>.<lambda>Y   s&    G--cll7.Cfse_UV r   r%   N)r   from_configr   CONFIG_ISSUE_6950
initializepickledumpsr*   s   @r   test_issue6950r1   S   sH    
 

23DE
FCNNVW
LLL
LLr   c                     t        |       }t               5 }|j                  |       t               j                  |      }d d d        j                  |j                  k(  sJ y # 1 sw Y   %xY w)Nmeta)r   r
   to_disk	from_diskr4   )r   languagednew_languages       r   !test_serialize_language_meta_diskr:   _   sb    Y'H	 /1z++A./ ---/ /s   +A&&A/c                  $   t        j                  d      t        j                  d      t        j                  d      fd} t               } | |      |_        t	               5 }|j                  |       ddd       y# 1 sw Y   yxY w)zTest that serialization with custom tokenizer works without token_match.
    See: https://support.prodi.gy/t/how-to-save-a-custom-tokenizer/661/2
    z$1/|2/|:[0-9][0-9][A-K]:|:[0-9][0-9]: z[~]c                 t    t        | j                  i j                  j                  j                        S )N)prefix_searchsuffix_searchinfix_finditer)r   vocabsearchfinditer)r    infix_re	prefix_re	suffix_res    r   custom_tokenizerz>test_serialize_with_custom_tokenizer.<locals>.custom_tokenizero   s5    II#**#**#,,
 	
r   N)recompiler   	tokenizerr
   r5   )rG   r    r8   rD   rE   rF   s      @@@r   $test_serialize_with_custom_tokenizerrK   g   sq     

FGI

7#Izz*%H
 *C$S)CM	 1A  s   +BBc                    d}t        |       }|j                  d   |k(  sJ t               j                  |j                               }|j                  d   |k(  sJ t               j                  |j                         dg      }|j                  d   |k(  rJ t               j                  |j                  dg            }|j                  d   |k(  rJ y )Nr   r3   r   r4   )exclude)r   r4   r   r   )r   r   r    new_nlps       r   test_serialize_language_excluderO   ~   s    D
	
"C88Ft###j##CLLN3G<<4'''j##CLLNVH#EG||F#t+++j##CLL&L$BCG||F#t++++r   )r/   rH   pytestspacy.lang.enr   spacy.lang.itr   spacy.languager   spacy.tokenizerr   spacy.trainingr   
spacy.utilr   utilr
   fixturer   markissuer"   r-   r1   r:   rK   rO   r   r   r   <module>r[      s     	  ! ! # % " +  
 
 4 ) X 4 ..	,r   