
    i,                         d dl mZmZ d dlmZ d dlmZmZmZ d dl	m
Z
 ddlmZ ddlmZ dZd	 Z G d
 de      Z G d de      Z G d de      ZdgZy)   )BaseDefaultsLanguage)Doc)DummyTokenizerload_config_from_strregistry)Vocab   )	LEX_ATTRS)
STOP_WORDSz?
[nlp]

[nlp.tokenizer]
@tokenizers = "spacy.th.ThaiTokenizer"
c                      d } | S )Nc                 ,    t        | j                        S )N)ThaiTokenizervocab)nlps    g/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/spacy/lang/th/__init__.pythai_tokenizer_factoryz5create_thai_tokenizer.<locals>.thai_tokenizer_factory   s    SYY''     )r   s    r   create_thai_tokenizerr      s    ( "!r   c                   ,    e Zd ZdeddfdZdedefdZy)r   r   returnNc                 `    	 ddl m} || _        || _        y # t        $ r t        d      d w xY w)N    )word_tokenizezYThe Thai tokenizer requires the PyThaiNLP library: https://github.com/PyThaiNLP/pythainlp)pythainlp.tokenizer   ImportErrorr   )selfr   r   s      r   __init__zThaiTokenizer.__init__   sC    	8 +
  	9 	s    -textc                     t        | j                  |            }dgt        |      z  }t        | j                  ||      S )NF)wordsspaces)listr   lenr   r   )r   r    r"   r#   s       r   __call__zThaiTokenizer.__call__#   s:    T''-.3u:%4::U6::r   )__name__
__module____qualname__r	   r   strr   r&   r   r   r   r   r      s(    	e 	 	;S ;S ;r   r   c                   $    e Zd Z ee      ZeZeZ	y)ThaiDefaultsN)
r'   r(   r)   r   DEFAULT_CONFIGconfigr   lex_attr_gettersr   
stop_wordsr   r   r   r,   r,   )   s    !.1F Jr   r,   c                       e Zd ZdZeZy)ThaithN)r'   r(   r)   langr,   Defaultsr   r   r   r2   r2   /   s    DHr   r2   N)languager   r   tokensr   utilr   r   r   r   r	   	lex_attrsr   r0   r   r-   r   r   r,   r2   __all__r   r   r   <module>r;      sS    .  B B    "";N ;$< 8 
 (r   