
    io                     <   U d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZmZmZmZmZ ddlmZ ddlmZ ddlmZ ddl m!Z!m"Z"m#Z#m$Z$m%Z%  e       rddl&m'Z' ndZ' e       rddl(m)Z) ndZ) ejT                  e+      Z,i Z-e.e/e0e   f   e1d<   i Z2e.e/e0e   f   e1d<    ee/e/dz  f   g d e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd  e       rdndfd! e       rd"ndfd# e       rd$ndfd%d& e       rdndfd' e       rd(ndfd)d*d+ e       rd,ndfd- e       rd.ndfd/d0 e       rd1ndfd2d3 e       rdndfd4 e       rd5ndfd6d7 e       rdndfd8d9 e       rd:ndfd;d< e       rdndfd=d> e       rdndfd? e       rdndfd@dA e       rdBndfdC e       rd5ndfdD e       rdndfdE e       rdndfdF e       rdndfdG e       rdndfdH e       rdIndfdJdKdLdMdN e       rd5ndfdO e       rdPndfdQ e       rdRndfdSdT e       rdndfdU e       rdVndfdW e       rdndfdX e       rd5ndfdY e       rdndfdZd[ e       rd\ndfd] e       rd^ndfd_d` e       rdndfda e       rd5ndfdb e       rdcndfdd e       rdendfdfdg e       rdhndfdi e       rdjndfdk e       rdjndfdl e       rdjndfdm e       rdjndfdn e       rdjndfdo e       rdjndfdp e       rdndfdq e       rdrndfds e       rdrndfdt e       rdrndfdu e       rdrndfdv e       rdrndfdw e       rdrndfdx e       rdrndfdy e       rdrndfdz e       rdrndfd{ e       rd|ndfd} e       rd5ndfd~ e       rd5ndfd e       rd5ndfd e       rd\ndfdd e       rd5ndfddddd e       rdndfd e       rdndfd e       rdndfddd e       rdndfd e       rdndfd e       rd5ndfd e       rd5ndfd e       rdndfd e       rd5ndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rd"ndfd e       rd"ndfdd e       rdndfd e       rdndfd e       rd\ndfd e       rd\ndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfdd e       rdndfd e       rdndfdd e
       rdn
 e       rdrndfd e
       rdn
 e       rdrndfd e
       rdn
 e       rdrndfd e
       rdn
 e       rdrndfd e
       rdn
 e       rdrndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rd\ndfdȑd e       rdndfd e       rdndfd e       rdndfd e       rdndfdϑd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rd\ndfd e       rd\ndfd e       rdrndfd e       rdrndfd e       rd\ndfd e       rdndfd e       rdndfd e       rdndfd e       rd5ndfd e       rdndfd e       rdndfd e       rdndfd e       rd.ndfd e       rd.ndfdd e       rd5ndfdd e       rdndfd e
       rdn
 e       rdrndfd e       rdndfdd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfdd  e       rdndfd e       rdjndfd e       rdndfd e       rdndfd e       rdndfddd	d
 e       rdndfd e       rd\ndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfd e       rdjndfd e       rdndfd e       rdndfd e       rdndfd e       rdndfdd e       rdndfd e       rd\ndfd e       rd5ndfd e       rdndfd  e       rdndfd! e       rdjndfd"d# e       rdndfd$ e       rdndfd% e       rd&ndfd' e       rdndfd(d)d* e       rdndfd+ e       rdndfd,d- e
       rdn
 e       rdrndfd. e
       rdn
 e       rdrndfd/d0d1d2d3 e       rd4ndfd5 e       rdndfd6 e       rd7ndfd8d9 e       rdndfd: e       rdndfd; e       rd<ndfd= e       rd\ndfd> e       rdndfd? e       rdndf      Z3h d@Z4e5e/   e1dA<   e4D ]  Z6e6e3vs e       rdrnde3e6<     ee!e3      Z7 e!jp                         D  ci c]  \  } }|| 
 c}} Z9dB Z:dC Z;dDe/dEe0e   dz  fdFZ<	 	 	 	 	 	 	 dSdGe/ejz                  e/   z  dHe/ejz                  e/   z  dz  dIe>dJe.e/e/f   dz  dKe>e/z  dz  dLe/dz  dMe>dNe/dEe.e/ef   fdOZ? G dP dQ      Z@dRdQgZAyc c}} w (T  zAuto Tokenizer class.    N)OrderedDict)Any)is_mistral_common_available   )PreTrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)load_gguf_checkpoint)TOKENIZER_CONFIG_FILE)extract_commit_hashis_g2p_en_availableis_sentencepiece_availableis_tokenizers_availablelogging)cached_file   )EncoderDecoderConfig   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigconfig_class_to_model_typemodel_type_to_module_name!replace_list_option_in_docstrings)TokenizersBackend)SentencePieceBackendREGISTERED_TOKENIZER_CLASSESREGISTERED_FAST_ALIASESaimv2CLIPTokenizeralbertAlbertTokenizeralignBertTokenizeraudioflamingo3Qwen2Tokenizer
aya_visionCohereTokenizerbarkbartRobertaTokenizerbarthezBarthezTokenizer)bartphoBartphoTokenizerbertzbert-generationBertGenerationTokenizer)zbert-japaneseBertJapaneseTokenizer)bertweetBertweetTokenizerbig_birdBigBirdTokenizerbigbird_pegasusPegasusTokenizer)biogptBioGptTokenizer
blenderbotBlenderbotTokenizer)zblenderbot-smallBlenderbotSmallTokenizerblipzblip-2GPT2Tokenizer)bridgetowerr+   bros)byt5ByT5Tokenizer	camembertCamembertTokenizer)canineCanineTokenizerchinese_clip)clapr+   clipclipseg)clvpClvpTokenizer
code_llamaCodeLlamaTokenizercodegencoherecohere2colqwen2convbertcpmCpmTokenizer)cpmantCpmAntTokenizer)ctrlCTRLTokenizer)zdata2vec-audioWav2Vec2CTCTokenizer)zdata2vec-textr+   dbrxdebertaDebertaTokenizerz
deberta-v2DebertaV2Tokenizer)diaDiaTokenizer
distilbertdprDPRQuestionEncoderTokenizerelectraemu3ernie)esmEsmTokenizerfalcon_mambaGPTNeoXTokenizerfastspeech2_conformerFastSpeech2ConformerTokenizer)flaubertFlaubertTokenizerflava	flex_olmo	florence2BartTokenizerfnetFNetTokenizer)fsmtFSMTTokenizerfunnelFunnelTokenizergemmaGemmaTokenizergemma2gemma3gemma3_textgemma3ngemma3n_textgitglmr   glm4glm4_moeglm4_moe_liteglm4v	glm4v_moe	glm_imageglmasrgot_ocr2zgpt-sw3GPTSw3Tokenizergpt2gpt_bigcodegpt_neogpt_neox)gpt_neox_japaneseGPTNeoXJapaneseTokenizergptj)graniter?   )
granitemoer?   )granitemoehybridr?   )granitemoesharedr?   zgrounding-dinogroupvitherbertHerbertTokenizer)hubertr[   )ibertr+   ideficsLlamaTokenizeridefics2instructblipinstructblipvideointernvljais2jina_embeddings_v3XLMRobertaTokenizerzkosmos-2lasr_ctcLasrTokenizerlasr_encoderlayoutlm
layoutlmv2LayoutLMv2Tokenizer
layoutlmv3LayoutLMv3Tokenizer	layoutxlmLayoutXLMTokenizerledLEDTokenizerlighton_ocrQwen2TokenizerFastlilt
longformer)lukeLukeTokenizerlxmertLxmertTokenizerm2m_100M2M100Tokenizermambamamba2marianMarianTokenizermarkuplmMarkupLMTokenizermbartMBartTokenizermbart50MBart50Tokenizer)megar+   zmegatron-bert
metaclip_2)zmgp-strMgpstrTokenizer	ministralMistralCommonBackend
ministral3mistralmistral3mixtralmlukeMLukeTokenizerzmm-grounding-dino
mobilebertMobileBertTokenizermpnetMPNetTokenizermpt)mrar+   mt5T5Tokenizermusicgenmusicgen_melodymvpMvpTokenizer)myt5MyT5TokenizernezhanllbNllbTokenizerznllb-moe
nomic_bertnougatNougatTokenizernystromformerolmoolmo2olmo3olmo_hybridolmoezomdet-turbo	oneformerz
openai-gptOpenAIGPTTokenizeroptovis2owlv2owlvitpegasus	pegasus_x)	perceiverPerceiverTokenizerphi)phobertPhobertTokenizer
pix2structpixtralplbartPLBartTokenizer)
prophetnetProphetNetTokenizerqdqbertqwen2qwen2_5_omni
qwen2_5_vlqwen2_audio	qwen2_moeqwen2_vlqwen3qwen3_5Qwen3_5Tokenizerqwen3_5_moe	qwen3_moe
qwen3_nextqwen3_omni_moeqwen3_vlqwen3_vl_moe)ragRagTokenizerrealmrecurrent_gemmareformerReformerTokenizerrembertRemBertTokenizer	retribert)robertar+   )zroberta-prelayernormr+   )roc_bertRoCBertTokenizerroformerRoFormerTokenizerrwkvsam3
sam3_videoseamless_m4tSeamlessM4TTokenizerseamless_m4t_v2shieldgemma2siglipSiglipTokenizersiglip2Siglip2Tokenizerspeech_to_textSpeech2TextTokenizerspeecht5SpeechT5Tokenizer)splinterSplinterTokenizersqueezebertstablelm
starcoder2switch_transformerst5t5gemma)tapasTapasTokenizertrocrtvpudopUdopTokenizerumt5)	unispeechr[   )zunispeech-satr[   viltvisual_bert)vitsVitsTokenizervoxtralvoxtral_realtime)wav2vec2r[   )zwav2vec2-bertr[   )zwav2vec2-conformerr[   )wav2vec2_phonemeWav2Vec2PhonemeCTCTokenizerwhisperWhisperTokenizerxclipxglmXGLMTokenizer)xlmXLMTokenizerzxlm-robertazxlm-roberta-xlxlnetXLNetTokenizerxlstmxmodyoso>    fuyuphi3jambajanusllavamolmonvfp4arcticchatlmmolmo2phi3_vphimoeopencuaopenvlastep3p5minicpmvnemotronvipllava	chameleon	internlm2
cohere_asr
h2ovl_chat
llava_next
minimax_m2
modernbertdeepseek_v2deepseek_v3deepseek_vlinternvl_chatdeepseek_vl_v2hyperclovax_vlmdeepseek_vl_hybrid)MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASSc                 t    t        | dd      5 }t        j                  |      cddd       S # 1 sw Y   yxY w)z*Loads a vocabulary file into a dictionary.rutf-8encodingN)openjsonload)
vocab_filereaders     {/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/auto/tokenization_auto.py
load_vocabr{    s1    	j#	0 !Fyy ! ! !s   .7c                     g }t        | dd      5 }|D ]O  }|j                         }|s|j                  d      r(|j                  t	        |j                                      Q 	 ddd       |S # 1 sw Y   |S xY w)z Loads a merges file into a list.rq  rr  rs  #N)ru  strip
startswithappendtuplesplit)merges_filemergesry  lines       rz  load_mergesr    sr    F	k3	1 3V 	3D::<DDOOC0eDJJL12	33
 M3
 Ms   A1A1*A11A;
class_namereturnc                 f   | dv rt         S | t        v r	t        |    S | t        v r	t        |    S | dk(  rt         S t        j	                         D ]  \  }}|| k(  st        |      }|dv r| dk(  rt        j                  dd      }nt        j                  d| d      }	 t        ||       }t        |d	d       x}rb|t        j                  v rPt        j                  |   }t        ||j                  d
z   |       t        j                  j                  |dz   |       |c S  t        j                   j#                         D ]  }t        |dd       | k(  s|c S  t        j                  d      }t%        ||       rt        ||       S | j'                  d
      rt)        | d d       S y # t        $ r Y cw xY w)N>   BloomTokenizerBloomTokenizerFastr   )r   r   r   r   r   r   r>  r   z.tokenization_mistral_commontransformers.ztransformers.models
__module__Fast_fast__name__)r   r   r   TOKENIZER_MAPPING_NAMESitemsr   	importlibimport_modulegetattrsysmodulessetattrr  
setdefaultAttributeErrorTOKENIZER_MAPPING_extra_contentvalueshasattrendswithtokenizer_class_from_name)	r  module_nametokenizer_classmoduleresultsubmodbase_mod	tokenizermain_modules	            rz  r  r    s   ==  ,,&z2211+J77((   )@(E(E(G $_j(3K@Krr"88"001OQ_`"001[M1BDYZ	 4%flDAAFAvQTQ\Q\G\"{{62HHfoo&>GKK**6G+;XF#* '55<<> 	9j$/:= )).9K{J'{J// 6"(CR99% " s   A>F##	F0/F0pretrained_model_name_or_path	cache_dirforce_downloadproxiestokenrevisionlocal_files_only	subfolderc                 "   |j                  d      }	t        | t        |||||||ddd|	      }
|
t        j	                  d       i S t        |
|	      }	t        |
d      5 }t        j                  |      }ddd       |	d<   |S # 1 sw Y   xY w)aY  
    Loads the tokenizer configuration from a pretrained model tokenizer configuration.

    Args:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
              huggingface.co.
            - a path to a *directory* containing a configuration file saved using the
              [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.

        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
            cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the configuration files and override the cached versions if they
            exist.
        proxies (`dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `hf auth login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
            identifier allowed by git.
        local_files_only (`bool`, *optional*, defaults to `False`):
            If `True`, will only try to load the tokenizer configuration from local files.
        subfolder (`str`, *optional*, defaults to `""`):
            In case the tokenizer config is located inside a subfolder of the model repo on huggingface.co, you can
            specify the folder name here.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Returns:
        `dict`: The configuration of the tokenizer.

    Examples:

    ```python
    # Download configuration from huggingface.co and cache.
    tokenizer_config = get_tokenizer_config("google-bert/bert-base-uncased")
    # This model does not have a tokenizer config so the result will be an empty dict.
    tokenizer_config = get_tokenizer_config("FacebookAI/xlm-roberta-base")

    # Save a pretrained tokenizer locally and you can reload its config
    from transformers import AutoTokenizer

    tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
    tokenizer.save_pretrained("tokenizer-test")
    tokenizer_config = get_tokenizer_config("tokenizer-test")
    ```_commit_hashF)r  r  r  r  r  r  r   _raise_exceptions_for_gated_repo%_raise_exceptions_for_missing_entries'_raise_exceptions_for_connection_errorsr  Nz\Could not locate the tokenizer configuration file, will try to use the model config instead.rr  rs  )	getr   r   loggerinfor   ru  rv  rw  )r  r  r  r  r  r  r  r  kwargscommit_hashresolved_config_filery  r  s                rz  get_tokenizer_configr    s    J **^,K&%%))..305  #rs	%&:KHK	"W	5 #6"#(F>M# #s    BBc                   \    e Zd ZdZd Ze ee      dee	z  fd              Z
e	 dd       Zy)AutoTokenizera  
    This is a generic tokenizer class that will be instantiated as one of the tokenizer classes of the library when
    created with the [`AutoTokenizer.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    c                     t        d      )Nz}AutoTokenizer is designed to be instantiated using the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` method.)OSError)selfs    rz  __init__zAutoTokenizer.__init__4  s    _
 	
    r  c           	         |j                  dd      }d|d<   |j                  dd      }|j                  dd      }|j                  dd      }|j                  d      }|vt        j                  |d      }	|	,t        d	| d
dj	                  d t        D               d      t        |	      }
|
t        d|	 d       |
j                  |g|i |S |r3t        ||fi |}t        |d      d   }t        j                  d)i |}n|	 t        j                  |fd|i|}|j                  }t        |fi |}|j                  dd      }d}d|v r4t        |d   t         t"        f      r|d   }n|d   j                  dd      }||||dk7  rt        j                  |      ut        j                  |      j%                  d      |j%                  d      k7  r?t&        	 t'        j                  |g|i |S  t        |      j                  |g|i |S d|v r|d   |d<   |r|j/                  d      r|dd }|du}t1        |      t2        v xs% |duxr t        |      duxs t        |dz         du}|xrN t1        |      t2        vxr; |duxr5 t        |      xs t        |dz         j4                  j7                  d       }|r|t8        v rd}d}|r:|d   |d   }n|d   }d|v r|j;                  d      d   }nd}t=        |||||      }|rg|re|sc|rt        |j%                  d             t?        |fi |}
|j                  dd      }|
jA                           |
j                  |g|d|i|S |c|}t        |      }
|
|j/                  d      st        |dz         }
|
|
jB                  dk(  rt&        }
|
t&        }
 |
j                  |g|i |S tE        |dd      rG|jF                  }d |vr|j/                  d      r|dd }t        |      }
 |
j                  |g|i |S t        |tH              rzt1        |jJ                        t1        |jL                        urDt*        jO                  d!|jL                  jP                   d"|jJ                  jP                   d#       |jL                  }tS        t1        |      jB                        xs tE        |d$d      }|;t2        j                  t1        |      t&              }
|
 |
j                  |g|i |S |j                  dd      }|||d%k7  r|j/                  d      r|dd }t        |      }
|
|j/                  d      st        |dz         }
|
|
jB                  dk(  rt&        }
|
t&        }
 |
j                  |g|i |S t        d&|jP                   d'dj	                  d( t2        D               d      # t        t        f$ r t        j                  |fi |}Y w xY w# t(        $ r#}t*        j-                  d|        Y d}~Jd}~ww xY w)*a  
        Instantiate one of the tokenizer classes of the library from a pretrained model vocabulary.

        The tokenizer class to instantiate is selected based on the `model_type` property of the config object (either
        passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's missing, by
        falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                Can be either:

                    - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
                    - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
                      using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
                    - a path to a single saved vocabulary file if and only if the tokenizer only requires a
                      single vocabulary file (like Bert or XLNet), e.g.: `./my_model_directory/vocab.txt`. (Not
                      applicable to all derived classes)
            inputs (additional positional arguments, *optional*):
                Will be passed along to the Tokenizer `__init__()` method.
            config ([`PreTrainedConfig`], *optional*)
                The configuration object used to determine the tokenizer class to instantiate.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model configuration should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force the (re-)download the model weights and configuration files and override the
                cached versions if they exist.
            proxies (`dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.
            subfolder (`str`, *optional*):
                In case the relevant files are located inside a subfolder of the model repo on huggingface.co (e.g. for
                facebook/rag-token-base), specify it here.
            tokenizer_type (`str`, *optional*):
                Tokenizer type to be loaded.
            backend (`str`, *optional*, defaults to `"tokenizers"`):
                Backend to use for tokenization. Valid options are:
                - `"tokenizers"`: Use the HuggingFace tokenizers library backend (default)
                - `"sentencepiece"`: Use the SentencePiece backend
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (additional keyword arguments, *optional*):
                Will be passed to the Tokenizer `__init__()` method. Can be used to set special tokens like
                `bos_token`, `eos_token`, `unk_token`, `sep_token`, `pad_token`, `cls_token`, `mask_token`,
                `additional_special_tokens`. See parameters in the `__init__()` for more details.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer

        >>> # Download vocabulary from huggingface.co and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

        >>> # Download vocabulary from huggingface.co (user-uploaded) and cache.
        >>> tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")

        >>> # If vocabulary files are in a directory (e.g. tokenizer was saved using *save_pretrained('./test/saved_model/')*)
        >>> # tokenizer = AutoTokenizer.from_pretrained("./test/bert_saved_model/")

        >>> # Download vocabulary from huggingface.co and define model-specific arguments
        >>> tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base", add_prefix_space=True)

        >>> # Explicitly use the tokenizers backend
        >>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", backend="tokenizers")

        >>> # Explicitly use the sentencepiece backend
        >>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", backend="sentencepiece")
        ```configNT
_from_autouse_fasttokenizer_typetrust_remote_code	gguf_filezPassed `tokenizer_type` z3 does not exist. `tokenizer_type` should be one of z, c              3       K   | ]  }|  y wN .0cs     rz  	<genexpr>z0AutoTokenizer.from_pretrained.<locals>.<genexpr>  s      Dq Ds   r  zTokenizer class z is not currently imported.F)return_tensorsr  auto_mapr   r  z!Failed to use TokenizersBackend: r  r  ztransformers.r   r   z--code_revisionPythonBackendPreTrainedTokenizerFastz The encoder model config class: z3 is different from the decoder model config class: z. It is not recommended to use the `AutoTokenizer.from_pretrained()` method in this case. Please use the encoder and decoder specific tokenizer classes.
model_typer   z!Unrecognized configuration class z8 to build an AutoTokenizer.
Model type should be one of c              3   4   K   | ]  }|j                     y wr  )r  r  s     rz  r  z0AutoTokenizer.from_pretrained.<locals>.<genexpr>B  s     4[AQZZ4[s   r  )*popr  r  
ValueErrorjoinr  from_pretrainedr   r
   r   	for_modelr  r   r  r  
isinstancer  listremovesuffixr   	Exceptionr  debugr  typer  r  r  ro  r  r	   r   register_for_auto_classr  r  r  r   decoderencoderwarning	__class__r   )clsr  inputsr  r  _r  r  r  tokenizer_class_namer  	gguf_pathconfig_dictconfig_model_typetokenizer_configtokenizer_config_classtokenizer_auto_mapehas_remote_codehas_local_codeexplicit_local_code	class_refupstream_repotokenizer_class_candidate_classr  s                             rz  r  zAutoTokenizer.from_pretrained:  s   d Hd+#| JJz4($4d;"JJ':DAJJ{+	 %#:#>#>~t#T #+ .~.>>qyy D,C DDEQH 
 88LMO& #34H3IId!eff2?223PdSYd]cdd#$A9WPVWI.yOPXYK))8K8F^c#331EVZ` #-- 00MXQWX!1!5!56G!N "))*:6F%5j%A"%5j%A%E%EoW[%\"
 &&2!-!R''++,=>J(,,->?LLVT&33F;= !,J,<<=Zn]cngmnn U,-CDTT-06:@  --%5n%EF>"!&<&E&Ef&M%;CR%@",D8f):: 
"$. )*@AM Z,-Cf-LMUYY	 	  V$55 'd2 9-.DE R01G&1PQ*ZZ89 	 04]]#O!%!!$0.q1	.q1	y  ) 5a 8 $ 9!#@.Racp! 09L%)*@*M*Mf*UV;IGdohnoO

?D1A3352?22-06J[_e  $/(>%78QRO&/H/Q/QRX/Y";<UX^<^"_*/G/G?/Z"3&"32?223PdSYd]cddV.5++F(66??6;R7?O2?223PdSYd]cdd f23FNN#4+??6v~~7O7O6P Q%%+^^%=%=$> ?22 ^^F/V0E0EFm'RXZfhlJm
!/33DLBSTO*6667ThW]haghh "2!5!56G!N!-%)<<AWA`A`agAh)?)D&78NOO&/E/N/Nv/V";<RU[<["\*/G/G?/Z"3&"32?223PdSYd]cdd/0@0@/A B++/994[IZ4[+[*\\]_
 	
c ( c)99:Wb[abcB ! JLL#DQC!HIIJs*   W8 (X$ 8%X! X!$	Y-YYNc                     |||}n||}nt        d      |||fD ]  }||t        |j                  <    |||t        |j                  <   t        j                  | ||       y)a  
        Register a new tokenizer in this mapping.

        Args:
            config_class ([`PreTrainedConfig`]):
                The configuration corresponding to the model to register.
            tokenizer_class: The tokenizer class to register (V5 - preferred parameter).
            slow_tokenizer_class: (Deprecated) The slow tokenizer to register.
            fast_tokenizer_class: (Deprecated) The fast tokenizer to register.
        Nz$You need to pass a `tokenizer_class`)exist_ok)r  r   r  r   r  register)config_classr  slow_tokenizer_classfast_tokenizer_classr  	candidates         rz  r   zAutoTokenizer.registerE  s     "#/"6%1"6 !GHH.0DoV 	MI$CL,Y-?-?@	M  +0D0PEY#$8$A$AB""<8"Tr  )NNNF)r  r  __qualname____doc__r  classmethodr   r  r   r   r  staticmethodr   r  r  rz  r  r  ,  sZ    
 &'>?G
	1	1G
 @ G
R kpU Ur  r  r  )NFNNNFr  )Br  r  rv  osr  collectionsr   typingr   transformers.utils.import_utilsr   configuration_utilsr   dynamic_module_utilsr   r	   modeling_gguf_pytorch_utilsr
   tokenization_utils_baser   utilsr   r   r   r   r   	utils.hubr   encoder_decoderr   auto_factoryr   configuration_autor   r   r   r   r   tokenization_utils_tokenizersr    tokenization_utils_sentencepiecer   
get_loggerr  r  r   dictstrr  __annotations__r   r  ro  setr  r  r  CONFIG_TO_TYPEr{  r  r  PathLikeboolr  r  __all__)kvs   00rz  <module>r#     s]      	 
 #  G 3 \ ? <  % 2 *  BH			H	% 68 d3S	>2 702 c49n- 26+c3:o6S	%<%>/DIS	(?(A$tLS 
%<%>/DIS 
/F/H+dS	S
 
,C,E(4PS 
$;$=4HS 
'>'@#dKS 
*A*C&NS 	(S 
$;$=4HS 
9S9U5[_`S 	3S 	*S 
+B+D'$OS 
2I2K.QUVS  	&!S" 
0G0I,tT#S$ 	9%S& 
$;$=4H'S( 
&=&??TJ)S* 	,+S, 
$;$=4H-S. 	"/S0 
.E.G*TR1S2 	&3S4 
,C,E4P5S6 	%7S8 
$;$=4H9S: 
'>'@OdK;S< 	"=S> 
/F/H+dS?S@ 
'>'@OdKASB 
(?(A$tLCSD 
)@)B%MESF 
)@)B%MGSH 
(?(A_tLISJ 
"9";FKSL 	&MSN 	"OSP 	3QSR 	.SST 
$;$=4HUSV 
*A*C&NWSX 
/F/H+dSYSZ 	 [S\ 
*A*CN]S^ 
1H1J-PTU_S` 
'>'@OdKaSb 
$;$=4HcSd 
%<%>/DIeSf 	 gSh 
/F/H+dSiSj 
!EXEZ"A`dekSl 	*mSn 
%<%>/DIoSp 
)@)BoMqSr 
)@)BoMsSt 
$;$=4HuSv 	"wSx 
(?(A$tLySz 
&=&?"TJ{S| 
'>'@#dK}S~ 
'>'@#dKS@ 
,C,E(4PASB 
(?(A$tLCSD 
-D-F)DQESF 
#:#<$GGSH 
'>'@#dKISJ 
(?(A$tLKSL 
,C,E(4PMSN 
1H1J-PTUOSP 
)@)B%MQSR 
-D-F)DQSST 
-D-F)DQUSV 
*A*C&NWSX 
,C,E(4PYSZ 
)C)E%4P[S\ 
$;$=4H]S^ 
+B+D$O_S` 
'>'@OdKaSb 
+B+D'$OcSd 	:eSf 
$;$=4HgSh 	%iSj 	(kSl 	.mSn 	.oSp 
.E.G?TRqSr 
(?(A_tLsSt 
*A*C&NuSv 	+wSx 	&ySz 
(?(A$tL{S| 
)@)B%M}S~ 
,C,E4PS@ 
1H1JoPTUASB 
)@)B%MCSD 
%<%>/DIESF 
8O8Q4W[\GSH 
.E.G*TRISJ 
(?(A_tLKSL 
,C,E4PMSN 
(?(A_tLOSP 
0G0I,tTQSR 
0G0I,tTSST 
.E.G*TRUSV 
"9";FWSX 
0G0I,tTYSZ 
'>'@#dK[S\ 
-D-F)DQ]S^ 	"_S` 
(?(A$tLaSb 
)C)E%4PcSd 
(?(A$tLeSf 
)@)B%MgSh 
(B(D$$OiSj 
,C,E(4PkSl 
&=&?"TJmSn 
*A*C&NoSp 	%qSr 
-D-F/DQsSt 
0G0I,tTuSv 	'wSz *, #)@)B%		
ySF *, #)@)B%		
ESR *, #)@)B%		
QS^ *, #)@)B%		
]Sj *, #)@)B%		
iSt 
&@&B"MuSv 
1H1JoPTUwSx 
0G0I,tTySz 
&=&?"TJ{S| 
&=&?"TJ}S~ 	$S@ 
!8!:EASB 
&=&?]TJCSD 
-D-FMDQESF 
"9";FGSH 	"ISJ 
%<%>/DIKSL 
$;$=4HMSN 
(?(A_tLOSP 
*A*CNQSR 
(?(A$tLSST 
/F/H+dSUSV 
'>'@#dKWSX 
(?(A$tLYSZ 
)@)B%M[S\ 
/F/H+dS]S^ 
(?(A$tL_S` 
+B+D$OaSb 
)@)BoMcSd 
/F/H+dSeSf 
#:#<$GgSh 
&=&?"TJiSj 
%<%>/DIkSl 
&=&??TJmSn 
*A*C&NoSp 
,C,E(4PqSr 	,sSt 
#:#<$GuSv 	(wSx 
(?(A}tLyS| *, #)@)B%		
{SF 
(?(A$tLGSH 	.ISJ 
'>'@OdKKSL 
&=&?"TJMSN 
-D-F)DQOSP 
+B+D'$OQSR 
,C,E(4PSST 
*A*C&NUSV 
)@)B%MWSX 
&=&?"TJYSZ 
*A*C&N[S\ 
.E.G*TR]S^ 
*A*C&N_S` 
+B+D'$OaSb 
/F/H+dScSd 
)@)B%MeSf 
-D-F)DQgSh 	 iSj 
%<%>/DIkSl 
0G0I,tTmSn 
,C,E(4PoSp 
*A*C&NqSr 
)@)BoMsSt 	(uSv 	5wSx 	)ySz 
,C,E(4P{S| 
'>'@#dK}S~ 
$;$=4HS@ 
*A*CNASB 
3J3L/RVWCSD 
6M6O2UYZESF 
-D-F)DQGSH 
(B(D$$OISJ 
*A*C&NKSL 
5O5Q1W[\MSN 
,F,H(dSOSP 	*QSR 
+B+D$OSST 
+B+D'$OUSV 
*A*CNWSX 
1H1JPTUYSZ 
 7 9}tD[S\ 
(?(A$tL]S^ 	$_S` 
+B+D'$OaSb 
#:#<$GcSd 
$;$=4HeSf 
"9";FgSh 	.iSj 	2kSl 
$;$=4HmSn 
+B+D$OoSp 	"qSt *, #)@)B%		
sS@ *, #)@)B%		
SJ 	-KSL 	2MSN 	7OSP 	<QSR 
*A*C&NSST 
%<%>/DIUSV 
$;$=4HWSX 	 YSZ 
1H1J-PTU[S\ 
4K4M0SWX]S^ 
&=&?"TJ_S` 
(?(A$tLaSb 
*A*C&NcSd 
&=&?"TJeSU r!7 )3s8 !F < iJ00E\E^.Adh
+i %%9;RS #=#7#=#=#?@41a!Q$@!3# 3$s)d2B 3p 04 %)#"]#&S)9#9]R[[%%,] ] #s(^d"	]
 #:] Dj] ] ] 
#s(^]@wU wUt	 
0I As   9p