
    iK                       d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
mZmZ dd	lmZ erdd
lmZ ddlmZ  G d dee      Z ej(                  e      Z eg d      Z eg d      Z eg d      Z eg d      Z eg d      Z edg      Z eg d      Z edg      Z eg d      Z edg      Z  eg d      Z! eg d      Z" eddg      Z# eg d      Z$ eg  e%e$jM                               dddd d!d"d#d$d%d&      Z' eg d'      Z( eg d(      Z) eg d)      Z* eg d*      Z+ ed+d,g      Z, ed-g      Z- eg d.      Z. eg d/      Z/ eg d0      Z0 eg d1      Z1 ed2g      Z2 eg d3      Z3 eg d4      Z4 eg d5      Z5 eg d6      Z6 eg d7      Z7 eg d8      Z8 eg d9      Z9 eg d:      Z: eg d;      Z; ed<d=g      Z< eg d>      Z= eg d?      Z> eg d@      Z? eg dA      Z@ edBg      ZA eg dC      ZB eg dD      ZC edEdFg      ZD edGdHg      ZE edIdJg      ZF edKg      ZG eg dL      ZH eee      ZI eee      ZJ eee      ZK eee      ZL eee      ZM eee>      ZN eee      ZO eee      ZP eee       ZQ eee!      ZR eee"      ZS eee$      ZT eee'      ZU eee#      ZV eee3      ZW eee4      ZX eee(      ZY eee      ZZ eee      Z[ eee)      Z\ eee*      Z] eee+      Z^ eee,      Z_ eee-      Z` eee.      Za eee0      Zb eee1      Zc eee2      Zd eee5      Ze eee6      Zf eee7      Zg eee8      Zh eee9      Zi eee/      Zj eee:      Zk eee;      Zl eee<      Zm eee=      Zn eee?      Zo eee@      Zp eeeA      Zq eeeB      Zr eeeC      Zs eeeD      Zt eeeE      Zu eeeF      Zv eeeG      Zw eeeH      Zx G dM dNe
      Zy G dO dPe
      Zz G dQ dRe
      Z{ G dS dTe
      Z| G dU dVe
      Z} G dW dXe
      Z~ ee~      Z~ G dY dZe
      Z eed[\      Z G d] d^e
      Z eed_\      Z G d` dae
      Z eedb\      Z G dc dde
      Z eededfg      Z G dh die
      Z eedj\      Z G dk dle
      Z eedm\      Z G dn doe
      Z eedpdqg      Z G dr dse
      Z eedtdug      Z G dv dwe
      Z eedxdyg      Z G dz d{e
      Z eed|\      Z G d} d~e
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z eed\      Z G d de
      Z G d de
      Z G d de
      Z G d de	      Z eedĬ\      Z G dń de
      Z eedǬ\      Z G dȄ de
      Z eedʬ\      Zg dˢZy)zAuto Model class.    N)OrderedDict)TYPE_CHECKING   )logging   )_BaseAutoBackboneClass_BaseAutoModelClass_LazyAutoMappingauto_class_update)CONFIG_MAPPING_NAMES)GenerationMixin)PreTrainedModelc                       e Zd Zy)_BaseModelWithGenerateN)__name__
__module____qualname__     w/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/auto/modeling_auto.pyr   r   #   s    r   r   (  )afmoe
AfmoeModel)aimv2
Aimv2Modelaimv2_vision_modelAimv2VisionModelalbertAlbertModelalign
AlignModelaltclipAltCLIPModel)apertusApertusModel)arcee
ArceeModel)aria	AriaModel)	aria_textAriaTextModel)audio-spectrogram-transformerASTModelaudioflamingo3&AudioFlamingo3ForConditionalGeneration)audioflamingo3_encoderAudioFlamingo3Encoder)
autoformerAutoformerModel)
aya_visionAyaVisionModel)bamba
BambaModelbark	BarkModel)bart	BartModelbeit	BeitModelbert	BertModel)bert-generationBertGenerationEncoderbig_birdBigBirdModel)bigbird_pegasusBigBirdPegasusModel)biogptBioGptModelbitBitModel)bitnetBitNetModel)
blenderbotBlenderbotModel)blenderbot-smallBlenderbotSmallModelblip	BlipModel)blip-2
Blip2Model)blip_2_qformerBlip2QFormerModel)bloom
BloomModel)bltBltModel)bridgetowerBridgeTowerModel)bros	BrosModel)	camembertCamembertModel)canineCanineModel)	chameleonChameleonModelchinese_clipChineseCLIPModel)chinese_clip_vision_modelChineseCLIPVisionModel)clap	ClapModelclip	CLIPModelclip_text_modelCLIPTextModel)clip_vision_modelCLIPVisionModelclipsegCLIPSegModel)clvp!ClvpModelForConditionalGeneration)
code_llama
LlamaModel)codegenCodeGenModel)cohereCohereModel)cohere2Cohere2Modelcohere2_visionCohere2VisionModel)
cohere_asrCohereAsrModelconditional_detrConditionalDetrModel)convbertConvBertModelconvnextConvNextModel
convnextv2ConvNextV2Model)cpmantCpmAntModelcsmCsmForConditionalGeneration)ctrl	CTRLModel)cvtCvtModel)cwmCwmModel)d_fine
DFineModeldab-detrDabDetrModeldacDacModel)data2vec-audioData2VecAudioModeldata2vec-textData2VecTextModeldata2vec-visionData2VecVisionModel)dbrx	DbrxModeldebertaDebertaModel
deberta-v2DebertaV2Model)decision_transformerDecisionTransformerModel)deepseek_v2DeepseekV2Model)deepseek_v3DeepseekV3Model)deepseek_vlDeepseekVLModel)deepseek_vl_hybridDeepseekVLHybridModeldeformable_detrDeformableDetrModeldeit	DeiTModel	depth_proDepthProModeldetr	DetrModel)diaDiaModel)	diffllamaDiffLlamaModeldinat
DinatModeldinov2Dinov2Modeldinov2_with_registersDinov2WithRegistersModeldinov3_convnextDINOv3ConvNextModel
dinov3_vitDINOv3ViTModel
distilbertDistilBertModel)doge	DogeModel)
donut-swinDonutSwinModel)dots1
Dots1Model)dprDPRQuestionEncoderdptDPTModeledgetamEdgeTamModel)edgetam_videoEdgeTamVideoModel)edgetam_vision_modelEdgeTamVisionModel)efficientloftrEfficientLoFTRModelefficientnetEfficientNetModelelectraElectraModel)emu3	Emu3Model)encodecEncodecModel)ernie
ErnieModel)ernie4_5Ernie4_5Model)ernie4_5_moeErnie4_5_MoeModel)ernie4_5_vl_moeErnie4_5_VLMoeModel)esmEsmModel)eurobertEuroBertModel)evollaEvollaModel)exaone4Exaone4Model)
exaone_moeExaoneMoeModel)falconFalconModel)	falcon_h1FalconH1Model)falcon_mambaFalconMambaModel)fast_vlmFastVlmModelfastspeech2_conformerFastSpeech2ConformerModel"fastspeech2_conformer_with_hifiganFastSpeech2ConformerWithHifiGanflaubertFlaubertModel)flava
FlavaModel)	flex_olmoFlexOlmoModel)	florence2Florence2Model)fnet	FNetModelfocalnetFocalNetModel)fsmt	FSMTModel)funnel)FunnelModelFunnelBaseModel)fuyu	FuyuModel)gemma
GemmaModel)gemma2Gemma2Model)gemma3Gemma3Model)gemma3_textGemma3TextModel)gemma3nGemma3nModel)gemma3n_audioGemma3nAudioEncoder)gemma3n_textGemma3nTextModel)gemma3n_visionTimmWrapperModel)gemma4Gemma4Model)gemma4_audioGemma4AudioModel)gemma4_textGemma4TextModel)gemma4_visionGemma4VisionModel)gitGitModel)glmGlmModel)glm4	Glm4Model)glm46vGlm46VModel)glm4_moeGlm4MoeModel)glm4_moe_liteGlm4MoeLiteModel)glm4v
Glm4vModel)	glm4v_moeGlm4vMoeModel)glm4v_moe_textGlm4vMoeTextModel)glm4v_moe_visionGlm4vMoeVisionModel)
glm4v_textGlm4vTextModel)glm4v_visionGlm4vVisionModel)	glm_imageGlmImageModel)glm_image_textGlmImageTextModel)glm_image_visionGlmImageVisionModel)glm_image_vqmodelGlmImageVQVAE)glm_moe_dsaGlmMoeDsaModel)glm_ocrGlmOcrModel)glm_ocr_textGlmOcrTextModel)glm_ocr_visionGlmOcrVisionModelglmasrGlmAsrForConditionalGeneration)glmasr_encoderGlmAsrEncoderglpn	GLPNModel)got_ocr2GotOcr2Model)gpt-sw3	GPT2Model)gpt2r  )gpt_bigcodeGPTBigCodeModel)gpt_neoGPTNeoModel)gpt_neoxGPTNeoXModel)gpt_neox_japaneseGPTNeoXJapaneseModel)gpt_ossGptOssModel)gptj	GPTJModel)graniteGraniteModel)
granitemoeGraniteMoeModel)granitemoehybridGraniteMoeHybridModel)granitemoesharedGraniteMoeSharedModel)grounding-dinoGroundingDinoModel)groupvitGroupViTModel)heliumHeliumModelhgnet_v2HGNetV2Backbonehiera
HieraModelhiggs_audio_v2$HiggsAudioV2ForConditionalGenerationhiggs_audio_v2_tokenizerHiggsAudioV2TokenizerModel)hubertHubertModel)hunyuan_v1_denseHunYuanDenseV1Model)hunyuan_v1_moeHunYuanMoEV1Modelibert
IBertModel)ideficsIdeficsModel)idefics2Idefics2Model)idefics3Idefics3Model)idefics3_visionIdefics3VisionTransformerijepa
IJepaModelimagegptImageGPTModel)informerInformerModel)instructblipInstructBlipModel)instructblipvideoInstructBlipVideoModel)internvlInternVLModel)internvl_visionInternVLVisionModel)jais2
Jais2Model)jamba
JambaModel)janus
JanusModel)jetmoeJetMoeModel)jina_embeddings_v3JinaEmbeddingsV3Model)kosmos-2Kosmos2Model)
kosmos-2.5Kosmos2_5Model)kyutai_speech_to_textKyutaiSpeechToTextModellasr_ctc
LasrForCTC)lasr_encoderLasrEncoder)layoutlmLayoutLMModel)
layoutlmv2LayoutLMv2Model)
layoutlmv3LayoutLMv3Model)ledLEDModellevit
LevitModel)lfm2	Lfm2Model)lfm2_moeLfm2MoeModel)lfm2_vlLfm2VlModel	lightglueLightGlueForKeypointMatching)lighton_ocrLightOnOcrModel)lilt	LiltModel)llamar   llama4Llama4ForConditionalGeneration)llama4_textLlama4TextModel)llava
LlavaModel)
llava_nextLlavaNextModel)llava_next_videoLlavaNextVideoModel)llava_onevisionLlavaOnevisionModel)longcat_flashLongcatFlashModel
longformerLongformerModel)longt5LongT5Model)luke	LukeModel)lw_detrLwDetrModel)lxmertLxmertModel)m2m_100M2M100Model)mamba
MambaModel)mamba2Mamba2Model)marianMarianModel)markuplmMarkupLMModel)mask2formerMask2FormerModel)
maskformerMaskFormerModel)maskformer-swinMaskFormerSwinModel)mbart
MBartModel)megatron-bertMegatronBertModel
metaclip_2MetaClip2Model)zmgp-strMgpstrForSceneTextRecognition)mimi	MimiModel)minimaxMiniMaxModel)
minimax_m2MiniMaxM2Model)	ministralMinistralModel)
ministral3Ministral3Model)mistralMistralModel)mistral3Mistral3Model)mistral4Mistral4Model)mixtralMixtralModelmlcdMLCDVisionModelmlcd_vision_modelrD  )mllamaMllamaModel)mm-grounding-dinoMMGroundingDinoModel
mobilebertMobileBertModelmobilenet_v1MobileNetV1Modelmobilenet_v2MobileNetV2Model	mobilevitMobileViTModelmobilevitv2MobileViTV2Model)
modernbertModernBertModel)modernbert-decoderModernBertDecoderModel)modernvbertModernVBertModel)	moonshineMoonshineModel)moonshine_streamingMoonshineStreamingModel)moshi
MoshiModel)mpnet
MPNetModel)mptMptModel)mraMraModel)mt5MT5Modelmusicflamingo%MusicFlamingoForConditionalGeneration)musicflamingo_encoderr5   )musicgenMusicgenModel)musicgen_melodyMusicgenMelodyModel)mvpMvpModel)nanochatNanoChatModel)nemotronNemotronModel)
nemotron_hNemotronHModel)nllb-moeNllbMoeModel)
nomic_bertNomicBertModelnystromformerNystromformerModel)olmo	OlmoModel)olmo2
Olmo2Model)olmo3
Olmo3Model)olmo_hybridOlmoHybridModel)olmoe
OlmoeModelzomdet-turboOmDetTurboForObjectDetection)	oneformerOneFormerModel)
openai-gptOpenAIGPTModel)optOPTModel)ovis2
Ovis2Model)owlv2
Owlv2Model)owlvitOwlViTModel)	paligemmaPaliGemmaModelparakeet_ctcParakeetForCTC)parakeet_encoderParakeetEncoder)patchtsmixerPatchTSMixerModel)patchtstPatchTSTModel)pe_audioPeAudioModel)pe_audio_encoderPeAudioEncoder)pe_audio_videoPeAudioVideoModel)pe_audio_video_encoderPeAudioVideoEncoder)pe_videoPeVideoModel)pe_video_encoderPeVideoEncoder)pegasusPegasusModel)	pegasus_xPegasusXModel)	perceiverPerceiverModel)perception_lmPerceptionLMModel)	persimmonPersimmonModel)phiPhiModel)phi3	Phi3Model)phi4_multimodalPhi4MultimodalModel)phimoePhimoeModel)pi0PI0Modelpixio
PixioModel)pixtralPixtralVisionModel)plbartPLBartModel
poolformerPoolFormerModel)pp_doclayout_v3PPDocLayoutV3Model)pp_ocrv5_mobile_recPPOCRV5MobileRecModel)pp_ocrv5_server_recPPOCRV5ServerRecModel)
prophetnetProphetNetModelpvtPvtModel)pvt_v2
PvtV2Model)qwen2
Qwen2Model)
qwen2_5_vlQwen2_5_VLModel)qwen2_5_vl_textQwen2_5_VLTextModel)qwen2_audio_encoderQwen2AudioEncoder)	qwen2_moeQwen2MoeModel)qwen2_vlQwen2VLModel)qwen2_vl_textQwen2VLTextModel)qwen3
Qwen3Model)qwen3_5Qwen3_5Model)qwen3_5_moeQwen3_5MoeModel)qwen3_5_moe_textQwen3_5MoeTextModel)qwen3_5_textQwen3_5TextModel)	qwen3_moeQwen3MoeModel)
qwen3_nextQwen3NextModel)qwen3_vlQwen3VLModel)qwen3_vl_moeQwen3VLMoeModel)qwen3_vl_moe_textQwen3VLMoeTextModel)qwen3_vl_textQwen3VLTextModel)recurrent_gemmaRecurrentGemmaModelreformerReformerModelregnetRegNetModelrembertRemBertModelresnetResNetModelrobertaRobertaModelroberta-prelayernormRobertaPreLayerNormModelroc_bertRoCBertModelroformerRoFormerModel)rt_detrRTDetrModel)
rt_detr_v2RTDetrV2Model)rwkv	RwkvModelsamSamModelsam2	Sam2Model)sam2_hiera_det_modelSam2HieraDetModel)
sam2_videoSam2VideoModel)sam2_vision_modelSam2VisionModel)sam3	Sam3Modelsam3_trackerSam3TrackerModelr1  )sam3_tracker_videoSam3TrackerVideoModel)
sam3_videoSam3VideoModel)sam3_vision_modelSam3VisionModel)sam3_vit_modelSam3ViTModelsam_hq
SamHQModel)sam_hq_vision_modelSamHQVisionModel)sam_vision_modelSamVisionModel)seamless_m4tSeamlessM4TModel)seamless_m4t_v2SeamlessM4Tv2Model)seed_ossSeedOssModel	segformerSegformerModel)seggptSegGptModel)sewSEWModel)sew-d	SEWDModelsiglipSiglipModelsiglip2Siglip2Model)siglip2_vision_modelSiglip2VisionModelsiglip_vision_modelSiglipVisionModel)smollm3SmolLM3Model)smolvlmSmolVLMModel)smolvlm_visionSmolVLMVisionTransformer)
solar_openSolarOpenModel)speech_to_textSpeech2TextModel)speecht5SpeechT5Model)splinterSplinterModelsqueezebertSqueezeBertModel)stablelmStableLmModel)
starcoder2Starcoder2ModelswiftformerSwiftFormerModelswin	SwinModelswin2srSwin2SRModelswinv2Swinv2Model)switch_transformersSwitchTransformersModel)t5T5Model)t5gemmaT5GemmaModel)t5gemma2T5Gemma2Model)t5gemma2_encoderT5Gemma2Encodertable-transformerTableTransformerModel)tapas
TapasModel)textnetTextNetModel)time_series_transformerTimeSeriesTransformerModel)timesfmTimesFmModel)
timesfm2_5TimesFm2_5ModeltimesformerTimesformerModeltimm_backboneTimmBackbonetimm_wrapperrK  )tvpTvpModel)udop	UdopModel)umt5	UMT5Model)	unispeechUniSpeechModel)unispeech-satUniSpeechSatModel)univnetUnivNetModel)uvdoc
UVDocModel)
vaultgemmaVaultGemmaModelvibevoice_acoustic_tokenizerVibeVoiceAcousticTokenizerModel)$vibevoice_acoustic_tokenizer_decoder&VibeVoiceAcousticTokenizerDecoderModel)$vibevoice_acoustic_tokenizer_encoder&VibeVoiceAcousticTokenizerEncoderModelvibevoice_asr$VibeVoiceAsrForConditionalGeneration)video_llama_3VideoLlama3Model)video_llama_3_visionVideoLlama3VisionModel)video_llavaVideoLlavaModelvideomaeVideoMAEModel)vilt	ViltModel)vipllavaVipLlavaModel)zvision-text-dual-encoderVisionTextDualEncoderModel)visual_bertVisualBertModelvitViTModelvit_maeViTMAEModelvit_msnViTMSNModelvitdetVitDetModelvits	VitsModelvivit
VivitModel)vjepa2VJEPA2ModelvoxtralVoxtralForConditionalGeneration)voxtral_encoderVoxtralEncodervoxtral_realtime'VoxtralRealtimeForConditionalGeneration)voxtral_realtime_encoderVoxtralRealtimeEncoder)voxtral_realtime_textVoxtralRealtimeTextModel)wav2vec2Wav2Vec2Model)wav2vec2-bertWav2Vec2BertModel)wav2vec2-conformerWav2Vec2ConformerModel)wavlm
WavLMModel)whisperWhisperModel)xclip
XCLIPModel)xcodecXcodecModel)xglm	XGLMModelxlmXLMModelxlm-robertaXLMRobertaModelxlm-roberta-xlXLMRobertaXLModel)xlnet
XLNetModel)xlstm
xLSTMModel)xmod	XmodModelyolos
YolosModel)yoso	YosoModel)youtu
YoutuModel)zamba
ZambaModel)zamba2Zamba2Model)])r   AlbertForPreTrainingr1   r?   BartForConditionalGeneration)rE   BertForPreTraining)rJ   BigBirdForPreTrainingr`   BloomForCausalLMrh   CamembertForMaskedLMcolmodernvbertColModernVBertForRetrievalcolpaliColPaliForRetrieval)colqwen2ColQwen2ForRetrievalr   CTRLLMHeadModelr   Data2VecTextForMaskedLMr   DebertaForMaskedLMr   DebertaV2ForMaskedLMr   DistilBertForMaskedLM)r  ElectraForPreTraining)r  ErnieForPreTrainingr  EvollaForProteinText2Textr  Exaone4ForCausalLMr  ExaoneMoeForCausalLMr  FalconMambaForCausalLMr(  FlaubertWithLMHeadModel)r*  FlavaForPreTrainingr.  !Florence2ForConditionalGeneration)r0  FNetForPreTrainingr5  FSMTForConditionalGeneration)r7  FunnelForPreTrainingr@  Gemma3ForConditionalGenerationrL  Gemma4ForConditionalGenerationr|  r  GPT2LMHeadModelr  rD  r  GPTBigCodeForCausalLM)r  HieraForPreTrainingr  IBertForMaskedLMr  IdeficsForVisionText2Textr   Idefics2ForConditionalGenerationr   Idefics3ForConditionalGenerationr  JanusForConditionalGenerationr  LayoutLMForMaskedLMr  LlavaForConditionalGenerationr  !LlavaNextForConditionalGenerationr  &LlavaNextVideoForConditionalGenerationr	  &LlavaOnevisionForConditionalGenerationr  LongformerForMaskedLMr  LukeForMaskedLM)r  LxmertForPreTrainingr  MambaForCausalLMr  Mamba2ForCausalLM)r*  MegatronBertForPreTrainingr<   Mistral3ForConditionalGenerationr>  Mistral4ForCausalLMrG  MllamaForConditionalGeneration)rL  MobileBertForPreTrainingrf  MPNetForMaskedLMrh  MptForCausalLMrj  MraForMaskedLMrn  rv  MvpForConditionalGenerationrx  NanoChatForCausalLMr~  NllbMoeForConditionalGenerationr  OpenAIGPTLMHeadModelr  !PaliGemmaForConditionalGenerationqwen2_audio"Qwen2AudioForConditionalGenerationr  RobertaForMaskedLMr  RobertaPreLayerNormForMaskedLM)r  RoCBertForPreTrainingr!  RwkvForCausalLM)ri  SplinterForPreTrainingrl  SqueezeBertForMaskedLMr~  *SwitchTransformersForConditionalGenerationr  T5ForConditionalGenerationr  T5GemmaForConditionalGenerationr   T5Gemma2ForConditionalGenerationr  TapasForMaskedLM)r  UniSpeechForPreTraining)r  UniSpeechSatForPreTrainingr  r  "VideoLlavaForConditionalGeneration)r  VideoMAEForPreTrainingr   VipLlavaForConditionalGeneration)r  VisualBertForPreTraining)r  ViTMAEForPreTrainingr  r  )r  Wav2Vec2ForPreTraining)r  Wav2Vec2ConformerForPreTrainingr  XLMWithLMHeadModelr  XLMRobertaForMaskedLMr  XLMRobertaXLForMaskedLMr   XLNetLMHeadModelr  xLSTMForCausalLMr  XmodForMaskedLM))r   AfmoeForCausalLM)r'   ApertusForCausalLM)r)   ArceeForCausalLM)r-   AriaTextForCausalLM)r:   BambaForCausalLM)r?   BartForCausalLM)rE   BertLMHeadModel)rG   BertGenerationDecoder)rJ   BigBirdForCausalLM)rL   BigBirdPegasusForCausalLM)rN   BioGptForCausalLM)rS   BitNetForCausalLM)rU   BlenderbotForCausalLM)rW   BlenderbotSmallForCausalLMr  )rb   BltForCausalLM)rh   CamembertForCausalLM)r   LlamaForCausalLM)r   CodeGenForCausalLM)r   CohereForCausalLM)r   Cohere2ForCausalLM)r   CpmAntForCausalLMr"  )r   CwmForCausalLM)r   Data2VecTextForCausalLM)r   DbrxForCausalLM)r   DeepseekV2ForCausalLM)r   DeepseekV3ForCausalLM)r   DiffLlamaForCausalLM)r   DogeForCausalLM)r   Dots1ForCausalLM)r  ElectraForCausalLM)r  Emu3ForCausalLM)r  ErnieForCausalLM)r	  Ernie4_5ForCausalLM)r  Ernie4_5_MoeForCausalLMr0  r2  )r  FalconForCausalLM)r  FalconH1ForCausalLMr4  )r,  FlexOlmoForCausalLMr:  FuyuForCausalLM)r<  GemmaForCausalLM)r>  Gemma2ForCausalLMr?  )rB  Gemma3ForCausalLMrD  Gemma3nForConditionalGeneration)rH  Gemma3nForCausalLMrA  )rP  Gemma4ForCausalLMrT  GitForCausalLM)rV  GlmForCausalLM)rX  Glm4ForCausalLM)r\  Glm4MoeForCausalLM)r^  Glm4MoeLiteForCausalLM)rt  GlmMoeDsaForCausalLMr  GotOcr2ForConditionalGenerationrC  rE  rF  )r  GPTNeoForCausalLM)r  GPTNeoXForCausalLM)r  GPTNeoXJapaneseForCausalLM)r  GptOssForCausalLM)r  GPTJForCausalLM)r  GraniteForCausalLM)r  GraniteMoeForCausalLM)r  GraniteMoeHybridForCausalLM)r  GraniteMoeSharedForCausalLM)r  HeliumForCausalLM)r  HunYuanDenseV1ForCausalLM)r  HunYuanMoEV1ForCausalLM)r  Jais2ForCausalLM)r  JambaForCausalLM)r  JetMoeForCausalLM)r  Lfm2ForCausalLM)r  Lfm2MoeForCausalLM)r  r  )r  Llama4ForCausalLM)r  r  )r  LongcatFlashForCausalLMrb  rd  )r  MarianForCausalLM)r(  MBartForCausalLM)r*  MegatronBertForCausalLM)r2  MiniMaxForCausalLM)r4  MiniMaxM2ForCausalLM)r6  MinistralForCausalLM)r8  Ministral3ForCausalLM)r:  MistralForCausalLM)r@  MixtralForCausalLM)rG  MllamaForCausalLM)r\  ModernBertDecoderForCausalLM)rd  MoshiForCausalLMrp  )rr  MusicgenForCausalLM)rt  MusicgenMelodyForCausalLM)rv  MvpForCausalLMrv  )rz  NemotronForCausalLM)r|  NemotronHForCausalLM)r  OlmoForCausalLM)r  Olmo2ForCausalLM)r  Olmo3ForCausalLM)r  OlmoHybridForCausalLM)r  OlmoeForCausalLMrz  )r  OPTForCausalLM)r  PegasusForCausalLM)r  PersimmonForCausalLM)r  PhiForCausalLM)r  Phi3ForCausalLMr  Phi4MultimodalForCausalLM)r  PhimoeForCausalLM)r  PLBartForCausalLM)r  ProphetNetForCausalLM)r  Qwen2ForCausalLM)r  Qwen2MoeForCausalLM)r  Qwen3ForCausalLM)r  Qwen3_5ForCausalLM)r  Qwen3_5MoeForCausalLM)r  r  )r  r  )r  Qwen3MoeForCausalLM)r  Qwen3NextForCausalLM)r  RecurrentGemmaForCausalLM)r  ReformerModelWithLMHead)r  RemBertForCausalLM)r  RobertaForCausalLM)r  RobertaPreLayerNormForCausalLM)r  RoCBertForCausalLM)r  RoFormerForCausalLMr  )rG  SeedOssForCausalLM)r]  SmolLM3ForCausalLM)rc  SolarOpenForCausalLM)rn  StableLmForCausalLM)rp  Starcoder2ForCausalLM)trocrTrOCRForCausalLM)r  VaultGemmaForCausalLM)r  WhisperForCausalLM)r  XGLMForCausalLMr  )r  XLMRobertaForCausalLM)r  XLMRobertaXLForCausalLMr  r  )r  XmodForCausalLM)r  YoutuForCausalLM)r  ZambaForCausalLM)r  Zamba2ForCausalLM)8r   rA   rP   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r2  r  r  r  r  r  )r  Llama4VisionModelrB  rE  )rG  MllamaVisionModelrN  rQ  rT  rW  r  r  r  r  r  rI  rZ  rr  ru  rx  r{  r  r  r  r  r  r  r  r  r  r  r  ))r   DeiTForMaskedImageModeling)r3  FocalNetForMaskedImageModeling)rv  SwinForMaskedImageModeling)r|  Swinv2ForMaskedImageModeling)r  ViTForMaskedImageModeling)r  ImageGPTForCausalImageModeling)*)rB   BeitForImageClassification)rQ   BitForImageClassification)rv   CLIPForImageClassification)r   ConvNextForImageClassification)r    ConvNextV2ForImageClassification)r   CvtForImageClassification)r   $Data2VecVisionForImageClassification)r   )DeiTForImageClassification%DeiTForImageClassificationWithTeacher)r   DinatForImageClassification)r   Dinov2ForImageClassification)r   )Dinov2WithRegistersForImageClassification)r   DonutSwinForImageClassification)r   "EfficientNetForImageClassification)r3  FocalNetForImageClassification)r  HGNetV2ForImageClassification)r  HieraForImageClassification)r  IJepaForImageClassification)r  ImageGPTForImageClassification)r  )LevitForImageClassification&LevitForImageClassificationWithTeacher)r-  MetaClip2ForImageClassification)rO  !MobileNetV1ForImageClassification)rR  !MobileNetV2ForImageClassification)rU  MobileViTForImageClassification)rX  !MobileViTV2ForImageClassification)r  )&PerceiverForImageClassificationLearned&PerceiverForImageClassificationFourier-PerceiverForImageClassificationConvProcessing)r   PoolFormerForImageClassification)pp_lcnetPPLCNetForImageClassification)r  PvtForImageClassification)r  PvtV2ForImageClassification)r	  RegNetForImageClassification)r  ResNetForImageClassification)rJ  SegformerForImageClassification)shieldgemma2"ShieldGemma2ForImageClassification)rS  SiglipForImageClassification)rV  Siglip2ForImageClassification)rs  !SwiftFormerForImageClassification)rv  SwinForImageClassification)r|  Swinv2ForImageClassification)r  TextNetForImageClassification)r  !TimmWrapperForImageClassification)r  ViTForImageClassification)r  ViTMSNForImageClassificationr   DetrForSegmentation))rB   BeitForSemanticSegmentation)r   %Data2VecVisionForSemanticSegmentation)r   DPTForSemanticSegmentation)rR  "MobileNetV2ForSemanticSegmentation)rU   MobileViTForSemanticSegmentation)rX  "MobileViTV2ForSemanticSegmentation)rJ   SegformerForSemanticSegmentation)upernetUperNetForSemanticSegmentationr$  !MaskFormerForInstanceSegmentation)rl  )eomtEomtForUniversalSegmentation)eomt_dinov3"EomtDinov3ForUniversalSegmentation)r"  #Mask2FormerForUniversalSegmentationrw  )r  !OneFormerForUniversalSegmentation)videomtVideomtForUniversalSegmentation))r  !TimesformerForVideoClassification)r  VideoMAEForVideoClassification)r  VivitForVideoClassification)r  VJEPA2ForVideoClassificationr  r  )@)r+   AriaForConditionalGeneration)r8   !AyaVisionForConditionalGeneration)rZ   BlipForConditionalGenerationr\   Blip2ForConditionalGeneration)rl   !ChameleonForConditionalGeneration)r   %Cohere2VisionForConditionalGeneration)r   "DeepseekVLForConditionalGeneration)r   (DeepseekVLHybridForConditionalGeneration)r  Emu3ForConditionalGeneration)r  &Ernie4_5_VLMoeForConditionalGenerationr.  )r  FastVlmForConditionalGenerationr9  r  r?  r  rA  r  )rZ  Glm46VForConditionalGeneration)r`  Glm4vForConditionalGeneration)rb   Glm4vMoeForConditionalGeneration)rv  GlmOcrForConditionalGenerationr  rK  rM  rO  )r  $InstructBlipForConditionalGeneration)r  )InstructBlipVideoForConditionalGeneration)r   InternVLForConditionalGenerationrQ  )r  Kosmos2ForConditionalGeneration)r  !Kosmos2_5ForConditionalGeneration)r  Lfm2VlForConditionalGeneration)r  "LightOnOcrForConditionalGenerationr  rU  rW  rY  r[  rg  ri  rk  )r  Ovis2ForConditionalGeneration)paddleocr_vl#PaddleOCRVLForConditionalGenerationr|  )r  $PerceptionLMForConditionalGeneration)r  PI0ForConditionalGeneration)
pix2struct"Pix2StructForConditionalGeneration)r  rV  )pp_chart2tabler  )r  "Qwen2_5_VLForConditionalGeneration)r  Qwen2VLForConditionalGeneration)r  Qwen3_5ForConditionalGeneration)r  "Qwen3_5MoeForConditionalGeneration)r  Qwen3VLForConditionalGeneration)r  "Qwen3VLMoeForConditionalGeneration)ra  r@  )r_  SmolVLMForConditionalGenerationr  )r  UdopForConditionalGeneration)r  #VideoLlama3ForConditionalGenerationr  r  )zvision-encoder-decoderVisionEncoderDecoderModelr|  granite_speech%GraniteSpeechForConditionalGenerationr  *KyutaiSpeechToTextForConditionalGenerationr  qwen2_5_omni#Qwen2_5OmniForConditionalGenerationr~  qwen3_omni_moe$Qwen3OmniMoeForConditionalGenerationr  r  r  ).)r   AlbertForMaskedLMr  )rE   BertForMaskedLM)rJ   BigBirdForMaskedLMr  )r   ConvBertForMaskedLMr$  r&  r(  r*  )r  ElectraForMaskedLM)r  ErnieForMaskedLM)r  EsmForMaskedLM)r  EuroBertForMaskedLMr6  )r0  FNetForMaskedLM)r7  FunnelForMaskedLMrI  )r  JinaEmbeddingsV3ForMaskedLMrS  r]  r_  r(  MBartForConditionalGeneration)r*  MegatronBertForMaskedLM)rL  MobileBertForMaskedLM)rZ  ModernBertForMaskedLM)r^  ModernVBertForMaskedLMrn  rr  rt  )r  NomicBertForMaskedLM)r  NystromformerForMaskedLM)r  PerceiverForMaskedLM)r  ReformerForMaskedLM)r  RemBertForMaskedLMr  r  )r  RoCBertForMaskedLM)r  RoFormerForMaskedLMr  r  r  r  r  r  )r	  YosoForMaskedLM))r   !ConditionalDetrForObjectDetection)r   DFineForObjectDetection)r   DabDetrForObjectDetection)r    DeformableDetrForObjectDetection)r   DetrForObjectDetection)r  LwDetrForObjectDetection)pp_doclayout_v2PPDocLayoutV2ForObjectDetection)r  PPDocLayoutV3ForObjectDetection)pp_ocrv5_mobile_det"PPOCRV5MobileDetForObjectDetection)pp_ocrv5_server_det"PPOCRV5ServerDetForObjectDetection)r  RTDetrForObjectDetection)r  RTDetrV2ForObjectDetection)r  "TableTransformerForObjectDetection)r  YolosForObjectDetection))r  GroundingDinoForObjectDetection)rI  !MMGroundingDinoForObjectDetectionr  )r  Owlv2ForObjectDetection)r  OwlViTForObjectDetection))chmv2CHMv2ForDepthEstimation)depth_anythingDepthAnythingForDepthEstimation)r   DepthProForDepthEstimation)r   DPTForDepthEstimation)r  GLPNForDepthEstimation)prompt_depth_anything%PromptDepthAnythingForDepthEstimation)zoedepthZoeDepthForDepthEstimation)r  "PPOCRV5MobileRecForTextRecognition)r  "PPOCRV5ServerRecForTextRecognition)slanextSLANeXtForTableRecognition)!r1   r  )rL   &BigBirdPegasusForConditionalGeneration)rU   "BlenderbotForConditionalGeneration)rW   'BlenderbotSmallForConditionalGeneration)zencoder-decoderEncoderDecoderModelr<  r|  r  )r  LEDForConditionalGeneration)r  LongT5ForConditionalGeneration)r  M2M100ForConditionalGeneration)r  MarianMTModelr  )rl  MT5ForConditionalGenerationrn  rt  rx  )r  PegasusForConditionalGeneration)r   PegasusXForConditionalGeneration)r  PLBartForConditionalGeneration)r  "ProphetNetForConditionalGenerationr~  )rC  SeamlessM4TForTextToText)rE  SeamlessM4Tv2ForTextToTextr  r  r  r  )r  UMT5ForConditionalGenerationr  r  r  ))r   !CohereAsrForConditionalGeneration)r   DiaForConditionalGenerationr  r  )r`  !MoonshineForConditionalGeneration)rb  *MoonshineStreamingForConditionalGeneration)	pop2piano!Pop2PianoForConditionalGeneration)rC  SeamlessM4TForSpeechToText)rE  SeamlessM4Tv2ForSpeechToText)zspeech-encoder-decoderSpeechEncoderDecoderModel)re  #Speech2TextForConditionalGeneration)rg  SpeechT5ForSpeechToTextr  r  r  )r  WhisperForConditionalGeneration)u)r   AlbertForSequenceClassification)r)   ArceeForSequenceClassification)r?   BartForSequenceClassification)rE   BertForSequenceClassification)rJ    BigBirdForSequenceClassification)rL   'BigBirdPegasusForSequenceClassification)rN   BioGptForSequenceClassification)r`   BloomForSequenceClassification)rh   "CamembertForSequenceClassification)rj   CanineForSequenceClassification)r   LlamaForSequenceClassification)r   !ConvBertForSequenceClassification)r   CTRLForSequenceClassification)r   %Data2VecTextForSequenceClassification)r    DebertaForSequenceClassification)r   "DebertaV2ForSequenceClassification)r   #DeepseekV2ForSequenceClassification)r   #DeepseekV3ForSequenceClassification)r   "DiffLlamaForSequenceClassification)r   #DistilBertForSequenceClassification)r   DogeForSequenceClassification)r   ElectraForSequenceClassification)r  ErnieForSequenceClassification)r  EsmForSequenceClassification)r  !EuroBertForSequenceClassification)r   Exaone4ForSequenceClassification)r  FalconForSequenceClassification)r(  !FlaubertForSequenceClassification)r0  FNetForSequenceClassification)r7  FunnelForSequenceClassification)r<  GemmaForSequenceClassification)r>  Gemma2ForSequenceClassification)r@  Gemma3ForSequenceClassification)rB  #Gemma3TextForSequenceClassification)rV  GlmForSequenceClassification)rX  Glm4ForSequenceClassification)r  GPT2ForSequenceClassification)r  r6  )r  #GPTBigCodeForSequenceClassification)r  GPTNeoForSequenceClassification)r   GPTNeoXForSequenceClassification)r  GptOssForSequenceClassification)r  GPTJForSequenceClassification)r  HeliumForSequenceClassification)r  'HunYuanDenseV1ForSequenceClassification)r  %HunYuanMoEV1ForSequenceClassification)r  IBertForSequenceClassification)r  JambaForSequenceClassification)r  JetMoeForSequenceClassification)r  )JinaEmbeddingsV3ForSequenceClassification)r  !LayoutLMForSequenceClassification)r  #LayoutLMv2ForSequenceClassification)r  #LayoutLMv3ForSequenceClassification)r  LiltForSequenceClassification)r  r  )r  #LongformerForSequenceClassification)r  LukeForSequenceClassification)r   !MarkupLMForSequenceClassification)r(  MBartForSequenceClassification)r*  %MegatronBertForSequenceClassification)r2   MiniMaxForSequenceClassification)r6  "MinistralForSequenceClassification)r8  #Ministral3ForSequenceClassification)r:   MistralForSequenceClassification)r>  !Mistral4ForSequenceClassification)r@   MixtralForSequenceClassification)rL  #MobileBertForSequenceClassification)rZ  #ModernBertForSequenceClassification)r\  *ModernBertDecoderForSequenceClassification)r^  $ModernVBertForSequenceClassification)rf  MPNetForSequenceClassification)rh  MptForSequenceClassification)rj  MraForSequenceClassification)rl  MT5ForSequenceClassification)rv  MvpForSequenceClassification)rz  !NemotronForSequenceClassification)r  "NomicBertForSequenceClassification)r  &NystromformerForSequenceClassification)r  "OpenAIGPTForSequenceClassification)r  OPTForSequenceClassification)r  "PerceiverForSequenceClassification)r  "PersimmonForSequenceClassification)r  PhiForSequenceClassification)r  Phi3ForSequenceClassification)r  PhimoeForSequenceClassification)r  PLBartForSequenceClassification)r  Qwen2ForSequenceClassification)r  !Qwen2MoeForSequenceClassification)r  Qwen3ForSequenceClassification)r   Qwen3_5ForSequenceClassification)r  ri  )r  !Qwen3MoeForSequenceClassification)r  "Qwen3NextForSequenceClassification)r  !ReformerForSequenceClassification)r   RemBertForSequenceClassification)r   RobertaForSequenceClassification)r  ,RobertaPreLayerNormForSequenceClassification)r   RoCBertForSequenceClassification)r  !RoFormerForSequenceClassification)rG   SeedOssForSequenceClassification)r]   SmolLM3ForSequenceClassification)rl  $SqueezeBertForSequenceClassification)rn  !StableLmForSequenceClassification)rp  #Starcoder2ForSequenceClassification)r  T5ForSequenceClassification)r   T5GemmaForSequenceClassification)r  !T5Gemma2ForSequenceClassification)r  TapasForSequenceClassification)r  UMT5ForSequenceClassification)r  XLMForSequenceClassification)r  #XLMRobertaForSequenceClassification)r  %XLMRobertaXLForSequenceClassification)r   XLNetForSequenceClassification)r  XmodForSequenceClassification)r	  YosoForSequenceClassification)r  ZambaForSequenceClassification)r  Zamba2ForSequenceClassification)M)r   AlbertForQuestionAnswering)r)   ArceeForQuestionAnswering)r?   BartForQuestionAnswering)rE   BertForQuestionAnswering)rJ   BigBirdForQuestionAnswering)rL   "BigBirdPegasusForQuestionAnswering)r`   BloomForQuestionAnswering)rh   CamembertForQuestionAnswering)rj   CanineForQuestionAnswering)r   ConvBertForQuestionAnswering)r    Data2VecTextForQuestionAnswering)r   DebertaForQuestionAnswering)r   DebertaV2ForQuestionAnswering)r   DiffLlamaForQuestionAnswering)r   DistilBertForQuestionAnswering)r  ElectraForQuestionAnswering)r  ErnieForQuestionAnswering)r  Exaone4ForQuestionAnswering)r  FalconForQuestionAnswering)r(  "FlaubertForQuestionAnsweringSimple)r0  FNetForQuestionAnswering)r7  FunnelForQuestionAnswering)r  GPT2ForQuestionAnswering)r  GPTNeoForQuestionAnswering)r  GPTNeoXForQuestionAnswering)r  GPTJForQuestionAnswering)r  IBertForQuestionAnswering)r  $JinaEmbeddingsV3ForQuestionAnsweringr  LayoutLMv2ForQuestionAnsweringr  LayoutLMv3ForQuestionAnswering)r  LEDForQuestionAnswering)r  LiltForQuestionAnswering)r  LlamaForQuestionAnswering)r  LongformerForQuestionAnswering)r  LukeForQuestionAnswering)r  LxmertForQuestionAnswering)r   MarkupLMForQuestionAnswering)r(  MBartForQuestionAnswering)r*   MegatronBertForQuestionAnswering)r2  MiniMaxForQuestionAnswering)r6  MinistralForQuestionAnswering)r8  Ministral3ForQuestionAnswering)r:  MistralForQuestionAnswering)r@  MixtralForQuestionAnswering)rL  MobileBertForQuestionAnswering)rZ  ModernBertForQuestionAnswering)rf  MPNetForQuestionAnswering)rh  MptForQuestionAnswering)rj  MraForQuestionAnswering)rl  MT5ForQuestionAnswering)rv  MvpForQuestionAnswering)rz  NemotronForQuestionAnswering)r  !NystromformerForQuestionAnswering)r  OPTForQuestionAnswering)r  Qwen2ForQuestionAnswering)r  Qwen2MoeForQuestionAnswering)r  Qwen3ForQuestionAnswering)r  Qwen3MoeForQuestionAnswering)r  Qwen3NextForQuestionAnswering)r  ReformerForQuestionAnswering)r  RemBertForQuestionAnswering)r  RobertaForQuestionAnswering)r  'RobertaPreLayerNormForQuestionAnswering)r  RoCBertForQuestionAnswering)r  RoFormerForQuestionAnswering)rG  SeedOssForQuestionAnswering)r]  SmolLM3ForQuestionAnswering)ri  SplinterForQuestionAnswering)rl  SqueezeBertForQuestionAnswering)r  T5ForQuestionAnswering)r  UMT5ForQuestionAnswering)r  XLMForQuestionAnsweringSimple)r  XLMRobertaForQuestionAnswering)r   XLMRobertaXLForQuestionAnswering)r   XLNetForQuestionAnsweringSimple)r  XmodForQuestionAnswering)r	  YosoForQuestionAnswering)r  TapasForQuestionAnswering))rZ   BlipForQuestionAnsweringr  )r  ViltForQuestionAnswering))r  LayoutLMForQuestionAnsweringr  r  )\)r   AlbertForTokenClassification)r'   ApertusForTokenClassification)r)   ArceeForTokenClassification)rE   BertForTokenClassification)rJ   BigBirdForTokenClassification)rN   BioGptForTokenClassification)r`   BloomForTokenClassification)rf   BrosForTokenClassification)rh   CamembertForTokenClassification)rj   CanineForTokenClassification)r   ConvBertForTokenClassification)r   "Data2VecTextForTokenClassification)r   DebertaForTokenClassification)r   DebertaV2ForTokenClassification)r    DeepseekV3ForTokenClassification)r   DiffLlamaForTokenClassification)r    DistilBertForTokenClassification)r  ElectraForTokenClassification)r  ErnieForTokenClassification)r  EsmForTokenClassification)r  EuroBertForTokenClassification)r  Exaone4ForTokenClassification)r  FalconForTokenClassification)r(  FlaubertForTokenClassification)r0  FNetForTokenClassification)r7  FunnelForTokenClassification)r<  GemmaForTokenClassification)r>  Gemma2ForTokenClassification)rV  GlmForTokenClassification)rX  Glm4ForTokenClassification)r  GPT2ForTokenClassification)r  r  )r   GPTBigCodeForTokenClassification)r  GPTNeoForTokenClassification)r  GPTNeoXForTokenClassification)r  GptOssForTokenClassification)r  HeliumForTokenClassification)r  IBertForTokenClassification)r  &JinaEmbeddingsV3ForTokenClassification)r  LayoutLMForTokenClassification)r   LayoutLMv2ForTokenClassification)r   LayoutLMv3ForTokenClassification)r  LiltForTokenClassification)r  LlamaForTokenClassification)r   LongformerForTokenClassification)r  LukeForTokenClassification)r   MarkupLMForTokenClassification)r*  "MegatronBertForTokenClassification)r2  MiniMaxForTokenClassification)r6  MinistralForTokenClassification)r8   Ministral3ForTokenClassification)r:  MistralForTokenClassification)r>  Mistral4ForTokenClassification)r@  MixtralForTokenClassification)rL   MobileBertForTokenClassification)rZ   ModernBertForTokenClassification)r^  !ModernVBertForTokenClassification)rf  MPNetForTokenClassification)rh  MptForTokenClassification)rj  MraForTokenClassification)rl  MT5ForTokenClassification)rz  NemotronForTokenClassification)r  NomicBertForTokenClassification)r  #NystromformerForTokenClassification)r  PersimmonForTokenClassification)r  PhiForTokenClassification)r  Phi3ForTokenClassification)r  Qwen2ForTokenClassification)r  Qwen2MoeForTokenClassification)r  Qwen3ForTokenClassification)r  Qwen3MoeForTokenClassification)r  Qwen3NextForTokenClassification)r  RemBertForTokenClassification)r  RobertaForTokenClassification)r  )RobertaPreLayerNormForTokenClassification)r  RoCBertForTokenClassification)r  RoFormerForTokenClassification)rG  SeedOssForTokenClassification)r]  SmolLM3ForTokenClassification)rl  !SqueezeBertForTokenClassification)rn  StableLmForTokenClassification)rp   Starcoder2ForTokenClassification)r  T5ForTokenClassification)r  T5GemmaForTokenClassification)r  T5Gemma2ForTokenClassification)r  UMT5ForTokenClassification)r  XLMForTokenClassification)r   XLMRobertaForTokenClassification)r  "XLMRobertaXLForTokenClassification)r   XLNetForTokenClassification)r  XmodForTokenClassification)r	  YosoForTokenClassification)#)r   AlbertForMultipleChoice)rE   BertForMultipleChoice)rJ   BigBirdForMultipleChoice)rh   CamembertForMultipleChoice)rj   CanineForMultipleChoice)r   ConvBertForMultipleChoice)r   Data2VecTextForMultipleChoice)r   DebertaV2ForMultipleChoice)r   DistilBertForMultipleChoice)r  ElectraForMultipleChoice)r  ErnieForMultipleChoice)r(  FlaubertForMultipleChoice)r0  FNetForMultipleChoice)r7  FunnelForMultipleChoice)r  IBertForMultipleChoice)r  LongformerForMultipleChoice)r  LukeForMultipleChoice)r*  MegatronBertForMultipleChoice)rL  MobileBertForMultipleChoice)rZ  ModernBertForMultipleChoice)rf  MPNetForMultipleChoice)rj  MraForMultipleChoice)r  NystromformerForMultipleChoice)r  RemBertForMultipleChoice)r  RobertaForMultipleChoice)r  $RobertaPreLayerNormForMultipleChoice)r  RoCBertForMultipleChoice)r  RoFormerForMultipleChoice)rl  SqueezeBertForMultipleChoice)r  XLMForMultipleChoice)r  XLMRobertaForMultipleChoice)r  XLMRobertaXLForMultipleChoice)r   XLNetForMultipleChoice)r  XmodForMultipleChoice)r	  YosoForMultipleChoice))rE   BertForNextSentencePrediction)r  ErnieForNextSentencePrediction)r0  FNetForNextSentencePrediction)r*  %MegatronBertForNextSentencePrediction)rL  #MobileBertForNextSentencePrediction))r/   ASTForAudioClassification)r   &Data2VecAudioForSequenceClassification)r  HubertForSequenceClassification)rN  SEWForSequenceClassification)rP  SEWDForSequenceClassification)r  "UniSpeechForSequenceClassification)r  %UniSpeechSatForSequenceClassification)r  !Wav2Vec2ForSequenceClassification)r  %Wav2Vec2BertForSequenceClassification)r  *Wav2Vec2ConformerForSequenceClassification)r  WavLMForSequenceClassification)r  WhisperForAudioClassification))r   Data2VecAudioForCTC)r  HubertForCTCr  r  )rN  	SEWForCTC)rP  
SEWDForCTC)r  UniSpeechForCTC)r  UniSpeechSatForCTC)r  Wav2Vec2ForCTC)r  Wav2Vec2BertForCTC)r  Wav2Vec2ConformerForCTC)r  WavLMForCTC))r   (Data2VecAudioForAudioFrameClassification)r  'UniSpeechSatForAudioFrameClassification)r  #Wav2Vec2ForAudioFrameClassification)r  'Wav2Vec2BertForAudioFrameClassification)r  ,Wav2Vec2ConformerForAudioFrameClassification)r   WavLMForAudioFrameClassification))r   Data2VecAudioForXVector)r  UniSpeechSatForXVector)r  Wav2Vec2ForXVector)r  Wav2Vec2BertForXVector)r  Wav2Vec2ConformerForXVector)r  WavLMForXVectorr!  )rg  SpeechT5ForTextToSpeech)r<   r   )r"  r&  r$  r  )rr   MusicgenForConditionalGeneration)rt  &MusicgenMelodyForConditionalGenerationr  r  )rC  SeamlessM4TForTextToSpeech)rE  SeamlessM4Tv2ForTextToSpeechr  )
r!   r$   rY   )r\   Blip2ForImageTextRetrievalrn   ru   r}   r,  rR  rU  ))rB   BeitBackbone)rQ   BitBackbone)r   ConvNextBackbone)r   ConvNextV2Backbone)r   DinatBackbone)r   Dinov2Backbone)r   Dinov2WithRegistersBackbone)r   DINOv3ConvNextBackbone)r   DINOv3ViTBackbone)r3  FocalNetBackboner  )r  HieraBackbone)lw_detr_vitLwDetrViTBackbone)r&  MaskFormerSwinBackbone)r  PixioBackbone)rZ  PPLCNetBackbone)pp_lcnet_v3PPLCNetV3Backbone)r  PvtV2Backbone)r  ResNetBackbone)rt_detr_resnetRTDetrResNetBackbone)rv  SwinBackbone)r|  Swinv2Backbone)r  TextNetBackboner  )uvdoc_backboneUVDocBackbone)r  VitDetBackbone)vitpose_backboneVitPoseBackbone)r   )r   r   r#  r&  )r+  r(  r1  )r6  r3  r<  )
superpointSuperPointForKeypointDetection))r   !EfficientLoFTRForKeypointMatchingr  )	superglueSuperGlueForKeypointMatching)r   rD   rI   rx   r   r   r   r   r   )r  Emu3TextModelr'  r  )r  r  r  )rG  MllamaTextModelrK  )rl  MT5EncoderModelr  r  r  r  r  r  r  rk  )r  T5EncoderModel)r  T5GemmaEncoderModel)r  UMT5EncoderModelr  r  r  )r  'PatchTSMixerForTimeSeriesClassification)r  PatchTSTForClassification)r  PatchTSMixerForRegression)r  PatchTSTForRegression)r  TimesFmModelForPrediction)r  TimesFm2_5ModelForPrediction)ry  Swin2SRForImageSuperResolution)r   r  r  c                       e Zd ZeZy)AutoModelForMaskGenerationN)r   r   r   !MODEL_FOR_MASK_GENERATION_MAPPING_model_mappingr   r   r   r  r        6Nr   r  c                       e Zd ZeZy)AutoModelForKeypointDetectionN)r   r   r   $MODEL_FOR_KEYPOINT_DETECTION_MAPPINGr  r   r   r   r  r        9Nr   r  c                       e Zd ZeZy)AutoModelForKeypointMatchingN)r   r   r   #MODEL_FOR_KEYPOINT_MATCHING_MAPPINGr  r   r   r   r  r        8Nr   r  c                       e Zd ZeZy)AutoModelForTextEncodingN)r   r   r   MODEL_FOR_TEXT_ENCODING_MAPPINGr  r   r   r   r  r        4Nr   r  c                       e Zd ZeZy)AutoModelForImageToImageN)r   r   r    MODEL_FOR_IMAGE_TO_IMAGE_MAPPINGr  r   r   r   r  r    s    5Nr   r  c                       e Zd ZeZy)	AutoModelN)r   r   r   MODEL_MAPPINGr  r   r   r   r  r    s    "Nr   r  c                       e Zd ZeZy)AutoModelForPreTrainingN)r   r   r   MODEL_FOR_PRETRAINING_MAPPINGr  r   r   r   r  r    s    2Nr   r  pretraining)head_docc                   `     e Zd ZeZeded    deej                  e   z  ddf fd       Z
 xZS )AutoModelForCausalLMclspretrained_model_name_or_pathreturnr   c                 *    t        |   |g|i |S Nsuperfrom_pretrainedr  r  
model_argskwargs	__class__s       r   r  z$AutoModelForCausalLM.from_pretrained  !     w&'D\z\U[\\r   )r   r   r   MODEL_FOR_CAUSAL_LM_MAPPINGr  classmethodtypestrosPathLiker  __classcell__r  s   @r   r  r    sO    0N ]()]'*R[[-='=]
 
"] ]r   r  zcausal language modelingc                       e Zd ZeZy)AutoModelForMaskedLMN)r   r   r   MODEL_FOR_MASKED_LM_MAPPINGr  r   r   r   r  r    s    0Nr   r  zmasked language modelingc                       e Zd ZeZy)AutoModelForSeq2SeqLMN)r   r   r   &MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPINGr  r   r   r   r  r        ;Nr   r  z&sequence-to-sequence language modelingzgoogle-t5/t5-base)r  checkpoint_for_examplec                       e Zd ZeZy)"AutoModelForSequenceClassificationN)r   r   r   )MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPINGr  r   r   r   r  r    s    >Nr   r  zsequence classificationc                       e Zd ZeZy)AutoModelForQuestionAnsweringN)r   r   r   $MODEL_FOR_QUESTION_ANSWERING_MAPPINGr  r   r   r   r  r    r  r   r  zquestion answeringc                       e Zd ZeZy)"AutoModelForTableQuestionAnsweringN)r   r   r   *MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPINGr  r   r   r   r  r        ?Nr   r  ztable question answeringzgoogle/tapas-base-finetuned-wtqc                       e Zd ZeZy)#AutoModelForVisualQuestionAnsweringN)r   r   r   +MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPINGr  r   r   r   r  r    s    @Nr   r  zvisual question answeringzdandelin/vilt-b32-finetuned-vqac                       e Zd ZeZy)%AutoModelForDocumentQuestionAnsweringN)r   r   r   -MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPINGr  r   r   r   r  r    s    BNr   r  zdocument question answeringz/impira/layoutlm-document-qa", revision="52e01b3c                       e Zd ZeZy)AutoModelForTokenClassificationN)r   r   r   &MODEL_FOR_TOKEN_CLASSIFICATION_MAPPINGr  r   r   r   r  r    r  r   r  ztoken classificationc                       e Zd ZeZy)AutoModelForMultipleChoiceN)r   r   r   !MODEL_FOR_MULTIPLE_CHOICE_MAPPINGr  r   r   r   r  r    r  r   r  zmultiple choicec                       e Zd ZeZy)"AutoModelForNextSentencePredictionN)r   r   r   *MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPINGr  r   r   r   r  r  !  r  r   r  znext sentence predictionc                       e Zd ZeZy)AutoModelForImageClassificationN)r   r   r   &MODEL_FOR_IMAGE_CLASSIFICATION_MAPPINGr  r   r   r   r  r  *  r  r   r  zimage classificationc                       e Zd ZeZy)'AutoModelForZeroShotImageClassificationN)r   r   r   0MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPINGr  r   r   r   r
  r
  1  s    ENr   r
  zzero-shot image classificationc                       e Zd ZeZy)AutoModelForImageSegmentationN)r   r   r   $MODEL_FOR_IMAGE_SEGMENTATION_MAPPINGr  r   r   r   r  r  :  r  r   r  zimage segmentationc                       e Zd ZeZy) AutoModelForSemanticSegmentationN)r   r   r   'MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPINGr  r   r   r   r  r  A      <Nr   r  zsemantic segmentationc                       e Zd ZeZy) AutoModelForTimeSeriesPredictionN)r   r   r   (MODEL_FOR_TIME_SERIES_PREDICTION_MAPPINGr  r   r   r   r  r  J      =Nr   r  ztime-series predictionc                       e Zd ZeZy)!AutoModelForUniversalSegmentationN)r   r   r   (MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPINGr  r   r   r   r  r  S  r  r   r  zuniversal image segmentationc                       e Zd ZeZy) AutoModelForInstanceSegmentationN)r   r   r   'MODEL_FOR_INSTANCE_SEGMENTATION_MAPPINGr  r   r   r   r  r  \  r  r   r  zinstance segmentationc                       e Zd ZeZy)AutoModelForObjectDetectionN)r   r   r   "MODEL_FOR_OBJECT_DETECTION_MAPPINGr  r   r   r   r  r  e      7Nr   r  zobject detectionc                       e Zd ZeZy)#AutoModelForZeroShotObjectDetectionN)r   r   r   ,MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPINGr  r   r   r   r"  r"  l      ANr   r"  zzero-shot object detectionc                       e Zd ZeZy)AutoModelForDepthEstimationN)r   r   r   "MODEL_FOR_DEPTH_ESTIMATION_MAPPINGr  r   r   r   r&  r&  u  r   r   r&  zdepth estimationc                       e Zd ZeZy)AutoModelForTextRecognitionN)r   r   r   "MODEL_FOR_TEXT_RECOGNITION_MAPPINGr  r   r   r   r)  r)  |  r   r   r)  ztext recognitionc                       e Zd ZeZy)AutoModelForTableRecognitionN)r   r   r   #MODEL_FOR_TABLE_RECOGNITION_MAPPINGr  r   r   r   r,  r,    r  r   r,  ztable recognitionc                       e Zd ZeZy)AutoModelForVideoClassificationN)r   r   r   &MODEL_FOR_VIDEO_CLASSIFICATION_MAPPINGr  r   r   r   r/  r/    r  r   r/  zvideo classificationc                   `     e Zd ZeZeded    deej                  e   z  ddf fd       Z
 xZS )AutoModelForImageTextToTextr  r  r  r   c                 *    t        |   |g|i |S r  r  r  s       r   r  z+AutoModelForImageTextToText.from_pretrained  r  r   )r   r   r   $MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPINGr  r  r  r  r  r  r  r  r  s   @r   r2  r2    sO    9N ]/0]'*R[[-='=]
 
"] ]r   r2  zimage-text-to-text modelingc                       e Zd ZeZy)AutoModelForMultimodalLMN)r   r   r   MODEL_FOR_MULTIMODAL_LM_MAPPINGr  r   r   r   r6  r6    r  r   r6  zmultimodal generationc                       e Zd ZeZy)AutoModelForAudioClassificationN)r   r   r   &MODEL_FOR_AUDIO_CLASSIFICATION_MAPPINGr  r   r   r   r9  r9    r  r   r9  zaudio classificationc                       e Zd ZeZy)AutoModelForCTCN)r   r   r   MODEL_FOR_CTC_MAPPINGr  r   r   r   r<  r<    s    *Nr   r<  z%connectionist temporal classificationc                       e Zd ZeZy)AutoModelForSpeechSeq2SeqN)r   r   r   "MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPINGr  r   r   r   r?  r?    r   r   r?  z,sequence-to-sequence speech-to-text modelingc                       e Zd ZeZy)$AutoModelForAudioFrameClassificationN)r   r   r   ,MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPINGr  r   r   r   rB  rB    r$  r   rB  z"audio frame (token) classificationc                       e Zd ZeZy)AutoModelForAudioXVectorN)r   r   r   MODEL_FOR_AUDIO_XVECTOR_MAPPINGr  r   r   r   rE  rE    r  r   rE  c                       e Zd ZeZy)AutoModelForTextToSpectrogramN)r   r   r   %MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPINGr  r   r   r   rH  rH    s    :Nr   rH  c                       e Zd ZeZy)AutoModelForTextToWaveformN)r   r   r   "MODEL_FOR_TEXT_TO_WAVEFORM_MAPPINGr  r   r   r   rK  rK    r   r   rK  c                       e Zd ZeZy)AutoBackboneN)r   r   r   MODEL_FOR_BACKBONE_MAPPINGr  r   r   r   rN  rN    s    /Nr   rN  zaudio retrieval via x-vectorc                       e Zd ZeZy)AutoModelForMaskedImageModelingN)r   r   r   'MODEL_FOR_MASKED_IMAGE_MODELING_MAPPINGr  r   r   r   rQ  rQ    r  r   rQ  zmasked image modelingc                       e Zd ZeZy)AutoModelForAudioTokenizationN)r   r   r   $MODEL_FOR_AUDIO_TOKENIZATION_MAPPINGr  r   r   r   rT  rT    r  r   rT  z$audio tokenization through codebooks)[r:  rC  rU  rF  rO  'MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPINGr  r=  r  r'  r*  r-  r  MODEL_FOR_IMAGE_MAPPINGr  r  r  r  r  rR  r  r  r  r  r  r  r  r  r  r  r@  r  r  rL  rI  r  r  r  r0  MODEL_FOR_RETRIEVAL_MAPPINGr4  r7  r  r  r  r#  ,MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING(MODEL_FOR_TIME_SERIES_REGRESSION_MAPPINGr  rN  r9  rB  rT  rE  r  r<  r&  r)  r,  r  r  r  r  r  r  r  r  rQ  r  r  r6  r  r  r  r  r  r  r  r?  r  rH  rK  r  r  r  r/  r  r  r
  r"  r2  )__doc__r  collectionsr   typingr   utilsr   auto_factoryr   r	   r
   r   configuration_autor   
generationr   modeling_utilsr   r   
get_loggerr   loggerMODEL_MAPPING_NAMES#MODEL_FOR_PRETRAINING_MAPPING_NAMES!MODEL_FOR_CAUSAL_LM_MAPPING_NAMESMODEL_FOR_IMAGE_MAPPING_NAMES-MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES-MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING_NAMES,MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES*MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES-MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES-MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES.MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES,MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES!MODEL_FOR_RETRIEVAL_MAPPING_NAMES*MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMESlistitems%MODEL_FOR_MULTIMODAL_LM_MAPPING_NAMES!MODEL_FOR_MASKED_LM_MAPPING_NAMES(MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES2MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES(MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES(MODEL_FOR_TEXT_RECOGNITION_MAPPING_NAMES)MODEL_FOR_TABLE_RECOGNITION_MAPPING_NAMES,MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES/MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES*MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES0MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES1MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES3MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES,MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES'MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMES0MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING_NAMES,MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMESMODEL_FOR_CTC_MAPPING_NAMES2MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMES%MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMES+MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES(MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMES6MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES MODEL_FOR_BACKBONE_MAPPING_NAMES'MODEL_FOR_MASK_GENERATION_MAPPING_NAMES*MODEL_FOR_KEYPOINT_DETECTION_MAPPING_NAMES)MODEL_FOR_KEYPOINT_MATCHING_MAPPING_NAMES%MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES2MODEL_FOR_TIME_SERIES_CLASSIFICATION_MAPPING_NAMES.MODEL_FOR_TIME_SERIES_REGRESSION_MAPPING_NAMES.MODEL_FOR_TIME_SERIES_PREDICTION_MAPPING_NAMES&MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES"MODEL_FOR_AUDIO_TOKENIZATION_NAMESr  r  r  rV  r  r  r  r  r  r  r0  r4  r7  rX  r  r  r  rW  rR  r  r#  r'  r*  r-  r  r  r  r  r  r  r  r:  r=  r@  rC  rF  rI  rL  rO  r  r  r  r  rY  rZ  r  r  rU  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r
  r  r  r  r  r  r  r"  r&  r)  r,  r/  r2  r6  r9  r<  r?  rB  rE  rH  rK  rN  rQ  rT  __all__r   r   r   <module>r     s    	 #     5 -1/  
		H	%!KM ^ '2_a' #F %0[]% !~ !,:<! | 1<1 - 1< 	71 - 0;9;0 ,z .9 	(. * 1<
1 - 1< 	<1 - 2=	2 . 0;0 , %08*% ! .9AC. *N )4	8>>@	A4 	D 	P	
 	9 	@ 	> 	C 	B 	7 	H) %" %002% !h ,7, (( 6A	6 2 ,7	, ( ,7EE, ( -81- ) 0;#%0 ,P ,7, (* 3>wy3 /v .9OQ. *f 4? 	/4 0 5@5 1 7B7 3 0;^`0 ,D +6%'+ 'R 4?4 0 0;0 ,$ * $ 6A
6 2 )4
) % /: 	?// + ,7, ($ :E: 6  $/$  B +6	+ ' .98. * -8- ) )4 ") %H 6AC16 2 2=5-2 . 2=062 . *55* & &1& " !!57JK 01EGj k ./CEfg *:G+ ' *:F* & 4DP4 0 (8D( $ +;G+ ' +;G+ ' ,<H, ( *:F* & (8D( $ #33GIn"o ./CEfg .>K/ + 1AM1 - //CEfg *+?A^_ *:G+ ' &66JLt%u "/?L0 , &66JLt%u "%56JLt%u "&67KMv&w #)9F* & -=I- ) (8D( $ .>J. * *:F* & %55IKr$s !-=J. * *:F* & ))=?Z[ %56JLt%u "/?L0 , #33GIn"o (8E) % &66JLt%u "-.BDde $45IKr$s !'7D( $ '77KMv&w #"23GIn"o /?L0 , ,<H, ( ,<H, ( $44HJp#q  '78LNp'q $7!4 7:$7 :9#6 952 562 6## # i(	31 3 ,,Cm\ ]. ] ))=Hbc 1. 1 ))=Hbc </ < *5. ?)< ? &7&1J& "
:$7 : !22OZn o @)< @ &7&'<& "A*= A '8'(<' #C,? C ):)*L) %<&9 < #44S^t"u 7!4 7 //ITef @)< @ &7&1K& "
<&9 < #44S^t"u F.A F +<+6V+ '
:$7 : !22OZn o =': = $5$/F$  
>': > $5$/G$  
>(; > %6%0N% !
=': = $5$/F$  
8"5 8 00KVhi B*= B '8'2N' #
8"5 8 00KVhi 8"5 8 00KVhi 9#6 9  11MXkl <&9 < #44S^t"u ]"5 ] 00KVst 52 5 --EPgh <&9 < #44S^t"u +) + $O>ef8 3 8 .(V 
B+> B (9(3W( $
52 5;$7 ;8!4 80) 0 --EPno =&9 = #44S^u"v :$7 : !2!,R! 
\r   