
    i                     n   d dl Zd dlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZmZmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZ  ed      e G d de                    Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z  G d de      Z!g d Z"y)!    N)strict   )PreTrainedConfig)auto_docstring)VideoMetadata   )CONFIG_MAPPING
AutoConfig	AutoModel)Glm4vImageProcessor)Glm4vImageProcessorPil)Glm4vForConditionalGeneration
Glm4vModelGlm4vPreTrainedModel)Glm4vProcessor)Glm4vVideoProcessorzzai-org/GLM-4.1V-9B-Thinking)
checkpointc                        e Zd ZU dZdZeedZdgZdZe	e
z  dz  ed<   dZe	e
z  dz  ed<   dZeed	<   d
Zeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<    fdZ xZS )Glm46VConfiga  
    image_start_token_id (`int`, *optional*, defaults to 151339):
        The image start token index to encode the start of image.
    image_end_token_id (`int`, *optional*, defaults to 151340):
        The image end token index to encode the end of image.
    video_start_token_id (`int`, *optional*, defaults to 151361):
        The video start token index to encode the start of video.
    video_end_token_id (`int`, *optional*, defaults to 151362):
        The video end token index to encode the end of video.

    ```python
    >>> from transformers import Glm46VForConditionalGeneration, Glm46VConfig

    >>> # Initializing a GLM-4.6V style configuration
    >>> configuration = Glm46VConfig()

    >>> # Initializing a model from the GLM-4.6V style configuration
    >>> model = Glm4vForConditionalGeneration(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```glm46v)text_configvision_configpast_key_valuesNr   r   i/O image_token_idi0O video_token_idi+O image_start_token_idi,O image_end_token_idiAO video_start_token_idiBO video_end_token_idFtie_word_embeddingsc                 T   t        | j                  t              rT| j                  j                  dd      | j                  d<   t	        | j                  d      di | j                  | _        n| j                  t	        d          | _        t        | j
                  t              rT| j
                  j                  dd      | j
                  d<   t	        | j
                  d      di | j
                  | _        n| j
                  t	        d          | _        t        |   di | y )N
model_typeglm4v_vision
glm4v_text )
isinstancer   dictgetr	   r   super__post_init__)selfkwargs	__class__s     z/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/glm46v/modular_glm46v.pyr*   zGlm46VConfig.__post_init__F   s   d(($//3/A/A/E/ElTb/cD|,!/0B0B<0P!Q!gTXTfTf!gD'!/!?!ADd&&--1-=-=-A-A,P\-]D\*-d.>.>|.LMaPTP`P`aD%-l;=D''    )__name__
__module____qualname____doc__r"   r
   sub_configskeys_to_ignore_at_inferencer   r'   r   __annotations__r   r   intr   r   r   r   r   r    boolr*   __classcell__r-   s   @r.   r   r      s    . J",zJK#4"526K((4/648M4**T18 NC  NC  &#&$$ &#&$$ %%( (r/   r   c                       e Zd ZdZdZd Zy)Glm46VPreTrainedModelNc                     t        d      )Nz
Not needed)AttributeError)r+   modules     r.   _init_weightsz#Glm46VPreTrainedModel._init_weightsZ   s    \**r/   )r0   r1   r2   _can_record_outputs_no_split_modulesr@   r%   r/   r.   r<   r<   V   s    +r/   r<   c                   "     e Zd ZdZ fdZ xZS )Glm46VModelNc                     t         |   |       t        j                  |j                        | _        t        j                  |j                        | _        y N)r)   __init__r   from_configr   visualr   language_model)r+   configr-   s     r.   rG   zGlm46VModel.__init__a   sA     ++F,@,@A'33F4F4FGr/   )r0   r1   r2   rB   rG   r9   r:   s   @r.   rD   rD   ^   s    H Hr/   rD   c                       e Zd Zy)Glm46VForConditionalGenerationNr0   r1   r2   r%   r/   r.   rM   rM   g       r/   rM   c                       e Zd Zd Zy)Glm46VProcessorc                 *    d| j                    d|ddS )Nz<|begin_of_image|>z<|end_of_image|>z.1fz seconds)image_token)r+   timestamp_secs     r.   replace_frame_token_idz&Glm46VProcessor.replace_frame_token_idl   s$    #D$4$4#55EmTWEXX`aar/   N)r0   r1   r2   rU   r%   r/   r.   rQ   rQ   k   s    br/   rQ   c                       e Zd Zy)Glm46VImageProcessorPilNrN   r%   r/   r.   rW   rW   p   rO   r/   rW   c                       e Zd Zy)Glm46VImageProcessorNrN   r%   r/   r.   rY   rY   t   rO   r/   rY   c                   ,    e Zd Z	 ddedeez  dz  fdZy)Glm46VVideoProcessorNmetadatafpsc                    |t        |dd       t        d      |j                  }|dz
  }|j                  xs t	        ||j
                  z        dz   }dddd}d}d}	t        ||	      }
|
d	k  r|d	   }n|
d
k  r|d
   }n|d   }t        |
|z  | j                  z        }t        ||      }d|j
                  z  }t        |      D cg c]  }||z  	 }}t        |      }||k  r/t        j                  d|dz
  |t              j                         }nLg }d}d| j                  |z  z  }t        |      D ](  }||   |k\  s||z  }|j                  |       ||k\  s( n t        |      |k  rVt        |      dk(  rdt        |dz
  d      }}n
|d   |d   }}t        j                  |||t              j                         }n<t        |      |kD  r.t        j                  d|dz
  |t              j                         }t!               g }}|D ])  }||vs|j#                  |       |j                  |       + t        |      dz  r|j                  |d          t        j$                  |      S c c}w )Nr]   zAsked to sample frames per second but no video metadata was provided which is required when sampling in Glm46V. Please pass in `VideoMetadata` object or set `do_sample_frames=False`   r   g      ?)   ,  `	  i  rb   r`   ra   r   )dtype)getattr
ValueErrortotal_num_framesdurationroundr]   minr7   temporal_patch_sizerangenplinspacetolistappendlenmaxsetaddarray)r+   r\   r]   r,   total_framesmax_frame_idxrh   DYNAMIC_FPS_THRESMAX_FRAME_COUNT_DYNAMICMAX_DURATIONeffective_duration
target_fps	extract_tduration_per_framei
timestamps
max_secondframe_indicescurrent_secondinv_fpsframe_indexstartendseenuniqidxs                             r.   sample_framesz"Glm46VVideoProcessor.sample_framesy   s    wx=EX 
  00$q($$Omhll.J(Ka(O!"#6"% <8#*2.J3&*3/J*40J*Z7$:R:RRS		#:;	-6;L6IJa,,J
J]
)#KK<!+;YcRYY[MMN433j@AG$\2 k*n<"g-N!((5%3 }	)=!Q&L1$4a 8s*1-}R/@sKKsISIPPRM)+KK<!+;YcRYY[MUBd  	!C$C 	!
 t9q=KKR!xx~E Ks   	I3rF   )r0   r1   r2   r   r7   floatr   r%   r/   r.   r[   r[   x   s*     #'@@ 5[4@r/   r[   )r   rD   r<   rM   rQ   rY   rW   r[   )#numpyrm   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   video_utilsr   autor	   r
   r   glm4v.image_processing_glm4vr    glm4v.image_processing_pil_glm4vr   glm4v.modeling_glm4vr   r   r   glm4v.processing_glm4vr   glm4v.video_processing_glm4vr   r   r<   rD   rM   rQ   rW   rY   r[   __all__r%   r/   r.   <module>r      s      . 3 # ( 8 8 > E b b 3 > 9:3(# 3(  ;3(l+0 +H* H	%B 	bn b
	4 		. 	A. AH	r/   