
    iO                        d dl Z d dlmZ d dlZd dlmZ d dlmZmZm	Z	m
Z
mZ d dlmZ d dlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$m%Z%m&Z& ddl'm(Z(m)Z) ddl*m+Z+m,Z,m-Z-m.Z. ddl/m0Z0 ddl1m2Z2  G d de2e      Z3 G d de0      Z4 ed      e G d de)                    Z5 ed      e G d de(                    Z6 G d  d!ejn                        Z8 G d" d#e$      Z9 G d$ d%e#      Z: G d& d'e,      Z; G d( d)e+      Z< G d* d+e.      Z= ed,-       G d. d/e=             Z> G d0 d1e-      Z?g d2Z@y)3    N)Callable)strict)	Tokenizerdecodersnormalizerspre_tokenizers
processors)Unigram)nn   )create_bidirectional_mask)BaseModelOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TokenizersBackend)TransformersKwargsauto_docstringcan_return_tuple)merge_with_config_defaults)capture_outputs   )LlamaAttentionLlamaRotaryEmbeddingapply_rotary_pos_embeager_attention_forward)ParakeetCTCConfigParakeetEncoderConfig)ParakeetEncoderBlock ParakeetEncoderConvolutionModuleParakeetForCTCParakeetPreTrainedModel)ParakeetProcessor)T5Tokenizerc                   Z    e Zd Z	 	 	 	 	 	 	 	 d	dZ	 	 	 d
deee   z  dededz  dedef
dZy)LasrTokenizerNc	           	      R   || _         |q|D 
cg c]  }
dt        |
      v s|
 }}
t        |      dk  r!|t        |      D cg c]  }d| d
 c}z  }nC|dkD  r>|t        |      k7  r0t	        d| d| d      t        |      D cg c]  }d| d
 }}|}||| _        nbt        |      dft        |      dft        |      dfd	g| _        t        |dz
  d
d
      D ]#  }| j
                  j                  d| ddf       % t        t        | j
                  dd            | _	        |$t        j                  |      | j                  _        t        j                  t        j                         t        j                   ddd      g      | j                  _        t%        j                   ddd      | j                  _        t)        j*                  d|||||d|	 t-        j.                  ddgg dd| j0                  fg      | j                  _        y c c}
w c c}w c c}w )Nz
<extra_id_   >r   zBoth extra_ids (z!) and additional_special_tokens (zm) are provided to LasrTokenizer. In this case the additional_special_tokens must include the extra_ids tokens        )   ▁g       r   F)unk_idbyte_fallbackr+   alwaysT)replacementprepend_schemesplit)	eos_token	unk_token	pad_token	extra_idsadditional_special_tokens$A</s>)r8   r9   z$Br9   )singlepairspecial_tokens )
_extra_idsstrlenrange
ValueError_vocab_scoresappendr   r
   
_tokenizerr   Precompiled
normalizerr   SequenceWhitespaceSplit	Metaspacepre_tokenizerr   decoderr   __init__r	   TemplateProcessingeos_token_idpost_processor)selfr3   r4   r5   _spm_precompiled_charsmapr6   r7   vocab
vocab_filekwargsxextra_tokensis                v/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/lasr/modular_lasr.pyrM   zLasrTokenizer.__init__-   s]    $ %0'@[!LTWXYTZDZA[L[< 1$)yIY-ZA
1#Q.?-ZZ)Q9L0A#A &yk1RSlRm n   8=Y7GH!j1-HLH(4% !&D Y%Y%Y%	"D 9q="b1 D""))Zs!+<c*BCD#""#
 %0)4)@)@AZ)[DOO&(6(?(?..0((U8[_`)
% #+"4"4W_gk"l"" 	
&?	
 	
 *4)F)F&>-**+*
&k \-Z Is   HHHH$	token_idsskip_special_tokensclean_up_tokenization_spacesgroup_tokensreturnc                     t        |t              r|g}|r%t        j                  |      D cg c]  }|d   	 }}|D cg c]  }|| j                  k7  s| }}t        j                  | f|||d|S c c}w c c}w )Nr   )rZ   r[   r\   )
isinstanceint	itertoolsgroupbypad_token_idr   _decode)rQ   rZ   r[   r\   r]   rU   token_grouptokens           rY   re   zLasrTokenizer._decodez   s     i%"I;D;L;LY;WXKQXIX )2PuUd>O>O5OUP	P ((
 3)E	

 
 	
 Y Qs   A4A9A9)r9   z<unk>z<pad>Nd   NNN)FNT)	__name__
__module____qualname__rM   ra   listboolr?   re   r=       rY   r&   r&   ,   so     "&"&K
` %*48!
c?
 "
 '+Tk	

 
 

rn   r&   c                       e Zd Zy)LasrProcessorNri   rj   rk   r=   rn   rY   rp   rp      s    rn   rp   zgoogle/medasr)
checkpointc                   $   e Zd ZU dZdZeed<   dZeed<   dZeed<   dZ	e
ed	<   dZe
ed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZee   eedf   z  ed<   dZee   eedf   z  ed<   dZeed<   dZedz  ed<    e       Z e       Zy)LasrEncoderConfiga8  
    convolution_bias (`bool`, *optional*, defaults to `False`):
        Whether to use bias in convolutions of the conformer's convolution module.
    conv_kernel_size (`int`, *optional*, defaults to 32):
        The kernel size of the convolution layers in the Conformer block.
    subsampling_conv_channels (`int`, *optional*, defaults to 256):
        The number of channels in the subsampling convolution layers.
    subsampling_conv_kernel_size (`int`, *optional*, defaults to 5):
        The kernel size of the subsampling convolution layers.
    subsampling_conv_stride (`int`, *optional*, defaults to 2):
        The stride of the subsampling convolution layers.
    dropout_positions (`float`, *optional*, defaults to 0.0):
        The dropout ratio for the positions in the input sequence.
    feed_forward_residual_weights (`tuple[float, float]`, *optional*, defaults to `[1.5, 0.5]`):
        The residual weights for the feed forward layers.
    conv_residual_weights (`tuple[float, float]`, *optional*, defaults to `[2.0, 1.0]`):
        The residual weights for the convolution layers.
    batch_norm_momentum (`float`, *optional*, defaults to 0.01):
        The momentum for the batch normalization layers

    Example:
        ```python
        >>> from transformers import LasrEncoderModel, LasrEncoderConfig

        >>> # Initializing a `LasrEncoder` configuration
        >>> configuration = LasrEncoderConfig()

        >>> # Initializing a model from the configuration
        >>> model = LasrEncoderModel(configuration)

        >>> # Accessing the model configuration
        >>> configuration = model.config
        ```

    This configuration class is based on the LasrEncoder architecture from Google Health AI. You can find more details
    and pre-trained models at [TODO/TODO](https://huggingface.co/TODO/TODO).
       hidden_size   num_hidden_layersi   intermediate_sizeFattention_biasconvolution_bias    conv_kernel_size   subsampling_conv_kernel_size   num_mel_binsi'  max_position_embeddingsgư>layer_norm_eps)g      ?g      ?.feed_forward_residual_weights)g       @g      ?conv_residual_weightsg{Gz?batch_norm_momentumNrope_parameters)ri   rj   rk   __doc__rv   ra   __annotations__rx   ry   rz   rm   r{   r}   r   r   r   r   floatr   rl   tupler   r   r   dictAttributeErrorsubsampling_factorscale_inputr=   rn   rY   rt   rt      s    $L Ks!s! ND "d"c() #)L##(S( NE EO!4;ucz1B#BO=G4;ucz)::G!%%#'OTD['') "Krn   rt   c                   >    e Zd ZU dZdZeed<   dZeed<   ed        Z	y)LasrCTCConfigaa  
    ctc_loss_reduction (`str`, *optional*, defaults to `"mean"`):
        Specifies the reduction to apply to the output of `torch.nn.CTCLoss`. Only relevant when training an
        instance of [`LasrForCTC`].
    ctc_zero_infinity (`bool`, *optional*, defaults to `True`):
        Whether to zero infinite losses and the associated gradients of `torch.nn.CTCLoss`. Infinite losses mainly
        occur when the inputs are too short to be aligned to the targets. Only relevant when training an instance
        of [`LasrForCTC`].

    Example:
        ```python
        >>> from transformers import LasrForCTC, LasrCTCConfig
        >>> # Initializing a Lasr configuration
        >>> configuration = LasrCTCConfig()
        >>> # Initializing a model from the configuration
        >>> model = LasrForCTC(configuration)
        >>> # Accessing the model configuration
        >>> configuration = model.config
        ```
    This configuration class is based on the Lasr CTC architecture from Google Health AI. You can find more details
    and pre-trained models at [TODO/TODO](https://huggingface.co/TODO/TODO).
    ru   
vocab_sizer   rd   c                 4    | j                   j                  dz  S )Nr   )encoder_configsubsampling_conv_stride)rQ   s    rY   inputs_to_logits_ratioz$LasrCTCConfig.inputs_to_logits_ratio   s    ""::A==rn   N)
ri   rj   rk   r   r   ra   r   rd   propertyr   r=   rn   rY   r   r      s/    . JL#> >rn   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )LasrEncoderSubsamplingconfigc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j
                  |j                  |j                        | _
        t        j                  |j
                  |j                  |j                  |j                        | _        t        j                  |j                  |j
                        | _        t        j                         | _        y )N)kernel_sizestride)superrM   r   Linearr   rv   dense_0Conv1dr   r   conv_0subsampling_conv_channelsconv_1dense_1ReLUact_fn)rQ   r   	__class__s     rY   rM   zLasrEncoderSubsampling.__init__   s    yy!4!4f6H6HIii;;11	
 ii,,;;11	
 yy!A!A6CUCUVggirn   input_featuresr^   c                 ,   | j                  | j                  |            }|j                  dd      }| j                  | j                  |            }| j                  | j	                  |            }|j                  dd      }| j                  |      S )Nr(   r   )r   r   	transposer   r   r   )rQ   r   hidden_statess      rY   forwardzLasrEncoderSubsampling.forward  sz    DLL$@A%//15DKK$>?DKK$>?%//15||M**rn   )	ri   rj   rk   rt   rM   torchTensorr   __classcell__r   s   @rY   r   r      s+     0  $+ell +u|| +rn   r   c                       e Zd Zy)LasrEncoderRotaryEmbeddingNrq   r=   rn   rY   r   r     s    rn   r   c                        e Zd Zdedef fdZ	 	 ddej                  deej                  ej                  f   dz  dej                  dz  de	e
   d	eej                  ej                  f   f
d
Z xZS )LasrEncoderAttentionr   	layer_idxc                 4    t         |   ||       d| _        y )NF)r   rM   	is_causalrQ   r   r   r   s      rY   rM   zLasrEncoderAttention.__init__  s    +rn   Nr   position_embeddingsattention_maskrU   r^   c                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }	|\  }
}t        |||
|      \  }}t        j                  | j                  j                  t              } || |||	|f| j                  sdn| j                  | j                  d|\  }} |j                   g |d j#                         }| j%                  |      }||fS )Nr,   r(   r   r*   )dropoutscaling)shapehead_dimq_projviewr   k_projv_projr   r   get_interfacer   _attn_implementationr   trainingattention_dropoutr   reshape
contiguouso_proj)rQ   r   r   r   rU   input_shapehidden_shapequery_states
key_statesvalue_statescossinattention_interfaceattn_outputattn_weightss                  rY   r   zLasrEncoderAttention.forward  sk    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j(?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
!\ *k));;;;FFHkk+.L((rn   NN)ri   rj   rk   rt   ra   rM   r   r   r   r   r   r   r   r   s   @rY   r   r     s    0 S  IM.2	")||") #5<<#=>E") t+	")
 +,") 
u||U\\)	*")rn   r   c                   &     e Zd Zddef fdZ xZS )LasrEncoderConvolutionModuler   c                     t         |   ||       d| _        t        j                  |j
                  |j                        | _        y )Nsame)momentum)r   rM   paddingr   BatchNorm1drv   r   norm)rQ   r   module_configr   s      rY   rM   z%LasrEncoderConvolutionModule.__init__?  s7    /NN6#5#5@Z@Z[	rn   N)ri   rj   rk   rt   rM   r   r   s   @rY   r   r   >  s    \0 \ \rn   r   c                        e Zd Zdedef fdZ	 	 ddej                  dej                  dz  dej                  dz  dee	   d	ej                  f
d
Z
 xZS )LasrEncoderBlockr   r   c                 T   t         |   ||       |j                  | _        |j                  | _        t	        j
                  |j                  |j                  d      | _        t	        j
                  |j                  |j                  d      | _	        t	        j
                  |j                  |j                  d      | _
        t	        j
                  |j                  |j                  d      | _        t	        j
                  |j                  |j                  d      | _        y )NF)bias)r   rM   r   r   r   	LayerNormrv   r   norm_feed_forward1norm_self_att	norm_convnorm_feed_forward2norm_outr   s      rY   rM   zLasrEncoderBlock.__init__F  s    +-3-Q-Q*%+%A%A""$,,v/A/A6CXCX_d"e\\&*<*<f>S>SZ_`f&8&8&:O:OV[\"$,,v/A/A6CXCX_d"eV%7%79N9NUZ[rn   Nr   r   r   rU   r^   c                 0   |}| j                  | j                  |            }| j                  d   |z  | j                  d   |z  z   }| j                  |      } | j                  d|||d|\  }}||z   }| j                  | j                  |      |      }	| j                  d   |z  | j                  d   |	z  z   }|}| j                  | j                  |            }| j                  d   |z  | j                  d   |z  z   }| j                  |      }|S )Nr   r(   )r   r   r   )r   r=   )feed_forward1r   r   r   	self_attnconvr   r   feed_forward2r   r   )
rQ   r   r   r   rU   residualnormalized_hidden_statesr   _conv_outputs
             rY   r   zLasrEncoderBlock.forwardR  sJ    !**4+B+B=+QR..q1H<t?a?abc?dgt?tt 	 $(#5#5m#D ' 
2) 3
 	
Q &3ii} =ni]2215EHbHbcdHehsHss **4+B+B=+QR..q1H<t?a?abc?dgt?tt 	 m4rn   r   )ri   rj   rk   rt   ra   rM   r   r   r   r   r   r   r   s   @rY   r   r   E  sw    
\0 
\S 
\ /337	!||! t+! #\\D0	!
 +,! 
!rn   r   c                   6    e Zd ZdZd Zdej                  fdZy)LasrPreTrainedModelFc                 .    t        j                  |       y r   )r   _init_weights)rQ   modules     rY   r   z!LasrPreTrainedModel._init_weightsz  s    %%f-rn   input_lengthsc                     t        | j                  t              r| j                  j                  n| j                  }|j                  }|j
                  }d}t        |      D ]  }||z
  |z  dz   } |S )Nr   r(   )r`   r   r   r   r   r   rA   )rQ   r   r   r   r   
num_layersr   s          rY   _get_subsampling_output_lengthz2LasrPreTrainedModel._get_subsampling_output_length}  st    7A$++}7]33cgcncn$AA77
z" 	HA*[8VCaGM	H rn   N)ri   rj   rk   _supports_flex_attnr   r   r   r   r=   rn   rY   r   r   v  s    .	ELL 	rn   r   zh
    The LasrEncoder model, based on the Conformer architecture](https://arxiv.org/abs/2005.08100).
    )custom_introc                        e Zd ZU eed<   dZdef fdZeee	e
	 d
dej                  dej                  dz  dee   defd	                            Z xZS )LasrEncoderr   encoderc           	         t         |   |       d| _        |j                  | _        |j                  | _        |j
                  | _        t        |      | _        t        |      | _	        t        j                  t        |j                        D cg c]  }t        ||       c}      | _        t        j                   |j"                  |j$                  d      | _        | j)                          y c c}w )NF)epsr   )r   rM   gradient_checkpointingr   dropout_positions	layerdropr   
subsamplerr   
rotary_embr   
ModuleListrA   rx   r   layersr   rv   r   out_norm	post_initr   s      rY   rM   zLasrEncoder.__init__  s     &+#~~!'!9!9))084V<mmBGH`H`BabYfi0b
 V%7%7V=R=RY^_	 cs   C2Nr   r   rU   r^   c                 b   | j                  |      }| j                  |t        j                  |j                  d   |j
                        j                  d            \  }}t        j                  j                  || j                  | j                        }t        j                  j                  || j                  | j                        }t        j                  j                  || j                  | j                        }| | j                  ||j                  d         }t        | j                  ||      }| j                  D ]G  }d}| j                  r&t        j                   g       }	|	| j"                  k  rd}|r: ||f|||fd	|}I | j%                  |      }t'        |
      S )a  
        Example:

        ```python
        >>> from transformers import AutoProcessor, LasrEncoder
        >>> from datasets import load_dataset, Audio

        >>> model_id = TODO
        >>> processor = AutoProcessor.from_pretrained(model_id)
        >>> encoder = ParakeetEncoder.from_pretrained(model_id)

        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
        >>> ds = ds.cast_column("audio", Audio(sampling_rate=processor.feature_extractor.sampling_rate))

        >>> inputs = processor(ds[0]["audio"]["array"])
        >>> encoder_outputs = encoder(**inputs)

        >>> print(encoder_outputs.last_hidden_state.shape)
        ```
        r(   )devicer   )pr   )target_length)r   inputs_embedsr   FT)r   r   )last_hidden_state)r  r  r   aranger   r  	unsqueezer   
functionalr   r   r  _get_output_attention_maskr   r   r
  randr  r  r   )
rQ   r   r   rU   r   r   r   encoder_layerto_dropdropout_probabilitys
             rY   r   zLasrEncoder.forward  s   > 7??5<<(;(;A(>}G[G[\ffghi
S --mt||VZVcVc-dmm##C4+A+ADMM#Zmm##C4+A+ADMM#Z%!<<^[h[n[nop[q<rN2;;')
 "[[ 	MG}}&+jjn#&7"G -!!#1),c
! 	!	  m4??rn   r   )ri   rj   rk   rt   r   base_model_prefixrM   r   r   r   r   r   r   r   r   r   r   r   r   s   @rY   r   r     s     !0 "  /3?@?@ t+?@ +,	?@
 
?@     ?@rn   r   c                        e Zd Z fdZ xZS )
LasrForCTCc                  8     t               j                  di | S )a  
        Example:

        ```python
        >>> from transformers import AutoProcessor, LasrForCTC
        >>> from datasets import load_dataset, Audio

        >>> model_id = TODO
        >>> processor = AutoProcessor.from_pretrained(model_id)
        >>> model = LasrForCTC.from_pretrained(model_id)

        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
        >>> ds = ds.cast_column("audio", Audio(sampling_rate=processor.feature_extractor.sampling_rate))

        >>> inputs = processor(ds[0]["audio"]["array"], text=ds[0]["text"])
        >>> predicted_ids = model.generate(**inputs)
        >>> transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)

        >>> print(transcription)
        ```
        r=   )r   generate)super_kwargsr   s    rY   r  zLasrForCTC.generate  s    ,  uw/,//rn   )ri   rj   rk   r  r   r   s   @rY   r  r    s    0 0rn   r  )r  r   r   rp   rt   r   r&   )Arb   collections.abcr   r   huggingface_hub.dataclassesr   
tokenizersr   r   r   r   r	   tokenizers.modelsr
   r   masking_utilsr   modeling_outputsr   modeling_utilsr   r   processing_utilsr   tokenization_utils_tokenizersr   utilsr   r   r   utils.genericr   utils.output_capturingr   llama.modeling_llamar   r   r   r   parakeet.configuration_parakeetr   r   parakeet.modeling_parakeetr   r    r!   r"   parakeet.processing_parakeetr#   t5.tokenization_t5r$   r&   rp   rt   r   Moduler   r   r   r   r   r   r   r  __all__r=   rn   rY   <module>r4     sZ    $  . S S %  6 / F & > I I 7 5 v v V  = ,d
K!2 d
N	% 	 ?+7#- 7#  ,7#t ?+>% >  ,>@+RYY +8 <!5 ;')> ')T\#C \.+ .b1 & 
X@% X@
X@v0 04rn   