
    iM                     `   d dl Z d dlmZmZ d dlmZ d dlZd dlmZ d dl	mc m
Z ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$  ejJ                  e&      Z'ee G d de                    Z(ee G d de                    Z) G d dejT                        Z+ G d dejT                        Z, ed       G d dejT                               Z- G d dejT                        Z.	 d1dejT                  dej^                  dej^                  d ej^                  d!ej^                  dz  d"e0d#e0e1z  d$ee   fd%Z2 G d& d'ejT                        Z3 G d( d)ejT                        Z4e G d* d+e             Z5e G d, d-e5             Z6 G d. d/e5      Z7g d0Z8y)2    N)CallableSequence)	dataclass   )initialization)use_kernel_forward_from_hub)FlashAttentionKwargs)BaseModelOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuplelogging)merge_with_config_defaults)capture_outputs   )TimesFmConfigc                   b    e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   y)TimesFmOutputz
    loc (`torch.Tensor` of shape `(batch_size, )`):
        The mean of the time series inputs.
    scale (`torch.Tensor` of shape `(batch_size,)`):
        The scale of the time series inputs.
    Nlocscale)	__name__
__module____qualname____doc__r   torchTensor__annotations__r        }/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/timesfm/modeling_timesfm.pyr   r   ,   s/      $C	#!%E5<<$%r"   r   c                       e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   dZ	ej                  e
z  dz  ed<   y)TimesFmOutputForPredictiona  
    mean_predictions (`torch.Tensor` of shape `(batch_size, sequence_length)`):
        The mean predictions of the time series.
    full_predictions (`torch.Tensor` of shape `(batch_size, sequence_length)`):
        The full predictions of the time series including the mean and the quantiles.
    loss (`torch.Tensor` of shape `(1,)`, *optional*, returned when `future_values` is provided):
        The loss of the TimesFM model.
    Nmean_predictionsfull_predictionsloss)r   r   r   r   r&   r   r   r    r'   r(   floatr!   r"   r#   r%   r%   :   sI     -1ellT)0,0ellT)0(,D%,,

%,r"   r%   c                   0     e Zd ZdZdef fdZddZ xZS )
TimesFmMLPzPax MLP in pytorch.configc                     t         |           |j                  }|j                  }t	        j
                  ||      | _        t	        j
                  ||      | _        t	        j                  |d      | _	        y )Nư>)normalized_shapeeps)
super__init__hidden_sizeintermediate_sizennLinear	gate_proj	down_proj	LayerNorm
layer_norm)selfr,   r3   r4   	__class__s       r#   r2   zTimesFmMLP.__init__N   s]    (("44;0AB#4kB,,Nr"   c                     | j                  |      }| j                  |      }t        j                  |      }| j	                  |      }||d|d d d d d f   z
  z  }||z   S )N      ?)r:   r7   Frelur8   )r;   xpaddingsgate_inpgateoutputss         r#   forwardzTimesFmMLP.forwardW   sc    ??1%~~h'vvd|..&x1d
';!;<G{r"   Nr   r   r   r   r   r2   rF   __classcell__r<   s   @r#   r+   r+   K   s    O} Or"   r+   c                   (     e Zd ZdZ fdZd Z xZS )TimesFmResidualBlockzTimesFM residual block.c                     t         |           || _        || _        || _        t        j                  ||      | _        t        j                         | _	        t        j                  ||      | _
        t        j                  ||      | _        y rG   )r1   r2   
input_dimshidden_dimsoutput_dimsr5   r6   input_layerSiLU
activationoutput_layerresidual_layer)r;   rN   rO   rP   r<   s       r#   r2   zTimesFmResidualBlock.__init__d   sk    $&&99Z='')IIk;? ii
K@r"   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }||z   S rG   )rQ   rS   rT   rU   )r;   rA   hiddenoutputresiduals        r#   rF   zTimesFmResidualBlock.forwardo   sK    !!!$(""6*&&q)  r"   )r   r   r   r   r2   rF   rI   rJ   s   @r#   rL   rL   a   s    !	A!r"   rL   RMSNormc                   h     e Zd Zddeddf fdZdej                  dej                  fdZd Z xZ	S )	TimesFmRMSNormr0   returnNc                     t         |           t        j                  t	        j
                  |            | _        || _        y)z=
        TimesFmRMSNorm is equivalent to T5LayerNorm
        N)r1   r2   r5   	Parameterr   onesweightvariance_epsilon)r;   r3   r0   r<   s      r#   r2   zTimesFmRMSNorm.__init__y   s1     	ll5::k#:; #r"   hidden_statesc                 "   |j                   }|j                  t        j                        }|j	                  d      j                  dd      }|t        j                  || j                  z         z  }| j                  |j                  |      z  S )N   T)keepdim)	dtypetor   float32powmeanrsqrtrb   ra   )r;   rc   input_dtypevariances       r#   rF   zTimesFmRMSNorm.forward   sy    #))%((7 $$Q',,R,>%Ht?T?T4T(UU{{]--k:::r"   c                 ^    t        | j                  j                         d| j                   S )Nz, eps=)tuplera   shaperb   )r;   s    r#   
extra_reprzTimesFmRMSNorm.extra_repr   s*    ))*+6$2G2G1HIIr"   )r.   )
r   r   r   r)   r2   r   r   rF   rs   rI   rJ   s   @r#   r\   r\   w   s7    $ $$ $;U\\ ;ell ;Jr"   r\   c                   0     e Zd ZdZdef fdZddZ xZS )TimesFmPositionalEmbeddingz6Generates position embedding for a given 1-d sequence.r,   c           
         t         |           |j                  }|j                  }||c| _        | _        |j                  | _        | j
                  dz  }t        j                  t        |      t        |      z        t        |dz
  d      z  }| j                  d|t        j                  t        j                  |t        j                        | z        z         y )Nre   r   inv_timescalesrh   )r1   r2   min_timescalemax_timescaler3   embedding_dimsmathlogr)   maxregister_bufferr   exparangerj   )r;   r,   ry   rz   num_timescaleslog_timescale_incrementr<   s         r#   r2   z#TimesFmPositionalEmbedding.__init__   s    ,,,,1>.D.$00,,1"&((5+?%BV+V"WZ]^lop^prsZt"tEIIell>&W[rZr&rss	
r"   c                 N   ||t        d      |Jt        j                  |t        j                  | j                  j
                        j                  d      }n'|j                  dk7  rt        d|j                          |j                  g |j                  d | j                  j                  ddd      z  }t        j                  t        j                  |      t        j                  |      gd      }t        j                  |ddd| j                  dz  f      }|S )	a  Generates a Tensor of sinusoids with different frequencies.

        Args:
            seq_length: an optional Python int defining the output sequence length.
              if the `position` argument is specified.
            position: [B, seq_length], optional position for each token in the
              sequence, only required when the sequence is packed.

        Returns:
            [B, seqlen, D] if `position` is specified, else [1, seqlen, D]
        z.Either position or seq_length must be providedrh   devicer   re   z*position must be 2-dimensional, got shape r   rf   dim)
ValueErrorr   r   rj   rw   r   	unsqueezendimrr   viewcatsincosr?   padr{   )r;   
seq_lengthpositionscaled_timesignals        r#   rF   z"TimesFmPositionalEmbedding.forward   s     
 2MNN||JemmDL_L_LfLfgqqrstH]]aI(..IYZ[[#hmm7X^^7Q7$:M:M:R:RSTVWY[:\\EIIk2EIIk4JKQRS v1a)<)<q)@ABr"   NNrH   rJ   s   @r#   ru   ru      s    @
} 
r"   ru   modulequery_states
key_statesvalue_statesattention_maskscalingdropoutkwargsc                    t        j                  ||j                  dd            |z  }|||z   }t        j                  j                  |dt         j                        j                  |j                        }t        j                  j                  ||| j                        }t        j                  ||      }	|	j                  dd      j                         }	|	|fS )Nre   r   rf   )r   rh   )ptrainingr   )r   matmul	transposer5   
functionalsoftmaxrj   ri   rh   r   r   
contiguous)
r   r   r   r   r   r   r   r   attn_weightsattn_outputs
             r#   simple_eager_attention_forwardr      s     <<j.B.B1a.HIGSL!#n4==((2U]](SVVWcWiWijL==((6??([L,,|\:K''1-88:K$$r"   c                        e Zd ZdZdedef fdZdej                  dej                  fdZ		 dd	ej                  d
ej                  dz  de
e   deej                  ej                  dz  f   fdZ xZS )TimesFmAttentionzlImplements the attention used in TimesFM. One key difference is that there is _per_dim_scaling of the query.r,   	layer_idxc                    t         |           || _        d| _        |j                  | _        || _        |j                  | _        |j                  | _        |j                  | _	        | j                  | j                  z  | _
        | j                  | j                  z  | _        t        j                  t        j                  | j                  f            | _        t        j"                  | j                  | j                  | j                  z        | _        t        j"                  | j                  | j                  | j                  z        | _        t        j"                  | j                  | j                  | j                  z        | _        t        j"                  | j                  | j                  z  | j                        | _        y )NT)r1   r2   r,   	is_causalattention_dropoutr   num_attention_heads	num_headsr3   head_dimq_sizekv_sizer5   r_   r   emptyr   r6   q_projk_projv_projo_projr;   r,   r   r<   s      r#   r2   zTimesFmAttention.__init__   s3   !'!9!9"33!--nnt}}4~~5||EKK0@$ABii 0 0$..4==2PQii 0 0$..4==2PQii 0 0$..4==2PQii >@P@PQr"   queryr]   c                     t        j                  | j                        j                  dt	        j
                  | j                        z        }||d d d d d f   z  S )Ng^$3eG?)r?   softplusr   mulr|   sqrtr   )r;   r   r   s      r#   _scale_queryzTimesFmAttention._scale_query   sJ    

4<<(,,[499T]];S-STuT4q0111r"   Nrc   r   r   c                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }| j                  |      }| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }t        j                  | j                  j                  t              }	 |	| ||||f| j                  sdn| j                  dd|\  }
} |
j                  g |d j!                         }
| j#                  |
      }
|
|fS )Nrf   r   re           r>   )r   r   )rr   r   r   r   r   r   r   r   r   get_interfacer,   _attn_implementationr   r   r   reshaper   r   )r;   rc   r   r   input_shapehidden_shaper   r   r   attention_interfacer   r   s               r#   rF   zTimesFmAttention.forward   sW    $))#2.88b8$--8{{=166|DNNqRST((6[[/44\BLLQPQR
{{=166|DNNqRST(?(M(MKK,,.L)
 %8	%
  $}}C$2H2H	%
 	%
!\ *k));;;;FFHkk+.L((r"   rG   )r   r   r   r   r   intr2   r   r   r   r   r	   rq   rF   rI   rJ   s   @r#   r   r      s    vR} R R(2%,, 25<< 2 /3)||) t+) -.	)
 
u||U\\D00	1)r"   r   c                        e Zd ZdZdedef fdZdej                  dej                  dej                  dej                  fd	Z	 xZ
S )
TimesFmDecoderLayerzTransformer layer.r,   r   c                     t         |           t        ||      | _        t	        |      | _        t        |j                  |j                        | _	        y )N)r   )r0   )
r1   r2   r   	self_attnr+   mlpr\   r3   rms_norm_epsinput_layernormr   s      r#   r2   zTimesFmDecoderLayer.__init__  sC    )&IFf%-f.@.@fFYFYZr"   rc   r   rB   r]   c                     |}| j                  |      }| j                  ||      \  }}||z   }| j                  ||      }|S )N)rc   r   )rB   )r   r   r   )r;   rc   r   rB   r   rY   _s          r#   rF   zTimesFmDecoderLayer.forward  s]     !,,];>>') * 
q !=0 Br"   )r   r   r   r   r   r   r2   r   r   rF   rI   rJ   s   @r#   r   r     sW    [} [ [||  ,,	 
r"   r   c                   r     e Zd ZU eed<   dZdgZdZdZdZ	e
edZ ej                          fd       Z xZS )	TimesFmPreTrainedModelr,   timesfmr   past_values)timeT)rc   
attentionsc           
      "   t         |   |       t        |t              r t	        j
                  |j                         y t        |t              r|j                  dz  }|j                  |j                  }}t        j                  t        |      t        |      z        t        |dz
  d      z  }t	        j                  |j                   |t#        j$                  t#        j&                  |t"        j(                        | z        z         y y )Nre   r   rx   )r1   _init_weights
isinstancer   initones_r   ru   r{   rz   ry   r|   r}   r)   r~   copy_rw   r   r   r   rj   )r;   r   r   rz   ry   r   r<   s         r#   r   z$TimesFmPreTrainedModel._init_weights9  s    f%f./JJv~~& :;#22a7N+1+?+?AUAU=M&*hhu]/CeMFZ/Z&[^a"A_ '# JJ%%))ELLu}}MQhPhhij <r"   )r   r   r   r   r    base_model_prefix_no_split_modulesmain_input_nameinput_modalities_supports_sdpar   r   _can_record_outputsr   no_gradr   rI   rJ   s   @r#   r   r   ,  sR    !./#O N,&
 U]]_ r"   r   c                       e Zd Zdef fdZdej                  dej                  deej                  eej                  ej                  f   f   fdZe	e
edej                  dej                  d	ej                  d
ee   def
d                     Ze	 ddej                  dz  dedej&                  dej(                  dedej                  dz  fd       Zedej                  dej                  deej                  ej                  f   fd       Zedej                  dej                  dej                  fd       Z xZS )TimesFmModelr,   c           	         t         |   |       || _        t        d|j                  z  |j
                  |j                        | _        t        j                  |j                  |j
                        | _        t        j                  t        |j                        D cg c]  }t        ||       c}      | _        | j                  j"                  rt%        |      | _        | j)                          y c c}w )Nre   rN   rP   rO   )num_embeddingsembedding_dim)r,   )r1   r2   r,   rL   patch_lengthr3   r4   input_ff_layerr5   	Embedding	freq_sizefreq_emb
ModuleListrangenum_hidden_layersr   layersuse_positional_embeddingru   position_emb	post_initr   s      r#   r2   zTimesFmModel.__init__N  s     26...**00

 F4D4DTZTfTfgmmEJ6KcKcEde	 3e
 ;;// :& ID 	 fs   "C9inputspatched_padsr]   c                    | j                  ||      \  }}t        j                  || j                  j                        }||ddddf   z
  |ddddf   z  }t        j
                  t        j                  || j                  j                  z
        | j                  j                  k  t        j                  | j                  j                  |j                  |j                        |      }|||ffS )zInput is of shape [B, N, P].minNr   )_timesfm_masked_mean_stdr   clampr,   	tolerancewhereabspad_valtensorrh   r   )r;   r   r   musigmarE   s         r#   _forward_transformzTimesFmModel._forward_transforma  s     11&,G	EEt{{'<'<= Bq$}--q$}1EE++IIft{{2223dkk6K6KKLL,,GMM'..Y

 U##r"   r   past_values_paddingfreqr   c                 r   |j                   d   }|j                  |d| j                  j                        }|j                  |d| j                  j                        }t	        j
                  t	        j                  |dz
        | j                  j                  k  t	        j                  d|j                  |j                        |      }t	        j
                  t	        j                  || j                  j                  z
        | j                  j                  k  t	        j                  d|j                  |j                        |      }| j                  ||      \  }}|d|z
  z  }t	        j                  ||gd      }	| j                  |	      }
t	        j                  |d      d   }| j                  j                   r]| j#                  |
j                   d         }t	        j$                  |g|
j                   d   z  d      }| j'                  ||      }|
|z  }
| j)                  |      }|
|z  }
|
}| j+                  ||j                   d   |j                  |j                  d	      }| j,                  d
| j                  j.                   D ]  } ||f||d|} t1        ||d   |d         S )a  
        past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
            Past values of the time series that serves as input to the model.
        past_values_padding (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
            The padding indicator of the time series.
        freq (`torch.LongTensor` of shape `(batch_size,)`):
            Frequency indices for the time series data.
        r   rf   r>   r   r   r   r   T)r   sequence_lengthrh   r   r   N)r   rB   )last_hidden_stater   r   )rr   r   r,   r   r   r   r   r   r  rh   r   r  r  r   r   r   r   r   concat_timesfm_shift_padded_seqr   _prepare_4d_attention_maskr   r   r   )r;   r   r  r  r   bsizepatched_inputsr   statsconcat_inputsmodel_inputpatched_paddingpos_embf_embrc   r   layers                    r#   rF   zTimesFmModel.forwardq  s   & !!!$$))%T[[5M5MN*//r4;;;S;STIIlS()DKK,A,AALLN$8$8AVAVW

 {{IInt{{':'::;dkk>S>SSLLL$6$6|?R?RS

 !% 7 7 U (3+=>		><"@bI))-8  ))Lb9!<;;//''(9(9!(<=GllG9{/@/@/C#CKG44_gNG7"Kd#u $88*)//2%% '' 9 
 [[!@4;;#@#@A 	E!-( 	M	 +a(
 	
r"   r   Nr	  rh   r   r   c                    |j                   rt        j                  |      j                  nt        j                  |      j                  }| &| j                  | j                  d   ddd      } | |z  } |rbt        j                  t        j                  ||f||      |z  d      }|j                  dd||      }| t        j                  | |      } | S |} | S )a  
        Creates 4D attention mask and combines causal and padding masks if needed.

        Args:
            attention_mask: Optional tensor of shape (batch_size, seq_length) containing padding mask
            sequence_length: Length of the sequence
            dtype: Data type of the mask
            device: Device of the mask
            is_causal: Whether to apply causal masking

        Returns:
            4D attention mask of shape (batch_size, 1, seq_length, seq_length)
        r   r   rf   r   )diagonal)
is_floating_pointr   finfor   iinfor   rr   triur`   minimum)r   r	  rh   r   r   	min_valuecausal_masks          r#   r  z'TimesFmModel._prepare_4d_attention_mask  s    , /4.E.EEKK&**5;;W\K]KaKa	 %+001E1Ea1H!QPRSN+i7N **

O_=USYZ]ffK &**1a/RK )!&~{!K  "-r"   paddingc                 F   dt         j                  fd}t        j                  d|z
  d      } ||      }t        j                  | j                  d         }| ||ddf   }|||ddf   }d|z
  }t        j                  |d      }	t        j
                  |	d	      }	t        j                  ||z  d      }
|
|	z  }||j                  d
      z
  |z  }t        j                  |dz  d      |	z  }t        j
                  |d	      }t        j                  |      }||fS )a  Calculates mean and standard deviation of `inputs` across axis 1.

        It excludes values where `padding` is 1.

        Args:
            inputs: A PyTorch tensor of shape [b, n, p].
            padding: A PyTorch tensor of shape [b, n, p] with values 0 or 1.

        Returns:
            A tuple containing the mean and standard deviation.
            We return the statistics of the first patch with more than three non-padded values.
        arrc                 (   t        j                  | dk\  j                  t         j                        d      }| dk\  j                  t         j                        j	                  d      }t        j
                  |dk(  | j                  d   dz
  |      S )Nr   r   r   r   )r   argmaxri   int32sumr   rr   )r"  indicesrow_sums      r#   _get_patch_indexz?TimesFmModel._timesfm_masked_mean_std.<locals>._get_patch_index  sk    llC1H==#=1EGaxmmEKK0444;G;;w!|SYYq\A-=wGGr"   r   re   r   r   Nr>   r   rf   r   )r   r   r&  r   rr   r   r   r   )r   r   r)  pad_sumpatch_indicesbidxsr"  r   masknum_valid_elements
masked_summasked_meanmasked_centered_arr
masked_var
masked_stds                  r#   r   z%TimesFmModel._timesfm_masked_mean_std  s    	H%,, 	H
 ))AKQ/(1V\\!_-UM1,-e]A-. 3w #YYt3"[[);E YYsTzq1
 #55  #[%:%:2%>>$FYY2A51=@RR
[[5
ZZ
+
J&&r"   r-  seqc                    |j                   \  }}}| dk(  }|j                  t        j                        j	                  d      }d||j                  d       <   t        j                  ||j                        j                  ddd      j                  |d|      }||ddddf   z
  |z  }|j                  d|      }	|	S )zShifts rows of seq based on the first 0 in each row of the mask.

        Args:
            mask: mask tensor of shape [B, N]
            seq: seq tensor of shape [B, N, P]

        Returns:
            The shifted sequence.
        r   r   r   rf   )r   N)rr   ri   r   r%  r$  anyr   r   r   expandgather)
r-  r4  
batch_sizenum_seqfeature_dimnew_maskr'  	idx_rangeshifted_idxshifted_seqs
             r#   r  z&TimesFmModel._timesfm_shift_padded_seq  s     ,/99(
G[%)QY ++ekk*11a18 )+!$$% LL<AA!RKRRS]_acno	 !71dD=#99WD jjK0r"   )T)r   r   r   r   r2   r   r   rq   r  r   r   r   
LongTensorr   r   r   rF   staticmethodr   rh   r   boolr  r   r  rI   rJ   s   @r#   r   r   L  s   } &$ll$27,,$	u||U5<<#=>>	?$   F
\\F
 #--F
 ll	F

 +,F
 
F
    F
P  +t+++ {{+ 	+
 + 
	+ +Z ,' ,' ,'QVW\WcWcejeqeqWqQr ,' ,'\  5<< ELL  r"   r   c                   R    e Zd ZdZdef fdZ	 ddeej                     dee	   dz  de	dz  de
ej                  d	f   fd
Zdej                  de
ej                  ej                  f   dej                  fdZdej                  dej                  dej                  fdZee	 	 	 	 	 	 ddeej                     deej                  e	z     dz  de	dz  dej                  dz  de	dz  dededee   defd              Zedej                  de	deej                     fd       Z xZS )TimesFmModelForPredictionz/TimesFM model for quantile and mean prediction.r,   c                 J   t         |   |       || _        |j                  | _        |j
                  | _        t        |      | _        t        |j                  |j
                  dt        |j                        z   z  |j                        | _        | j                          y )Nr   r   )r1   r2   r,   context_lengthcontext_lenhorizon_lengthhorizon_lenr   decoderrL   r3   len	quantilesr4   horizon_ff_layerr   )r;   r,   r<   s     r#   r2   z"TimesFmModelForPrediction.__init__=  s     !00!00#F+ !5))--S9I9I5J1JK00!
 	r"   Nr   r  rG  r]   .c                 X   || j                   }g g }}|D ]  }|j                  d   }t        j                  || j                  z   |j
                  |j                        }||k  r||z
  }	t        j                  t        j                  |	|j
                  |j                        |gd      }t        j                  t        j                  |	|j
                  |j                        |gd      }n||kD  r|| d }||| j                  z    d }|j                  |       |j                  |        t        j                  |d      t        j                  |d      f}
|E|
t        j                  |dt        |       t        j                        j                  dd      fz   }
|
S )a  Pad/truncate input time series to `context_len` and build a padding mask.

        Args:
            inputs: A list of 1d Tensors. Each Tensor is the context time series of a single forecast task.
            freq: Optional list of frequencies (returned as a tensor when provided).
            context_len: Optional context length override (defaults to `self.context_len`).

        Returns:
            Tuple of (padded_inputs, padding_mask) and optionally a freq tensor.
        Nr   r   r   rx   rf   r   )rG  rr   r   zerosrI  rh   r   r   r`   appendstackr  rK  r%  r   )r;   r   r  rG  input_tsinput_paddingts	input_lenr   num_front_padresults              r#   _preprocessz%TimesFmModelForPrediction._preprocessP  s    **K"$b- 	*BIkk)d.>.>">bhhWYW`W`aG;& +i 7YYMRTR[R[ \^`aghi))UZZRXXV]VdVd%egn$ouvw[(&!K$2B2B$B"C"EFOOB  )	* ++hA.Mq0QRu||D3v;,?u{{S[[\^`abddFr"   model_outputr  c                    | j                  |      }|j                  \  }}}|j                  ||| j                  j                  t        | j                  j                        dz         }|\  }}||dddddf   z  |dddddf   z   S )z*Postprocess output of stacked transformer.r   N)rM  rr   r   r,   rH  rK  rL  )	r;   rY  r  	output_tsbnr   r  r  s	            r#   _postprocess_outputz-TimesFmModelForPrediction._postprocess_outputu  s     )),7	 //1aNN1a)C)CSI^I^E_bcEcd		E5D$!4551dD$;N8OOOr"   predictionstargetsc                 *   g }t        | j                  j                        D ]M  \  }}||d|f   z
  }t        j                  |dz
  |z  ||z        }|j                  |j                                O t        j                  |      j                         S )N.r   )	enumerater,   rL  r   r~   rP  rl   rQ  )r;   r_  r`  lossesiqerrorsr(   s           r#   _quantile_lossz(TimesFmModelForPrediction._quantile_loss  s    dkk334 	'DAq{3622F99a!ev-q6z:DMM$))+&	' {{6"''))r"   r   window_sizefuture_valuesforecast_context_lenreturn_forecast_on_contexttruncate_negativer   c           
         || j                   }	n|}	|d   j                  }
|D cg c]  }||	 d 
 }}t        j                  t        j                  |D cg c]  }t        j                  |       c}            }|Yg }g }t        |      D ]A  \  }}|j                  | j                  ||             |*|j                  ||   gdz         C |}||}|$t        j                  d       dgt        |      z  }| j                  ||      \  }}}|j                  |
      }|j                  |
      }|j                  |
      }|}|j                  d   }g }|j                  d   |j                  d   | j                  z   k7  r8t        d|j                  d    d|j                  d    d| j                         | j                   j"                  }| j                  |z   dz
  |z  }t%        |      D ]/  }|ddd|j                  d   f   }|dd|	 df   }|dd|	 df   } | j&                  d|||d	|}| j)                  |j*                  |j,                  |j.                  f      }|rl|dk(  rg|dddd
d| j                   j0                  ddf   }|j3                  |j5                  d      d
|j5                  d            }|j7                  |       |ddd
d|df   }|ddd
d|ddf   }|j7                  |       t        j8                  ||gd
      }2 |rHt        j8                  |d      ddd|| j                   j0                  z
  | j                  z   ddf   }n-t        j8                  |d      ddd| j                  ddf   }|dddddf   }|*|ddddf   |ddddf   z   }|ddddf   |ddddf   z   }|dk\  r.|r,t        j:                  |d      }t        j:                  |d      }d} |9t=        j>                  ||      }!| jA                  |ddddddf   |      }"|!|"z   } tC        j*                  |jD                  |jF                  |||       S c c}w c c}w )a  
        past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
            Past values of the time series that serves as input to the model.
        freq (`torch.LongTensor` of shape `(batch_size,)`):
            Frequency indices for the time series data.
        window_size (`int`, *optional*):
            Window size of trend + residual decomposition. If None then we do not do decomposition.
        future_values (`torch.Tensor`, *optional*):
            Optional future time series values to be used for loss computation.
        forecast_context_len (`int`, *optional*):
            Optional max context length.
        return_forecast_on_context (`bool`, *optional*):
            True to return the forecast on the context when available, i.e. after the first input patch.
        truncate_negative (`bool`, *optional*):
            Truncate to only non-negative values if any of the contexts have non-negative values,
            otherwise do nothing.

        Example:

        ```python
        >>> from transformers import TimesFmModelForPrediction

        >>> model = TimesFmModelForPrediction.from_pretrained("google/timesfm-2.0-500m-pytorch")

        >>> forecast_input = [torch.linspace(0, 20, 100).sin(), torch.linspace(0, 20, 200).sin(), torch.linspace(0, 20, 400).sin()]
        >>> frequency_input = torch.tensor([0, 1, 2], dtype=torch.long)

        >>> # Generate
        >>> with torch.no_grad():
        >>>     outputs = model(past_values=forecast_input, freq=frequency_input, return_dict=True)
        >>>     point_forecast_conv = outputs.mean_predictions
        >>>     quantile_forecast_conv = outputs.full_predictions
        ```
        Nr   re   z6No frequency provided via `freq`. Default to high (0).r   z=Length of paddings must match length of input + horizon_len: z != z + )r   r  r  rf   r   )axis.r   )r
  r   rc   r&   r'   r(   r!   )$rG  r   r   r   rQ  rb  extend_timesfm_moving_averageloggerinforK  rX  ri   rr   rI  r   r,   rH  r   rJ  r^  r
  r   r   r   r   sizerP  concatenatemaximumr?   mse_lossrg  r%   r   rc   )#r;   r   r  rh  ri  rj  rk  rl  r   fcontext_lenr   rT  r   inp_min
new_inputs	new_freqsrd  rR  rS  inp_freq	final_outrG  full_outputsoutput_patch_lennum_decode_patches
step_indexcurrent_paddingdecoder_outputfprop_outputsnew_full_tsnew_tsmean_outputsr(   rv  quantile_losss#                                      r#   rF   z!TimesFmModelForPrediction.forward  s   ^  '++L/LQ&&/:;"l]^$;;))EKK(H22(HIJ"JI"6* 42!!$">">r;"OP#$$d1gY]34  F <KKPQ3V$D,0,<,<VT,J)-;;v&%((0;;v&	ooa(q!Y__Q%7$:J:J%JJ!''*+4	0B/C3tGWGWFXZ   ;;55"..1AAAEJZZ 23 	HJ+Aq9??13E/E,EFO \MN!23H+A}~,=>M,8DLL -$$1- 	-N !4400##^%9%9:M
 *jAo+Ass4Ndkk6N6N4NPQ,QR)11+2B2B12Er;K[K[\]K^_##K0"1b*;+;*;Q#>?F'2/@0@/@!(CDK,))9f*=BGI/	H2 & ,,\BPkDKK$<$<<t?O?OOPRSSL !,,\B1a$JZJZFZ\]C]^L#Aq!G,"'1c	2\!$Q$)5LLL'1c	2\!$Q$)5LLLa<- ==s;L ==s;L$zz,>H //Q12X0FVMm+D),>>%00(66))
 	
c <(Hs   Q5Q:r"  c                 4   t        j                  | |dz
  dfdd      }t        j                  || j                  | j
                        |z  }t        j                  |j                  ddd      |j                  ddd            j                         }|| |z
  gS )zCCalculates the moving average using PyTorch's convolution function.r   r   constantr   rf   )	r?   r   r   r`   rh   r   conv1dr   squeeze)r"  rh  
arr_paddedkernelsmoothed_arrs        r#   rp  z1TimesFmModelForPrediction._timesfm_moving_average  s     UU3q! 4j!D
KsyyL{Zxx
1b 96;;q!R;PQYY[cL011r"   r   )NNNNFF)r   r   r   r   r   r2   r   r   r   r   rq   rX  r^  rg  r   r   rB  r   r   r%   rF   rA  listrp  rI   rJ   s   @r#   rD  rD  :  s   9} ( lp#u||,#4<SMD4H#^adh^h#	u||S 	!#JP!LLP16u||U\\7Q1RP	P*%,, * *RWR^R^ *  59"&-1+/+0"'L
ell+L
 u||c)*T1L
 4Z	L

 ||d*L
 "DjL
 %)L
  L
 +,L
 
$L
  L
\ 2U\\ 2 2U\\HZ 2 2r"   rD  )rD  r   r   )r   )9r|   collections.abcr   r   dataclassesr   r   torch.nnr5   torch.nn.functionalr   r?    r   r   integrationsr   modeling_flash_attention_utilsr	   modeling_outputsr
   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   utils.genericr   utils.output_capturingr   configuration_timesfmr   
get_loggerr   rq  r   r%   Moduler+   rL   r\   ru   r   r)   r   r   r   r   r   r   rD  __all__r!   r"   r#   <module>r     s  *  . !     & 7 B / F & R R 7 5 0 
		H	% 	&O 	&  	& - -  - ,!299 !, Y'JRYY J (J(+ +j %II%,,% % ,,	%
 LL4'% % S[% '(%,9)ryy 9)x")) @ _  > j) j jZk2 6 k2\ Rr"   