
    iB                        d dl Z d dl mZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZmZ d
dlmZ d Z G d dej(                        Z G d dej(                        Z G d dej(                        Z G d dej(                        Z G d dej(                        Z G d dej(                        Z G d dej(                        Ze G d de
             Z ed       G d d e             Zd dgZy)!    N)nn   )initialization)load_backbone)DepthEstimatorOutput)PreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuple   )CHMv2Configc                     | j                   ,t        | j                   d      r| j                   j                  S | j                  S )Nhidden_size)backbone_confighasattrr   )configs    y/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/transformers/models/chmv2/modeling_chmv2.py_get_backbone_hidden_sizer   "   s;    )gf6L6Lm.\%%111!!!    c                   2     e Zd Zdededef fdZd Z xZS )CHMv2ReassembleLayerr   channelsfactorc           	      \   t         |           t        |      }t        j                  ||d      | _        |dkD  r t        j                  ||||d      | _        y |dk(  rt        j                         | _        y |dk  r,t        j                  ||dt        d|z        d      | _        y y )Nr   )in_channelsout_channelskernel_sizer   r   stridepaddingr   )
super__init__r   r   Conv2d
projectionConvTranspose2dresizeIdentityint)selfr   r   r   r   	__class__s        r   r#   zCHMv2ReassembleLayer.__init__*   s    /7))(`ab A:,,XxV\blmnDKq[++-DKaZ))HhAcRSV\R\oghiDK r   c                 J    | j                  |      }| j                  |      }|S N)r%   r'   )r*   hidden_states     r   forwardzCHMv2ReassembleLayer.forward9   s$    |4{{<0r   )__name__
__module____qualname__r   r)   r#   r/   __classcell__r+   s   @r   r   r   )   s&    j{ jc j3 jr   r   c                   n     e Zd ZdZdef fdZddeej                     deej                     fdZ	 xZ
S )CHMv2ReassembleStagez
    Reassemble stage that processes hidden states from the backbone into image-like feature
    representations at various resolutions.
    r   c           	         t         |           || _        |j                  | _        t	        j
                         | _        t        |j                  |j                        D ],  \  }}| j                  j                  t        |||             . t        |      }| j                  dk(  rt	        j
                         | _        t        t        | j                              D ]Z  }| j                  j                  t	        j                   t	        j"                  d|z  |      t	        j$                                      \ y y )N)r   r   r   project   )r"   r#   r   readout_typer   
ModuleListlayerszippost_process_channelsreassemble_factorsappendr   r   readout_projectsrangelen
SequentialLinearGELU)r*   r   r   r   r   _r+   s         r   r#   zCHMv2ReassembleStage.__init__E   s    "//mmo$'(D(DfF_F_$` 	 L&KK$!)!	 07	)$&MMOD!3t{{+, p%%,,R]]299Q_Va;bdfdkdkdm-nop *r   hidden_statesreturnc                    g }t        |      D ]  \  }}t        |t        t        f      rt	        |      dk(  r|d   |d   }}|j
                  }| j                  dk(  r|j                  d      j                  dd      }|j                  d      j                  |      }	 | j                  |   t        j                  ||	fd            }|j                  ddd      j                  |      }n| j                  dk(  r|j                  d      |j                  d      z   }|j                  |      }nd|j!                         dk(  rQ|d d dd f   }|j
                  \  }
}}|j                  |
|||      }|j                  dddd      j#                         } | j$                  |   |      }|j'                  |        |S )Nr9   r   r   r8   addr   )	enumerate
isinstancetuplelistrC   shaper:   flatten	transpose	unsqueeze	expand_asrA   torchcatpermutereshapedim
contiguousr<   r@   )r*   rH   patch_heightpatch_widthout	layer_idxr.   	cls_tokenfeature_shapereadout
batch_sizerG   num_channelss                r   r/   zCHMv2ReassembleStage.forwardZ   s   '0'? 	%#I|,63|;LPQ;Q*6q/<?i , 2 2$$	1#/#7#7#:#D#DQ#JL'11!4>>|LG#C4#8#8#CEII|]dNegiDj#kL#/#7#71a#@#H#H#WL&&%/#/#7#7#:Y=P=PQS=T#TL#/#7#7#FL##%*#/12#6L2>2D2D/J<#/#7#7
LR]_k#lL#/#7#71a#C#N#N#PL14;;y1,?LJJ|$+	%. 
r   NN)r0   r1   r2   __doc__r   r#   rP   rV   Tensorr/   r3   r4   s   @r   r6   r6   ?   s?    
p{ p*T%,,%7 aefkfrfras r   r6   c                   Z     e Zd ZdZ fdZdej                  dej                  fdZ xZS )CHMv2PreActResidualLayerz
    ResidualConvUnit, pre-activate residual unit.

    Args:
        config (`[CHMv2Config]`):
            Model configuration class defining the model architecture.
    c                 V   t         |           t        j                         | _        t        j
                  |j                  |j                  dddd      | _        t        j                         | _        t        j
                  |j                  |j                  dddd      | _	        y )Nr   r   T)r   r    r!   bias)
r"   r#   r   ReLUactivation1r$   fusion_hidden_sizeconvolution1activation2convolution2r*   r   r+   s     r   r#   z!CHMv2PreActResidualLayer.__init__   s    779II%%%%
 779II%%%%
r   r.   rI   c                     |}| j                  |      }| j                  |      }| j                  |      }| j                  |      }||z   S r-   )rm   ro   rp   rq   )r*   r.   residuals      r   r/   z CHMv2PreActResidualLayer.forward   sR    ''5((6''5((6h&&r   )	r0   r1   r2   rf   r#   rV   rg   r/   r3   r4   s   @r   ri   ri   w   s(    
.'ELL 'U\\ 'r   ri   c                   2     e Zd Zddedef fdZddZ xZS )CHMv2FeatureFusionLayerr   is_first_layerc                     t         |           || _        t        j                  |j
                  |j
                  dd      | _        |st        |      | _        t        |      | _	        y )Nr   T)r   rk   )
r"   r#   rw   r   r$   rn   r%   ri   residual_layer1residual_layer2)r*   r   rw   r+   s      r   r#   z CHMv2FeatureFusionLayer.__init__   sW    ,))F$=$=v?X?Xfgnrs#;F#CD 7?r   c                    |o| j                   sc|j                  |j                  k7  r6|j                  \  }}}}t        j                  j	                  |||fdd      }|| j                  |      z   }| j                  |      }|ddind|i}t        j                  j                  |fi |ddd}| j                  |      }|S )	NbilinearF)sizemodealign_cornersscale_factorr9   r}   T)r~   r   )rw   rQ   r   
functionalinterpolatery   rz   r%   )r*   r.   rt   r}   rG   heightwidthmodifiers           r   r/   zCHMv2FeatureFusionLayer.forward   s    (;(;!!X^^3&2&8&8#1fe==44FE?SX 5  ($*>*>x*HHL++L9*.,NA&VTN}}00

 	
 |4r   )Fre   )r0   r1   r2   r   boolr#   r/   r3   r4   s   @r   rv   rv      s    	@{ 	@D 	@r   rv   c                   *     e Zd ZdZd fd	Zd Z xZS )CHMv2UpsampleConvHeadz
    Convolutional head with intermediate upsampling.

    Architecture: Conv3x3 -> 2x bilinear upsample -> Conv3x3 -> ReLU -> Conv1x1.
    c                 L   t         |           t        j                  t        j                  ||dz  ddd      t        j
                  ddd      t        j                  |dz  |ddd      t        j                         t        j                  ||ddd      g      | _        y )	Nr9   r   r   r   r|   T)r   r~   r   r   )r"   r#   r   r;   r$   Upsamplerl   head)r*   featuresnumber_output_channelsn_hidden_channelsr+   s       r   r#   zCHMv2UpsampleConvHead.__init__   s    MM		(HMqTUV4P		(a-):RS]^_			+-CQR[\fgh
	r   c                 8    | j                   D ]
  } ||      } |S r-   )r   )r*   rH   layers      r   r/   zCHMv2UpsampleConvHead.forward   s%    YY 	1E!-0M	1r   )   )r0   r1   r2   rf   r#   r/   r3   r4   s   @r   r   r      s    

r   r   c                        e Zd ZdZdef fdZdeej                     de	de	dej                  fdZ
deej                     de	de	dej                  fd	Z xZS )
	CHMv2Headz
    CHMv2 dense-prediction head adapted from DPT.

    Integrates reassemble, projection convs, feature fusion, and UpConv depth head.
    r   c           
      J   t         |           || _        t        |      | _        t        j                         | _        |j                  D ]?  }| j                  j                  t        j                  ||j                  ddd             A t        j                         | _        t        t        |j                              D ]+  }| j                  j                  t        ||dk(               - t!        |j                  |j"                  |j$                        | _        y )Nr   r   F)r   r!   rk   r   )rw   )r   r   r   )r"   r#   r   r6   reassemble_stager   r;   convsr>   r@   r$   rn   fusion_layersrB   rC   rv   r   r   head_hidden_size
conv_depth)r*   r   channelidxr+   s       r   r#   zCHMv2Head.__init__   s     4V <]]_
33 	sGJJbii1J1JXYcdkpqr	s  ]]_V99:; 	bC%%&=fVY]^V^&`a	b 0..#)#@#@$55
r   rH   r\   r]   rI   c                 b   | j                  |||      }t        |      D cg c]  \  }} | j                  |   |       }}}|j                           | j                  d   |d         }t        dt        | j                              D ]  } | j                  |   |||         } |S c c}}w )Nr   r   )r   rM   r   reverser   rB   rC   )r*   rH   r\   r]   ifeaturer   fused_hidden_states           r   forward_featureszCHMv2Head.forward_features   s    --m\;W=F}=UVzq'MDJJqM'*VV2T//28A;?q#d0012 	XA!6!3!3A!67I8TU;!W	X "! Ws   B+c                 N    | j                  |||      }| j                  |      }|S r-   )r   r   )r*   rH   r\   r]   r^   s        r   r/   zCHMv2Head.forward  s)    ##M<Mooc"
r   )r0   r1   r2   rf   r   r#   rP   rV   rg   r)   r   r/   r3   r4   s   @r   r   r      sy    
{ 
(
"d5<<.@ 
"PS 
"be 
"jojvjv 
"T%,,%7 s Y\ afamam r   r   c                        e Zd ZdZdef fdZdedej                  dej                  fdZ
dej                  d	ej                  dej                  fd
Zdej                  dej                  fdZ xZS )CHMv2FeaturesToDepthzJConverts raw logits from the CHMv2 head into a depth map using depth bins.r   c                     t         |           |j                  | _        |j                  | _        |j                  | _        |j
                  | _        d| _        d| _        d| _        y )Ng-C6?g:0yE>g-q=)	r"   r#   	min_depth	max_depthbins_strategynorm_strategy_mixlog_max_clamp_value_mixlog_eps_shift_mixlog_epsrr   s     r   r#   zCHMv2FeaturesToDepth.__init__  s\    ))))#11#11'+$!% r   n_binsdevicerI   c                    | j                   dz  }t        j                  | j                  |||      }t        j                  t        j                  t        j
                  t        j                  | j                  |            t        j
                  t        j                  ||            ||            }t        j                  dd||      }||z  d|z
  |z  z   }|S )z
        Creates mixed log bins interpolated between linear and log distributions.

        The max_depth is divided by 8.0 internally; this scaling is reversed in
        `_create_outputs_with_mixlog_norm` by multiplying by 8.0.
               @r         ?        )r   rV   linspacer   explogtensor)r*   r   r   scaled_max_depthlinearr   interp_weightbinss           r   _create_mixlog_binsz(CHMv2FeaturesToDepth._create_mixlog_bins  s      >>C/0@&QWXiiNN		%,,t~~fEF		%,,'7GH	
 sCGs"cM&9V%CCr   inputr   c                 &   t        j                  |      }|j                  dd      }| j                  d      j	                  | j
                        | j                  z   }||z   }|j                  dd      }t        j                  |ddd      j                  | j                        }||z  }|j                  dddd      j                  | j                        }	||	z  j                  dd      j                  | j                        }
|
dz  }
|
S )	zEConverts depth bin logits to depth values using mixlog normalization.r   TrZ   keepdimr   r   )nanposinfneginfrK   r   )rV   reluamin	clamp_min	clamp_maxr   r   sum
nan_to_numr   view)r*   r   r   logitsmin_per_sampleshift
logits_posdenomweightsbins_broadcastoutputs              r    _create_outputs_with_mixlog_normz5CHMv2FeaturesToDepth._create_outputs_with_mixlog_norm.  s    E"D9 ++C0::4;W;WX[_[q[qqe^
1d3  CCHRRSWScScdu$1b!Q/99$:J:JKN*//At/DNNtO_O_`#r   xc                 J   |j                   d   }|dkD  r| j                  dk(  r8t        j                  | j                  | j
                  ||j                        }n| j                  dk(  rt        j                  t        j                  t        j                  | j                              t        j                  t        j                  | j
                              ||j                        }t        j                  |      }n| j                  ||j                        }| j                  dv r| j                  dk(  r3t        j                  |      }d}||z   }||j                  dd      z  }nR| j                  d	k(  rt        j                  |d
      }n+t        j                  |      }||j                  dd      z  }t        j                   d||g      j#                  d
      }|S | j%                  ||      }|S t        j                  |      | j                  z   }|S )Nr   r   r   r   )r   softmaxsigmoidg?Tr   r   rZ   zikmn,k->imn)rQ   r   rV   r   r   r   r   r   r   r   r   r   r   r   r   r   einsumrT   r   )r*   r   r   r   logitepsr   s          r   r/   zCHMv2FeaturesToDepth.forwardA  s   A:!!X-~~dnndnnfUVU]U]^##u,~~IIell4>>:;IIell4>>:;88	 yy//A!!%EE%%1!JJqMEC!CKE!EII!TI$BBE''94!MM!3E!MM!,E!EII!TI$BBEmeT]CMMRSMT 	 >>q$G  ZZ]T^^3Fr   )r0   r1   r2   rf   r   r#   r)   rV   r   rg   r   r   r/   r3   r4   s   @r   r   r     sx    T!{ !# u||  *ell %,, [`[g[g &" "%,, "r   r   c                   L     e Zd ZU eed<   dZdZdZdZdZ	dZ
dZdZd fdZ xZS )CHMv2PreTrainedModelr   chmv2pixel_values)imageTc                 \   t         |   |       t        |t        j                  t        j
                  t        j                  f      rct        j                  |j                  d| j                  j                         |j                   t        j                  |j                         y y y )Nr   )meanstd)r"   _init_weightsrN   r   rE   r$   r&   inittrunc_normal_weightr   initializer_rangerk   zeros_)r*   moduler+   s     r   r   z"CHMv2PreTrainedModel._init_weightsr  st    f%fryy"))R5G5GHIv}}3DKK<Y<YZ{{&FKK( ' Jr   )rI   N)r0   r1   r2   r   __annotations__base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_supports_sdpa_supports_flash_attn_supports_flex_attn_supports_attention_backendr   r3   r4   s   @r   r   r   f  sA    $O!&*#N"&) )r   r   z
    CHMv2 Model with a depth estimation head on top (consisting of convolutional layers) e.g. for canopy height
    estimation.
    )custom_introc                        e Zd Zdef fdZd Zee	 d
dej                  dej                  dz  dee   defd	              Z xZS )CHMv2ForDepthEstimationr   c                     t         |   |       t        |      | _        t	        |      | _        t        |      | _        | j                          y r-   )	r"   r#   r   backboner   r   r   features_to_depth	post_initrr   s     r   r#   z CHMv2ForDepthEstimation.__init__  s?     %f-f%	!5f!=r   c                 6    | j                   j                         S r-   )r   get_input_embeddings)r*   s    r   r   z,CHMv2ForDepthEstimation.get_input_embeddings  s    }}1133r   Nr   labelskwargsrI   c                    d}|t        d      |j                  \  }}}}| j                  j                  }||z  }	||z  }
 | j                  |fi |}t        t        |j                  |j                              }| j                  ||	|
      }| j                  |      }|j                  d      }t        |||j                  |j                        S )z
        labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
            Ground truth depth estimation maps for computing the loss.
        NzTraining is not implemented yetr   r   )losspredicted_depthrH   
attentions)NotImplementedErrorrQ   r   
patch_sizer   rP   r=   feature_maps
cls_tokensr   r   squeezer   rH   r  )r*   r   r   r   r   rG   r   r   r  r\   r]   backbone_outputintermediate_featureshead_outputr  s                  r   r/   zCHMv2ForDepthEstimation.forward  s     %&GHH*001fe[[++
+z)'$--?? $S)E)EGaGa%b cii 5|[Q00=)11a18#+)77&11	
 	
r   r-   )r0   r1   r2   r   r#   r   r   r   rV   FloatTensor
LongTensorr	   r
   r   r/   r3   r4   s   @r   r   r   z  sr    { 4  +/ 
'' 
   4' 
 +,	 

 
 
   
r   r   )rV   r    r   r   backbone_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr	   utilsr
   r   r   configuration_chmv2r   r   Moduler   r6   ri   rv   r   r   r   r   r   __all__ r   r   <module>r     s   ,   & + 4 - & I I ,"299 ,5299 5p''ryy ''T"bii "JBII 2*		 *ZW299 Wt )? ) )& /
2 /
/
d %&<
=r   