
    h                        d Z ddlZddlmZ ddlmZmZ ddlZddl	Zddlm
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZmZ ddlmZmZ ddlmZmZmZmZ ddl m!Z! ddl"m#Z#m$Z$ ddl%m&Z&  ejN                  e(      Z)e ed       G d de                    Z*e ed       G d de                    Z+ G d de
jX                        Z- G d de
jX                        Z. G d de
jX                        Z/	 dTde
jX                  d ej`                  d!ej`                  d"ej`                  d#eej`                     d$e1d%e1fd&Z2 G d' d(e
jX                        Z3 G d) d*e
jX                        Z4 G d+ d,e
jX                        Z5 G d- d.e
jX                        Z6 G d/ d0e
jX                        Z7 G d1 d2e      Z8 G d3 d4e
jX                        Z9 G d5 d6e
jX                        Z:d7 Z; G d8 d9e
jX                        Z< G d: d;e
jX                        Z= G d< d=e
jX                        Z> G d> d?e
jX                        Z?e G d@ dAe             Z@e G dB dCe@             ZA G dD dEe
jX                        ZB G dF dGe
jX                        ZC G dH dIe
jX                        ZD edJ       G dK dLe@             ZE G dM dNe
jX                        ZF G dO dPe
jX                        ZGe G dQ dRe@             ZHg dSZIy)UzPyTorch DPT (Dense Prediction Transformers) model.

This implementation is heavily inspired by OpenMMLab's implementation, found here:
https://github.com/open-mmlab/mmsegmentation/blob/master/mmseg/models/decode_heads/dpt_head.py.

    N)	dataclass)CallableOptional)nn)CrossEntropyLoss   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputDepthEstimatorOutputSemanticSegmenterOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputauto_docstringlogging	torch_int)load_backbone)can_return_tuplecheck_model_inputs   )	DPTConfigz
    Base class for model's outputs that also contains intermediate activations that can be used at later stages. Useful
    in the context of Vision models.:
    )custom_introc                   l    e Zd ZU dZdZeej                     ed<   dZ	ee
ej                  df      ed<   y)*BaseModelOutputWithIntermediateActivationsak  
    last_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.
    intermediate_activations (`tuple(torch.FloatTensor)`, *optional*):
        Intermediate activations that can be used to compute hidden states of the model at various layers.
    Nlast_hidden_states.intermediate_activations)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   tuple     b/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/models/dpt/modeling_dpt.pyr   r   -   s?     7;!2!23:HLhuU->->-C'DELr)   r   z
    Base class for model's outputs that also contains a pooling of the last hidden states as well as intermediate
    activations that can be used by the model at later stages.
    c                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eeej                  df      ed<   dZeeej                  df      ed<   dZeeej                  df      ed<   y)	4BaseModelOutputWithPoolingAndIntermediateActivationsa  
    pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size)`):
        Last layer hidden-state of the first token of the sequence (classification token) after further processing
        through the layers used for the auxiliary pretraining task. E.g. for BERT-family of models, this returns
        the classification token after processing through a linear layer and a tanh activation function. The linear
        layer weights are trained from the next sentence prediction (classification) objective during pretraining.
    intermediate_activations (`tuple(torch.FloatTensor)`, *optional*):
        Intermediate activations that can be used to compute hidden states of the model at various layers.
    Nlast_hidden_statepooler_output.hidden_states
attentionsr   )r    r!   r"   r#   r-   r   r$   r%   r&   r.   r/   r'   r0   r   r(   r)   r*   r,   r,   @   s     6:x 1 12915M8E--.5=AM8E%"3"3S"89:A:>Ju00#567>HLhuU->->-C'DELr)   r,   c                   r     e Zd ZdZd
dedeeeef      f fdZddZ		 dde
j                  dedefd	Z xZS )DPTViTHybridEmbeddingsz
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
    Transformer.
    configfeature_sizec                 b   t         
|           |j                  |j                  }}|j                  |j
                  }}t        |t        j                  j                        r|n||f}t        |t        j                  j                        r|n||f}|d   |d   z  |d   |d   z  z  }t        |      | _        | j                  j                  d   }t        | j                  j                        dk7  r+t        dt        | j                  j                               ddg| _        ||j                   }	|	dd  }|	d   }nCt        |t        j                  j                        r|n||f}| j                  j                  d   }|| _        |d   | _        || _        t#        j$                  ||d      | _        t#        j(                  t+        j,                  dd|j
                              | _        t#        j(                  t+        j,                  d|dz   |j
                              | _        y )Nr   r   r   z1Expected backbone to have 3 output features, got kernel_size)super__init__
image_size
patch_sizenum_channelshidden_size
isinstancecollectionsabcIterabler   backbonechannelslen
ValueErrorresidual_feature_map_indexbackbone_featmap_shaper   Conv2d
projection	Parameterr$   zeros	cls_tokenposition_embeddings)selfr3   r4   r<   r=   r>   r?   num_patchesfeature_dimfeat_map_shape	__class__s             r*   r;   zDPTViTHybridEmbeddings.__init__`   s   !'!2!2F4E4EJ
$*$7$79K9Kk#-j+//:R:R#SZZdfpYq
#-j+//:R:R#SZZdfpYq
!!}
15*Q-:VW=:XY%f-mm,,R0t}}%%&!+PQTUYUbUbUkUkQlPmnoo+,a&'#::N)"#.L(+K !+<9Q9Q RYegsXt  --004K$$Q-())K!Lekk!Q8J8J&KL#%<<A{QPVPbPb0c#d r)   c                 r   |d d d |f   }|d|d f   }t        t        |      dz        }|j                  d||d      j                  dddd      }t        j
                  j                  |||fd      }|j                  dddd      j                  d||z  d      }t        j                  ||gd	      }|S 
Nr         ?r   r6   r      bilinear)sizemodedim)	r   rF   reshapepermuter   
functionalinterpolater$   catrP   posembgrid_size_heightgrid_size_widthstart_index
posemb_tokposemb_gridold_grid_sizes           r*   _resize_pos_embedz(DPTViTHybridEmbeddings._resize_pos_embed   s    A||O,
Q_-!#k"2c"9:!))!]M2NVVWXZ[]^`abmm//CSUdBelv/w!))!Q15==aAQTcAceghJ4!<r)   pixel_valuesinterpolate_pos_encodingreturnc                    |j                   \  }}}}|| j                  k7  rt        d      |sV|| j                  d   k7  s|| j                  d   k7  r2t        d| d| d| j                  d    d| j                  d    d	      | j	                  | j
                  || j                  z  || j                  z        }| j                  |      }|j                  d   }	| j                  D 
cg c]  }
|j                  |
    }}
| j                  |	      j                  d	      j                  dd	      }| j                  j                  |dd      }t        j                   ||fd
      }||z   }t#        ||      S c c}
w )NeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r   r   zInput image size (*z) doesn't match model (z).r6   rX   r\   )r   r   )shaper>   rG   r<   rk   rO   r=   rD   feature_mapsrH   rK   flatten	transposerN   expandr$   rb   r   )rP   rl   rm   
batch_sizer>   heightwidthrO   backbone_outputfeaturesindexoutput_hidden_states
embeddings
cls_tokenss                 r*   forwardzDPTViTHybridEmbeddings.forward   s    3?2D2D/
L&%4,,,w  (++u8J/J (% 9+,Adooa.@-AE 
 #44$$f&?$//AY
 --5"//3 RVQpQpq < <U Cqq__X.66q9CCAqI
^^**:r2>
YY
J7Q?
  "55
 :)%9
 	
  rs   )E9Nr   )F)r    r!   r"   r#   r   r   r'   intr;   rk   r$   Tensorboolr   r   __classcell__rT   s   @r*   r2   r2   Y   sY     ey  esCx8Q  eD LQ&
!LL&
DH&
	3&
r)   r2   c                   N     e Zd ZdZ fdZddZdej                  defdZ	 xZ
S )DPTViTEmbeddingszB
    Construct the CLS token, position and patch embeddings.

    c                    t         |           t        j                  t	        j
                  dd|j                              | _        t        |      | _	        | j                  j                  }t        j                  t	        j
                  d|dz   |j                              | _        t        j                  |j                        | _        || _        y )Nr   )r:   r;   r   rL   r$   rM   r?   rN   DPTViTPatchEmbeddingspatch_embeddingsrQ   rO   Dropouthidden_dropout_probdropoutr3   )rP   r3   rQ   rT   s      r*   r;   zDPTViTEmbeddings.__init__   s    ekk!Q8J8J&KL 5f =++77#%<<A{QPVPbPb0c#d zz&"<"<=r)   c                 ~   |d d d |f   }|d|d f   }t        |j                  d      dz        }|j                  d||d      j                  dddd      }t        j
                  j                  |||fd      }|j                  dddd      j                  d||z  d      }t        j                  ||gd	      }|S rV   )	r   rZ   r^   r_   r   r`   ra   r$   rb   rc   s           r*   rk   z"DPTViTEmbeddings._resize_pos_embed   s    A||O,
Q_-!+"2"21"5"<=!))!]M2NVVWXZ[]^`abmm//CSUdBelv/w!))!Q15==aAQTcAceghJ4!<r)   rl   rn   c                    |j                   \  }}}}| j                  j                  }| j                  | j                  ||z  ||z        }| j                  |      }|j                         \  }}	}
| j                  j                  |dd      }t        j                  ||fd      }||z   }| j                  |      }t        |      S )Nr6   r   r\   )r   )rr   r3   r=   rk   rO   r   rZ   rN   rv   r$   rb   r   r   )rP   rl   rw   r>   rx   ry   r=   rO   r~   seq_len_r   s               r*   r   zDPTViTEmbeddings.forward   s    2>2D2D/
L&% [[++
"44$$f
&:EZ<O
 **<8
!+!2
GQ ^^**:r2>
YY
J7Q?
  "55
\\*-
9ZXXr)   r   )r    r!   r"   r#   r;   rk   r$   r   r   r   r   r   s   @r*   r   r      s-    
YELL Y5_ Yr)   r   c                   `     e Zd ZdZdef fdZdej                  dej                  fdZ xZ	S )r   z$
    Image to Patch Embedding.

    r3   c                    t         |           |j                  |j                  }}|j                  |j
                  }}t        |t        j                  j                        r|n||f}t        |t        j                  j                        r|n||f}|d   |d   z  |d   |d   z  z  }|| _        || _        || _        || _
        t        j                  ||||      | _        y )Nr   r   )r9   stride)r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rQ   r   rJ   rK   )rP   r3   r<   r=   r>   r?   rQ   rT   s          r*   r;   zDPTViTPatchEmbeddings.__init__   s    !'!2!2F4E4EJ
$*$7$79K9Kk#-j+//:R:R#SZZdfpYq
#-j+//:R:R#SZZdfpYq
!!}
15*Q-:VW=:XY$$(&))L+:^hir)   rl   rn   c                     |j                   \  }}}}|| j                  k7  rt        d      | j                  |      j	                  d      j                  dd      }|S )Nrp   rX   r   )rr   r>   rG   rK   rt   ru   )rP   rl   rw   r>   rx   ry   r~   s          r*   r   zDPTViTPatchEmbeddings.forward  sb    2>2D2D/
L&%4,,,w  __\2::1=GG1M
r)   
r    r!   r"   r#   r   r;   r$   r   r   r   r   s   @r*   r   r      s1    
jy jELL U\\ r)   r   modulequerykeyvalueattention_maskscalingr   c                    t        j                  ||j                  dd            |z  }t        j                  j                  |dt         j                        j                  |j                        }t        j                  j                  ||| j                        }|||z  }t        j                  ||      }	|	j                  dd      j                         }	|	|fS )Nr6   r7   )r]   dtype)ptrainingr   rX   )r$   matmulru   r   r`   softmaxfloat32tor   r   r   
contiguous)
r   r   r   r   r   r   r   kwargsattn_weightsattn_outputs
             r*   eager_attention_forwardr     s     <<s}}R'<=GL ==((2U]](SVVW\WbWbcL ==((6??([L !#n4,,|U3K''1-88:K$$r)   c            	            e Zd Zdef fdZ	 ddej                  deej                     deej                  ej                  f   fdZ	 xZ
S )DPTSelfAttentionr3   c                 2   t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      || _        |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _	        |j                  | _        | j                  dz  | _        d| _        t        j                  |j                  | j                  |j                         | _        t        j                  |j                  | j                  |j                         | _        t        j                  |j                  | j                  |j                         | _        y )	Nr   embedding_sizezThe hidden size z4 is not a multiple of the number of attention heads .g      F)bias)r:   r;   r?   num_attention_headshasattrrG   r3   r   attention_head_sizeall_head_sizeattention_probs_dropout_probdropout_probr   	is_causalr   Linearqkv_biasr   r   r   rP   r3   rT   s     r*   r;   zDPTSelfAttention.__init__0  sF    : ::a?PVXhHi"6#5#5"6 7334A7 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PP"??//5YYv1143E3EFOO\
99V//1C1C&//ZYYv1143E3EFOO\
r)   r/   	head_maskrn   c           
         |j                   d   }|d| j                  | j                  f} | j                  |      j                  | j                  dd      } | j                  |      j                  | j                  dd      } | j                  |      j                  | j                  dd      }t        }| j                  j                  dk7  rt        | j                  j                     } || ||||| j                  | j                  | j                  sdn| j                        \  }	}
|	j!                         d d | j"                  fz   }|	j%                  |      }	|	|
fS )	Nr   r6   r   rX   eager        )r   r   r   r7   )rr   r   r   r   viewru   r   r   r   r3   _attn_implementationr   r   r   r   r   rZ   r   r^   )rP   r/   r   rw   	new_shape	key_layervalue_layerquery_layerattention_interfacecontext_layerattention_probsnew_context_layer_shapes               r*   r   zDPTSelfAttention.forwardD  sR    #((+
D$<$<d>V>VV	0DHH]+00)<FFq!L	4djj/44i@JJ1aP4djj/44i@JJ1aP(?;;++w6"9$++:Z:Z"[)<nnLL#}}C$2C2C	*
& #0"4"4"6s";t?Q?Q>S"S%--.EFo--r)   r   )r    r!   r"   r   r;   r$   r   r   r'   r   r   r   s   @r*   r   r   /  sT    ]y ]* PT."\\.6>u||6L.	u||U\\)	*.r)   r   c                   x     e Zd ZdZdef fdZdej                  dej                  dej                  fdZ xZ	S )DPTViTSelfOutputz
    The residual connection is defined in ViTLayer instead of here (as is the case with other models), due to the
    layernorm applied before each block.
    r3   c                     t         |           t        j                  |j                  |j                        | _        t        j                  |j                        | _        y r   )	r:   r;   r   r   r?   denser   r   r   r   s     r*   r;   zDPTViTSelfOutput.__init__j  sB    YYv1163E3EF
zz&"<"<=r)   r/   input_tensorrn   c                 J    | j                  |      }| j                  |      }|S r   r   r   rP   r/   r   s      r*   r   zDPTViTSelfOutput.forwardo  s$    

=1]3r)   r   r   s   @r*   r   r   d  s=    
>y >
U\\  RWR^R^ r)   r   c                        e Zd Zdef fdZdee   fdZd	dej                  de
ej                     dej                  fdZ xZS )
DPTViTAttentionr3   c                     t         |           t        |      | _        t	        |      | _        t               | _        y r   )r:   r;   r   	attentionr   outputsetpruned_headsr   s     r*   r;   zDPTViTAttention.__init__w  s0    )&1&v.Er)   headsc                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   r\   )rF   r   r   r   r   r   r   r   r   r   r   r   r   union)rP   r   r|   s      r*   prune_headszDPTViTAttention.prune_heads}  s   u:?74>>55t~~7Y7Y[_[l[l
u
  2$..2F2FN/0B0BEJ1$..2F2FN.t{{/@/@%QO .2^^-O-ORUV[R\-\*'+~~'I'IDNNLnLn'n$ --33E:r)   r/   r   rn   c                 T    | j                  ||      \  }}| j                  ||      }|S r   )r   r   )rP   r/   r   self_attn_outputr   r   s         r*   r   zDPTViTAttention.forward  s.    "nn]IF!-}=r)   r   )r    r!   r"   r   r;   r   r   r   r$   r   r   r   r   r   s   @r*   r   r   v  sM    "y ";S ;$U\\ hu||>T `e`l`l r)   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )DPTViTIntermediater3   c                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r   )r:   r;   r   r   r?   intermediate_sizer   r@   
hidden_actstrr	   intermediate_act_fnr   s     r*   r;   zDPTViTIntermediate.__init__  s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r)   r/   rn   c                 J    | j                  |      }| j                  |      }|S r   )r   r   )rP   r/   s     r*   r   zDPTViTIntermediate.forward  s&    

=100?r)   	r    r!   r"   r   r;   r$   r   r   r   r   s   @r*   r   r     s*    9y 9U\\ ell r)   r   c                   t     e Zd Zdef fdZdej                  dej                  dej                  fdZ xZS )DPTViTOutputr3   c                     t         |           t        j                  |j                  |j
                        | _        t        j                  |j                        | _	        y r   )
r:   r;   r   r   r   r?   r   r   r   r   r   s     r*   r;   zDPTViTOutput.__init__  sB    YYv779K9KL
zz&"<"<=r)   r/   r   rn   c                 T    | j                  |      }| j                  |      }||z   }|S r   r   r   s      r*   r   zDPTViTOutput.forward  s.    

=1]3%4r)   r   r   s   @r*   r   r     s8    >y >
U\\  RWR^R^ r)   r   c                        e Zd ZdZdef fdZddej                  deej                     dej                  fdZ	 xZ
S )	DPTViTLayerz?This corresponds to the Block class in the timm implementation.r3   c                 r   t         |           |j                  | _        d| _        t	        |      | _        t        |      | _        t        |      | _	        t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        y )Nr   eps)r:   r;   chunk_size_feed_forwardseq_len_dimr   r   r   intermediater   r   r   	LayerNormr?   layer_norm_epslayernorm_beforelayernorm_afterr   s     r*   r;   zDPTViTLayer.__init__  s    '-'E'E$(0.v6"6* "V-?-?VEZEZ [!||F,>,>FDYDYZr)   r/   r   rn   c                     | j                  |      }| j                  ||      }||z   }| j                  |      }| j                  |      }| j	                  ||      }|S r   )r   r   r   r   r   )rP   r/   r   hidden_states_normattention_outputlayer_outputs         r*   r   zDPTViTLayer.forward  sk    !22=A>>*<iH )=8 ++M:((6 {{<?r)   r   )r    r!   r"   r#   r   r;   r$   r   r   r   r   r   s   @r*   r   r     sB    I[y [U\\ hu||>T `e`l`l r)   r   c            	       n     e Zd Zdef fdZ	 ddej                  deej                     dede	fdZ
 xZS )	DPTViTEncoderr3   c                     t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        d| _	        y c c}w NF)
r:   r;   r3   r   
ModuleListrangenum_hidden_layersr   layergradient_checkpointingrP   r3   r   rT   s      r*   r;   zDPTViTEncoder.__init__  sN    ]]vG_G_A`#aAK$7#ab
&+# $bs   A#r/   r   r}   rn   c                     |r|gnd }t        | j                        D ]+  \  }}|||   nd } |||      }|s|j                  |       - t        ||rt	        |            S d       S )N)r-   r/   )	enumerater  appendr   r'   )rP   r/   r   r}   all_hidden_statesilayer_modulelayer_head_masks           r*   r   zDPTViTEncoder.forward  s     0D]O(4 	8OA|.7.CilO(HM !((7		8 +6G% 12
 	
MQ
 	
r)   r   )r    r!   r"   r   r;   r$   r   r   r   r   r   r   r   s   @r*   r   r     sH    ,y , sx
"\\
6>u||6L
ko
	
r)   r   c                   t     e Zd ZdZ fdZd Zd Zddeej                     deej                     fdZ
 xZS )	DPTReassembleStagea@  
    This class reassembles the hidden states of the backbone into image-like feature representations at various
    resolutions.

    This happens in 3 stages:
    1. Map the N + 1 tokens to a set of N tokens, by taking into account the readout ([CLS]) token according to
       `config.readout_type`.
    2. Project the channel dimension of the hidden states according to `config.neck_hidden_sizes`.
    3. Resizing the spatial dimensions (height, width).

    Args:
        config (`[DPTConfig]`):
            Model configuration class defining the model architecture.
    c                     t         |           || _        t        j                         | _        |j                  r| j                  |       n| j                  |       |j                  | _	        y r   )
r:   r;   r3   r   r   layers	is_hybrid_init_reassemble_dpt_hybrid_init_reassemble_dptneck_ignore_stagesr   s     r*   r;   zDPTReassembleStage.__init__  sU    mmo,,V4%%f-"(";";r)   c           	      v   t        t        t        |j                              |j                        D ]r  \  }}|dk  r.| j
                  j                  t        j                                9|dkD  s?| j
                  j                  t        ||j                  |   |             t |j                  dk7  rt        d|j                   d      t        j                         | _        t        |      }t        t        |j                              D ]  }|dk  rA| j                  j                  t        j                  t        j                                      I|dkD  sO| j                  j                  t        j                  t        j                   d|z  |      t"        |j$                                   y)a   "
        For DPT-Hybrid the first 2 reassemble layers are set to `nn.Identity()`, please check the official
        implementation: https://github.com/isl-org/DPT/blob/f43ef9e08d70a752195028a51be5e1aff227b913/dpt/vit.py#L438
        for more details.
        r   rE   factorprojectzReadout type z! is not supported for DPT-Hybrid.rX   N)zipr  rF   neck_hidden_sizesreassemble_factorsr  r  r   IdentityDPTReassembleLayerreadout_typerG   r   readout_projects_get_backbone_hidden_size
Sequentialr   r	   r   )rP   r3   r
  r  r?   s        r*   r  z.DPTReassembleStage._init_reassemble_dpt_hybrid  sX    U3v'?'?#@A6C\C\] 	tIAvAv""2;;=1Q""#5fvG_G_`aGbkq#rs		t )+}V-@-@,AAbcdd !#/7s63345 	AAv%%,,R]]2;;=-IJQ%%,,MM"))AO["I6RXRcRcKde		r)   c           	      <   t        t        t        |j                              |j                        D ]9  \  }}| j
                  j                  t        ||j                  |   |             ; |j                  dk(  rt        j                         | _        t        |      }t        t        |j                              D ]Y  }| j                  j                  t        j                  t        j                  d|z  |      t        |j                                   [ y y )Nr  r  rX   )r  r  rF   r  r  r  r  r  r  r   r   r  r   r!  r   r	   r   )rP   r3   r
  r  r?   r   s         r*   r  z'DPTReassembleStage._init_reassemble_dpt   s    U3v'?'?#@A6C\C\] 	pIAvKK1&6C[C[\]C^gmno	p )+$&MMOD!3F;K3v7789 %%,,MM"))AO["I6RXRcRcKde ,r)   r/   rn   c                    g }t        |      D ]  \  }}|| j                  vr|dddf   |ddddf   }}|j                  \  }}	}
|||j                  ||||
      }n"t	        |	dz        }|j                  ||||
      }|j                  dddd      j                         }|j                  }| j                  j                  dk(  r|j                  d      j                  d      }|j                  d      j                  |      } | j                  |   t        j                  ||fd	            }|j                  ddd      j                  |      }nM| j                  j                  d
k(  r4|j                  d      |j                  d	      z   }|j                  |      } | j                  |   |      }|j!                  |        |S )z
        Args:
            hidden_states (`list[torch.FloatTensor]`, each of shape `(batch_size, sequence_length + 1, hidden_size)`):
                List of hidden states from the backbone.
        Nr   r   rW   r   rX   r  )r   rX   r   r6   add)r  r  rr   r^   r   r_   r   r3   r  rt   	unsqueeze	expand_asr  r$   rb   r  r  )rP   r/   patch_heightpatch_widthoutr
  hidden_staterN   rw   sequence_lengthr>   rZ   feature_shapereadouts                 r*   r   zDPTReassembleStage.forward,  s    (7 	%OA|///*6q!t*<l1ab5>Q<	<H<N<N9
O\+0G#/#7#7
LR]_k#lL$_c%9:D#/#7#7
D$P\#]L+33Aq!Q?JJL , 2 2;;++y8#/#7#7#:#B#B9#ML'11!4>>|LG#;4#8#8#;EII|U\F]_a<b#cL#/#7#71a#@#H#H#WL[[--6#/#7#7#:Y=P=PQS=T#TL#/#7#7#FL-t{{1~l;JJ|$3	%6 
r)   NN)r    r!   r"   r#   r;   r  r  listr$   r   r   r   r   s   @r*   r  r    s@    
<4
#T%,,%7 #aefkfrfras #r)   r  c                 z    | j                   $| j                  du r| j                   j                  S | j                  S r   )backbone_configr  r?   )r3   s    r*   r   r   R  s9    )f.>.>%.G%%111!!!r)   c                   2     e Zd Zdededef fdZd Z xZS )r  r3   rE   r  c           	      \   t         |           t        |      }t        j                  ||d      | _        |dkD  r t        j                  ||||d      | _        y |dk(  rt        j                         | _        y |dk  r,t        j                  ||dt        d|z        d      | _        y y )Nr   )in_channelsout_channelsr9   r   r9   r   paddingr   )
r:   r;   r   r   rJ   rK   ConvTranspose2dresizer  r   )rP   r3   rE   r  r?   rT   s        r*   r;   zDPTReassembleLayer.__init__Z  s    /7))(`ab A:,,XxV\blmnDKq[++-DKaZ))HhAcRSV\R\oghiDK r)   c                 J    | j                  |      }| j                  |      }|S r   )rK   r9  )rP   r*  s     r*   r   zDPTReassembleLayer.forwardi  s$    |4{{<0r)   )r    r!   r"   r   r   r;   r   r   r   s   @r*   r  r  Y  s&    jy jC j jr)   r  c                   *     e Zd Zdef fdZd Z xZS )DPTFeatureFusionStager3   c                     t         |           t        j                         | _        t        t        |j                              D ]&  }| j                  j                  t        |             ( y r   )
r:   r;   r   r   r  r  rF   r  r  DPTFeatureFusionLayerr  s      r*   r;   zDPTFeatureFusionStage.__init__p  sR    mmos63345 	>AKK4V<=	>r)   c                     |d d d   }g }d }t        || j                        D ]*  \  }}|	 ||      }n	 |||      }|j                  |       , |S )Nr6   )r  r  r  )rP   r/   fused_hidden_statesfused_hidden_stater*  r  s         r*   r   zDPTFeatureFusionStage.forwardv  sq    %dd+ !#&}dkk#B 	;L%!)%*<%8"%*+=|%L"&&'9:	; #"r)   )r    r!   r"   r   r;   r   r   r   s   @r*   r<  r<  o  s    >y >#r)   r<  c                   `     e Zd ZdZdef fdZdej                  dej                  fdZ xZ	S )DPTPreActResidualLayerz
    ResidualConvUnit, pre-activate residual unit.

    Args:
        config (`[DPTConfig]`):
            Model configuration class defining the model architecture.
    r3   c                 l   t         |           |j                  | _        |j                  |j                  n| j                   }t        j                         | _        t        j                  |j                  |j                  ddd|      | _
        t        j                         | _        t        j                  |j                  |j                  ddd|      | _        | j                  rIt        j                  |j                        | _        t        j                  |j                        | _        y y )Nr   r   )r9   r   r7  r   )r:   r;   !use_batch_norm_in_fusion_residualuse_batch_normuse_bias_in_fusion_residualr   ReLUactivation1rJ   fusion_hidden_sizeconvolution1activation2convolution2BatchNorm2dbatch_norm1batch_norm2)rP   r3   rG  rT   s      r*   r;   zDPTPreActResidualLayer.__init__  s   $FF 11= ..((( 	$ 779II%%%%,
 779II%%%%,
 !~~f.G.GHD!~~f.G.GHD r)   r*  rn   c                    |}| j                  |      }| j                  |      }| j                  r| j                  |      }| j	                  |      }| j                  |      }| j                  r| j                  |      }||z   S r   )rI  rK  rF  rO  rL  rM  rP  rP   r*  residuals      r*   r   zDPTPreActResidualLayer.forward  s    ''5((6++L9L''5((6++L9Lh&&r)   r   r   s   @r*   rC  rC    s2     Iy  ID'ELL 'U\\ 'r)   rC  c                        e Zd ZdZd	dedef fdZd
dej                  de	ej                     dej                  fdZ
 xZS )r>  a3  Feature fusion layer, merges feature maps from different stages.

    Args:
        config (`[DPTConfig]`):
            Model configuration class defining the model architecture.
        align_corners (`bool`, *optional*, defaults to `True`):
            The align_corner setting for bilinear upsample.
    r3   align_cornersc                     t         |           || _        t        j                  |j
                  |j
                  dd      | _        t        |      | _        t        |      | _	        y )Nr   T)r9   r   )
r:   r;   rU  r   rJ   rJ  rK   rC  residual_layer1residual_layer2)rP   r3   rU  rT   s      r*   r;   zDPTFeatureFusionLayer.__init__  sT    *))F$=$=v?X?Xfgnrs5f=5f=r)   r*  rS  rn   c                    |l|j                   |j                   k7  r?t        j                  j                  ||j                   d   |j                   d   fdd      }|| j	                  |      z   }| j                  |      }t        j                  j                  |dd| j                        }| j                  |      }|S )NrX   r   rY   FrZ   r[   rU  scale_factorr[   rU  )rr   r   r`   ra   rW  rX  rU  rK   rR  s      r*   r   zDPTFeatureFusionLayer.forward  s    !!X^^3==44L$6$6q$9<;M;Ma;P#QXbrw 5  ($*>*>x*HHL++L9}}00qzI[I[ 1 
 |4r)   Tr   )r    r!   r"   r#   r   r   r;   r$   r   r   r   r   r   s   @r*   r>  r>    sI    >y > >ELL HU\\<R ^c^j^j r)   r>  c                   B    e Zd ZU eed<   dZdZdZdZdZ	dZ
dZdeiZd Zy)DPTPreTrainedModelr3   dptrl   Tr0   c                    t        |t        j                  t        j                  t        j                  f      rl|j
                  j                  j                  d| j                  j                         |j                  |j                  j                  j                          nst        |t        j                  t        j                  f      rI|j                  j                  j                          |j
                  j                  j                  d       t        |t        t         f      rI|j"                  j                  j                          |j$                  j                  j                          yy)zInitialize the weightsr   )meanstdNg      ?)r@   r   r   rJ   r8  weightdatanormal_r3   initializer_ranger   zero_r   rN  fill_r   r2   rN   rO   )rP   r   s     r*   _init_weightsz DPTPreTrainedModel._init_weights  s    fryy"))R5G5GHI MM&&CT[[5R5R&S{{&  &&(r~~ >?KK""$MM$$S)f/1GHI!!'')&&++113 Jr)   N)r    r!   r"   r   r&   base_model_prefixmain_input_namesupports_gradient_checkpointing_supports_sdpa_supports_flash_attn_supports_flex_attn_supports_attention_backendr   _can_record_outputsrj  r(   r)   r*   r_  r_    sC    $O&*#N"&&4r)   r_  c                        e Zd Zddedef fdZd Zd Zee		 	 dde
j                  dee
j                     dee   d	efd
              Z xZS )DPTModelr3   add_pooling_layerc                 T   t         |   |       || _        |j                  rt	        |      | _        nt        |      | _        t        |      | _        t        j                  |j                  |j                        | _        |rt        |      nd| _        | j!                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        r   N)r:   r;   r3   r  r2   r~   r   r   encoderr   r   r?   r   	layernormDPTViTPoolerpooler	post_init)rP   r3   ru  rT   s      r*   r;   zDPTModel.__init__	  s    
 	  4V<DO.v6DO$V,f&8&8f>S>ST.?l6*T 	r)   c                 r    | j                   j                  r| j                  S | j                  j                  S r   )r3   r  r~   r   )rP   s    r*   get_input_embeddingszDPTModel.get_input_embeddings  s)    ;;  ??"??333r)   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsrw  r  r   r   )rP   heads_to_pruner  r   s       r*   _prune_headszDPTModel._prune_heads$  sE    
 +002 	CLE5LLu%//;;EB	Cr)   rl   r   r}   rn   c                    || j                   j                  }| j                  || j                   j                        }| j	                  |      }|j
                  }| j                  |||      }|j                  }| j                  |      }| j                  | j                  |      nd }	t        ||	|j                  |j                        S )Nr   r}   )r-   r.   r   r/   )r3   r}   get_head_maskr  r~   r   rw  r-   rx  rz  r,   r   r/   )
rP   rl   r   r}   r   embedding_outputembedding_last_hidden_statesencoder_outputssequence_outputpooled_outputs
             r*   r   zDPTModel.forward,  s      '#';;#C#C  &&y$++2O2OP	GKWcGd'7'J'J$+/<<(ITh ,8 ,
 *;;..98<8OO4UYC-'%5%N%N)77	
 	
r)   r]  r.  )r    r!   r"   r   r   r;   r}  r  r   r   r$   r%   r   r,   r   r   r   s   @r*   rt  rt    s    y T *4C  26/3	!
''!
 E--.!
 'tn	!
 
>!
  !
r)   rt  c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )ry  r3   c                     t         |           t        j                  |j                  |j
                        | _        t        |j                     | _	        y r   )
r:   r;   r   r   r?   pooler_output_sizer   r	   
pooler_act
activationr   s     r*   r;   zDPTViTPooler.__init__T  s>    YYv1163L3LM
 !2!23r)   r/   rn   c                 \    |d d df   }| j                  |      }| j                  |      }|S )Nr   )r   r  )rP   r/   first_token_tensorr  s       r*   r   zDPTViTPooler.forwardY  s6     +1a40

#566r)   r   r   s   @r*   ry  ry  S  s*    4y 4
U\\ ell r)   ry  c            
            e Zd ZdZdef fdZ	 	 d	deej                     de	e
   de	e
   deej                     fdZ xZS )
DPTNecka;  
    DPTNeck. A neck is a module that is normally used between the backbone and the head. It takes a list of tensors as
    input and produces another list of tensors as output. For DPT, it includes 2 stages:

    * DPTReassembleStage
    * DPTFeatureFusionStage.

    Args:
        config (dict): config dict.
    r3   c           
         t         |           || _        |j                   |j                  j                  dv rd | _        nt        |      | _        t        j                         | _	        |j                  D ]?  }| j                  j                  t        j                  ||j                  ddd             A t        |      | _        y )N)swinv2r   r   Fr9   r7  r   )r:   r;   r3   r1  
model_typereassemble_stager  r   r   convsr  r  rJ   rJ  r<  fusion_stage)rP   r3   channelrT   s      r*   r;   zDPTNeck.__init__n  s     !!-&2H2H2S2SWa2a$(D!$6v$>D!]]_
// 	sGJJbii1J1JXYcdkpqr	s 2&9r)   r/   r'  r(  rn   c                    t        |t        t        f      st        d      t	        |      t	        | j
                  j                        k7  rt        d      | j                  | j                  |||      }t        |      D cg c]  \  }} | j                  |   |       }}}| j                  |      }|S c c}}w )z
        Args:
            hidden_states (`list[torch.FloatTensor]`, each of shape `(batch_size, sequence_length, hidden_size)` or `(batch_size, hidden_size, height, width)`):
                List of hidden states from the backbone.
        z2hidden_states should be a tuple or list of tensorszOThe number of hidden states should be equal to the number of neck hidden sizes.)r@   r'   r/  	TypeErrorrF   r3   r  rG   r  r  r  r  )rP   r/   r'  r(  r
  featurer{   r   s           r*   r   zDPTNeck.forward  s     -%7PQQ}T[[%B%B!CCnoo   , 11-{[M=F}=UVzq'MDJJqM'*VV ""8, Ws   B:r.  )r    r!   r"   r#   r   r;   r/  r$   r   r   r   r   r   r   s   @r*   r  r  b  sa    	:y :( '+%)	ELL) sm c]	
 
ell	r)   r  c                   f     e Zd ZdZdef fdZdeej                     dej                  fdZ	 xZ
S )DPTDepthEstimationHeada	  
    Output head consisting of 3 convolutional layers. It progressively halves the feature dimension and upsamples
    the predictions to the input resolution after the first convolutional layer (details can be found in the paper's
    supplementary material).
    r3   c                    t         |           || _        d | _        |j                  rt        j                  ddddd      | _        |j                  }t        j                  t        j                  ||dz  ddd      t        j                  ddd	
      t        j                  |dz  dddd      t        j                         t        j                  ddddd      t        j                               | _        y )N   )r   r   )r   r   r6  rX   r   r   rY   Tr[      r   )r:   r;   r3   rK   add_projectionr   rJ   rJ  r!  UpsamplerH  headrP   r3   r{   rT   s      r*   r;   zDPTDepthEstimationHead.__init__  s       iiSfV]cdDO,,MMIIhA1QPQRKKQZtLIIh!mRQq!LGGIIIb!1a@GGI
	r)   r/   rn   c                     || j                   j                     }| j                  +| j                  |      } t        j                         |      }| j                  |      }|j                  d      }|S )Nr   r\   )r3   head_in_indexrK   r   rH  r  squeeze)rP   r/   predicted_depths      r*   r   zDPTDepthEstimationHead.forward  sg    %dkk&?&?@??& OOM:M%BGGIm4M))M2)11a18r)   )r    r!   r"   r#   r   r;   r/  r$   r   r   r   r   s   @r*   r  r    s4    
y 
&T%,,%7 ELL r)   r  zu
    DPT Model with a depth estimation head on top (consisting of 3 convolutional layers) e.g. for KITTI, NYUv2.
    c                        e Zd Z fdZee	 	 	 ddej                  deej                     deej                     dee
   def
d              Z xZS )	DPTForDepthEstimationc                 $   t         |   |       d | _        |j                  du r)|j                  |j                  t        |      | _        nt        |d      | _        t        |      | _	        t        |      | _        | j                          y NF)ru  )r:   r;   rD   r  r1  r   rt  r`  r  neckr  r  r{  r   s     r*   r;   zDPTForDepthEstimation.__init__  s}     u$&*@*@*LPVP_P_Pk)&1DM%@DH FO	 +62	 	r)   rl   r   labelsr}   rn   c                     | j                   j                  }d}|t        d       j                  ,  j                  j                  |fddi|}|j
                  }n  j                  |f|dd|}|j                  } j                   j                  s:t        |dd       D 	
cg c]   \  }	}
|	 j                   j                  v s|
" }}	}
n4|j                  }|j                   fdt        |dd       D               |}d\  }} j                   j                  S j                   j                  d	u r;|j                  \  }}}} j                   j                  j                  }||z  }||z  } j!                  |||      } j#                  |      }t%        |||r|j                  nd|j&                  
      S c c}
}	w )a  
        labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
            Ground truth depth estimation maps for computing the loss.

        Examples:
        ```python
        >>> from transformers import AutoImageProcessor, DPTForDepthEstimation
        >>> import torch
        >>> import numpy as np
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> image_processor = AutoImageProcessor.from_pretrained("Intel/dpt-large")
        >>> model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")

        >>> # prepare image for the model
        >>> inputs = image_processor(images=image, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**inputs)

        >>> # interpolate to original size
        >>> post_processed_output = image_processor.post_process_depth_estimation(
        ...     outputs,
        ...     target_sizes=[(image.height, image.width)],
        ... )

        >>> # visualize the prediction
        >>> predicted_depth = post_processed_output[0]["predicted_depth"]
        >>> depth = predicted_depth * 255 / predicted_depth.max()
        >>> depth = depth.detach().cpu().numpy()
        >>> depth = Image.fromarray(depth.astype("uint8"))
        ```NzTraining is not implemented yetr}   Tr  r   c              3   ^   K   | ]$  \  }}|j                   j                  d d v r| & ywrX   Nr3   backbone_out_indices.0idxr  rP   s      r*   	<genexpr>z0DPTForDepthEstimation.forward.<locals>.<genexpr>   s6      .$Wdkk>>qrBB .s   *-r.  F)lossr  r/   r0   )r3   r}   NotImplementedErrorrD   forward_with_filtered_kwargsrs   r`  r/   r  r  r  r   extendr1  rr   r=   r  r  r   r0   )rP   rl   r   r  r}   r   r  outputsr/   r  r  backbone_hidden_statesr'  r(  r   rx   ry   r=   r  s   `                  r*   r   zDPTForDepthEstimation.forward  s   ^  '#';;#C#C %&GHH==$@dmm@@sdhslrsG#00Mdhh|fyW[f_efG#11M ;;((09-:K0L! ,WPSW[WbWbWwWwPwG! ! *1)I)I&&-- .(1-2C(D. 
 !7$.!k;;&&2t{{7L7LPU7U"."4"4Aq&%44??J!Z/L:-K		-{K))M2#+3G'//T))	
 	
-!s   * G G )NNN)r    r!   r"   r;   r   r   r$   r%   r   
LongTensorr   r   r   r   r   s   @r*   r  r    s    $  26-1/3X
''X
 E--.X
 ))*	X

 'tnX
 
X
  X
r)   r  c                   b     e Zd Zdef fdZdeej                     dej                  fdZ xZ	S )DPTSemanticSegmentationHeadr3   c                    t         |           || _        |j                  }t	        j
                  t	        j                  ||ddd      t	        j                  |      t	        j                         t	        j                  |j                        t	        j                  ||j                  d      t	        j                  ddd	            | _        y )
Nr   r   Fr  r8   rX   rY   Tr[  )r:   r;   r3   rJ  r   r!  rJ   rN  rH  r   semantic_classifier_dropout
num_labelsr  r  r  s      r*   r;   z$DPTSemanticSegmentationHead.__init__:  s    ,,MMIIhaONN8$GGIJJv99:IIh 1 1qAKKQZtL
	r)   r/   rn   c                 Z    || j                   j                     }| j                  |      }|S r   )r3   r  r  rP   r/   logitss      r*   r   z#DPTSemanticSegmentationHead.forwardH  s)    %dkk&?&?@=)r)   )
r    r!   r"   r   r;   r/  r$   r   r   r   r   s   @r*   r  r  9  s/    
y 
T%,,%7 ELL r)   r  c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )DPTAuxiliaryHeadr3   c                 X   t         |           |j                  }t        j                  t        j
                  ||ddd      t        j                  |      t        j                         t        j                  dd      t        j
                  ||j                  d            | _
        y )Nr   r   Fr  g?r8   )r:   r;   rJ  r   r!  rJ   rN  rH  r   r  r  r  s      r*   r;   zDPTAuxiliaryHead.__init__P  sv    ,,MMIIhaONN8$GGIJJsE"IIh 1 1qA
	r)   r/   rn   c                 (    | j                  |      }|S r   )r  r  s      r*   r   zDPTAuxiliaryHead.forward\  s    =)r)   r   r   s   @r*   r  r  O  s*    

y 

U\\ ell r)   r  c                        e Zd Zdef fdZee	 	 	 	 d	deej                     deej                     deej                     dee   def
d              Z xZS )
DPTForSemanticSegmentationr3   c                     t         |   |       t        |d      | _        t	        |      | _        t        |      | _        |j                  rt        |      nd | _
        | j                          y r  )r:   r;   rt  r`  r  r  r  r  use_auxiliary_headr  auxiliary_headr{  r   s     r*   r;   z#DPTForSemanticSegmentation.__init__c  s^     Fe< FO	 07	:@:S:S.v6Y] 	r)   rl   r   r  r}   rn   c                     | j                   j                  }|$ j                   j                  dk(  rt        d        j                  |f|dd|}|j
                  } j                   j                  s:t        |dd       D 	cg c]   \  }}	| j                   j                  v s|	" }}}	n4|j                  }
|
j                   fdt        |dd       D               |
} j                  |      } j                  |      }d} j                   j                  |d         }d}|t        j                  j!                  ||j"                  d	d d
d      }|0t        j                  j!                  ||j"                  d	d d
d      }t%         j                   j&                        } |||      } ||      }| j                   j(                  |z  z   }t+        |||r|j
                  nd|j,                        S c c}	}w )a  
        labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
            Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1`, a classification loss is computed (Cross-Entropy).

        Examples:
        ```python
        >>> from transformers import AutoImageProcessor, DPTForSemanticSegmentation
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> image_processor = AutoImageProcessor.from_pretrained("Intel/dpt-large-ade")
        >>> model = DPTForSemanticSegmentation.from_pretrained("Intel/dpt-large-ade")

        >>> inputs = image_processor(images=image, return_tensors="pt")

        >>> outputs = model(**inputs)
        >>> logits = outputs.logits
        ```Nr   z/The number of labels should be greater than oneTr  c              3   `   K   | ]%  \  }}|j                   j                  d d v s"| ' ywr  r  r  s      r*   r  z5DPTForSemanticSegmentation.forward.<locals>.<genexpr>  s7      *(CCSWS^S^SsSstutvSwLw*s   #..)r/   r6   r7   rY   FrZ  )ignore_index)r  r  r/   r0   )r3   r}   r  rG   r`  r/   r  r  r  r   r  r  r  r  r   r`   ra   rr   r   semantic_loss_ignore_indexauxiliary_loss_weightr   r0   )rP   rl   r   r  r}   r   r  r/   r  r  r  r  auxiliary_logitsr  upsampled_logitsupsampled_auxiliary_logitsloss_fct	main_lossauxiliary_losss   `                  r*   r   z"DPTForSemanticSegmentation.forwardr  s,   @  '#';;#C#C $++"8"8A"=NOOHPI
$-DI
LRI
  -- {{$$,5mAB6G,H(CCSWS^S^SsSsLsM  &-%E%E"")) *,5mAB6G,H*  3M			>=)*#22=3DE!}}88V\\"#.Zu  9    +-/]]-F-F$6<<+<:]b .G .* (T[[5[5[\H !16:I%&@&INt{{@@>QQD&3G'//T))	
 	
Es   	 H*H)NNNN)r    r!   r"   r   r;   r   r   r   r$   r%   r  r   r   r   r   r   s   @r*   r  r  a  s    y   5915-1/3S
u001S
 E--.S
 ))*	S

 'tnS
 
!S
  S
r)   r  )r  r  rt  r_  )r   )Jr#   collections.abcrA   dataclassesr   typingr   r   r$   torch.utils.checkpointr   torch.nnr   activationsr	   modeling_layersr
   modeling_outputsr   r   r   modeling_utilsr   r   pytorch_utilsr   r   utilsr   r   r   r   utils.backbone_utilsr   utils.genericr   r   configuration_dptr   
get_loggerr    loggerr   r,   Moduler2   r   r   r   floatr   r   r   r   r   r   r   r   r  r   r  r<  rC  r>  r_  rt  ry  r  r  r  r  r  r  __all__r(   r)   r*   <module>r     s    ! %    % ! 9 ^ ^ F Q D D 1 A ( 
		H	% 	M 	M 	M M; M M$]
RYY ]
@4Yryy 4YnBII N %II%<<% 
% <<	%
 U\\*% % %>1.ryy 1.jryy $bii @  
299 
, >
BII 
.e eP" ,#BII #0:'RYY :'z"BII "J 4 4 4: G
! G
 G
V299 7bii 7t%RYY %P 
m
. m

m
`")) ,ryy $ e
!3 e
 e
P dr)   