
    hqH              	          d Z ddlmZ ddlZddlZddlmZ ddlmZ ddlm	Z	m
Z
mZmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ  ej0                  e      Zd-dej6                  dededej6                  fdZ G d dej>                        Z  G d dej>                        Z! G d dejD                        Z# G d dej>                        Z$ G d dej>                        Z% G d dej>                        Z& G d d ej>                        Z'e G d! d"e             Z(e G d# d$e(             Z) ed%&       G d' d(e(             Z* ed)&       G d* d+e(e             Z+g d,Z,y).zPyTorch ConvNextV2 model.    )OptionalN)nn   )ACT2FN)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging)BackboneMixin)can_return_tuple   )ConvNextV2Configinput	drop_probtrainingreturnc                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
            r   r   )r   )dtypedevice)shapendimtorchrandr   r   floor_div)r   r   r   	keep_probr   random_tensoroutputs          p/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/models/convnextv2/modeling_convnextv2.py	drop_pathr$   )   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FM    c                   x     e Zd ZdZd	dee   ddf fdZdej                  dej                  fdZ	de
fdZ xZS )
ConvNextV2DropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 0    t         |           || _        y N)super__init__r   )selfr   	__class__s     r#   r+   zConvNextV2DropPath.__init__A   s    "r%   hidden_statesc                 D    t        || j                  | j                        S r)   )r$   r   r   )r,   r.   s     r#   forwardzConvNextV2DropPath.forwardE   s    FFr%   c                      d| j                    S )Nzp=)r   )r,   s    r#   
extra_reprzConvNextV2DropPath.extra_reprH   s    DNN#$$r%   r)   )__name__
__module____qualname____doc__r   floatr+   r   Tensorr0   strr2   __classcell__r-   s   @r#   r'   r'   >   sG    b#(5/ #T #GU\\ Gell G%C %r%   r'   c                   `     e Zd ZdZdef fdZdej                  dej                  fdZ xZ	S )ConvNextV2GRNz)GRN (Global Response Normalization) layerdimc                     t         |           t        j                  t	        j
                  ddd|            | _        t        j                  t	        j
                  ddd|            | _        y )Nr   )r*   r+   r   	Parameterr   zerosweightbias)r,   r>   r-   s     r#   r+   zConvNextV2GRN.__init__O   sL    ll5;;q!Q#<=LLQ1c!:;	r%   r.   r   c                     t         j                  j                  |ddd      }||j                  dd      dz   z  }| j                  ||z  z  | j
                  z   |z   }|S )N   )r   rE   T)ordr>   keepdim)r>   rG   ư>)r   linalgvector_normmeanrB   rC   )r,   r.   global_featuresnorm_featuress       r#   r0   zConvNextV2GRN.forwardT   si    ,,22=aV]a2b'?+?+?BPT+?+UX\+\]}}'DE		QTaar%   )
r3   r4   r5   r6   intr+   r   FloatTensorr0   r:   r;   s   @r#   r=   r=   L   s1    3<C <
U%6%6 5;L;L r%   r=   c                   f     e Zd ZdZddd fd
Zdej                  dej                  f fdZ xZS )	ConvNextV2LayerNormaA  LayerNorm that supports two data formats: channels_last (default) or channels_first.
    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch_size, height,
    width, channels) while channels_first corresponds to inputs with shape (batch_size, channels, height, width).
    rI   channels_lastepsdata_formatc                \    t        |   |fd|i| |dvrt        d|       || _        y )NrU   )rS   channels_firstzUnsupported data format: )r*   r+   NotImplementedErrorrV   )r,   normalized_shaperU   rV   kwargsr-   s        r#   r+   zConvNextV2LayerNorm.__init__d   s?    )=s=f=AA%(A+&OPP&r%   featuresr   c                     | j                   dk(  r9|j                  dddd      }t        |   |      }|j                  dddd      }|S t        |   |      }|S )z
        Args:
            features: Tensor of shape (batch_size, channels, height, width) OR (batch_size, height, width, channels)
        rX   r   rE   r   r   )rV   permuter*   r0   )r,   r\   r-   s     r#   r0   zConvNextV2LayerNorm.forwardj   sj    
 //''1a3Hwx0H''1a3H  wx0Hr%   	r3   r4   r5   r6   r+   r   r8   r0   r:   r;   s   @r#   rR   rR   ^   s4    
 15/ '   r%   rR   c                   Z     e Zd ZdZ fdZdej                  dej                  fdZ xZ	S )ConvNextV2EmbeddingszThis class is comparable to (and inspired by) the SwinEmbeddings class
    found in src/transformers/models/swin/modeling_swin.py.
    c                    t         |           t        j                  |j                  |j
                  d   |j                  |j                        | _        t        |j
                  d   dd      | _	        |j                  | _        y )Nr   kernel_sizestriderI   rX   rT   )
r*   r+   r   Conv2dnum_channelshidden_sizes
patch_sizepatch_embeddingsrR   	layernormr,   configr-   s     r#   r+   zConvNextV2Embeddings.__init__~   sr     "		!4!4Q!7VEVEV_e_p_p!
 -V-@-@-C[kl"//r%   pixel_valuesr   c                     |j                   d   }|| j                  k7  rt        d      | j                  |      }| j	                  |      }|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r   rg   
ValueErrorrj   rk   )r,   rn   rg   
embeddingss       r#   r0   zConvNextV2Embeddings.forward   sV    #))!,4,,,w  **<8
^^J/
r%   )
r3   r4   r5   r6   r+   r   rP   r8   r0   r:   r;   s   @r#   ra   ra   y   s*    0E$5$5 %,, r%   ra   c                   \     e Zd ZdZd fd	Zdej                  dej                  fdZ xZS )ConvNextV2Layera5  This corresponds to the `Block` class in the original implementation.

    There are two equivalent implementations: [DwConv, LayerNorm (channels_first), Conv, GELU,1x1 Conv]; all in (N, C,
    H, W) (2) [DwConv, Permute to (N, H, W, C), LayerNorm (channels_last), Linear, GELU, Linear]; Permute back

    The authors used (2) as they find it slightly faster in PyTorch.

    Args:
        config ([`ConvNextV2Config`]): Model configuration class.
        dim (`int`): Number of input channels.
        drop_path (`float`): Stochastic depth rate. Default: 0.0.
    c                    t         |           t        j                  ||dd|      | _        t        |d      | _        t        j                  |d|z        | _        t        |j                     | _        t        d|z        | _        t        j                  d|z  |      | _        |dkD  rt        |      | _        y t        j                          | _        y )N   r   )rd   paddinggroupsrI   rU      r   )r*   r+   r   rf   dwconvrR   rk   Linearpwconv1r   
hidden_actactr=   grnpwconv2r'   Identityr$   )r,   rm   r>   r$   r-   s       r#   r+   zConvNextV2Layer.__init__   s    iiSa3O,Sd;yya#g.&++, S)yyS#.:Cc/+I6r{{}r%   r\   r   c                 N   |}| j                  |      }|j                  dddd      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }| j                  |      }|j                  dddd      }|| j                  |      z   }|S )Nr   rE   r   r   )rz   r^   rk   r|   r~   r   r   r$   )r,   r\   residuals      r#   r0   zConvNextV2Layer.forward   s    ;;x(##Aq!Q/>>(+<<)88H%88H%<<)##Aq!Q/dnnX66r%   )r   r_   r;   s   @r#   rs   rs      s)    
]  r%   rs   c                   \     e Zd ZdZd fd	Zdej                  dej                  fdZ xZS )ConvNextV2Stagea  ConvNeXTV2 stage, consisting of an optional downsampling layer + multiple residual blocks.

    Args:
        config ([`ConvNextV2Config`]): Model configuration class.
        in_channels (`int`): Number of input channels.
        out_channels (`int`): Number of output channels.
        depth (`int`): Number of residual blocks.
        drop_path_rates(`list[float]`): Stochastic depth rates for each layer.
    c                    t         	|           ||k7  s|dkD  r@t        j                  t	        |dd      t        j
                  ||||      g      | _        nt        j                         | _        |xs dg|z  }t        j                  t        |      D cg c]  }t        ||||          c}      | _	        y c c}w )Nr   rI   rX   rT   rc   r   )r>   r$   )
r*   r+   r   
ModuleListrR   rf   downsampling_layerrangers   layers)
r,   rm   in_channelsout_channelsrd   re   depthdrop_path_ratesjr-   s
            r#   r+   zConvNextV2Stage.__init__   s    ,&&1*&(mm'K[\IIk<[Y_`'D# ')mmoD#):cUU]mm^cdi^jkYZ_VQRASTk
ks   B>r\   r   c                 j    | j                   D ]
  } ||      } | j                  D ]
  } ||      } |S r)   )r   r   )r,   r\   layers      r#   r0   zConvNextV2Stage.forward   sA    ,, 	'EXH	'[[ 	'EXH	'r%   )rE   rE   rE   Nr_   r;   s   @r#   r   r      s(    
"  r%   r   c                   P     e Zd Z fdZ	 ddej
                  dee   defdZ	 xZ
S )ConvNextV2Encoderc           
      ,   t         |           t        j                         | _        t        j                  d|j                  t        |j                        d      j                  |j                        D cg c]  }|j                          }}|j                  d   }t        |j                        D ]V  }|j                  |   }t        ||||dkD  rdnd|j                  |   ||         }| j                  j!                  |       |}X y c c}w )Nr   cpu)r   rE   r   )r   r   re   r   r   )r*   r+   r   r   stagesr   linspacedrop_path_ratesumdepthssplittolistrh   r   
num_stagesr   append)	r,   rm   xr   prev_chsiout_chsstager-   s	           r#   r+   zConvNextV2Encoder.__init__   s    mmo ^^Av'<'<c&-->PY^_eeflfsfst
 HHJ
 
 &&q)v(() 	A))!,G#$$EqqmmA& / 2E KKu%H	
s   :Dr.   output_hidden_statesr   c                     |r|gnd }| j                   D ]  } ||      }||j                  |         t        ||      S )N)last_hidden_stater.   )r   r   r   )r,   r.   r   all_hidden_stateslayer_modules        r#   r0   zConvNextV2Encoder.forward   sR     0D]O KK 	8L(7M ,!((7	8
 .]noor%   )F)r3   r4   r5   r+   r   r8   r   boolr   r0   r:   r;   s   @r#   r   r      s8    , SX
p"\\
pAI$
p	'
pr%   r   c                   ,    e Zd ZU eed<   dZdZdgZd Zy)ConvNextV2PreTrainedModelrm   
convnextv2rn   rs   c                    t        |t        j                  t        j                  f      rm|j                  j
                  j                  d| j                  j                         |j                  %|j                  j
                  j                          yyt        |t        j                  t        f      rJ|j                  j
                  j                          |j                  j
                  j                  d       yt        |t              rI|j                  j
                  j                          |j                  j
                  j                          yy)zInitialize the weightsr   )rL   stdNg      ?)
isinstancer   r{   rf   rB   datanormal_rm   initializer_rangerC   zero_	LayerNormrR   fill_r=   )r,   modules     r#   _init_weightsz'ConvNextV2PreTrainedModel._init_weights  s    fryy"))45 MM&&CT[[5R5R&S{{&  &&( '/B CDKK""$MM$$S).MM$$&KK""$ /r%   N)	r3   r4   r5   r   __annotations__base_model_prefixmain_input_name_no_split_modulesr    r%   r#   r   r     s!    $$O*+%r%   r   c            	       j     e Zd Z fdZee	 ddeej                     dee	   de
fd              Z xZS )ConvNextV2Modelc                     t         |   |       || _        t        |      | _        t        |      | _        t        j                  |j                  d   |j                        | _        | j                          y )NrH   rx   )r*   r+   rm   ra   rq   r   encoderr   r   rh   layer_norm_epsrk   	post_initrl   s     r#   r+   zConvNextV2Model.__init__  s`     .v6(0 f&9&9"&=6CXCXY 	r%   rn   r   r   c                     || j                   j                  }|t        d      | j                  |      }| j	                  ||      }|j
                  }| j                  |j                  ddg            }t        |||j                        S )Nz You have to specify pixel_valuesr   rH   )r   pooler_outputr.   )
rm   r   rp   rq   r   r   rk   rL   r	   r.   )r,   rn   r   embedding_outputencoder_outputsr   pooled_outputs          r#   r0   zConvNextV2Model.forward,  s    
  '#';;#C#C ?@@??<8:>,,3G ;G ;
 ,== '8'='=r2h'GH7/')77
 	
r%   NN)r3   r4   r5   r+   r   r   r   r   rP   r   r	   r0   r:   r;   s   @r#   r   r     sP     gk
$U%6%67
V^_cVd
	1
  
r%   r   z
    ConvNextV2 Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc            	       ~     e Zd Z fdZee	 ddeej                     deej                     de
fd              Z xZS ) ConvNextV2ForImageClassificationc                 <   t         |   |       |j                  | _        t        |      | _        |j                  dkD  r3t        j                  |j                  d   |j                        | _        nt        j                         | _        | j                          y )Nr   rH   )r*   r+   
num_labelsr   r   r   r{   rh   
classifierr   r   rl   s     r#   r+   z)ConvNextV2ForImageClassification.__init__O  su      ++)&1 q  ii(;(;B(?ARARSDO kkmDO 	r%   rn   labelsr   c                      | j                   |fi |}|j                  }| j                  |      }d}|| j                  ||| j                        }t        |||j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        N)r   pooled_logitsrm   )losslogitsr.   )r   r   r   loss_functionrm   r
   r.   )r,   rn   r   r[   outputsr   r   r   s           r#   r0   z(ConvNextV2ForImageClassification.forward^  sy     =LDOOL<c\b<c--/%%V6RVR]R]%^D3!//
 	
r%   r   )r3   r4   r5   r+   r   r   r   r   rP   
LongTensorr
   r0   r:   r;   s   @r#   r   r   G  sV     ei
$U%6%67
HPQVQaQaHb
	-
  
r%   r   zT
    ConvNeXT V2 backbone, to be used with frameworks like DETR and MaskFormer.
    c            	       h     e Zd ZdZ fdZee	 ddej                  de	e
   defd              Z xZS )ConvNextV2BackboneFc                    t         |   |       t         | 	  |       t        |      | _        t        |      | _        |j                  d   g|j                  z   | _        i }t        | j                  | j                        D ]  \  }}t        |d      ||<    t        j                  |      | _        | j!                          y )Nr   rX   )rV   )r*   r+   _init_backbonera   rq   r   r   rh   num_featureszip_out_featureschannelsrR   r   
ModuleDicthidden_states_normsr   )r,   rm   r   r   rg   r-   s        r#   r+   zConvNextV2Backbone.__init__  s     v&.v6(0#0034v7J7JJ !#&t'9'94==#I 	iE<)<\Wg)h&	i#%==1D#E  	r%   rn   r   r   c                 z   || j                   j                  }| j                  |      }| j                  |d      }|j                  }g }t        | j                  |      D ]:  \  }}|| j                  v s | j                  |   |      }|j                  |       < t        t        |      |r|      S d      S )ar  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("facebook/convnextv2-tiny-1k-224")
        >>> model = AutoBackbone.from_pretrained("facebook/convnextv2-tiny-1k-224")

        >>> inputs = processor(image, return_tensors="pt")
        >>> outputs = model(**inputs)
        ```NTr   )feature_mapsr.   )rm   r   rq   r   r.   r   stage_namesout_featuresr   r   r   tuple)	r,   rn   r   r   r   r.   r   r   hidden_states	            r#   r0   zConvNextV2Backbone.forward  s    2  '#';;#C#C ??<8<@LLIYptL<u--#&t'7'7#G 	2E<)))>t77>|L##L1	2
 |,+?-
 	
EI
 	
r%   r)   )r3   r4   r5   has_attentionsr+   r   r   r   r8   r   r   r   r0   r:   r;   s   @r#   r   r   x  sT     N"  04'
ll'
 'tn'
 
	'
  '
r%   r   )r   r   r   r   )r   F)-r6   typingr   r   torch.utils.checkpointr   activationsr   modeling_outputsr   r   r	   r
   modeling_utilsr   utilsr   r   utils.backbone_utilsr   utils.genericr   configuration_convnextv2r   
get_loggerr3   loggerr8   r7   r   r$   Moduler'   r=   r   rR   ra   rs   r   r   r   r   r   r   __all__r   r%   r#   <module>r      sy         !  . , 1 - 6 
		H	%U\\ e T V[VbVb *% %BII $",, 6299 0(bii (X!bii !J p		  pF % % %, &
/ &
 &
R '
'@ '
'
T =
2M =
=
@ ur%   