
    hB              	          d Z ddlmZ ddlZddlZddlmZ ddlmZ ddlm	Z	m
Z
mZmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ  ej0                  e      Zd+dej6                  dededej6                  fdZ G d dej>                        Z  G d dejB                        Z" G d dej>                        Z# G d dej>                        Z$ G d dej>                        Z% G d dej>                        Z&e G d d e             Z'e G d! d"e'             Z( ed#$       G d% d&e'             Z) ed'$       G d( d)e'e             Z*g d*Z+y),zPyTorch ConvNext model.    )OptionalN)nn   )ACT2FN)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging)BackboneMixin)can_return_tuple   )ConvNextConfiginput	drop_probtrainingreturnc                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
            r   r   )r   )dtypedevice)shapendimtorchrandr   r   floor_div)r   r   r   	keep_probr   random_tensoroutputs          l/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/models/convnext/modeling_convnext.py	drop_pathr$   )   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FM    c                   x     e Zd ZdZd	dee   ddf fdZdej                  dej                  fdZ	de
fdZ xZS )
ConvNextDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 0    t         |           || _        y N)super__init__r   )selfr   	__class__s     r#   r+   zConvNextDropPath.__init__A   s    "r%   hidden_statesc                 D    t        || j                  | j                        S r)   )r$   r   r   )r,   r.   s     r#   forwardzConvNextDropPath.forwardE   s    FFr%   c                      d| j                    S )Nzp=)r   )r,   s    r#   
extra_reprzConvNextDropPath.extra_reprH   s    DNN#$$r%   r)   )__name__
__module____qualname____doc__r   floatr+   r   Tensorr0   strr2   __classcell__r-   s   @r#   r'   r'   >   sG    b#(5/ #T #GU\\ Gell G%C %r%   r'   c                   f     e Zd ZdZddd fd
Zdej                  dej                  f fdZ xZS )	ConvNextLayerNormaA  LayerNorm that supports two data formats: channels_last (default) or channels_first.
    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch_size, height,
    width, channels) while channels_first corresponds to inputs with shape (batch_size, channels, height, width).
    ư>channels_lastepsdata_formatc                \    t        |   |fd|i| |dvrt        d|       || _        y )NrA   )r?   channels_firstzUnsupported data format: )r*   r+   NotImplementedErrorrB   )r,   normalized_shaperA   rB   kwargsr-   s        r#   r+   zConvNextLayerNorm.__init__R   s?    )=s=f=AA%(A+&OPP&r%   featuresr   c                     | j                   dk(  r9|j                  dddd      }t        |   |      }|j                  dddd      }|S t        |   |      }|S )z
        Args:
            features: Tensor of shape (batch_size, channels, height, width) OR (batch_size, height, width, channels)
        rD   r      r   r   )rB   permuter*   r0   )r,   rH   r-   s     r#   r0   zConvNextLayerNorm.forwardX   sj    
 //''1a3Hwx0H''1a3H  wx0Hr%   	r3   r4   r5   r6   r+   r   r8   r0   r:   r;   s   @r#   r=   r=   L   s4    
 15/ '   r%   r=   c                   Z     e Zd ZdZ fdZdej                  dej                  fdZ xZ	S )ConvNextEmbeddingszThis class is comparable to (and inspired by) the SwinEmbeddings class
    found in src/transformers/models/swin/modeling_swin.py.
    c                    t         |           t        j                  |j                  |j
                  d   |j                  |j                        | _        t        |j
                  d   dd      | _	        |j                  | _        y )Nr   kernel_sizestrider>   rD   r@   )
r*   r+   r   Conv2dnum_channelshidden_sizes
patch_sizepatch_embeddingsr=   	layernormr,   configr-   s     r#   r+   zConvNextEmbeddings.__init__k   sr     "		!4!4Q!7VEVEV_e_p_p!
 +6+>+>q+AtYij"//r%   pixel_valuesr   c                     |j                   d   }|| j                  k7  rt        d      | j                  |      }| j	                  |      }|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r   rT   
ValueErrorrW   rX   )r,   r[   rT   
embeddingss       r#   r0   zConvNextEmbeddings.forwards   sV    #))!,4,,,w  **<8
^^J/
r%   )
r3   r4   r5   r6   r+   r   FloatTensorr8   r0   r:   r;   s   @r#   rN   rN   f   s*    0E$5$5 %,, r%   rN   c                   \     e Zd ZdZd fd	Zdej                  dej                  fdZ xZS )ConvNextLayera3  This corresponds to the `Block` class in the original implementation.

    There are two equivalent implementations: [DwConv, LayerNorm (channels_first), Conv, GELU,1x1 Conv]; all in (N, C,
    H, W) (2) [DwConv, Permute to (N, H, W, C), LayerNorm (channels_last), Linear, GELU, Linear]; Permute back

    The authors used (2) as they find it slightly faster in PyTorch.

    Args:
        config ([`ConvNextConfig`]): Model configuration class.
        dim (`int`): Number of input channels.
        drop_path (`float`): Stochastic depth rate. Default: 0.0.
    c                 $   t         |           t        j                  ||dd|      | _        t        |d      | _        t        j                  |d|z        | _        t        |j                     | _        t        j                  d|z  |      | _        |j                  dkD  r7t        j                  |j                  t        j                   |      z  d	      nd | _        |d
kD  rt%        |      | _        y t        j&                         | _        y )N   r   )rQ   paddinggroupsr>   rA      r   T)requires_gradr   )r*   r+   r   rS   dwconvr=   rX   Linearpwconv1r   
hidden_actactpwconv2layer_scale_init_value	Parameterr   oneslayer_scale_parameterr'   Identityr$   )r,   rZ   dimr$   r-   s       r#   r+   zConvNextLayer.__init__   s    iiSa3O*3D9yya#g.&++,yyS#. ,,q0 LL66CHX\] 	"
 9BC))4R[[]r%   rH   r   c                 b   |}| j                  |      }|j                  dddd      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }| j                  | j                  |z  }|j                  dddd      }|| j                  |      z   }|S )Nr   rJ   r   r   )ri   rK   rX   rk   rm   rn   rr   r$   )r,   rH   residuals      r#   r0   zConvNextLayer.forward   s    ;;x(##Aq!Q/>>(+<<)88H%<<)%%111H<H##Aq!Q/dnnX66r%   )r   rL   r;   s   @r#   ra   ra   ~   s)    [  r%   ra   c                   \     e Zd ZdZd fd	Zdej                  dej                  fdZ xZS )ConvNextStagea  ConvNeXT stage, consisting of an optional downsampling layer + multiple residual blocks.

    Args:
        config ([`ConvNextConfig`]): Model configuration class.
        in_channels (`int`): Number of input channels.
        out_channels (`int`): Number of output channels.
        depth (`int`): Number of residual blocks.
        drop_path_rates(`list[float]`): Stochastic depth rates for each layer.
    c                    t         	|           ||k7  s|dkD  r@t        j                  t	        |dd      t        j
                  ||||      g      | _        nt        j                         | _        |xs dg|z  }t        j                  t        |      D cg c]  }t        ||||          c}      | _	        y c c}w )Nr   r>   rD   r@   rP   r   )rt   r$   )
r*   r+   r   
ModuleListr=   rS   downsampling_layerrangera   layers)
r,   rZ   in_channelsout_channelsrQ   rR   depthdrop_path_ratesjr-   s
            r#   r+   zConvNextStage.__init__   s    ,&&1*&(mm%ktIYZIIk<[Y_`'D# ')mmoD#):cUU]mm\abg\hiWX]6|q?QRi
is   B>rH   r   c                 j    | j                   D ]
  } ||      } | j                  D ]
  } ||      } |S r)   )r{   r}   )r,   rH   layers      r#   r0   zConvNextStage.forward   sA    ,, 	'EXH	'[[ 	'EXH	'r%   )rJ   rJ   rJ   NrL   r;   s   @r#   rx   rx      s(    
"  r%   rx   c                   P     e Zd Z fdZ	 ddej
                  dee   defdZ	 xZ
S )ConvNextEncoderc           
      ,   t         |           t        j                         | _        t        j                  d|j                  t        |j                        d      j                  |j                        D cg c]  }|j                          }}|j                  d   }t        |j                        D ]V  }|j                  |   }t        ||||dkD  rdnd|j                  |   ||         }| j                  j!                  |       |}X y c c}w )Nr   cpu)r   rJ   r   )r~   r   rR   r   r   )r*   r+   r   rz   stagesr   linspacedrop_path_ratesumdepthssplittolistrU   r|   
num_stagesrx   append)	r,   rZ   xr   prev_chsiout_chsstager-   s	           r#   r+   zConvNextEncoder.__init__   s    mmo ^^Av'<'<c&-->PY^_eeflfsfst
 HHJ
 
 &&q)v(() 	A))!,G!$$EqqmmA& / 2E KKu%H	
s   :Dr.   output_hidden_statesr   c                     |r|gnd }| j                   D ]  } ||      }||j                  |         t        ||      S )N)last_hidden_stater.   )r   r   r   )r,   r.   r   all_hidden_stateslayer_modules        r#   r0   zConvNextEncoder.forward   sR     0D]O KK 	8L(7M ,!((7	8
 .]noor%   )F)r3   r4   r5   r+   r   r8   r   boolr   r0   r:   r;   s   @r#   r   r      s8    , SX
p"\\
pAI$
p	'
pr%   r   c                   0    e Zd ZU eed<   dZdZdgZi Zd Z	y)ConvNextPreTrainedModelrZ   convnextr[   ra   c                    t        |t        j                  t        j                  f      rm|j                  j
                  j                  d| j                  j                         |j                  %|j                  j
                  j                          yyt        |t        j                  t        f      rJ|j                  j
                  j                          |j                  j
                  j                  d       yt        |t              rG|j                  :|j                  j
                  j                  | j                  j                          yyy)zInitialize the weightsr   )meanstdNg      ?)
isinstancer   rj   rS   weightdatanormal_rZ   initializer_rangebiaszero_	LayerNormr=   fill_ra   rr   ro   )r,   modules     r#   _init_weightsz%ConvNextPreTrainedModel._init_weights   s    fryy"))45 MM&&CT[[5R5R&S{{&  &&( '/@ ABKK""$MM$$S).++7,,11778Z8Z[ 8 /r%   N)
r3   r4   r5   r   __annotations__base_model_prefixmain_input_name_no_split_modules_can_record_outputsr    r%   r#   r   r      s(    "$O()\r%   r   c            	       j     e Zd Z fdZee	 ddeej                     dee	   de
fd              Z xZS )ConvNextModelc                     t         |   |       || _        t        |      | _        t        |      | _        t        j                  |j                  d   |j                        | _        | j                          y )Nrf   )r*   r+   rZ   rN   r^   r   encoderr   r   rU   layer_norm_epsrX   	post_initrY   s     r#   r+   zConvNextModel.__init__
  s`     ,V4&v. f&9&9"&=6CXCXY 	r%   r[   r   r   c                     || j                   j                  }|t        d      | j                  |      }| j	                  ||      }|j
                  }| j                  |j                  ddg            }t        |||j                        S )Nz You have to specify pixel_valuesr   r   )r   pooler_outputr.   )
rZ   r   r]   r^   r   r   rX   r   r	   r.   )r,   r[   r   embedding_outputencoder_outputsr   pooled_outputs          r#   r0   zConvNextModel.forward  s    
  '#';;#C#C ?@@??<8:>,,3G ;G ;
 ,== '8'='=r2h'GH7/')77
 	
r%   NN)r3   r4   r5   r+   r   r   r   r   r_   r   r	   r0   r:   r;   s   @r#   r   r     sP     gk
$U%6%67
V^_cVd
	1
  
r%   r   z
    ConvNext Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc            	       ~     e Zd Z fdZee	 ddeej                     deej                     de
fd              Z xZS )ConvNextForImageClassificationc                 <   t         |   |       |j                  | _        t        |      | _        |j                  dkD  r3t        j                  |j                  d   |j                        | _        nt        j                         | _        | j                          y )Nr   r   )r*   r+   
num_labelsr   r   r   rj   rU   
classifierrs   r   rY   s     r#   r+   z'ConvNextForImageClassification.__init__9  su      ++%f- q  ii(;(;B(?ARARSDO kkmDO 	r%   r[   labelsr   c                      | j                   |fi |}|j                  }| j                  |      }d}|| j                  ||| j                        }t        |||j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        N)r   pooled_logitsrZ   )losslogitsr.   )r   r   r   loss_functionrZ   r
   r.   )r,   r[   r   rG   outputsr   r   r   s           r#   r0   z&ConvNextForImageClassification.forwardH  sy     =JDMM,<aZ`<a--/%%V6RVR]R]%^D3!//
 	
r%   r   )r3   r4   r5   r+   r   r   r   r   r_   
LongTensorr
   r0   r:   r;   s   @r#   r   r   2  sV     ei
$U%6%67
HPQVQaQaHb
	-
  
r%   r   zQ
    ConvNeXt backbone, to be used with frameworks like DETR and MaskFormer.
    c            	       h     e Zd ZdZ fdZee	 ddej                  de	e
   defd              Z xZS )ConvNextBackboneFc                    t         |   |       t         | 	  |       t        |      | _        t        |      | _        |j                  d   g|j                  z   | _        i }t        | j                  | j                        D ]  \  }}t        |d      ||<    t        j                  |      | _        | j!                          y )Nr   rD   )rB   )r*   r+   _init_backbonerN   r^   r   r   rU   num_featureszip_out_featureschannelsr=   r   
ModuleDicthidden_states_normsr   )r,   rZ   r   r   rT   r-   s        r#   r+   zConvNextBackbone.__init__j  s     v&,V4&v.#0034v7J7JJ !#&t'9'94==#I 	gE<):<Ue)f&	g#%==1D#E  	r%   r[   r   r   c                 z   || j                   j                  }| j                  |      }| j                  |d      }|j                  }g }t        | j                  |      D ]:  \  }}|| j                  v s | j                  |   |      }|j                  |       < t        t        |      |r|      S d      S )ah  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("facebook/convnext-tiny-224")
        >>> model = AutoBackbone.from_pretrained("facebook/convnext-tiny-224")

        >>> inputs = processor(image, return_tensors="pt")
        >>> outputs = model(**inputs)
        ```NTr   )feature_mapsr.   )rZ   r   r^   r   r.   r   stage_namesout_featuresr   r   r   tuple)	r,   r[   r   r   r   r.   r   r   hidden_states	            r#   r0   zConvNextBackbone.forward{  s    2  '#';;#C#C ??<8<@LLIYptL<u--#&t'7'7#G 	2E<)))>t77>|L##L1	2
 |,+?-
 	
EI
 	
r%   r)   )r3   r4   r5   has_attentionsr+   r   r   r   r8   r   r   r   r0   r:   r;   s   @r#   r   r   b  sT     N"  04'
ll'
 'tn'
 
	'
  '
r%   r   )r   r   r   r   )r   F),r6   typingr   r   torch.utils.checkpointr   activationsr   modeling_outputsr   r   r	   r
   modeling_utilsr   utilsr   r   utils.backbone_utilsr   utils.genericr   configuration_convnextr   
get_loggerr3   loggerr8   r7   r   r$   Moduler'   r   r=   rN   ra   rx   r   r   r   r   r   __all__r   r%   r#   <module>r      sl        !  . , 1 - 2 
		H	%U\\ e T V[VbVb *%ryy % 4 0(BII (V!BII !H pbii  pF \o \ \. &
+ &
 &
R '
%< '
'
T 
=
. =

=
@ mr%   