
    hn}                        d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	m
Z
 d dlZd dlZd dlmc mZ d dlmZmZ d dlmZmZmZ d dlmZ d d	lmZmZ d d
lmZmZ d dlm Z  d dl!m"Z"m#Z# d dl$m%Z% g dZ&de'e(e(f   de(de(de(de'e(e(f   f
dZ)de'e(e(f   de(de*e'e(e(f      fdZ+de(de(dej$                  fdZ, G d dejZ                        Z. G d dejZ                        Z/ G d dejZ                        Z0 G d  d!ejZ                        Z1 G d" d#ejZ                        Z2 G d$ d%ejZ                        Z3 G d& d'ejZ                        Z4 G d( d)ejZ                        Z5 G d* d+ejZ                        Z6	 	 d=d,e(d-e*e(   d.e*e(   d/e7d0e(d1e(d2e
e   d3e8d4ede6fd5Z9 G d6 d7e      Z: e        ed8e:jv                  f9      dd:d;d2e
e:   d3e8d4ede6fd<              Z<y)>    N)OrderedDict)Sequence)partial)AnyCallableOptional)nnTensor)register_modelWeightsWeightsEnum)_IMAGENET_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)Conv2dNormActivationSqueezeExcitation)StochasticDepth)ImageClassificationInterpolationMode)_log_api_usage_once)MaxVitMaxVit_T_Weightsmaxvit_t
input_sizekernel_sizestridepaddingreturnc                 R    | d   |z
  d|z  z   |z  dz   | d   |z
  d|z  z   |z  dz   fS )Nr          )r   r   r   r   s       W/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/torchvision/models/maxvit.py_get_conv_output_shaper$      sJ    	A	$q7{	2v=A	A	$q7{	2v=A     n_blocksc                     g }t        | ddd      }t        |      D ]!  }t        |ddd      }|j                  |       # |S )zQUtil function to check that the input size is correct for a MaxVit configuration.   r    r!   )r$   rangeappend)r   r&   shapesblock_input_shape_s        r#   _make_block_input_shapesr.   !   sQ    F.z1aC8_ )23DaAN'() Mr%   heightwidthc                    t        j                  t        j                  t        j                  |       t        j                  |      gd            }t        j                  |d      }|d d d d d f   |d d d d d f   z
  }|j                  ddd      j                         }|d d d d dfxx   | dz
  z  cc<   |d d d d dfxx   |dz
  z  cc<   |d d d d dfxx   d|z  dz
  z  cc<   |j                  d      S )Nij)indexingr!   r    r   )torchstackmeshgridarangeflattenpermute
contiguoussum)r/   r0   coordscoords_flatrelative_coordss        r#   _get_relative_position_indexr@   +   s    [[f)=u||E?R(S^bcdF--*K!!Q*-AtQJ0GGO%--aA6AACOAq!G
*Aq!G	)Aq!GE	A-r""r%   c                        e Zd ZdZ	 ddededededededej                  f   d	edej                  f   d
eddf fdZ	de
de
fdZ xZS )MBConva=  MBConv: Mobile Inverted Residual Bottleneck.

    Args:
        in_channels (int): Number of input channels.
        out_channels (int): Number of output channels.
        expansion_ratio (float): Expansion ratio in the bottleneck.
        squeeze_ratio (float): Squeeze ratio in the SE Layer.
        stride (int): Stride of the depthwise convolution.
        activation_layer (Callable[..., nn.Module]): Activation function.
        norm_layer (Callable[..., nn.Module]): Normalization function.
        p_stochastic_dropout (float): Probability of stochastic depth.
    in_channelsout_channelsexpansion_ratiosqueeze_ratior   activation_layer.
norm_layerp_stochastic_dropoutr   Nc	                    t         |           |  |dk7  xs ||k7  }	|	rTt        j                  ||ddd      g}
|dk(  rt        j                  d|d      g|
z   }
t        j
                  |
 | _        nt        j                         | _        t        ||z        }t        ||z        }|rt        |d      | _
        nt        j                         | _
        t               } ||      |d	<   t        ||ddd
||d       |d<   t        ||d|d|||d 	      |d<   t        ||t        j                        |d<   t        j                  ||dd      |d<   t        j
                  |      | _        y )Nr!   T)r   r   biasr    r(   r   r   r   rowmodepre_normr   )r   r   r   rG   rH   inplaceconv_a)r   r   r   rG   rH   groupsrQ   conv_b)
activationsqueeze_excitation)rC   rD   r   rK   conv_c)super__init__r	   Conv2d	AvgPool2d
SequentialprojIdentityintr   stochastic_depthr   r   r   SiLUlayers)selfrC   rD   rE   rF   r   rG   rH   rI   should_projr]   mid_channelssqz_channels_layers	__class__s                 r#   rY   zMBConv.__init__D   st    	 	k@[L%@IIk<QqW[\]D{61MNQUUt,DIDI</9:<-78$34Hu$UD!$&KKMD!-(5
0-!	
 1-!

 ):,acahah(i$%II,\ghostmmG,r%   xc                 n    | j                  |      }| j                  | j                  |            }||z   S )z
        Args:
            x (Tensor): Input tensor with expected layout of [B, C, H, W].
        Returns:
            Tensor: Output tensor with expected layout of [B, C, H / stride, W / stride].
        )r]   r`   rb   rc   ri   ress      r#   forwardzMBConv.forward   s2     iil!!$++a.1Qwr%   )        )__name__
__module____qualname____doc__r_   floatr   r	   ModulerY   r
   rm   __classcell__rh   s   @r#   rB   rB   6   s    , '*;-;- ;- 	;-
 ;- ;- #3		>2;- S"))^,;- $;- 
;-z	 	F 	r%   rB   c                   d     e Zd ZdZdedededdf fdZdej                  fdZd	edefd
Z	 xZ
S )$RelativePositionalMultiHeadAttentionzRelative Positional Multi-Head Attention.

    Args:
        feat_dim (int): Number of input features.
        head_dim (int): Number of features per head.
        max_seq_len (int): Maximum sequence length.
    feat_dimhead_dimmax_seq_lenr   Nc                 b   t         |           ||z  dk7  rt        d| d|       ||z  | _        || _        t        t        j                  |            | _        || _	        t        j                  || j                  | j                  z  dz        | _        |dz  | _        t        j                  | j                  | j                  z  |      | _        t        j                  j!                  t#        j$                  d| j                  z  dz
  d| j                  z  dz
  z  | j                  ft"        j&                              | _        | j+                  d	t-        | j                  | j                               t"        j                  j.                  j1                  | j(                  d
       y )Nr   z
feat_dim: z  must be divisible by head_dim: r(   g      r    r!   )dtyperelative_position_index{Gz?std)rX   rY   
ValueErrorn_headsrz   r_   mathsqrtsizer{   r	   Linearto_qkvscale_factormerge	parameter	Parameterr5   emptyfloat32relative_position_bias_tableregister_bufferr@   inittrunc_normal_)rc   ry   rz   r{   rh   s       r#   rY   z-RelativePositionalMultiHeadAttention.__init__   sU    	h!#z(3ST\S]^__8+ 		+./	&ii$,,*F*JK$dNYYt}}t||;XF
,.LL,B,BKK!dii-!+DII0ABDLLQY^YfYfg-
) 	68TUYU^U^`d`i`i8jk##D$E$E4#Pr%   c                    | j                   j                  d      }| j                  |   j                  | j                  | j                  d      }|j	                  ddd      j                         }|j                  d      S )Nr4   r    r   r!   )r~   viewr   r{   r:   r;   	unsqueeze)rc   
bias_indexrelative_biass      r#   get_relative_positional_biaszARelativePositionalMultiHeadAttention.get_relative_positional_bias   ss    1166r:
99*EJJ4K[K[]a]m]moqr%--aA6AAC&&q))r%   ri   c                    |j                   \  }}}}| j                  | j                  }}| j                  |      }t	        j
                  |dd      \  }	}
}|	j                  |||||      j                  ddddd      }	|
j                  |||||      j                  ddddd      }
|j                  |||||      j                  ddddd      }|
| j                  z  }
t	        j                  d|	|
      }| j                         }t        j                  ||z   d      }t	        j                  d	||      }|j                  ddddd      j                  ||||      }| j                  |      }|S )
z
        Args:
            x (Tensor): Input tensor with expected layout of [B, G, P, D].
        Returns:
            Tensor: Output tensor with expected layout of [B, G, P, D].
        r(   r4   )dimr   r!   r       z!B G H I D, B G H J D -> B G H I Jz!B G H I J, B G H J D -> B G H I D)shaper   rz   r   r5   chunkreshaper:   r   einsumr   Fsoftmaxr   )rc   ri   BGPDHDHqkvqkvdot_prodpos_biasouts                  r#   rm   z,RelativePositionalMultiHeadAttention.forward   sX    WW
1admm2kk!n++c1"-1aIIaAq"%--aAq!<IIaAq"%--aAq!<IIaAq"%--aAq!<!!!<< CQJ44699X0b9ll>!Lkk!Q1a(00Aq!<jjo
r%   )ro   rp   rq   rr   r_   rY   r5   r
   r   rm   ru   rv   s   @r#   rx   rx      s[    QQ Q 	Q
 
Q8*ell * F r%   rx   c                   h     e Zd ZdZdededdf fdZdej                  dej                  fdZ xZ	S )	SwapAxeszPermute the axes of a tensor.abr   Nc                 >    t         |           || _        || _        y N)rX   rY   r   r   )rc   r   r   rh   s      r#   rY   zSwapAxes.__init__   s    r%   ri   c                 \    t        j                  || j                  | j                        }|S r   )r5   swapaxesr   r   rk   s      r#   rm   zSwapAxes.forward   s!    nnQ/
r%   )
ro   rp   rq   rr   r_   rY   r5   r
   rm   ru   rv   s   @r#   r   r      s;    '# # $ 
 %,, r%   r   c                   8     e Zd ZdZd fdZdededefdZ xZS )WindowPartitionzB
    Partition the input tensor into non-overlapping windows.
    r   c                 "    t         |           y r   rX   rY   rc   rh   s    r#   rY   zWindowPartition.__init__       r%   ri   pc                     |j                   \  }}}}|}|j                  ||||z  |||z  |      }|j                  dddddd      }|j                  |||z  ||z  z  ||z  |      }|S )z
        Args:
            x (Tensor): Input tensor with expected layout of [B, C, H, W].
            p (int): Number of partitions.
        Returns:
            Tensor: Output tensor with expected layout of [B, H/P, W/P, P*P, C].
        r   r    r   r(      r!   r   r   r:   )rc   ri   r   r   Cr   Wr   s           r#   rm   zWindowPartition.forward   s|     WW
1aIIaAFAqAvq1IIaAq!Q'IIa!q&Q!V,a!eQ7r%   r   N	ro   rp   rq   rr   rY   r
   r_   rm   ru   rv   s   @r#   r   r      s'     C F r%   r   c            
       @     e Zd ZdZd	 fdZdededededef
dZ xZS )
WindowDepartitionzo
    Departition the input tensor of non-overlapping windows into a feature volume of layout [B, C, H, W].
    r   c                 "    t         |           y r   r   r   s    r#   rY   zWindowDepartition.__init__  r   r%   ri   r   h_partitionsw_partitionsc                     |j                   \  }}}}|}	||}}
|j                  ||
||	|	|      }|j                  dddddd      }|j                  |||
|	z  ||	z        }|S )ar  
        Args:
            x (Tensor): Input tensor with expected layout of [B, (H/P * W/P), P*P, C].
            p (int): Number of partitions.
            h_partitions (int): Number of vertical partitions.
            w_partitions (int): Number of horizontal partitions.
        Returns:
            Tensor: Output tensor with expected layout of [B, C, H, W].
        r   r   r!   r(   r    r   r   )rc   ri   r   r   r   r   r   PPr   r   HPWPs               r#   rm   zWindowDepartition.forward  st     gg1b!|BIIaRAq)IIaAq!Q'IIaBFBF+r%   r   r   rv   s   @r#   r   r      s6     C s # RX r%   r   c                        e Zd ZdZdededededeeef   deded	ej                  f   d
ed	ej                  f   de
de
de
ddf fdZdedefdZ xZS )PartitionAttentionLayera  
    Layer for partitioning the input tensor into non-overlapping windows and applying attention to each window.

    Args:
        in_channels (int): Number of input channels.
        head_dim (int): Dimension of each attention head.
        partition_size (int): Size of the partitions.
        partition_type (str): Type of partitioning to use. Can be either "grid" or "window".
        grid_size (Tuple[int, int]): Size of the grid to partition the input tensor into.
        mlp_ratio (int): Ratio of the  feature size expansion in the MLP layer.
        activation_layer (Callable[..., nn.Module]): Activation function to use.
        norm_layer (Callable[..., nn.Module]): Normalization function to use.
        attention_dropout (float): Dropout probability for the attention layer.
        mlp_dropout (float): Dropout probability for the MLP layer.
        p_stochastic_dropout (float): Probability of dropping out a partition.
    rC   rz   partition_sizepartition_type	grid_size	mlp_ratiorG   .rH   attention_dropoutmlp_dropoutrI   r   Nc           	         t         |           ||z  | _        || _        |d   |z  | _        || _        || _        |dvrt        d      |dk(  r|| j                  c| _        | _	        n| j                  |c| _        | _	        t               | _        t               | _        |dk(  rt        dd      nt        j                          | _        |dk(  rt        dd      nt        j                          | _        t        j&                   ||      t)        |||dz        t        j*                  |	            | _        t        j&                  t        j.                  |      t        j0                  |||z         |       t        j0                  ||z  |      t        j*                  |
            | _        t5        |d	
      | _        y )Nr   )gridwindowz0partition_type must be either 'grid' or 'window'r   r   r    rM   rN   )rX   rY   r   rz   n_partitionsr   r   r   r   gr   partition_opr   departition_opr   r	   r^   partition_swapdepartition_swapr\   rx   Dropout
attn_layer	LayerNormr   	mlp_layerr   stochastic_dropout)rc   rC   rz   r   r   r   r   rG   rH   r   r   rI   rh   s               r#   rY   z PartitionAttentionLayer.__init__-  s   " 	"h. %aLN:,"!33OPPX%+T->->NDFDF!..NDFDF+-/12@F2Jhr2.PRP[P[P]4Bf4LR 0RTR]R]R_--{# 1hXYHYZJJ()
 LL%IIk;#:;IIkI-{;JJ{#
 #22FU"Sr%   ri   c                    | j                   d   | j                  z  | j                   d   | j                  z  }}t        j                  | j                   d   | j                  z  dk(  xr | j                   d   | j                  z  dk(  dj	                  | j                   | j                               | j                  || j                        }| j                  |      }|| j                  | j                  |            z   }|| j                  | j                  |            z   }| j                  |      }| j                  || j                  ||      }|S )z
        Args:
            x (Tensor): Input tensor with expected layout of [B, C, H, W].
        Returns:
            Tensor: Output tensor with expected layout of [B, C, H, W].
        r   r!   z[Grid size must be divisible by partition size. Got grid size of {} and partition size of {})r   r   r5   _assertformatr   r   r   r   r   r   r   )rc   ri   ghgws       r#   rm   zPartitionAttentionLayer.forwardg  s!    "dff,dnnQ.?466.IBNN1&!+Oq0ADFF0Ja0Oipp	
 a("''(:;;''q(9::!!!$4662r2r%   )ro   rp   rq   rr   r_   strtupler   r	   rt   rs   rY   r
   rm   ru   rv   s   @r#   r   r     s    "8T8T 8T
 8T 8T c?8T 8T #3		>28T S"))^,8T !8T 8T $8T  
!8Tt F r%   r   c                        e Zd ZdZdededededededej                  f   d	edej                  f   d
edededededede	eef   ddf fdZ
dedefdZ xZS )MaxVitLayera  
    MaxVit layer consisting of a MBConv layer followed by a PartitionAttentionLayer with `window` and a PartitionAttentionLayer with `grid`.

    Args:
        in_channels (int): Number of input channels.
        out_channels (int): Number of output channels.
        expansion_ratio (float): Expansion ratio in the bottleneck.
        squeeze_ratio (float): Squeeze ratio in the SE Layer.
        stride (int): Stride of the depthwise convolution.
        activation_layer (Callable[..., nn.Module]): Activation function.
        norm_layer (Callable[..., nn.Module]): Normalization function.
        head_dim (int): Dimension of the attention heads.
        mlp_ratio (int): Ratio of the MLP layer.
        mlp_dropout (float): Dropout probability for the MLP layer.
        attention_dropout (float): Dropout probability for the attention layer.
        p_stochastic_dropout (float): Probability of stochastic depth.
        partition_size (int): Size of the partitions.
        grid_size (Tuple[int, int]): Size of the input feature grid.
    rC   rD   rF   rE   r   rH   .rG   rz   r   r   r   rI   r   r   r   Nc                 2   t         |           t               }t        ||||||||      |d<   t	        |||d||	|t
        j                  ||
|      |d<   t	        |||d||	|t
        j                  ||
|      |d<   t        j                  |      | _        y )N)rC   rD   rE   rF   r   rG   rH   rI   MBconvr   )rC   rz   r   r   r   r   rG   rH   r   r   rI   window_attentionr   grid_attention)	rX   rY   r   rB   r   r	   r   r\   rb   )rc   rC   rD   rF   rE   r   rH   rG   rz   r   r   r   rI   r   r   rb   rh   s                   r#   rY   zMaxVitLayer.__init__  s    * 	)m "#%+'-!!5	
x &=$)#-||/#!5&
!" $;$)!-||/#!5$
  mmF+r%   ri   c                 (    | j                  |      }|S z
        Args:
            x (Tensor): Input tensor of shape (B, C, H, W).
        Returns:
            Tensor: Output tensor of shape (B, C, H, W).
        rb   )rc   ri   s     r#   rm   zMaxVitLayer.forward  s     KKNr%   )ro   rp   rq   rr   r_   rs   r   r	   rt   r   rY   r
   rm   ru   rv   s   @r#   r   r     s    (?, ?, 	?,
 ?, ?, ?, S"))^,?, #3		>2?, ?, ?, ?, !?,  $!?,$ %?,& c?'?,( 
)?,B F r%   r   c                        e Zd ZdZdedededededej                  f   dedej                  f   d	ed
edededede	eef   dede
e   ddf fdZdedefdZ xZS )MaxVitBlocka(  
    A MaxVit block consisting of `n_layers` MaxVit layers.

     Args:
        in_channels (int): Number of input channels.
        out_channels (int): Number of output channels.
        expansion_ratio (float): Expansion ratio in the bottleneck.
        squeeze_ratio (float): Squeeze ratio in the SE Layer.
        activation_layer (Callable[..., nn.Module]): Activation function.
        norm_layer (Callable[..., nn.Module]): Normalization function.
        head_dim (int): Dimension of the attention heads.
        mlp_ratio (int): Ratio of the MLP layer.
        mlp_dropout (float): Dropout probability for the MLP layer.
        attention_dropout (float): Dropout probability for the attention layer.
        p_stochastic_dropout (float): Probability of stochastic depth.
        partition_size (int): Size of the partitions.
        input_grid_size (Tuple[int, int]): Size of the input feature grid.
        n_layers (int): Number of layers in the block.
        p_stochastic (List[float]): List of probabilities for stochastic depth for each layer.
    rC   rD   rF   rE   rH   .rG   rz   r   r   r   r   input_grid_sizen_layersp_stochasticr   Nc                 p   t         |           t        |      |k(  st        d| d| d      t	        j
                         | _        t        |ddd      | _        t        |      D ]L  \  }}|dk(  rdnd}| xj                  t        |dk(  r|n||||||||||	|
|| j                  |	      gz  c_        N y )
Nz'p_stochastic must have length n_layers=z, got p_stochastic=.r(   r    r!   rL   r   )rC   rD   rF   rE   r   rH   rG   rz   r   r   r   r   r   rI   )rX   rY   lenr   r	   
ModuleListrb   r$   r   	enumerater   )rc   rC   rD   rF   rE   rH   rG   rz   r   r   r   r   r   r   r   idxr   r   rh   s                     r#   rY   zMaxVitBlock.__init__  s    , 	< H,FxjPcdpcqqrsttmmo/QWXbcd- 	FC(QFKK/2ax\!-"/$3!)%5%' +&7#1"nn)* K	r%   ri   c                 8    | j                   D ]
  } ||      } |S r   r   )rc   ri   layers      r#   rm   zMaxVitBlock.forward-  s%     [[ 	EaA	r%   )ro   rp   rq   rr   r_   rs   r   r	   rt   r   listrY   r
   rm   ru   rv   s   @r#   r   r     s    *1 1 	1
 1 1 S"))^,1 #3		>21 1 1 1 !1  !1" sCx#1& '1( 5k)1* 
+1f	 	F 	r%   r   c            !            e Zd ZdZdej
                  ddddddfdeeef   ded	ed
ee   dee   dede	de
edej                  f      dedej                  f   de	de	dede	de	deddf  fdZdedefdZd Z xZS )r   ay  
    Implements MaxVit Transformer from the `MaxViT: Multi-Axis Vision Transformer <https://arxiv.org/abs/2204.01697>`_ paper.
    Args:
        input_size (Tuple[int, int]): Size of the input image.
        stem_channels (int): Number of channels in the stem.
        partition_size (int): Size of the partitions.
        block_channels (List[int]): Number of channels in each block.
        block_layers (List[int]): Number of layers in each block.
        stochastic_depth_prob (float): Probability of stochastic depth. Expands to a list of probabilities for each layer that scales linearly to the specified value.
        squeeze_ratio (float): Squeeze ratio in the SE Layer. Default: 0.25.
        expansion_ratio (float): Expansion ratio in the MBConv bottleneck. Default: 4.
        norm_layer (Callable[..., nn.Module]): Normalization function. Default: None (setting to None will produce a `BatchNorm2d(eps=1e-3, momentum=0.01)`).
        activation_layer (Callable[..., nn.Module]): Activation function Default: nn.GELU.
        head_dim (int): Dimension of the attention heads.
        mlp_ratio (int): Expansion ratio of the MLP layer. Default: 4.
        mlp_dropout (float): Dropout probability for the MLP layer. Default: 0.0.
        attention_dropout (float): Dropout probability for the attention layer. Default: 0.0.
        num_classes (int): Number of classes. Default: 1000.
    Ng      ?r   rn   i  r   stem_channelsr   block_channelsblock_layersrz   stochastic_depth_probrH   .rG   rF   rE   r   r   r   num_classesr   c                    t         |           t        |        d}|t        t        j
                  dd      }t        |t        |            }t        |      D ]3  \  }}|d   |z  dk7  s|d   |z  dk7  st        d| d| d	| d
| d	       t	        j                  t        ||dd||	dd       t        ||ddd d d            | _        t        |ddd      }|| _        t	        j                         | _        |g|d d z   }|}t#        j$                  d|t'        |            j)                         }d}t+        |||      D ]\  \  }}}| j                   j-                  t/        |||
|||	|||||||||||z                 | j                   d   j0                  }||z  }^ t	        j                  t	        j2                  d      t	        j4                         t	        j6                  |d         t	        j8                  |d   |d         t	        j:                         t	        j8                  |d   |d            | _        | j?                          y )Nr(   gMbP?g{Gz?)epsmomentumr   r!   zInput size z
 of block z$ is not divisible by partition size zx. Consider changing the partition size or the input size.
Current configuration yields the following block input sizes: r   r    F)r   rH   rG   rK   rQ   T)r   rH   rG   rK   rL   r4   )rC   rD   rF   rE   rH   rG   rz   r   r   r   r   r   r   r   )rK   ) rX   rY   r   r   r	   BatchNorm2dr.   r   r   r   r\   r   stemr$   r   r   blocksnplinspacer<   tolistzipr*   r   r   AdaptiveAvgPool2dFlattenr   r   Tanh
classifier_init_weights)rc   r   r  r   r  r  rz   r  rH   rG   rF   rE   r   r   r   r	  input_channelsblock_input_sizesr   block_input_sizerC   rD   r   p_idx
in_channelout_channel
num_layersrh   s                              r#   rY   zMaxVit.__init__N  s   : 	D!  TDIJ
 5Z^ATU%./@%A 	!C!"^3q8<LQ<OR`<`de<e !"2!3:cUBfgufv wUUfTgghj 	 MM %!1	 !}ad]ahl
	" ,JAaYZ[
, mmo$os(;;%
 {{1&;S=NOVVX36{LR^3_ 	 /JZKK *!,"/$3)%5%' +&7#1$.'!-eej6H!I$ R22JZE)	 0 --  #JJLLL+,IInR(.*<=GGIIInR(+EB
 	r%   ri   c                 |    | j                  |      }| j                  D ]
  } ||      } | j                  |      }|S r   )r  r  r  )rc   ri   blocks      r#   rm   zMaxVit.forward  s>    IIaL[[ 	EaA	OOAr%   c                    | j                         D ]l  }t        |t        j                        rbt        j                  j                  |j                  d       |j                  Vt        j                  j                  |j                         t        |t        j                        rUt        j                  j                  |j                  d       t        j                  j                  |j                  d       t        |t        j                        st        j                  j                  |j                  d       |j                  Dt        j                  j                  |j                         o y )Nr   r   r!   r   )modules
isinstancer	   rZ   r   normal_weightrK   zeros_r  	constant_r   )rc   ms     r#   r  zMaxVit._init_weights  s     	+A!RYY'd366%GGNN166*Ar~~.!!!((A.!!!&&!,Aryy)d366%GGNN166*	+r%   )ro   rp   rq   rr   r	   GELUr   r_   r  rs   r   r   rt   rY   r
   rm   r  ru   rv   s   @r#   r   r   9  s%   J :>57WW#!" #&7t #s(Ot
 t t S	t 3it t  %t" Xc299n56#t$ #3		>2%t( )t* +t. /t0 1t2 !3t6 7t8 
9tl F +r%   r   r  r  r  r  r   rz   weightsprogresskwargsc                 d   |dt        |dt        |j                  d                |j                  d   d   |j                  d   d   k(  sJ t        |d|j                  d          |j                  dd      }	t	        d| ||||||	d|}
|"|
j                  |j                  |d	
             |
S )Nr	  
categoriesmin_sizer   r!   r      r2  )r  r  r  r  rz   r   r   T)r,  
check_hashr"   )r   r   metapopr   load_state_dictget_state_dict)r  r  r  r  r   rz   r+  r,  r-  r   models              r#   _maxvitr9    s    $ fmSl9S5TU||J'*gll:.Fq.IIIIflGLL4LML*5J 	#%!3%	 	E g44hSW4XYLr%   c                   j    e Zd Z ed eeddej                        edddddd	d
idddd      Z	e	Z
y)r   z9https://download.pytorch.org/models/maxvit_t-bc5ab103.pthr2  )	crop_sizeresize_sizeinterpolationir1  zLhttps://github.com/pytorch/vision/tree/main/references/classification#maxvitzImageNet-1KgT@g|?5.X@)zacc@1zacc@5gZd;@gK7]@zThese weights reproduce closely the results of the paper using a similar training recipe.
            They were trained with a BatchNorm2D momentum of 0.99 instead of the more correct 0.01.)r/  
num_paramsr0  recipe_metrics_ops
_file_size_docs)url
transformsr4  N)ro   rp   rq   r   r   r   r   BICUBICr   IMAGENET1K_V1DEFAULTr"   r%   r#   r   r     sb    G3CO`OhOh
 /""d##  !g
M. Gr%   r   
pretrained)r+  T)r+  r,  c                 \    t         j                  |       } t        ddg dg dddd| |d|S )	a  
    Constructs a maxvit_t architecture from
    `MaxViT: Multi-Axis Vision Transformer <https://arxiv.org/abs/2204.01697>`_.

    Args:
        weights (:class:`~torchvision.models.MaxVit_T_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.MaxVit_T_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.maxvit.MaxVit``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/maxvit.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.MaxVit_T_Weights
        :members:
    @   )rK        i   )r    r    r   r        g?   )r  r  r  rz   r  r   r+  r,  r"   )r   verifyr9  )r+  r,  r-  s      r#   r   r     sH    . %%g.G 
*!!
 
 
r%   )NF)=r   collectionsr   collections.abcr   	functoolsr   typingr   r   r   numpyr  r5   torch.nn.functionalr	   
functionalr   r
   torchvision.models._apir   r   r   torchvision.models._metar   torchvision.models._utilsr   r   torchvision.ops.miscr   r    torchvision.ops.stochastic_depthr   torchvision.transforms._presetsr   r   torchvision.utilsr   __all__r   r_   r$   r  r.   r@   rt   rB   rx   r   r   r   r   r   r   r   rs   boolr9  r   rG  r   r"   r%   r#   <module>ra     s[    # $  * *      H H 9 T H < R 1uS#X S RU `c hmnqsvnvhw sCx C DQVWZ\_W_Q`La # #S #U\\ #TRYY TnF299 FR
ryy 
bii 4		 <ebii eP^")) ^BR")) Rj^+RYY ^+Z &*'' I	'
 s)' !' ' ' k"' ' '  !'T{ 6 ,0@0N0N!OP6:T !"23 !d !]` !ek ! Q !r%   