
    h                        d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	m
Z
 d dlZd dlZd dlmZ ddlmZmZ ddlmZ dd	lmZ d
dlmZmZmZ d
dlmZ d
dlmZmZ g dZe G d d             Z dee!   de!fdZ"dejF                  de!de!de$ejF                  e!f   fdZ%dejF                  de!de!de!dejF                  f
dZ&ejN                  jQ                  d       ejN                  jQ                  d        G d dejR                        Z*dejF                  de!dejF                  fd Z+d!ejF                  d"ejF                  d#e$e!e!e!f   d$e$e!e!e!f   d%ejF                  d&ejF                  d'ejF                  dejF                  fd(Z,dejF                  d)ejF                  d*e-fd+Z.ejN                  jQ                  d,       ejN                  jQ                  d-        G d. d/ejR                        Z/ G d0 d1ejR                        Z0 G d2 d3ejR                        Z1 G d4 d5ejR                        Z2d6e3e    d7e4d8e
e   d9e-d:ede2fd;Z5 G d< d=e      Z6 G d> d?e      Z7 e        ed@e6jp                  fA      ddBdCd8e
e6   d9e-d:ede2fdD              Z9 e        ed@e7jp                  fA      ddBdCd8e
e7   d9e-d:ede2fdE              Z:y)F    N)Sequence)	dataclass)partial)AnyCallableOptional   )MLPStochasticDepth)VideoClassification)_log_api_usage_once   )register_modelWeightsWeightsEnum)_KINETICS400_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)MViTMViT_V1_B_Weights	mvit_v1_bMViT_V2_S_Weights	mvit_v2_sc                   l    e Zd ZU eed<   eed<   eed<   ee   ed<   ee   ed<   ee   ed<   ee   ed<   y)	MSBlockConfig	num_headsinput_channelsoutput_channelskernel_q	kernel_kvstride_q	stride_kvN)__name__
__module____qualname__int__annotations__list     [/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/torchvision/models/video/mvit.pyr   r      s;    N3iCy3iCyr*   r   sreturnc                 "    d}| D ]  }||z  }	 |S N   r)   )r,   productvs      r+   _prodr3   '   s$    G 1Nr*   x
target_dim
expand_dimc                     | j                         }||dz
  k(  r| j                  |      } | |fS ||k7  rt        d| j                         | |fS )Nr0   zUnsupported input dimension )dim	unsqueeze
ValueErrorshaper4   r5   r6   
tensor_dims       r+   
_unsqueezer>   .   s^    JZ!^#KK
# j= 
z	!7yABBj=r*   r=   c                 8    ||dz
  k(  r| j                  |      } | S r/   )squeezer<   s       r+   _squeezerA   7   s!    Z!^#IIj!Hr*   r>   rA   c                        e Zd Z	 	 ddej                  deej                     deej                     deddf
 fdZdej                  d	e
eeef   de
ej                  e
eeef   f   fd
Z xZS )PoolNpoolnorm
activationnorm_before_poolr-   c                     t         |           || _        g }||j                  |       ||j                  |       |rt	        j
                  | nd | _        || _        y )N)super__init__rD   appendnn
Sequentialnorm_actrG   )selfrD   rE   rF   rG   layers	__class__s         r+   rJ   zPool.__init__B   s\     		MM$!MM*%28v.d 0r*   r4   thwc                    t        |dd      \  }}t        j                  |dd      \  }}|j                  dd      }|j                  d d \  }}}|j                  ||z  |f|z         j                         }| j                  r| j                  | j                  |      }| j                  |      }|j                  dd  \  }}	}
|j                  |||d      j                  dd      }t        j                  ||fd      }| j                  s| j                  | j                  |      }t        |dd|      }|||	|
ffS )	N   r0   )r0   r   )indicesr8   r	   r8   )r>   torchtensor_split	transposer;   reshape
contiguousrG   rN   rD   catrA   )rO   r4   rR   r=   class_tokenBNCTHWs              r+   forwardzPool.forwardS   s3   "1a+: ++AtCQKK1''"1+1aIIq1uaj3&'224   T]]%>a A IIaL''!"+1aIIaAr",,Q2II{A&A.$$)Ba AQ1j)1a)|r*   )NF)r#   r$   r%   rL   Moduler   boolrJ   rX   Tensortupler&   re   __classcell__rQ   s   @r+   rC   rC   A   s    
 +/!&1ii1 ryy!1 RYY'	1
 1 
1" E#sC-,@ U5<<Y^_bdgil_lYmKmEn r*   rC   	embeddingdc                     | j                   d   |k(  r| S t        j                  j                  | j	                  dd      j                  d      |d      j                  d      j	                  dd      S )Nr   r0   linear)sizemode)r;   rL   
functionalinterpolatepermuter9   r@   )rl   rm   s     r+   _interpolateru   m   sn    qQ 	!!a#--a0 	" 	

 
	Ar*   attnqq_thwk_thw	rel_pos_h	rel_pos_w	rel_pos_tc                    |\  }}}	|\  }
}}t        dt        ||      z  dz
        }t        dt        |	|      z  dz
        }t        dt        ||
      z  dz
        }t        ||z  d      }t        ||z  d      }t        j                  |      d d d f   |z  t        j                  |      d d d f   d|z
  z   |z  z
  }t        ||	z  d      }t        |	|z  d      }t        j                  |	      d d d f   |z  t        j                  |      d d d f   d|z
  z   |z  z
  }t        |
|z  d      }t        ||
z  d      }t        j                  |      d d d f   |z  t        j                  |
      d d d f   d|
z
  z   |z  z
  }t	        ||      }t	        ||      }t	        ||      }||j                            }||j                            }||j                            }|j                  \  }}}}|d d d d dd f   j                  |||||	|      } t        j                  d| |      }!t        j                  d| |      }"| j                  dddddd	      j                  |||z  |z  |	z  |      } t        j                  | |j                  dd            j                  dd      }#|#j                  ||||	||
      j                  dddddd	      }#|!d d d d d d d d d d d d d d f   |"d d d d d d d d d d d d d d f   z   |#d d d d d d d d d d d d d d f   z   j                  ||||z  |	z  |
|z  |z        }$| d d d d dd dd fxx   |$z  cc<   | S )
Nr   r0         ?zbythwc,hkc->bythwkzbythwc,wkc->bythwkr   r	   rT      )r&   maxrX   arangeru   longr;   r[   einsumrt   matmulrZ   view)%rv   rw   rx   ry   rz   r{   r|   q_tq_hq_wk_tk_hk_wdhdwdt	q_h_ratio	k_h_ratiodist_h	q_w_ratio	k_w_ratiodist_w	q_t_ratio	k_t_ratiodist_tRhRwRtr_   n_head_r8   r_qrel_h_qrel_w_qrel_q_trel_poss%                                        r+   _add_rel_posr   |   sy    MCcMCc	QS#"	#B	QS#"	#B	QS#"	#B C#Is#IC#Is#I\\#q$w')3u||C7Hq7QUX[^U^7_cl6llFC#Is#IC#Is#I\\#q$w')3u||C7Hq7QUX[^U^7_cl6llFC#Is#IC#Is#I\\#q$w')3u||C7Hq7QUX[^U^7_cl6llF Y+IY+IY+I	6;;=	!B	6;;=	!B	6;;=	!BAvq#
Aq!"H+

ac3
<Cll/b9Gll/b9G
++aAq!Q
'
/
/QZ#5E5KS
QCll3Q 23==aCGll1fc3S9AA!Q1aQRSG 	1aAtQ,-
!Q1atQ.
/	0
!Q1aD$.
/	0 gasS#)c/:	  	Aqr12'!Kr*   shortcutresidual_with_cls_embedc           	          |r| j                  |       | S | d d d d dd d d fxx   |d d d d dd d d f   z  cc<   | S r/   )add_)r4   r   r   s      r+   _add_shortcutr      sF    	x H 	
!QA+(1aQ;//Hr*   r   r   c                       e Zd Zdej                  fdee   dedededee   dee   dee   d	ee   d
edededede	dej                  f   ddf fdZdej                  deeeef   deej                  eeeef   f   fdZ xZS )MultiscaleAttention        
input_size	embed_dim
output_dimr   r   r    r!   r"   residual_poolr   rel_pos_embeddropout
norm_layer.r-   Nc                 "   t         |           || _        || _        || _        ||z  | _        dt        j                  | j
                        z  | _        |	| _	        |
| _
        t        j                  |d|z        | _        t        j                  ||      g}|dkD  r&|j                  t        j                  |d             t        j                   | | _        d | _        t'        |      dkD  st'        |      dkD  rt|D cg c]  }t)        |dz         }}t+        t        j,                  | j
                  | j
                  |||| j
                  d	       || j
                              | _        d | _        d | _        t'        |      dkD  st'        |      dkD  r|D cg c]  }t)        |dz         }}t+        t        j,                  | j
                  | j
                  |||| j
                  d	       || j
                              | _        t+        t        j,                  | j
                  | j
                  |||| j
                  d	       || j
                              | _        d | _        d | _        d | _        |rt9        |dd        }t;        |      d
kD  r||d   z  n|}t;        |      d
kD  r||d   z  n|}dt9        ||      z  dz
  }d|d
   z  dz
  }t        j<                  t?        j@                  || j
                              | _        t        j<                  t?        j@                  || j
                              | _        t        j<                  t?        j@                  || j
                              | _        t        jB                  jE                  | j2                  d       t        jB                  jE                  | j4                  d       t        jB                  jE                  | j6                  d       y y c c}w c c}w )Nr~   r	   r   Tinplacer0   r   F)stridepaddinggroupsbiasr   {Gz?std)#rI   rJ   r   r   r   head_dimmathsqrtscalerr   r   rL   LinearqkvrK   DropoutrM   projectpool_qr3   r&   rC   Conv3dpool_kpool_vrz   r{   r|   r   len	ParameterrX   zerosinittrunc_normal_)rO   r   r   r   r   r   r    r!   r"   r   r   r   r   r   rP   rw   	padding_qkv
padding_kvrp   q_sizekv_sizespatial_dimtemporal_dimrQ   s                           r+   rJ   zMultiscaleAttention.__init__   sL     	"$""i/DIIdmm44*'>$99YJ7#%99Z#D"ES=MM"**Wd;<}}f-+/?Q%/A"5.67Q!V7I7		MMMM#%== 4==)DK ,0+/a5#3a#71:;2#bAg,;J;		MMMM$&== 4==)DK 		MMMM$&== 4==)DK 261515z!"~&D,/MA,=TXa[(4F.1)nq.@dil*dGc&'22Q6Kz!},q0L\\%++k4==*QRDN\\%++k4==*QRDN\\%++lDMM*RSDNGG!!$..d!;GG!!$..d!;GG!!$..d!; ] 8" <s   P Pr4   rR   c           	         |j                   \  }}}| j                  |      j                  ||d| j                  | j                        j                  dd      j                  d      \  }}}| j                  | j                  ||      \  }}	n|}	| j                  | j                  ||      d   }| j                  | j                  ||      \  }}t        j                  | j                  |z  |j                  dd            }
| j                  G| j                  ;| j                  /t!        |
|||	| j                  | j                  | j                        }
|
j#                  d      }
t        j                  |
|      }| j$                  rt'        ||| j(                         |j                  dd      j                  |d| j*                        }| j-                  |      }||fS )Nr	   r0   r   rW   r   rV   )r;   r   r[   r   r   rZ   unbindr   r   r   rX   r   r   rz   r{   r|   r   softmaxr   r   r   r   r   )rO   r4   rR   r_   r`   ra   rw   kr2   ry   rv   s              r+   re   zMultiscaleAttention.forward!  s   ''1a((1+%%aAt~~t}}MWWXY[\]ddijdk1a;;"{{1c*HAuE;;"As#A&A;;"[[C(FAs||DKK!OQ[[A->?>>%$..*DIcD |||#LLq!!Q < <=KK1%%aT__=LLO#vr*   )r#   r$   r%   rL   	LayerNormr(   r&   rg   floatr   rf   rJ   rX   rh   ri   re   rj   rk   s   @r+   r   r      s    /1||Z<IZ< Z< 	Z<
 Z< s)Z< 9Z< s)Z< 9Z< Z< "&Z< Z< Z< S"))^,Z< 
Z<x   E#sC-,@  U5<<Y^_bdgil_lYmKmEn  r*   r   c                        e Zd Zddej                  fdee   dededededede	d	e	d
e
dej                  f   ddf fdZdej                  deeeef   deej                  eeeef   f   fdZ xZS )MultiscaleBlockr   r   cnfr   r   r   proj_after_attnr   stochastic_depth_probr   .r-   Nc
                    t         |           || _        d | _        t	        |j
                        dkD  ro|j
                  D 
cg c]  }
|
dkD  r|
dz   n|
 }}
|D cg c]  }t        |dz         }}t        t        j                  ||j
                  |      d       | _        |r|j                  n|j                  } |	|j                        | _         |	|      | _        t        | j                  t        j                        | _        t#        ||j                  ||j$                  |j&                  |j(                  |j
                  |j*                  |||||	      | _        t/        |d|z  |j                  gt        j0                  |d       | _        t5        |d      | _        d | _        |j                  |j                  k7  r0t        j:                  |j                  |j                        | _        y y c c}
w c c}w )Nr0   r   )r   r   )	r   r    r!   r"   r   r   r   r   r   rT   )activation_layerr   r   row)rI   rJ   r   	pool_skipr3   r!   r&   rC   rL   	MaxPool3dr   r   norm1norm2
isinstanceBatchNorm1dneeds_transposalr   r   r   r    r"   rv   r
   GELUmlpr   stochastic_depthr   r   )rO   r   r   r   r   r   r   r   r   r   r,   kernel_skipr   padding_skipattn_dimrQ   s                  r+   rJ   zMultiscaleBlock.__init__E  s    	..2":=,,GQAE1q5q0GKG1<=ACQK=L=![|TVZDN +:3&&s?Q?Q 2 23
)
 *4::r~~ F'MM\\mm\\mm''$;!
	 \3../WW
 !00Eu M,0!4!4499S%7%79L9LMDL 5M H=s   G9G>r4   rR   c                    | j                   r1| j                  |j                  dd            j                  dd      n| j                  |      }| j                  ||      \  }}| j                  | j
                  s|n| j	                  |      }| j                  |n| j                  ||      d   }|| j                  |      z   }| j                   r1| j                  |j                  dd            j                  dd      n| j                  |      }| j                  | j
                  r|n| j	                  |      }|| j                  | j                  |            z   |fS )Nr0   r   r   )
r   r   rZ   rv   r   r   r   r   r   r   )	rO   r4   rR   x_norm1x_attnthw_newx_skipx_norm2x_projs	            r+   re   zMultiscaleBlock.forward  s-   CGCXCX$**Q[[A./99!Q?^b^h^hij^k))GS1%T-A-AAt||T[G\nn,$..C2H2KT**622CGCXCX$**Q[[A./99!Q?^b^h^hij^kll*d.B.BU\H]--dhhw.?@@'IIr*   )r#   r$   r%   rL   r   r(   r&   r   rg   r   r   rf   rJ   rX   rh   ri   re   rj   rk   s   @r+   r   r   D  s     '*/1||8NI8N 8N 	8N
 "&8N 8N 8N 8N  %8N S"))^,8N 
8Nt
J 
JE#sC-,@ 
JU5<<Y^_bdgil_lYmKmEn 
Jr*   r   c            
       v     e Zd Zdedeeef   dededdf
 fdZdej                  dej                  fd	Z	 xZ
S )
PositionalEncoding
embed_sizespatial_sizetemporal_sizer   r-   Nc                 (   t         |           || _        || _        t	        j
                  t        j                  |            | _        d | _	        d | _
        d | _        |st	        j
                  t        j                  | j                  d   | j                  d   z  |            | _	        t	        j
                  t        j                  | j                  |            | _
        t	        j
                  t        j                  |            | _        y y )Nr   r0   )rI   rJ   r   r   rL   r   rX   r   r^   spatial_postemporal_pos	class_pos)rO   r   r   r   r   rQ   s        r+   rJ   zPositionalEncoding.__init__  s    (*<<J(?@374815!||EKK8I8I!8LtO`O`abOc8ceo,pqD "U[[9K9KZ-X YD\\%++j*ABDN r*   r4   c                    | j                   j                  |j                  d      d      j                  d      }t	        j
                  ||fd      }| j                  | j                  | j                  | j                  j                  \  }}t	        j                  | j                  |d      }|j                  | j                  j                  d      j                  | j                  dd      j                  d|             t	        j
                  | j                  j                  d      |fd      j                  d      }|j                  |       |S )Nr   rV   r0   rW   )r^   expandrp   r9   rX   r]   r   r   r   r;   repeat_interleaver   r   r[   )rO   r4   r^   hw_sizer   pos_embeddings         r+   re   zPositionalEncoding.forward  s   &&--affQi<FFqIII{A&A.'D,=,=,IdnnNh"&"2"2"8"8GZ!33D4E4EwTUVMt//99!<CCDDVDVXZ\^_gghjlvwx!IIt~~'?'?'BM&RXYZddefgMFF=!r*   )r#   r$   r%   r&   ri   rg   rJ   rX   rh   re   rj   rk   s   @r+   r   r     sW    C3 CeCHo CVY Cjn Csw C %,, r*   r   c            $       ,    e Zd Z	 	 	 	 	 	 	 	 	 ddeeef   dedee   dedededed	ed
ededede	e
dej                  f      de	e
dej                  f      deeeef   deeeef   deeeef   ddf" fdZdej                  dej                  fdZ xZS )r   Nr   r   block_settingr   r   r   r   r   attention_dropoutr   num_classesblock.r   patch_embed_kernelpatch_embed_stridepatch_embed_paddingr-   c                    t         |           t        |        t        |      }|dk(  rt	        d      |t
        }|t        t        j                  d      }t        j                  d|d   j                  |||      | _        t        |f|z   | j                  j                        D cg c]
  \  }}||z   }}}t        |d   j                  |d   |d	   f|d   |
      | _        t        j                          | _        t%        |      D ]~  \  }}|
|z  |dz
  z  }| j"                  j'                   ||||||||	||	             t        |j(                        dkD  sTt        ||j(                        D cg c]
  \  }}||z   }}}  ||d   j*                        | _        t        j.                  t        j0                  |d      t        j2                  |d   j*                  |            | _        | j7                         D ]l  }t9        |t        j2                        r~t        j:                  j=                  |j>                  d       t9        |t        j2                        sd|j@                  qt        j:                  jC                  |j@                  d       t9        |t        j                        ro|j>                  *t        j:                  jC                  |j>                  d       |j@                  t        j:                  jC                  |j@                  d       %t9        |t              s7|jE                         D ]#  }t        j:                  j=                  |d       % o yc c}}w c c}}w )a  
        MViT main class.

        Args:
            spatial_size (tuple of ints): The spacial size of the input as ``(H, W)``.
            temporal_size (int): The temporal size ``T`` of the input.
            block_setting (sequence of MSBlockConfig): The Network structure.
            residual_pool (bool): If True, use MViTv2 pooling residual connection.
            residual_with_cls_embed (bool): If True, the addition on the residual connection will include
                the class embedding.
            rel_pos_embed (bool): If True, use MViTv2's relative positional embeddings.
            proj_after_attn (bool): If True, apply the projection after the attention.
            dropout (float): Dropout rate. Default: 0.0.
            attention_dropout (float): Attention dropout rate. Default: 0.0.
            stochastic_depth_prob: (float): Stochastic depth rate. Default: 0.0.
            num_classes (int): The number of classes.
            block (callable, optional): Module specifying the layer which consists of the attention and mlp.
            norm_layer (callable, optional): Module specifying the normalization layer to use.
            patch_embed_kernel (tuple of ints): The kernel of the convolution that patchifies the input.
            patch_embed_stride (tuple of ints): The stride of the convolution that patchifies the input.
            patch_embed_padding (tuple of ints): The padding of the convolution that patchifies the input.
        r   z+The configuration parameter can't be empty.Ngư>)epsr	   )in_channelsout_channelskernel_sizer   r   r0   r   )r   r   r   r   r~   )	r   r   r   r   r   r   r   r   r   rV   Tr   r   r   r   )#rI   rJ   r   r   r:   r   r   rL   r   r   r   	conv_projzipr   r   pos_encoding
ModuleListblocks	enumeraterK   r!   r   rE   rM   r   r   headmodulesr   r   r   weightr   	constant_
parameters)rO   r   r   r  r   r   r   r   r   r  r   r  r  r   r	  r
  r  total_stage_blocksrp   r   r   stage_block_idr   sd_probmweightsrQ   s                             r+   rJ   zMViT.__init__  s   R 	
 	D! /"JKK=#E 48J &q)88*%'
 :=m=MP\=\^b^l^l^s^s9tuvdfnu
u /$Q'66$Q-A7$Q-'	
 mmo#,]#; 	`NC+n<@RUX@XYGKK)"/,C"/$3-*1)
 3<< 1$ADZQTQ]Q]A^_vdfn_
_'	`( }R0@@A	 MMJJw-IImB'77E
	
  	=A!RYY'%%ahhD%9a+0BGG%%affc2Ar||,88'GG%%ahh466%GG%%affc2A12 ||~ =GGG))'t)<=	=Q v> `s   .M==Nr4   c                    t        |dd      d   }| j                  |      }|j                  d      j                  dd      }| j	                  |      }| j                  j
                  f| j                  j                  z   }| j                  D ]  } |||      \  }} | j                  |      }|d d df   }| j                  |      }|S )Nr   r   r   r0   )
r>   r  flattenrZ   r  r   r   r  rE   r  )rO   r4   rR   r  s       r+   re   zMViT.forward"  s    q!Q"NN1IIaL""1a( a    ..043D3D3Q3QQ[[ 	#E1c]FAs	#IIaL adGIIaLr*   )	g      ?r   r   i  NN)r	      r#  )r   rT   rT   )r0   r	   r	   )r#   r$   r%   ri   r&   r   r   rg   r   r   r   rL   rf   rJ   rX   rh   re   rj   rk   s   @r+   r   r     sM    #&'*489=3<3<4=#v=CHov= v=  .	v=
 v= "&v= v= v= v= !v=  %v= v= bii01v= Xc299n56v= "#sC-0v=  "#sC-0!v=" #3S=1#v=$ 
%v=p %,, r*   r   r  r   r   progresskwargsc                 >   |~t        |dt        |j                  d                |j                  d   d   |j                  d   d   k(  sJ t        |d|j                  d          t        |d|j                  d          |j                  dd	      }|j                  dd
      }t	        d||| |j                  dd      |j                  dd      |j                  dd      |j                  dd      |d|}|"|j                  |j                  |d             |S )Nr  
categoriesmin_sizer   r0   r   r   min_temporal_size   r+     r   Fr   Tr   r   )r   r   r  r   r   r   r   r   )r$  
check_hashr)   )r   r   metapopr   load_state_dictget_state_dict)r  r   r   r$  r%  r   r   models           r+   _mvitr3  9  s$    fmSl9S5TU||J'*gll:.Fq.IIIIfngll:6NOfow||DW7XY::nj9LJJ3M 
!##jj%8 &

+Dd Kjj%8

#4e<3
 
E g44hSW4XYLr*   c                   Z    e Zd Z ed eedddd      ddedd	d
ddddiddd	      ZeZy)r   z:https://download.pytorch.org/models/mvit_v1_b-dbeb1030.pthr*     ?r8  r8  ?r:  r:  	crop_sizeresize_sizemeanr   r,  zShttps://github.com/facebookresearch/pytorchvideo/blob/main/docs/source/model_zoo.mdThe weights were ported from the paper. The accuracies are estimated on video-level with parameters `frame_rate=7.5`, `clips_per_video=5`, and `clip_len=16`ip.Kinetics-400gJ+S@gh|?eW@zacc@1zacc@5guVQ@g rxa@	r(  r)  r'  recipe_docs
num_params_metrics_ops
_file_sizeurl
transformsr.  N	r#   r$   r%   r   r   r   r   KINETICS400_V1DEFAULTr)   r*   r+   r   r   Z  sf    H #%
 #!#1k[ ###! !#
N: Gr*   r   c                   Z    e Zd Z ed eedddd      ddedd	d
ddddiddd	      ZeZy)r   z:https://download.pytorch.org/models/mvit_v2_s-ae3be167.pthr*  r5  r7  r9  r;  r,  zChttps://github.com/facebookresearch/SlowFast/blob/main/MODEL_ZOO.mdr?  ir@  g r0T@g(\W@rA  guVP@g?5^I|`@rB  rI  NrL  r)   r*   r+   r   r   {  sf    H #%
 #!#1[[ ###! !#
N: Gr*   r   
pretrained)r   T)r   r$  c                 ~   t         j                  |       } g dg dg dg g dg g dg g g g g g g g g g g dg gg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dgg g dg g dg g g g g g g g g g g dg gg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dgd	}g }t        t        |d
               D ]M  }|j	                  t        |d
   |   |d   |   |d   |   |d   |   |d   |   |d   |   |d   |   	             O t        ddd|dd|j                  dd      | |d|S )a  
    Constructs a base MViTV1 architecture from
    `Multiscale Vision Transformers <https://arxiv.org/abs/2104.11227>`__.

    .. betastatus:: video module

    Args:
        weights (:class:`~torchvision.models.video.MViT_V1_B_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.MViT_V1_B_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.MViT``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/mvit.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.MViT_V1_B_Weights
        :members:
    r0   r   r   rT   rT   rT   rT   rT   rT   rT   rT   rT   rT   rT      rS  `      rV    rW  rW  rW  rW  rW  rW  rW  rW  rW  rW     rX  )rV  rV  rW  rW  rW  rW  rW  rW  rW  rW  rW  rW  rW  rX  rX  rX  r	   r	   r	   r0   r   r   r0   rS  rS  r0   rT   rT   r0   r0   r0   r   r   r   r   r    r!   r"   r   r   r   r   r    r!   r"   r*  r,  Fr   皙?)r   r   r  r   r   r   r   r$  r)   )r   verifyranger   rK   r   r3  r/  r   r$  r%  configr  is         r+   r   r     s   2  &&w/G FikB	2r2r2r2rSUWY[dfhi!
$ B	2r2r2r2rSUWY[dfhi!
1*FX M3vk*+, 
 -a0%&67: &'8 9! <
+A. -a0
+A. -a0
	

  
# %$jj)@#F
 
 
r*   c                    t         j                  |       } g dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dgg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dgg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dgg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dgd	}g }t        t        |d
               D ]M  }|j	                  t        |d
   |   |d   |   |d   |   |d   |   |d   |   |d   |   |d   |   	             O t        ddd|dddd|j                  dd      | |d
|S )aC  Constructs a small MViTV2 architecture from
    `Multiscale Vision Transformers <https://arxiv.org/abs/2104.11227>`__ and
    `MViTv2: Improved Multiscale Vision Transformers for Classification
    and Detection <https://arxiv.org/abs/2112.01526>`__.

    .. betastatus:: video module

    Args:
        weights (:class:`~torchvision.models.video.MViT_V2_S_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.MViT_V2_S_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.MViT``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/mvit.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.MViT_V2_S_Weights
            :members:
    rR  )rU  rU  rV  rV  rW  rW  rW  rW  rW  rW  rW  rW  rW  rW  rW  rX  rT  rY  r]  rZ  r[  r\  r^  r   r   r   r   r    r!   r"   r*  r,  TFr   r_  )
r   r   r  r   r   r   r   r   r   r$  r)   )r   r`  ra  r   rK   r   r3  r/  rb  s         r+   r   r     s   4  &&w/G Fhj!
& !
& !
& !
uLF\ M3vk*+, 
 -a0%&67: &'8 9! <
+A. -a0
+A. -a0
	

  # %$jj)@#F  r*   );r   collections.abcr   dataclassesr   	functoolsr   typingr   r   r   rX   torch.fxtorch.nnrL   opsr
   r   transforms._presetsr   utilsr   _apir   r   r   _metar   _utilsr   r   __all__r   r&   r3   rh   ri   r>   rA   fxwraprf   rC   ru   r   rg   r   r   r   r   r   r(   r   r3  r   r   rM  r   r   r)   r*   r+   <module>ru     s[    $ !  * *    ' 6 ( 7 7 + C   Xc] s %,, C S U5<<Y\K\E]  # 3 C TYT`T`  l  j )299 )XELL S U\\ 9
,,9||9 c39 c3	9
 ||9 ||9 ||9 \\9xU\\ U\\ TX  n  o }")) }@EJbii EJP :M299 M`&  k" 	
  
B B B ,0A0P0P!QR8<t ](#45 ] ]_b ]gk ] S ]@ ,0A0P0P!QR8<t B(#45 B B_b Bgk B S Br*   