
    hE                     V   d dl mZmZmZ  e       r
ddlZddlmZ  e       rddlmZ ddlZ ej                  e
      Zg dZd Zd Zej                  d	d
dej                   dedej$                  fdZ G d dej(                        Zd Zd Zd Zd Zd Z	 	 	 	 	 ddZ	 	 	 	 ddZy)   )is_accelerate_availableis_torch_availablelogging    N)nn)init_empty_weights)g        g      ?g      ?g      ?g       @g      @g      @g      @g       g      g      g      g       g      g      g      c                     |j                   j                  j                  } || j                  t        j
                        t        j                  d      \  } }| |fS )N   )axis)numerics_detailsmxfpdowncast_to_mxfp_torchtotorchbfloat16uint8)wtriton_kernels_hubr   w_scales       ]/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/integrations/mxfp4.pyquantize_to_mxfp4r   3   sH    /@@EE\\'U^^(<ekkPQRJAwg:    c                 f   |j                   j                  |j                   j                  |j                   j                  }}}|j                  j
                  }|j                  j
                  j                  }|j                  d      \  }}	 | || |      |fi |	}  | ||      |      }| |fS )zE
    Changes the layout of the tensors depending on the hardware
    r
   )mx_axisdtype)tensorFP4convert_layoutwrap_torch_tensortensor_detailslayoutStridedLayout"make_default_matmul_mxfp4_w_layout)
r   r   r   r   r   r    r"   r#   value_layoutvalue_layout_optss
             r   swizzle_mxfp4r'   9   s    
 	!!%%!!00!!33 +C
  ..55F&55<<JJM&,&O&OXY&O&Z#L#(#6ZHYZA.w7GGg:r   i   )r   rows_per_chunkr   r(   returnc                   ddl }| j                  s>t        j                  j	                         r | j                         } |j                         }|j                  t        j                        dz
  }| j                  dd |j                  k(  s$J d| j                  dd d|j                         t        j                  t        || j                        }| j                  ^ }}}|j                  |      |z  }	| j                  |	|      } |j                  |	d      }t        j                  |	|d	z  || j                        }
t        d|	|      D ]  }t        ||z   |	      }| || }||| }|d
z  j                  t        j                         }|dz	  j                  t        j                         }|
|| }||   |ddddd	f<   ||   |ddddd	f<   t        j"                  |||       ~~~~~   |
j                  g |||d	z   j$                  g |||z  d	z   }
~ ~~|
j'                  dd	      j)                         S )zw
    Convert the mxfp4 weights again, dequantizing and makes them compatible with the forward
    pass of GPT_OSS.
    r   N   zblocks.shape[:-1]=z does not match scales.shape=)r   devicer
   r         )out)mathis_cudar   cudais_availabler   int32shaper   
FP4_VALUESr-   prodreshapeemptyrangeminlongldexpview	transpose
contiguous)blocksscalesr   r(   r1   lutprefix_shapeGB
rows_totalr0   r0r1blkexpidx_loidx_hisubs                     r   convert_moe_packed_tensorsrP   M   s(     >>ejj557YYu{{#c)F<<,d1Ccr1B0DDbU[UaUaTc.dd,
,,zv}}
EC ,,\1a<(1,J^^J*F^^J*F
++j!a%uV]]
KCAz>2 *n$j1RmRm *,(uzz*"Rj6{Aqt!tG6{Aqt!tGC#&FCc*" 4+#++
.|
.Q
.A
.
3
3
M\
M1q519
MC==A))++r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )Mxfp4GptOssExpertsc           	      r   t         |           |j                  | _        |j                  | _        |j
                  | _        t        j                  t        j                  | j                  d| j                  z  | j
                  dz  dt        j                        d      | _        t        j                  t        j                  | j                  d| j                  z  | j
                  dz  t        j                        d      | _        t        j                  t        j                  | j                  d| j                  z  t        j                        d      | _        t        j                  t        j                  | j                  | j
                  | j                  dz  dft        j                        d      | _        t        j                  t        j                  | j                  | j
                  | j                  dz  t        j                        d      | _        t        j                  t        j                  | j                  | j
                  t        j                        d      | _        d| _        t'        |dd	      | _        d | _        d | _        t'        |dd	      | _        y )
Nr          r   Frequires_gradgZd;?swiglu_limitg      @)super__init__num_local_expertsnum_expertsintermediate_sizehidden_sizer   	Parameterr   zerosr   gate_up_proj_blocksgate_up_proj_scalesfloat32gate_up_proj_biasdown_proj_blocksdown_proj_scalesdown_proj_biasalphagetattrlimitgate_up_proj_precision_configdown_proj_precision_config)selfconfig	__class__s     r   rZ   zMxfp4GptOssExperts.__init__   s   !33!'!9!9!--#%<<KK((!d.D.D*DdFVFVZ\F\^`hmhshst$
  $&<<KK((!d.D.D*DdFVFVZ\F\didodop$
  "$KK((!d.D.D*DEMMZjo"
 !#KK))4+;+;T=S=SWY=Y[]^fkfqfqr!
 !#KK(($*:*:D<R<RVX<X`e`k`kl!
 !llKK(($*:*:%--P`e
 
V^S9
-1**.'V^S9
r   hidden_statesr)   c                    t         j                  j                  t         j                  j                  t         j                  j                  }}}t         j                  j
                  }t        j                  j                  |j                        5   | |d|d      | j                  | j                  fd      }	 ||| j                  | j                  j                  t        j                        ||| j                  d |	      }
 ||
| j                   | j"                  j                  t        j                        ||| j$                  |j&                        }d d d        |S # 1 sw Y   S xY w)Nswiglu)rh   rj   r   )gather_indxprecision_configgammasfused_activation)scatter_indxrt   ru   )r   
matmul_ogsFnSpecsFusedActivationrr   	swiglu_fnr   r3   r-   rh   rj   gate_up_projrd   r   rc   rk   	down_projrg   rl   	gate_scal)rm   rp   routing_data
gather_idxscatter_idxry   rz   rx   r{   actintermediate_cache1intermediate_cache3s               r   forwardzMxfp4GptOssExperts.forward   s:   ))11))99))44 #-
 '--77	ZZ}334 	!'(I?Q"RUYU_U_aeakakTlnopC",!!&&))%--8&!%!C!C!$	# #-###&&u}}5(!%!@!@#--#	. #"/	. #"s   CE$$E.)__name__
__module____qualname__rZ   r   Tensorr   __classcell__)ro   s   @r   rR   rR      s'    ":H#U\\ #]b]i]i #r   rR   c                 j   dd l }t        j                  j                  t        j                  j                  t        j                  j
                  t        j                  j                  f\  }}}}t        j                  j                  | j                        5  t        j                  j                         }t        |j                  j                  dd            }d}	| j                  d   }
| j                  d   }||z  }||z  }|dz   |z  }|
|z  }d } || |      \  }}t        j                   |d      }t        j"                  |d      \  }}t        j$                  |d|      }|j'                  d      }t        j(                  |||dz
        || }|j+                  d      j-                  t        j.                        }d	}t        j0                  ||k  ||      }t        j2                  |d
      j-                  t        j.                        }t        j2                  |      j-                  t        j.                        }t        j0                  ||k  ||	      }t        j0                  ||k  ||	      }t        j0                  ||	k(  |	|      }||   }t        j0                  ||   |	k(  |	|      } ||j                         |j                               } ||j                         |j                               } ||||      }|}d d d         |      fS # 1 sw Y   xY w)Nr   
LOCAL_RANK0r,   r
   c                     t        j                  |  dd      d d d |f   }|j                         }t        j                  | |d      }||j	                         fS )Nr
   T)dimstabler   )r   argsortr=   take_along_dimint)valsktk_indxtk_vals       r   topkz routing_torch_dist.<locals>.topk   sS    mmTEq>q"1"uEGllnG))$Q?F7;;=((r   r   )binsmaxi  T)r   )src_indxdst_indx)osr   routing
GatherIndxRoutingDataScatterIndxcompute_expt_data_torchr   r3   r-   distributedget_world_sizer   environgetr6   softmaxsortgatherr9   histcr?   r   r5   wherer   )logitsn_expts_actr   r   r   r   r   
world_sizerankreplace_valuen_tokensn_expts_totn_local_expertslocal_expert_startlocal_expert_endn_gates_padr   	expt_scal	expt_indxsort_indiceshistvar	topk_indx	gate_indxr~   rs   rw   	expt_datahit_expertss                                r   routing_torch_distr      s     	""--""..""..""::	EAJ[*A 
		6==	) 3"&&557
2::>>,45<<?ll1o%3!O3 1H7,	)  $FK8	9MM)4	"'**YA">	<LLA|<	 %%b)	{{9;K!OLM_`pqNN2&))%++6	 KK	,> >YO	MM)D9<<U[[I	MM),//<	KK	,< <iW	KK 2i ?MZ	KK	] :M9U	i(	KK	) 4 E}V_`	 !)--/IMMOT"IMMOimmoV+D/;O	!g3"h y$iPR]_kkki3" 3"s   I9L))L2c                    dd l m} |j                         r#|j                         rt	        | d      rt
        }nt        j                  j                  }|j                  d   }|j                  d| j                  j                        }t        j                  j                  || j                  j                  | j                  j                         }t"        j$                  j'                  |j&                        5   ||| j                  j(                        \  }}}d d d        | j+                  |      }	|	j                  |d| j                  j                        }	|	|fS # 1 sw Y   HxY w)Nr   
_is_hookedr,   )torch.distributedr   r4   is_initializedhasattrr   r   r   r6   r9   router
hidden_dimr   
functionallinearweightbiasr   r3   r-   top_kexperts)
rm   rp   distr   
batch_sizerouter_logitsr   r   r   
routed_outs
             r   mlp_forwardr     s    $t224|9T$$,,44$$Q'J!))"dkk.D.DEMMM((8J8JDKKL\L\]M			=//	0 Z07t{{GXGX0Y-j+Z m\:{SJ##JDKK4J4JKJ}$$Z Zs   ;"E$$E-c                 R    dj                  |       t        fd|D              syy)N.c              3      K   | ]6  }t        j                  | d       xs t        j                  |        8 yw)z\.N)rematch).0keycurrent_key_name_strs     r   	<genexpr>z(should_convert_module.<locals>.<genexpr>&  s>      dgC523_rxx3%J^7__s   <?TF)joinany)current_key_namepatternsr   s     @r   should_convert_moduler   $  s0    88$45 ks  r   c                 
   ddl m} |j                  d      }|j                  d      }|j                  d      }	|j                  d      }
|j                  d      }|j                  d      }d	D ]  }||v s	| ||||||	|
||d
	      }| d}| d}t        | |j	                  dd      d   |       t        | |      sSt        | |      s`t        t        | |      t        | |            }|dk(  r<t        j                  j                         rt        j                  j                          t        | |t        j                  j                  |j                  |                   t        | |       t        | |        y )Nr   shard_and_distribute_modulemodelempty_paramcasting_dtypeto_contiguousr   device_mesh)r|   r}   F)	set_param_blocks_scalesr   r
   cpu)integrations.tensor_parallelr   r   setattrrsplitr   rP   ri   r   r3   r4   empty_cacher   r_   r   delattr)module
param_nameparam_valuetarget_devicedq_param_namekwargsr   r   r   r   r   r   r   projblocks_attrscales_attrdequantizeds                    r   
dequantizer   -  sb   JJJwE**]+KJJ/MJJ/M::fD**]+K- -:&9!!!#
 "F'*K!F'*KFJ--c15a8+Fv{+0L89UW^_egrWst E)ejj.E.E.GJJ**,ehh&8&89V&WX,,/-r   c                    |j                   j                  |j                   j                  |j                   j                  }}}ddlm}	 |j                  d      }
|j                  d      }|j                  d      }|j                  d      }|j                  d      }|j                  d      }d	|v r&|j                  d
      d   j                  d      d   }d|v r&|j                  d
      d   j                  d      d   }| |	|
|||||||       n?t        | |j                  d
d      d   t        j                  j                  |d              d}| d}t        | |      }t        | |      }|j                  j                  dk7  r|j                  j                  dk7  r|j!                  d      }|dk(  r!|j#                  || j$                  dz  d      }n |j#                  |d| j$                  dz        }t        |d|      dk(  rd}|j'                  |      j)                         }|j'                  |      j)                         }t        j*                  j                  |      5  t-        |j/                  dd      |j/                  dd      |      \  }}ddd       |dk(  r5t        j0                  || j2                  | j$                  dz  g      _        n1t        j0                  || j$                  | j2                  g      _        t        | ||       t        | | d | | |                          t7        | |       t7        | |       ~yyy# 1 sw Y   xY w)zq
    This transforms the weights obtained using `convert_gpt_oss.py` to load them into `Mxfp4GptOssExperts`.
    r   r   r   r   r   r   r   r   rB   r   r,   r   r   rC   r   Nr
   FrV   metar|   typer   r3   _precision_config)rhs_data)weight_scaleflex_ctx)rx   PrecisionConfigFlexCtx
InFlexDatar   r   r   splitr   r   r   r   r_   ri   r-   r   sizer9   r]   r   rA   r3   r'   r@   Sizer^   r6   r   )r   r   r   r   r   r   r  r  r  r   r   r   r   r   r   r   r   r   r   rB   rC   local_expertstriton_weight_tensorr  s                           r   load_and_swizzle_mxfp4r  Q  s<   
 	%%55%%--%%00 )WO
 KJJwE**]+KJJ/MJJ/M::fD**]+K:$R(..y9!<:$R(..y9!<#;ZW[]h	
 	
))#q1!4ehh6H6Hdi6H6jkF'"KF'"KV[)FV[)F}}V#(:(:f(DA>!^^M63K3Ka3OQSTF^^M2v7O7OST7TUF=&-8EA"M=)446=)446ZZ}- 	1>  R(&*:*:2r*BDV2. ,	 >!).]FDVDVX^XpXpstXt4u)v &).]FD\D\^d^p^p4q)r & 	23f%&Q[Q]@^_	
 	$$A )E#	 	s   01M!!M*c           	      n   |g }| j                         D ]  \  }}|j                  |       t        ||      s|j                  d       6|j                  j
                  dk(  r9|j                  s-t               5  t        |      | j                  |<   d}d d d        |j                  j
                  dk(  r$|j                  sddl
m}  |t        |      |_        t        t        |j!                                     dkD  rt#        ||||||      \  }	}|j                  d        | |fS # 1 sw Y   xY w)Nr,   GptOssExpertsT	GptOssMLPr   )
MethodType)has_been_replacedrn   )named_childrenappendr   popro   r   r   r   rR   _modulestypesr  r   r   lenlistchildren_replace_with_mxfp4_linear)
r   modules_to_not_convertr   quantization_configr  rn   namer   r  _s
             r   r  r    s1    ,,. !f%$%57MN  $$$7@S@^@^#% )'9&'At$$(!) $$3<O<Z<Z('V<FNtFOO%&'!+#=& #"3$ A  	R -!. ####) )s   :D++D4	c                    |j                   r| S ddlm}  |d      a|dgn|}|j                  |j                  |j                         t        t        |            }t        | ||||      \  } }|st        j                  d       | S )Nr   )
get_kernelz kernels-community/triton_kernelslm_head)rn   zYou are loading your model using mixed-precision FP4 quantization but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)r   kernelsr"  r   r  extendr  setr  loggerwarning)r   r  r   r  rn   r"  r  s          r   replace_with_mxfp4_linearr)    s     %%& ((JK,B,Ji[Pf11=%%&9&P&PQ!#&<"=>9 E 	
 Lr   )NNNFN)NNNN)utilsr   r   r   r   r   
accelerater   r   
get_loggerr   r'  r7   r   r'   r   r   r   r   rP   ModulerR   r   r   r   r   r  r  r)   r   r   <module>r/     s    I H - 	 
		H	%
*0 &3, ;;	3,
 3, \\3,lD# D#RAlH%(!-H@J  "$N  "r   