
    h                        d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlmZ d dlmZmZ d dlmZmZmZ g dZg dZeez   Zg dZed	gz   Zeez   Zed	gz   Zd
dgZdgez   dgz   ez   d	gz   Z e ee      h dz
        Zd Z d Z!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d Z)d Z*d Z+d+dee   fdZ,d,dejZ                  deeej\                  f   fdZ/de0dejb                  fd Z2de0fd!Z3d" Z4d-d#Z5d-d$Z6e
jn                  jq                   e5         e6       %      Z9d&e:d'e:fd(Z;d)eej\                  ejx                  jz                  j"                  f   dej\                  fd*Z>y).    N)knobs)RandomState)OptionalUnion)TensorWrapperreinterprettype_canonicalisation_dict)int8int16int32int64)uint8uint16uint32uint64)float16float32float64bfloat16float8_e4m3fnfloat8_e5m2boolr   >   r   r   r   c                  H    t         j                  j                  dd      dk(  S )NTRITON_INTERPRET01)osenvironget     V/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/triton/_internal_testing.pyis_interpreterr#      s    ::>>,c2c99r!   c                  |    t               ry t        j                  j                  j                  j                         S N)r#   tritonruntimedriveractiveget_current_targetr    r!   r"   r*   r*      s*    >>  ''::<<r!   c                  <    t               } | dS | j                  dk(  S )NFcudar*   backendtargets    r"   is_cudar1   $   s"    !FN5@&(@@r!   c                  b    t               xr$ t        j                  j                         d   dk\  S )Nr   	   )r1   torchr,   get_device_capabilityr    r!   r"   	is_hopperr6   )   s&    9C99;A>!CCr!   c                  <    t               } | dS | j                  dk(  S )NFhipr-   r/   s    r"   is_hipr9   -   "    !FN5?%(??r!   c                  b    t               } | d uxr  | j                  dk(  xr | j                  dk(  S )Nr8   gfx90ar*   r.   archr/   s    r"   is_hip_cdna2r?   2   1    !FU&..E"9UfkkX>UUr!   c                  b    t               } | d uxr  | j                  dk(  xr | j                  dk(  S )Nr8   gfx942r=   r/   s    r"   is_hip_cdna3rC   7   r@   r!   c                  b    t               } | d uxr  | j                  dk(  xr | j                  dk(  S )Nr8   gfx950r=   r/   s    r"   is_hip_cdna4rF   <   r@   r!   c                      t               } t        | j                         | d uxr | j                  dk(  xr d| j                  v S )Nr8   gfx12)r*   printr>   r.   r/   s    r"   is_hip_gfx12rJ   A   s=    !F	&++T&..E"9Tg>TTr!   c                  F    t               xs t               xs
 t               S r%   )r?   rC   rF   r    r!   r"   is_hip_cdnarL   G   s    >=\^=|~=r!   c                  <    t               } | dS | j                  dk(  S )NFxpur-   r/   s    r"   is_xpurO   K   r:   r!   c                  H    t               } | dS t        | j                        S )N )r*   strr>   r/   s    r"   get_archrS   P   s"    !F25S%55r!   rsc                 F   t        | t              r| f} |t        d      }|t        t        z   v rt        j                  t        t
        |            }||j                  nt        ||j                        }||j                  nt        ||j                        }t        t
        |      }|j                  ||| |      }d||dk(  <   |S |r)d|v r%|j                  dd| t
        j                        }|S |t        v r"|j                  dd|       j                  |      S |d	k(  rV|j                  dd|       j                  d
      j                  d      t        j                   d      z  j                  d
      S |dv r|j                  dd|       dkD  S t#        d|       )zp
    Override `rs` if you're calling this function twice and don't want the same
    result for both calls.
       )seed)dtype   r   float8   (   r   r   r   l      )r   int1bool_g        zUnknown dtype )
isinstanceintr   
int_dtypesuint_dtypesnpiinfogetattrminmaxrandintr
   float_dtypesnormalastypeviewr   RuntimeError)shape	dtype_strrT   lowhighrd   rX   xs           r"   numpy_randomrs   U   s|   
 %		zb!J,,Y/0;eiiCUYY,? Luyyc$		.BI&JJsD%uJ5!q&		x9,JJr2uBGGJ4	l	"yyAu%,,Y77	j	 		!Q&--i8==hG"))T^J__eefopp	/	/yyAu%++^I;788r!   rr   returnc                    | j                   j                  }|t        v r_|j                  d      }| j	                  t        t        |            }t        t        j                  ||      t        t        |            S |r3d|v r/t        t        j                  | |      t        t        |            S |dk(  r*|dk(  r%t        j                  | |      j                         S t        j                  | |      S )z
    Note: We need dst_type because the type of x can be different from dst_type.
          For example: x is of type `float32`, dst_type is `bfloat16`.
          If dst_type is None, we infer dst_type from x.
    u)devicerZ   r   r   )rX   namerb   lstriprk   re   rc   r   r4   tensortlr   )rr   rw   dst_typetsigned_type_namex_signeds         r"   	to_tritonr   s   s     	
AK88C=88GB(89:5<<@'"a.QQH,u||Af=wr8?TUU	>h*4<<&1::<<||Af--r!   c                 :    t        j                  t        |          S r%   )r{   	str_to_tyr	   rr   s    r"   str_to_triton_dtyper      s    <<21566r!   c                 $   t        | t        j                  j                        r| j                  S t        | t
        j                        r0t        j                  dt        |             }|j                  d      S t        dt        |              )Nz^torch\.(\w+)$rY   znot a triton or torch dtype: )r_   r&   languagerX   rx   r4   rematchrR   group	TypeErrortype)rX   ms     r"   torch_dtype_namer      sh    %../zz	E5;;	'HH&E
3wwqz7U}EFFr!   c                    t        | t              rX| j                  j                         j	                         j                  t        t        t        | j                                    S t        | t        j                        rf| j                  t        j                  u r,| j                         j                         j	                         S | j                         j	                         S t        d|        )Nz Not a triton-compatible tensor: )r_   r   basecpunumpyrk   re   rc   r   rX   r4   Tensorr   float
ValueErrorr   s    r"   to_numpyr      s    !]#vvzz|!!#**727G7P+QRR	Au||	$77enn$557==?((**uuw}};A3?@@r!   c                 R   t               ryt               syt        j                  j                  j
                  }| rdnd}t        t        t        |j                  d                  }t        |      dk(  sJ |       t        j                  j                         d   dk\  xr ||k\  S )	NTF)   r   )r      .   r   r3   )r#   r1   r   nvidiaptxasversiontuplemapr`   splitlenr4   r,   r5   )
byval_onlycuda_versionmin_cuda_versioncuda_version_tuples       r"   supports_tmar      s    9<<%%--L",w's3(:(:3(?@A!"a';);;'::++-a0A5`:LP`:``r!   c                 
    | ryy)NzURequires __grid_constant__ TMA support (NVIDIA Hopper or higher, CUDA 12.0 or higher)zLRequires advanced TMA support (NVIDIA Hopper or higher, CUDA 12.3 or higher)r    )r   s    r"   tma_skip_msgr      s    f]r!   )reasonsizealignc                 N    t        j                  | t         j                  d      S )Nr,   )rX   rw   )r4   emptyr
   )r   r   _s      r"   default_alloc_fnr      s    ;;t5::f==r!   r}   c                 z    t        | t        j                  j                  j                        r| j
                  S | S r%   )r_   r&   r'   jitr   r   )r}   s    r"   unwrap_tensorr      s*    !V^^''556vvHr!   )NNNr%   )F)?r   r   r   rc   r4   r&   triton.languager   r{   r   pytestnumpy.randomr   typingr   r   triton.runtime.jitr   r   r	   ra   rb   integral_dtypesri   float_dtypes_with_bfloat16dtypesdtypes_with_bfloat16torch_float8_dtypestorch_dtypessortedset
tma_dtypesr#   r*   r1   r6   r9   r?   rC   rF   rJ   rL   rO   rS   rs   ndarrayr   r   rR   rX   r   r   r   r   r   markskipifrequires_tmar`   r   r'   r   r   r    r!   r"   <module>r      s   	 	       $ " U U0
5{*0)ZL8 	<	', &6 x*$y0<?:,NC,-0NNO
:=A
D@
V
V
V
U>@
6
9x'< 9<. .u]ELL=X7Y .&73 7288 7Gs GA	a^ {{!!ln"4\^!L>3 >s >U5<<););)I)IIJ u|| r!   