
    "hi                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlZd dlZd dlmZ d dlmZ dZedk(  r` ee	      Zej)                  d
d       ej)                  ddeddd       ej)                  ddedd       ej)                  ddedd       ej)                  ddedd       ej)                  dd edd!       ej)                  d"d#ed$d%       ej)                  d&d'ed(d%       ej/                         Zej2                  rej2                  nej4                  Zej6                  rej6                  n ee      Z eej8                        Zej8                  j=                  d  eej>                               ej@                  jC                  ejD                  e      Z#ej@                  jI                  e#      Z%e#jL                  jO                  e%        e(e%ej4                        Z)ejT                  jW                  d)      Z* e,e*      dk(  sJ  e- e.d* ej^                  jW                  d)                  Z/d+e
e   fd,Z0d-ejb                   d.ejd                   Z3 e0e/e3gz         Z4d/ Z5 e6e/      D  ci c]%  \  } }d0|v s|  e5|jW                  d0      d         ' c}} Z7e7jq                         D ci c]  \  }}|	|| c}}Z7 e6e/      D  ci c]  \  } }e)jr                  |     e5|       c}} Z:e:jq                         D ci c]  \  }}|	|| c}}Z: e6e/      D  ci c]8  \  } }e)jr                  |    e:vr"e)jr                  |    |jW                  d0      d    : c}} Z/d1jw                  e:jy                         D cg c]
  } e|       c}      Z=e:jq                         D cg c]  \  }}| d2|  c}}Z>e>d3ejb                   d4ejd                   gz  Z>e7jy                         D ]  Z?e?d5v rJ d6e?         ej                  j                  j                  j                  e7      ZDeDj                         jq                         D ]%  \  ZFZGe:j                  e)jr                  eF   eGi       ' ej                  j                  e)e:e/eD7      ZJejb                  ejd                  d8ZK ej                  eJeK9      ZMg Z9g ZNg ZOg ZP e6e)jr                        D ]  \  ZQZReRe:vrKe9j                  eR       eNj                  e/eR          eOj                  eR       ePj                  e/eR          UeQeDj                  v sde9j                  eR       eNj                  e/eR            ee/jy                         eD      ZUd:jw                  ee4eUg      ZV e e j                  eMj                  d;               d<d= ZYi d>eVd?ej4                  d@ e,eY      dAdBjw                   eZeYddd<   eYddd<         D cg c]  \  }}dC| |  c}}      d+dBjw                   eZeOeP      D cg c]  \  }} e|       dD|  c}}      dEdBjw                   eZe9eN      D cg c]  \  }} e|       dD|  c}}      dFdBjw                  eOD cg c]  }dG| 	 c}      dH e,eO      dIe>dJeMj                  j                  dKejb                  dLd:jw                  e=e3g      dMe*d    dNe*d   dOe*d<   dPdZ]dQD ]  Z^ ee_      j>                  dRe^ z  Z`ej                  dSe4 d:eU dSe^       j                  dT      5 Zcecj                    ee`      j                         j                  dUi e]       ddd        yyc c}} w c c}}w c c}} w c c}}w c c}} w c c}w c c}}w c c}}w c c}}w c c}}w c c}w # 1 sw Y   xY w)V    N)ArgumentParser)Path)List)kernel_suffix)	ty_to_cppa  
Triton ahead-of-time compiler:

This program compiles the kernel with name `kernel-name` in the file at the
provided `path` into self-contained C source-code that embeds the `cubin`
data along with utilities to load, unload and launch the kernel.

signature is provided as a list of (optionally divisibility-hinted) types
or constexpr values, e.g.

`compile.py --kernel-name kernel --signature "*fp32:16, i32:16, 1024, i32" --out-name kernel /path/to/kernel.py`

will compile triton.JITFunction of name `kernel` inside the file `/path/to/kernel.py`.
Said kernel will be specialized such that argument 0, 1 are assumed to be multiple of 16,
and argument 2 is assumed to be a compile-time constant of value 1024, i.e. it won't be part of the generated prototype.

The resulting entry point will have signature

CUresult kernel_{specialization_suffix}(CUstream stream, unsigned gX, unsigned gY, unsigned gZ, float* arg0, int32_t arg1, int32_t arg2)

Different such specialized entry points can be combined using the `linker.py` script.

NOTE: when resolving the scope of /path/to/kernel.py, the file will be executed from within its parent directory with the python interpreter
used to run this `compile.py` script
__main__)descriptionpathzTPath to Python source containing desired kernel in its scope. File will be executed.)helpz--kernel-namez-n zName of the kernel to compileT)typedefaultr   requiredz--num-warpsz-w   z$Number of warps to launch the kernel)r   r   r   z--num-stagesz-ns   z/Number of stages (meta-parameter of the kernel)z
--out-namez-onz Out name for the compiled kernelz
--out-pathz-ozOut filenamez--signaturez-szSignature of the kernel)r   r   r   z--gridz-gzLaunch grid of the kernel,c                 $    | j                  d      S )N )strip)ss    Q/var/www/html/sandstorm/venv/lib/python3.12/site-packages/triton/tools/compile.py<lambda>r   G   s    1773<     	signaturec                     t        j                         }|j                  dj                  |       j	                                |j                         d d S )Nr      )hashlibsha256updatejoinencode	hexdigest)r   ms     r   hash_signaturer$   I   s?    NN	)$++-.{{}Ra  r   warpsxstagesc                 v    	 t        |       }|S # t        $ r Y nw xY w	 t        |       }|S # t        $ r Y y w xY w)N)int
ValueErrorfloat)r   rets     r   	constexprr,   Q   sO    	a&CJ 			(CJ 		s    	, 	88:x=z
num_warps=znum_stages=)r      z#Only 1 and 16 are valid hints, got )fn	constantsr   attrs)	num_warps
num_stages)options_cubin   kernel_nametriton_kernel_namebin_sizebin_dataz, 0xr   full_signaturearg_pointers&num_argskernel_docstringsharedr4   	algo_infogridXgridYgridZ_placeholder)hczcompile..w )gbinasciir   importlib.util	importlibsysargparser   pathlibr   typingr   tritontriton.backendstriton.compiler.code_generatorr   triton.backends.nvidia.driverr   desc__name__parseradd_argumentstrr(   
parse_argsargsout_namer;   out_pathr
   arg_pathinsertparentutilspec_from_file_locationstemspecmodule_from_specmodloaderexec_modulegetattrkernelgridsplitlenlistmapr   r$   r4   r5   meta_sigsig_hashr,   	enumeratehintsitems	arg_namesr2   r    values	const_sig
doc_stringrK   backendscompilerAttrsDescriptor
from_hintsr3   get_constantspvr   	ASTSourcesrcoptscompileccinfo	arg_typesarg_names_not_1arg_types_not_1iarg_nameappend
equal_to_1suffix	func_namehexlifyasmhex_zipmetadatarE   paramsext__file__template_pathwith_suffixopenfpwrite	read_textformat)	r   r   kr   r.   ynametyargs	   000000000r   <module>r      s,      
 #     8 34 z -F
s  u
CJi!%  '
t#qGmn
CN  P
e#tJlm
dt.Y
t#<U`de
$S7R]abD $t}}43C3CH $t}}4>H DIIHHHOOAs8??+,>>11(--JD
..
)
)$
/CKKC S$**+F99??3Dt9>> S/1E1Ec1JKLI!$s) !
 t~~&gdoo->?Hi8*45H 8A7KXtq!sVWxQ	!''#,q/**XE#kkm=daq}QT=E?H?STtq!!!!$il2TI"+//"3E$!Qq}AEI i(AqAi/ 	QWWS\!_,I
 )*:*:*<=Q#a&=>I)2):;AQCq*;JZ/0K?P2QRRJ \\^ GG|FB1#FF|GOO$$44??FE##%++- 31&**1-q123
//
#
#vi_d
#
eCtGDV^^C.FIIOO !1!12 289$X&Yx01""8,""9X#67%"""X&Yx012 9++-u5F(Hf56Ix

7 345a;Dyd.. 	CI 	DIIs4!9d14a4j7QRtq!A3qc{RS	
 	TYY#o_nJophdB9R=/4& 9pq 	$))sS\^gOh$i84	"av%>$ij 			"HQse9"HI 	C( 	J 	&//(( 	T^^ 	SXXy(34 	a 	a 	a  	!F$  GX--(3%0@@!!AhZq#"?@EEcJ 	GbHH;T-(224;;EfEF	G 	GGa l Y=TE
 >;D Sp$i"H	G 	Gs`   ^ ^ 
^&^&' ^,
^2)^2==^8^>__	_<_+_6_  _)	