
    "hK?                     
   d dl mZmZmZmZ d dlmZmZmZm	Z	 d dl
mZ d dlmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ defd	Z ed
       G d d             Ze G d de             Z G d de      Zy)    )BaseBackend	GPUTargetAttrsDescriptorregister_descriptor)irpassesllvmamd)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                 <    | j                   }d|v rd S d|v rd S d S )Ngfx94c                 J    | j                         s|j                         rdS dS )N   r   r   r   r      )is_int8lhsTyperhsTypes     Y/var/www/html/sandstorm/venv/lib/python3.12/site-packages/triton/backends/amd/compiler.py<lambda>zmin_dot_size.<locals>.<lambda>   s    9JgooN_ fq     gfx9c                      y)Nr    r   s     r   r   zmin_dot_size.<locals>.<lambda>       r   c                      y)Nr   r"   r   s     r   r   zmin_dot_size.<locals>.<lambda>   r#   r   )arch)r   arch_strs     r   min_dot_sizer'      s/    {{H (qq3300r   T)frozenc                   |   e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   dZ	eed	<   dZ
eed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZee   ed<   dZee   ed<   dZeed<   dZee   ed<   dZeed<   dZeed<   dZeed <   dZeed!<   dZeed"<   d#Z eed$<   d%Z!eed&<   d' Z"d( Z#y))
HIPOptions   	num_warps   waves_per_eu   
num_stagesnum_ctasr   num_buffers_warp_specnum_consumer_groupsreg_dec_producerreg_inc_consumerNextern_libs)r-   r-   r-   cluster_dimsFdebugTsanitize_overflowr%   )fp8e5supported_fp8_dtypesr"   deprecated_fp8_dtypesieeedefault_dot_input_precision)r=   allowed_dot_input_precisionsenable_fp_fusionmatrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_namedefaultinstruction_sched_variantc                    t        t              j                  dz  }| j                  i nt	        | j                        }d| j
                  v sd| j
                  v sd| j
                  v rdnd}t        j                  | d|       dd	g}|D ]  }t        || d
z        ||<    t        j                  | dt        |j                                      | j                  dkD  r| j                  | j                  dz
  z  dk(  sJ d       y )Nlibgfx10gfx11gfx12    @   	warp_sizeocmlocklz.bcr6   r   r-   znum_warps must be a power of 2)r   __file__parentr6   dictr%   object__setattr__strtupleitemsr,   )selfdefault_libdirr6   rP   libsrJ   s         r   __post_init__zHIPOptions.__post_init__=   s    h..6 ,,4b$t?O?O:P!TYY.'TYY2F'UYU^U^J^Bdf	4i8 	AC">se3K#?@K	A4k6G6G6I0JK~~!t~~!9K'LQR&R 	0/	0R&Rr   c           	          dj                  | j                  j                         D cg c]  \  }}| d|  c}}      }t        j                  |j                  d            j                         S c c}}w )N_-utf-8)join__dict__rZ   hashlibsha256encode	hexdigest)r[   namevalkeys       r   hashzHIPOptions.hashJ   s]    hh9L9L9NOID#4&#OP~~cjj12<<>> Ps   A4
)$__name__
__module____qualname__r,   int__annotations__r.   r0   r1   r2   r3   r4   r5   r6   rU   r7   rY   r8   boolr9   r%   rX   r;   r   r<   r>   r?   r@   rA   rB   rC   rD   rF   rH   r^   rl   r"   r   r   r*   r*      s   IsL#JHc!"3"  ccK#L%#E4"t"D#'2%*2(*5:*'--/9 %*9!d! !#!E3N$$)*!3*L# &/s.0?r   r*   c                   8    e Zd ZdZddZed        Zed        Zy)HIPAttrsDescriptorpointer_range_32Nc                     d| j                   d<   ||y t        ||      D cg c]?  \  }}t        j                  |      s|j                  s|j
                  s|j                  A c}}| j                  d<   y c c}}w )NrN   ztt.pointer_range)property_valuesziprt   is_within2gbdo_not_specializedo_not_specialize_on_alignmentnumarg_properties)r[   paramsvaluesparamargs        r   _add_backend_propertiesz*HIPAttrsDescriptor._add_backend_propertiesZ   sy    35/0>V^ ),FF(;3
$%?Q?^?^_b?c++E4X4X II3
./ 3
s   A:&A:c                     t        | d      r| j                         dk  S dt        t        |             v r-t        | d      r!| j	                         j                         dk  S y)N	ptr_rangeiztorch.Tensoruntyped_storageF)hasattrr   rX   typer   size)r   s    r   ry   zHIPAttrsDescriptor.is_within2gbd   sW    3$==?i//Sc^+=N0O&&(--/9<<r   c                     t        j                  | |      }t        j                  |       rdnd}||z   j	                  dd      }|r|S dS )NSN )r   get_property_keyrt   ry   replace)rj   aligngeneric_keyhip_keyrk   s        r   r   z#HIPAttrsDescriptor.get_property_keym   sL    %66sEB+88=#3W$--c26s"s"r   )NN)rm   rn   ro   	__slots__r   staticmethodry   r   r"   r   r   rt   rt   O   s6     $I
   # #r   rt   c                       e Zd Zedefd       Zdeddf fdZdefdZd Z	d Z
deeef   fd	Zd
 Zd Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zd Z ej4                         d        Z xZS )
HIPBackendr   c                      | j                   dk(  S )NrE   )backend)r   s    r   supports_targetzHIPBackend.supports_targetw   s    ~~&&r   returnNc                 j    t         |   |       t        |j                  t              sJ d| _        y )Nhsaco)super__init__
isinstancer%   rX   
binary_ext)r[   r   	__class__s     r   r   zHIPBackend.__init__{   s+     &++s+++!r   c                    d| j                   j                  i}d|vr[t        t        j                        }| j                   j                  dv r|j                  ddh       t        t        |            |d<   d|vrt        j                  dd      dk(  |d<   |j                  t        j                  j                         D ci c]  }||v s|||    c}       t        d	i |S c c}w )
Nr%   r;   )gfx940gfx941gfx942fp8e4b8fp8e5b16r@   TRITON_DEFAULT_FP_FUSION1r"   )r   r%   setr*   r;   updaterY   sortedosgetenv__dataclass_fields__keys)r[   optsargsr;   ks        r   parse_optionszHIPBackend.parse_options   s    (()!-#&z'F'F#G {{#AA$++Y
,CD+08L1M+ND'(T)')yy1KS'QUX'XD#$)H)H)M)M)O]ASTX\S\QQZ]^!D!! ^s   	C'C'c                     |j                   |j                  |j                  |j                  d   |j                  d   |j                  d   fS )Nr   r-   r/   )r,   r1   sharedr7   )r[   metadatas     r   pack_metadatazHIPBackend.pack_metadata   sO    OO!!!$!!!$!!!$
 	
r   c                 4    dt        | j                        i}|S )Nr'   )r'   r   )r[   codegen_fnss     r   get_codegen_implementationz%HIPBackend.get_codegen_implementation   s    %|DKK'@Ar   c                     ddl m} d|iS )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )r[   r   s     r   get_module_mapzHIPBackend.get_module_map   s    719==r   c                 .    t        j                  |       y N)r
   load_dialects)r[   ctxs     r   r   zHIPBackend.load_dialects   s    #r   c                     t        ||      S r   )rt   )r[   r~   r   s      r   get_attrs_descriptorzHIPBackend.get_attrs_descriptor   s    !&$//r   c                 .    t         j                  | |      S r   )rt   r   )r   r   s     r   compute_spec_keyzHIPBackend.compute_spec_key   s    !223>>r   c                  P   t        j                  d      } | t        |       }|j                         r|S t        t              j
                  dz  }|j                         r|S t        d      }|j                         r|S t        d      }|j                         r|S t        d      )NTRITON_HIP_LLD_PATHzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldzWROCm linker /opt/rocm/llvm/bin/ld.lld not found. Set 'TRITON_HIP_LLD_PATH' to its path.)r   r   r   is_filerS   rT   	Exception)lld_env_pathllds     r   path_to_rocm_lldzHIPBackend.path_to_rocm_lld   s     yy!67#|$C{{}
8n##&77;;=J./;;=J$%;;=Jqrrr   c                    t        j                  | j                        }|j                          t        j
                  j                  |       t        j                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j
                  j                  |       t        j
                  j                  |       t        j                  j                  |       |j!                  |        | S r   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointeradd_combineadd_canonicalizeradd_reorder_broadcastadd_cseadd_licmadd_symbol_dceadd_loop_unrollrunmodr   optionspms       r   	make_ttirzHIPBackend.make_ttir   s    __S[[)
!!"%..r2#''+))"-b!r"$$R(##B'
s
r   c                 P   t        j                  | j                        }|j                          t        j
                  j                  |d|j                   |j                  |j                  |j                         |j                  |        t        j                  | j                        }|j                          t        j                  j                  |       t        j                  j                  |       t        j                  j                  |       t         j                  j                  j#                  ||j                  |j$                  |j&                         t        j                  j                  |       t         j                  j                  j)                  |       t        j                  j+                  |d       t!        j,                  |j                        ri|j.                  dk7  sJ d       t         j                  j                  j1                  ||j.                         t        j2                  j5                  |       t         j                  j                  j7                  |       t        j                  j+                  |d       t        j                  j                  |       t        j                  j9                  |       t!        j,                  |j                        r)t         j                  j                  j;                  |       t<        j>                  jA                  dd      dk(  rqt         j                  j                  jC                  |       t        j2                  j5                  |       t         j                  j                  jE                  |       t        j2                  j5                  |       t        j2                  jG                  |       t        j2                  jI                  |       |j                  |        | S )Nzhip:Tr   zTriton AMD backend pipeliner has been updated. We used to trigger software pipelining with num_stages == 0. Now it will not happen anymore; please update to use num_stages == 2 for equivalent behavior in the past.AMDGCN_USE_BUFFER_OPS0r   )%r   r   r   r   r   r   add_convert_to_ttgpuirr%   r,   rP   r1   r   ttgpuiradd_coalesceadd_remove_layout_conversionsadd_optimize_thread_localityr
   add_accelerate_matmulrA   rB   add_optimize_epilogueadd_optimize_dot_operandshas_matrix_core_featurer0   add_stream_pipelinev2r   r   insert_instruction_sched_hintsadd_reduce_data_duplicationadd_reorder_instructionsr   environgetadd_canonicalize_pointersadd_convert_to_buffer_opsr   r   r   s       r   
make_ttgirzHIPBackend.make_ttgir   s   __S[[)
**2gll^/DgFWFWY`YjYj+2+;+;	=
s__S[[)
##B'44R833B7

00W\\7C_C_ahanano44R8

00400T:&&w||4%%* Q .P Q*
 JJ44R9K9KLMM++B/

99"=00T:44R82226&&w||4JJ77;::>>1373>JJ88<MM++B/JJ88<''+b!$$R(
s
r   c                    | }t        j                  |j                        }|j                          t        j
                  j                  j                  ||j                         d}t        j
                  j                  j                  ||j                  |       t
        j                  j                  |       t
        j                  j                  |       t
        j                  j                  |       d}t        j
                  j                  j                  ||j                  |       t
        j                  j!                  |       t
        j                  j#                  |       t
        j                  j%                  |       t
        j                  j'                  |       t
        j                  j!                  |       t
        j                  j#                  |       t
        j                  j)                  |       t        j
                  j                  j+                  ||j,                         t.        j0                  j3                  dd      dk(  rt
        j4                  j7                  |       t        j
                  j                  j9                  ||       |j;                  |       t=        j>                          t=        j                         }t=        j@                  ||      }t	        jB                  |       t=        jD                  |t        jF                  |j                  d       t	        jH                  ||j                         t	        jJ                  |d       t	        jL                  |dd       t	        jL                  |d	d       t	        jL                  |d
d       t	        jL                  |d|jN                  dk(         |jQ                         D 	cg c]  }	|	jS                         r|	 }
}	|
d   jU                  t        jV                         |
d   jY                  dd|jZ                  |jN                  z          |
d   jY                  d|j\                          |j^                  rdnd}|
d   jY                  d|       t	        j`                  |
d          |jb                  rK|jb                  D cg c]  \  }}t	        jd                  ||      s|  }}}t=        jf                  ||       t=        jh                  |t<        jj                  |j                  dg |jl                         | jo                  d      |d<   t	        jp                  |       ts        |      S c c}	w c c}}w )Nr   TTRITON_DISABLE_LINE_INFOr   r   i  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rO   zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr=   zdenormal-fp-math-f32ztriton_gpu.sharedr   ):r   r   r   r   r
   r   r   %add_decompose_unsupported_conversionsr%   add_optimize_lds_usageconvertadd_scf_to_cfadd_index_to_llvmiradd_allocate_shared_memoryadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   lower_instruction_sched_hintsrH   r   r   r   llvmiradd_di_scopeadd_builtin_func_to_llvmirr   r	   init_targets	to_moduleattach_target_tripleattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrP   get_functionsis_declarationset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr,   r.   rC   set_all_fn_arg_inregr6   need_extern_liblink_extern_libsoptimize_moduleOPTIMIZE_O3r@   get_int_attrcleanup_bitcode_metadatarX   )srcr   r   r   r   custom_lds_size_HIPBackend__HIP_FTZr   llvm_modfnfnsdenormal_moderi   pathpathss                  r   	make_llirzHIPBackend.make_llir   s   __S[[)


@@W\\R 

11"gllOT$$R(**2.11"5 	

((W\\9E''+b!''+**2.''+b!$$R(

88W=^=^_::>>4c:cAMM&&r*

55b)D
s 	,,.>>#w/  *x):):GLL"M 	Hgll3Hc*%%h0H%P%%h0QSWX%%h0H%P%%h0H'J[J[_aJab %224PbB<M<M<OrPPA > >?A8Bw?P?PQXQbQb?b>c:deA0W5I5I4JL+2+E+E6A1=A
 	  Q(.5.A.AiltTSEXEXYacgEhTiEi!!(E2Xt'7'7r2wOgOgh !--.AB$$X.8}/ Q js   /V V V3Vc           	      N   t        j                  d|       }t        |      dk(  sJ |d   |d<   t        j                  | t
        j                  |j                  dg |j                  d      }t        j                  j                  dd      d	k(  rt        d
       t        |       |S )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r-   r   ri   r   FAMDGCN_ENABLE_DUMPr   r   z!// -----// AMDGCN Dump //----- //)refindalllenr	   translate_to_asmr
   r  r%   r@   r   r   r   print)r  r   r   namesamdgcns        r   make_amdgcnzHIPBackend.make_amdgcnI  s    
 

QSVW5zQ 8&&sC,=,=w||RQSU\UmUmotu::>>.4;56&Mr   c                 d   t        j                  | |j                  d      }t        j	                         }t        j                         5 }t        j                         5 }t        |j                  d      5 }|j                  |       d d d        t        j                  |ddd|j                  d|j                  g       d d d        t        |j                  d      5 }|j                         }	d d d        d d d        	S # 1 sw Y   zxY w# 1 sw Y   NxY w# 1 sw Y   +xY w# 1 sw Y   	S xY w)Nr   wbz-flavorgnuz-sharedz-orb)r
   assemble_amdgcnr%   r   r   tempfileNamedTemporaryFileopenri   write
subprocess
check_callread)
r  r   r   r   	rocm_pathtmp_outtmp_infd_infd_outrets
             r   
make_hsacozHIPBackend.make_hsacoX  s   ##Cr://1	((* 	$g,,. q&&++t, 'KK&'%%y)UIv{{\`bibnbn&opq gllD) $Vkkm$	$ 
' 'q q$ $	$ 
sT   
D%D6D8D D%D/D%D
DD	D%D"	D%%D/c                 b      fd|d<    fd|d<    fd|d<    fd|d<    fd	|d
<   y )Nc                 *    j                  | |      S r   )r   r  r   r   r[   s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>g      t~~c8W/U r   r   c                 *    j                  | |      S r   )r   rH  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>h      Xw0W r   ttgirc                 *    j                  | |      S r   )r(  rH  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>i  rI  r   llirc                 *    j                  | |      S r   )r2  rH  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>j  s    1A1A#xQX1Y r   r1  c                 *    j                  | |      S r   )rE  rH  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>k  rK  r   r   r"   )r[   stagesr   s   ` `r   
add_stageszHIPBackend.add_stagesf  s1    UvWwUvYxWwr   c                 z    t        j                  t        j                         dgd      }| d| j                   S )Nz	--versionrb   )encodingra   )r<  check_outputr   r   r   )r[   versions     r   rl   zHIPBackend.hashm  s8    )):+F+F+H+*Vahi!DKK=))r   )rm   rn   ro   r   r   r   r   r   r   r   r   r   rX   r   r   r   r   r   r   r   r   r(  r2  rE  rR  	functools	lru_cacherl   __classcell__)r   s   @r   r   r   u   s   '	 ' '"y "T "
"S "
>S*_ 5 >0 ? ? s s&   % %N P Pd    X Y* *r   r   )triton.backends.compilerr   r   r   r   triton._C.libtritonr   r   r	   r
   dataclassesr   typingr   r   r   typesr   re   r8  r   r+  r<  rW  pathlibr   r'   r*   rt   r   r"   r   r   <module>r`     s    a a 5 5 ! # #    	 	   
1 
1 $/? /? /?d "# "# "#J{* {*r   