
    ##h                     "   d dl mZmZ d dlZddlmZmZ  e       Z	 	 	 	 ddej                  j                  dej                  dej                  dej                  d	eej                     d
edee   dee   dee   deej                  df   fdZy)    )OptionalTupleN   )_flash_attention_forward!flash_attn_supports_top_left_maskmodulequerykeyvalueattention_maskdropoutscalingsliding_windowsoftcapreturnc	                 V   |j                   d   }
|j                  dd      }|j                  dd      }|j                  dd      }d }|j                  t        j                  k(  rt        j
                         rt        j                         }nat        | j                  d      r| j                  j                  }n4t        d | j                         D              j                  j                  }|	j                  dd        t        ||||f|
| j                  ||||t         |d|	}|d fS )Nr      _pre_quantization_dtypec              3   j   K   | ]+  }t        |t        j                  j                        s(| - y w)N)
isinstancetorchnnLinear).0layers     f/var/www/html/sandstorm/venv/lib/python3.12/site-packages/transformers/integrations/flash_attention.py	<genexpr>z*flash_attention_forward.<locals>.<genexpr>,   s'     j%zRWY^YaYaYhYhGijs   )33	is_causal)query_lengthr   r   softmax_scaler   r   use_top_left_masktarget_dtype)shape	transposedtyper   float32is_autocast_enabledget_autocast_gpu_dtypehasattrconfigr   nextmodulesweightpopr   r   _use_top_left_mask)r   r	   r
   r   r   r   r   r   r   kwargsseq_lenr"   attn_outputs                r   flash_attention_forwardr3      s    kk!nG OOAq!E
--1
COOAq!E L{{emm#$$& 779LV]]$=>!==@@Lj6>>3CjjqqwwL JJ{D!*	
 ""%,! K      )g        NNN)typingr   r   r   modeling_flash_attention_utilsr   r   r/   r   ModuleTensorfloatintr3    r4   r   <module>r<      s    "  h 78  #$(#6HHOO6<<6 
6 <<	6
 U\\*6 6 e_6 SM6 e_6 5<<6r4   