
    h]                         d dl Z d dlZd dlZddlmZ  e       rd dlmZ dZdZ e	 ej                  de            Zeeefvr ed      i Zd	 Zd
 Z	 	 	 ddZ	 	 	 	 	 ddZy)    N   )is_torch_npu_available)npu_fusion_attention   NPU_FA2_SPARSE_MODE)defaultzEnvironment variable `NPU_FA2_SPARSE_MODE` can only be set as 2 (top-left aligned causal mask) or 3 (down-right aligned causal mask).c                     | t         vrCt        j                  t        j                  ddg|       d      j	                         t         | <   t         |    S )z6Get or create attention mask for the specified device.i   device   )diagonal)ATTN_MASK_NPU_CACHEtorchtriuonesboolr
   s    k/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/integrations/npu_flash_attention.pyget_attn_mask_npur   (   sF    ((&+jjT4LQW1Xcd&e&j&j&lF#v&&    c                  4    t               rt        t        k(  S dS )NF)r   SPARSE_MODE!TOP_LEFT_ALIGNED_CAUSAL_MASK_MODE r   r   'is_npu_fa2_top_left_aligned_causal_maskr   /   s    ?U?W;;;b]bbr   c                 ,   d|z
  }|%dt        j                  | j                  d         z  }|s&| j                  d   }t        | |||d||      d   }	|	S t	        | j
                        }
| j                  d   }t        | |||d|||
t        	      d   }	|	S )N      ?r   BSND)	keep_probscaler   )r   r    
atten_masksparse_mode)mathsqrtshaper   r   r   r   )qkv	dropout_psoftmax_scalecausalkwargsr   head_numoutputattn_mask_npus              r   npu_flash_attn_funcr0   3   s     iIdii44771:%aAx9\ijklm  M *!((3771:%$#

 
 Mr   c
                    d|z
  }|%dt        j                  | j                  d         z  }|	s| j                  d   }t        | |||d d ||dt	        |dd  j                         j                         j                               t	        |dd  j                         j                         j                                     d   }|S t        | j                        }| j                  d   }t        | |||d d |||dt	        |dd  j                         j                         j                               t	        |dd  j                         j                         j                               t              d   }|S )Nr   r   r   TND)pser!   r    r   input_layoutactual_seq_qlenactual_seq_kvlenr   )	r3   padding_maskr!   r    r   r4   r5   r6   r"   )r#   r$   r%   r   tuplecpunumpytolistr   r   r   )r&   r'   r(   cu_seqlens_qcu_seqlens_kmax_seqlen_qmax_seqlen_kr)   r*   r+   r,   r   r-   r.   r/   s                  r   npu_flash_attn_varlen_funcr@   V   sv    iIdii44771:%!,qr"2"6"6"8">">"@"G"G"IJ"<#3#7#7#9#?#?#A#H#H#JK
 @ M% *!((3771:%$!,qr"2"6"6"8">">"@"G"G"IJ"<#3#7#7#9#?#?#A#H#H#JK#
   Mr   )        NF)NNrA   NF)r#   osr   utils.import_utilsr   	torch_npur   r   #DOWN_RIGHT_ALIGNED_CAUSAL_MASK_MODEintgetenvr   
ValueErrorr   r   r   r0   r@   r   r   r   <module>rI      s     	  7 .
 %& !&' #)"))1;^_`8:]^^
	1 
  'c  R 4r   