
    h                        d dl mZmZ d dlZd dlmZ d dlmZ ddlmZm	Z	 ddl
mZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' ddl(m)Z)  ejT                  e+      Z, G d de#      Z- G d de      Z. G d de      Z/ G d de%      Z0 G d de$      Z1 G d de       Z2 G d d e"      Z3 G d! d"e!      Z4 G d# d$ee0      Z5g d%Z6y)&    )CallableOptionalN)nn)check_model_inputs   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)GenericForQuestionAnswering)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)deprecate_kwarg   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModelLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward   )MistralConfigc                        e Zd Z fdZ xZS )
MistralMLPc                 J   t         |   |       t        j                  | j                  | j
                  d      | _        t        j                  | j                  | j
                  d      | _        t        j                  | j
                  | j                  d      | _        y )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_proj)selfconfig	__class__s     i/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/models/mistral/modular_mistral.pyr(   zMistralMLP.__init__&   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )__name__
__module____qualname__r(   __classcell__r1   s   @r2   r#   r#   %   s    Y Yr3   r#   c                   ,    e Zd Zdedef fdZ eddd      	 	 ddej                  d	e	ej                  ej                  f   d
e
ej                     de
e   de
ej                     dee   de	ej                  e
ej                     f   fd       Z xZS )MistralAttentionr0   	layer_idxc                 p   t         |   ||       t        |dd       xs |j                  |j                  z  | _        t        j                  |j                  |j                  | j
                  z  d      | _        t        j                  |j                  |j                  | j
                  z  d      | _
        t        j                  |j                  |j                  | j
                  z  d      | _        t        j                  |j                  | j
                  z  |j                  d      | _        y )Nhead_dimFr%   )r'   r(   getattrr*   num_attention_headsr=   r   r)   q_projnum_key_value_headsk_projv_projo_projr/   r0   r;   r1   s      r2   r(   zMistralAttention.__init__.   s    +
D9mV=O=OSYSmSm=mii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii : :T]] JFL^L^ejkr3   past_key_valuepast_key_valuesz4.58)new_nameversionhidden_statesposition_embeddingsattention_maskcache_positionkwargsreturnc           
      `   |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        }| j                  j                  dk7  rt        | j                  j                     } || |	|
||f| j                  sdn| j                  | j                   t#        | j                  dd       d|\  }} |j$                  g |d j'                         }| j)                  |      }||fS )	Nr    r   )sincosrM   eagerg        sliding_window)dropoutscalingrU   )shaper=   r@   view	transposerB   rC   r   updater;   r   r0   _attn_implementationr   trainingattention_dropoutrW   r>   reshape
contiguousrD   )r/   rJ   rK   rL   rG   rM   rN   input_shapehidden_shapequery_states
key_statesvalue_statesrS   rR   cache_kwargsattention_interfaceattn_outputattn_weightss                     r2   forwardzMistralAttention.forward6   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j&#&snUL'6'='=j,X\XfXfht'u$J(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
!\ *k));;;;FFHkk+.L((r3   )NN)r4   r5   r6   r!   intr(   r   torchTensortupler   r   
LongTensorr   r   rj   r7   r8   s   @r2   r:   r:   -   s    l} l l %0A6R ,059*)||*) #5<<#=>*) !.	*)
 "%*) !!1!12*) -.*) 
u||Xell33	4*) S*)r3   r:   c                   (     e Zd Zdedef fdZ xZS )MistralDecoderLayerr0   r;   c                 j    t         |   ||       t        ||      | _        t	        |      | _        y )N)r0   r;   )r'   r(   r:   	self_attnr#   mlprE   s      r2   r(   zMistralDecoderLayer.__init__e   s,    +)9Mf%r3   )r4   r5   r6   r!   rk   r(   r7   r8   s   @r2   rq   rq   d   s    &} & & &r3   rq   c                       e Zd ZeedZy)MistralPreTrainedModel)rJ   
attentionsN)r4   r5   r6   rq   r:   _can_record_outputs r3   r2   rv   rv   k   s    ,&r3   rv   c                       e Zd Zee	 	 	 	 	 	 	 ddeej                     deej                     deej                     dee	   deej                     dee   deej                     d	ee   d
efd              Zy)MistralModelN	input_idsrL   position_idsrG   inputs_embeds	use_cacherM   rN   rO   c                    |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|F||j	                         nd}	t        j                  |	|	|j                  d   z   |j                        }||j                  d      }| j                  j                  t        nt        }
 |
| j                  |||||      }|}| j                  ||      }| j                  d | j                  j                   D ]  } ||f||||||d|} | j!                  |      }t#        ||r|      S d       S )	Nz:You must specify exactly one of input_ids or inputs_embeds)r0   r   r    )device)r0   input_embedsrL   rM   rG   r}   )rL   r}   rG   r   rM   rK   )last_hidden_staterG   )
ValueErrorembed_tokensr	   r0   get_seq_lengthrl   arangerX   r   	unsqueezerU   r
   r   
rotary_emblayersnum_hidden_layersnormr   )r/   r|   rL   r}   rG   r~   r   rM   rN   past_seen_tokensmask_functioncausal_maskrJ   rK   decoder_layers                  r2   rj   zMistralModel.forwards   s    -t";<YZZ  --i8M0*$++>O!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L.2kk.H.H.P*Vw#;;&))+%
 &"oom\J![[)H4;;+H+HI 
	M)	*) /#-$7	 	M
	 		-0&+/8O
 	
>B
 	
r3   )NNNNNNN)r4   r5   r6   r   r   r   rl   ro   rm   r   FloatTensorboolr   r   r   rj   ry   r3   r2   r{   r{   r   s     151537+/59$(599
E,,-9
 !.9
 u//0	9

 "%9
   1 129
 D>9
 !!1!129
 +,9
 
!9
  9
r3   r{   c                       e Zd Zy)MistralForCausalLMNr4   r5   r6   ry   r3   r2   r   r          r3   r   c                       e Zd Zy)MistralForTokenClassificationNr   ry   r3   r2   r   r      r   r3   r   c                       e Zd Zy) MistralForSequenceClassificationNr   ry   r3   r2   r   r      r   r3   r   c                       e Zd Zy)MistralForQuestionAnsweringNr   ry   r3   r2   r   r      s    r3   r   )r   r   r{   rv   r   r   )7typingr   r   rl   r   transformers.utils.genericr   cache_utilsr   r	   masking_utilsr
   r   modeling_flash_attention_utilsr   modeling_layersr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.deprecationr   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   configuration_mistralr!   
get_loggerr4   loggerr#   r:   rq   rv   r{   r   r   r   r   __all__ry   r3   r2   <module>r      s    %   9 . R B 8 5 & @ @ 0   1 
		H	%Y Y4)~ 4)n&+ &1 <
: <
~	) 		$? 		'E 	 \"=?U [r3   