
    h8%                        d dl mZmZ d dlZd dlZd dlmZ d dlmZ ddlm	Z	m
Z
 ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, ddl-m.Z. ddl/m0Z0  ejb                  e2      Z3 G d de)      Z4 G d de#      Z5 ejl                   e!              ejl                  d      k\  r G d dejn                        Z8n  ed       G d dejr                               Z8 G d d e$      Z: G d! d"e*      Z; G d# d$e.      Z< G d% d&e%      Z= G d' d(e'      Z> G d) d*e(      Z? G d+ d,e&      Z@g d-ZAy).    )CallableOptionalN)version)nn   )CacheDynamicCache)use_kernel_forward_from_hub)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)deprecate_kwarg)check_model_inputs)get_torch_version   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForQuestionAnsweringLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLPLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward)MistralModel   )Qwen2Configc                        e Zd Z fdZ xZS )Qwen2MLPc                 J   t         |   |       t        j                  | j                  | j
                  d      | _        t        j                  | j                  | j
                  d      | _        t        j                  | j
                  | j                  d      | _        y )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_projselfconfig	__class__s     e/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/models/qwen2/modular_qwen2.pyr+   zQwen2MLP.__init__)   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )__name__
__module____qualname__r+   __classcell__r5   s   @r6   r&   r&   (   s    Y Yr7   r&   c                   ,    e Zd Zdedef fdZ eddd      	 	 ddej                  d	e	ej                  ej                  f   d
e
ej                     de
e   de
ej                     dee   de	ej                  e
ej                     f   fd       Z xZS )Qwen2Attentionr4   	layer_idxc                 l   t         |   ||       t        j                  |j                  |j
                  | j                  z  d      | _        t        j                  |j                  |j                  | j                  z  d      | _	        t        j                  |j                  |j                  | j                  z  d      | _
        t        j                  |j
                  | j                  z  |j                  d      | _        |j                  |   dk(  r|j                  | _        y d | _        y )NTr(   Fsliding_attention)r*   r+   r   r,   r-   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projo_projlayer_typessliding_windowr3   r4   r?   r5   s      r6   r+   zQwen2Attention.__init__1   s    +ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii : :T]] JFL^L^ejk7=7I7I)7TXk7kf33qur7   past_key_valuepast_key_valuesz4.58)new_namer   hidden_statesposition_embeddingsattention_maskcache_positionkwargsreturnc                 J   |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        }| j                  j                  dk7  rt        | j                  j                     } || |	|
||f| j                  sdn| j                  | j                   | j"                  d|\  }} |j$                  g |d j'                         }| j)                  |      }||fS )Nr#   r   )sincosrR   eagerg        )dropoutscalingrJ   )shaperC   rD   view	transposerF   rG   r    updater?   r!   r4   _attn_implementationr   trainingattention_dropoutr[   rJ   reshape
contiguousrH   )r3   rO   rP   rQ   rM   rR   rS   input_shapehidden_shapequery_states
key_statesvalue_statesrX   rW   cache_kwargsattention_interfaceattn_outputattn_weightss                     r6   forwardzQwen2Attention.forward9   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j&#&snUL'6'='=j,X\XfXfht'u$J(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL..
%
 
%
!\ *k));;;;FFHkk+.L((r7   )NN)r8   r9   r:   r$   intr+   r   torchTensortupler   r   
LongTensorr   r   rn   r;   r<   s   @r6   r>   r>   0   s    v{ vs v %0A6R ,059*)||*) #5<<#=>*) !.	*)
 "%*) !!1!12*) -.*) 
u||Xell33	4*) S*)r7   r>   z2.3.0c                   *     e Zd Zddeddf fdZ xZS )Qwen2RMSNormepsrT   Nc                 *    t         |   ||d       y )NT)normalized_shaperv   elementwise_affine)r*   r+   r3   r-   rv   r5   s      r6   r+   zQwen2RMSNorm.__init__j   s    GksW[\r7   gư>)r8   r9   r:   floatr+   r;   r<   s   @r6   ru   ru   i   s    	]U 	]d 	] 	]r7   ru   RMSNormc                   h     e Zd Zddeddf fdZdej                  dej                  fdZd Z xZ	S )	ru   rv   rT   Nc                     t         |           t        j                  t	        j
                  |            | _        || _        y)zC
            Qwen2RMSNorm is equivalent to T5LayerNorm
            N)r*   r+   r   	Parameterrp   onesweightvariance_epsilonrz   s      r6   r+   zQwen2RMSNorm.__init__q   s1     G,,uzz+'>?DK$'D!r7   rO   c                 "   |j                   }|j                  t        j                        }|j	                  d      j                  dd      }|t        j                  || j                  z         z  }| j                  |j                  |      z  S )Nr   rV   T)keepdim)	dtypetorp   float32powmeanrsqrtr   r   )r3   rO   input_dtypevariances       r6   rn   zQwen2RMSNorm.forwardy   sy    '--K),,U]];M$((+00T0BH)EKK4CXCX8X,YYM;;!1!1+!>>>r7   c                 ^    t        | j                  j                         d| j                   S )Nz, eps=)rr   r   r\   r   )r3   s    r6   
extra_reprzQwen2RMSNorm.extra_repr   s*    DKK--./vd6K6K5LMMr7   r{   )
r8   r9   r:   r|   r+   rp   rq   rn   r   r;   r<   s   @r6   ru   ru   o   s7    	(U 	(d 	(	? 	?%,, 	?	Nr7   c                   (     e Zd Zdedef fdZ xZS )Qwen2DecoderLayerr4   r?   c                 P    t         |   ||       |j                  |   | _        y )N)r4   r?   )r*   r+   rI   attention_typerK   s      r6   r+   zQwen2DecoderLayer.__init__   s(    )<$00;r7   )r8   r9   r:   r$   ro   r+   r;   r<   s   @r6   r   r      s    <{ <s < <r7   r   c                       e Zd Zy)Qwen2PreTrainedModelNr8   r9   r:    r7   r6   r   r          r7   r   c                       e Zd Zdef fdZee	 	 	 	 	 	 	 ddeej                     deej                     deej                     dee   deej                     dee   d	eej                     d
ee   defd              Z xZS )
Qwen2Modelr4   c                 ^    t         |   |       d| j                  j                  v | _        y )NrA   )r*   r+   r4   rI   has_sliding_layersr2   s     r6   r+   zQwen2Model.__init__   s'     "59P9P"Pr7   	input_idsrQ   position_idsrM   inputs_embeds	use_cacherR   rS   rT   c                    |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|F||j	                         nd}	t        j                  |	|	|j                  d   z   |j                        }||j                  d      }t        |x}
t              s:| j                  |||||d}dt        di |i}
| j                  rt        di ||
d<   |}| j                  ||      }| j                   d | j                  j"                   D ]  } ||f|
|j$                     |||||d	|}! | j'                  |      }t)        ||r|
      S d 
      S )Nz:You must specify exactly one of input_ids or inputs_embeds)r4   r   r#   )device)r4   input_embedsrQ   rR   rM   r   full_attentionrA   )rQ   r   rM   r   rR   rP   )last_hidden_staterM   r   )
ValueErrorembed_tokensr	   r4   get_seq_lengthrp   aranger\   r   	unsqueeze
isinstancedictr   r   r   
rotary_emblayersnum_hidden_layersr   normr   )r3   r   rQ   r   rM   r   r   rR   rS   past_seen_tokenscausal_mask_mappingmask_kwargsrO   rP   decoder_layers                  r6   rn   zQwen2Model.forward   s    -t";<YZZ  --i8M0*$++>O!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L ?-F ++ -"0"0#2 ,K !"4"C{"C# &&;\;k_j;k#$78% #oom\J![[)H4;;+H+HI 
	M)	2=3O3OP) /#-$7	 	M
	 		-0&+/8O
 	
>B
 	
r7   )NNNNNNN)r8   r9   r:   r$   r+   r   r   r   rp   rs   rq   r   FloatTensorboolr   r   r   rn   r;   r<   s   @r6   r   r      s    Q{ Q  151537+/59$(59E
E,,-E
 !.E
 u//0	E

 "%E
   1 12E
 D>E
 !!1!12E
 +,E
 
!E
  E
r7   r   c                       e Zd Zy)Qwen2ForCausalLMNr   r   r7   r6   r   r      r   r7   r   c                       e Zd Zy)Qwen2ForSequenceClassificationNr   r   r7   r6   r   r      r   r7   r   c                       e Zd Zy)Qwen2ForTokenClassificationNr   r   r7   r6   r   r      r   r7   r   c                       e Zd Zy)Qwen2ForQuestionAnsweringNr   r   r7   r6   r   r      r   r7   r   )r   r   r   ru   r   r   r   )Btypingr   r   rp   torch.utils.checkpoint	packagingr   r   cache_utilsr   r	   integrationsr
   masking_utilsr   r   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.deprecationr   utils.genericr   utils.import_utilsr   llama.modeling_llamar   r   r   r   r   r   r   r   r    r!   mistral.modeling_mistralr"   configuration_qwen2r$   
get_loggerr8   loggerr&   r>   parser}   ru   Moduler   r   r   r   r   r   r   __all__r   r7   r6   <module>r      sC   %     . 7 R B 6 & @ @ 0 / 3   4 , 
		H	%Yx Y4)^ 4)n 7=="$%w)??]rzz ] !+Nryy N ,N(<) <	/ 	L
 L
^	' 		%C 		"= 		 9 	r7   