
    h&                        d Z ddlmZmZmZ ddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZ ddlm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z( ddl)m*Z*  ejV                  e,      Z- G d dej\                        Z/ G d de       Z0 G d de!      Z1 G d de&      Z2 G d de%      Z3 G d de"      Z4 G d  d!e#      Z5 G d" d#e$      Z6g d$Z7y)%zPyTorch Starcoder2 model.    )CallableOptionalUnionN)nn)check_model_inputs   )ACT2FN)CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)deprecate_kwarg   )	MistralAttentionMistralDecoderLayerMistralForCausalLM MistralForSequenceClassificationMistralForTokenClassificationMistralModelMistralRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )Starcoder2Configc                   h     e Zd Zdef fdZdeeej                        dej                  fdZ	 xZ
S )Starcoder2MLPconfigc                 P   t         |           |j                  }t        j                  ||j
                  |j                        | _        t        j                  |j
                  ||j                        | _        t        |j                     | _        |j                  | _        y )Nbias)super__init__hidden_sizer   Linearintermediate_sizeuse_biasc_fcc_projr	   
hidden_actactresidual_dropout)selfr#   	embed_dim	__class__s      o/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/transformers/models/starcoder2/modular_starcoder2.pyr(   zStarcoder2MLP.__init__9   su    &&	IIi)A)AX	ii 8 8)&//Z&++, & 7 7    hidden_statesreturnc                     | j                  |      }| j                  |      }| j                  |      }t        j                  j                  || j                  | j                        }|S )Nptraining)r-   r0   r.   r   
functionaldropoutr1   r<   )r2   r7   s     r5   forwardzStarcoder2MLP.forwardA   sZ    		-0/M2--mt?T?T_c_l_l-mr6   )__name__
__module____qualname__r    r(   r   tupletorchFloatTensorr?   __classcell__r4   s   @r5   r"   r"   8   s9    8/ 8XeE4E4E.F%G EL]L] r6   r"   c                   V    e Zd Zddedee   f fdZ eddd      	 	 ddej                  d	e
ej                  ej                  f   d
eej                     dee   deej                     dee   de
ej                  eej                     ee
ej                        f   fd       Z xZS )Starcoder2Attentionr#   	layer_idxc                    t         |   ||       |j                  | _        t        j                  |j
                  |j                  | j                  z  |j                        | _	        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j                  | j                  z  |j
                  |j                        | _        y )Nr#   rJ   r%   )r'   r(   r1   r   r*   r)   num_attention_headshead_dimr,   q_projnum_key_value_headsk_projv_projo_projr2   r#   rJ   r4   s      r5   r(   zStarcoder2Attention.__init__J   s    )< & 7 7ii 2 2F4N4NQUQ^Q^4^eketetuii 2 2F4N4NQUQ^Q^4^eketetuii 2 2F4N4NQUQ^Q^4^eketetuii : :T]] JFL^L^eketetur6   past_key_valuepast_key_valuesz4.58)new_nameversionr7   position_embeddingsattention_maskcache_positionkwargsr8   c           
         |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        }| j                  j                  dk7  rt        | j                  j                     } || |	|
||f| j                  sdn| j                  | j                   t#        | j                  dd       d|\  }} |j$                  g |d j'                         }| j)                  |      }t*        j,                  j/                  || j0                  | j                  	      }||fS )
Nr   r   )sincosr[   eagerg        sliding_window)r>   scalingrb   r:   )shaperN   rO   view	transposerQ   rR   r   updaterJ   r   r#   _attn_implementationr   r<   attention_dropoutrc   getattrreshape
contiguousrS   r   r=   r>   r1   )r2   r7   rY   rZ   rV   r[   r\   input_shapehidden_shapequery_states
key_statesvalue_statesr`   r_   cache_kwargsattention_interfaceattn_outputattn_weightss                     r5   r?   zStarcoder2Attention.forwardR   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j&#&snUL'6'='=j,X\XfXfht'u$J(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
!\ *k));;;;FFHkk+.mm++4004== , 
 L((r6   )N)NN)r@   rA   rB   r    r   intr(   r   rD   TensorrC   r
   
LongTensorr   r   r?   rF   rG   s   @r5   rI   rI   I   s    v/ vHSM v %0A6R ,059.)||.) #5<<#=>.) !.	.)
 "%.) !!1!12.) -..) 
u||Xell3XeELL>Q5RR	S.) S.)r6   rI   c                   (     e Zd Zdedef fdZ xZS )Starcoder2DecoderLayerr#   rJ   c                 *   t         |   ||       t        ||      | _        t	        |      | _        t        j                  |j                  |j                        | _
        t        j                  |j                  |j                        | _        y )NrL   eps)r'   r(   rI   	self_attnr"   mlpr   	LayerNormr)   norm_epsiloninput_layernormpost_attention_layernormrT   s      r5   r(   zStarcoder2DecoderLayer.__init__   sj    +,FiP (!||F,>,>FDWDWX(*V5G5GVM`M`(a%r6   )r@   rA   rB   r    rv   r(   rF   rG   s   @r5   rz   rz      s     b/ bC b br6   rz   c                       e Zd Zy)Starcoder2RotaryEmbeddingNr@   rA   rB    r6   r5   r   r          r6   r   c                   "    e Zd Zdef fdZe	 	 	 	 	 	 	 ddeej                     deej                     deej                     dee
eeej                     f      deej                     dee   d	eej                     d
ee   defd       Z xZS )Starcoder2Modelr#   c           	      :   t         |   |       t        j                  t	        |j
                        D cg c]  }t        ||       c}      | _        t        j                  |j                  |j                        | _        |j                  | _        y c c}w )Nr|   )r'   r(   r   
ModuleListrangenum_hidden_layersrz   layersr   r)   r   normembedding_dropoutrT   s      r5   r(   zStarcoder2Model.__init__   su     mmHMfNfNfHgh9#FI6h
 LL!3!39L9LM	!'!9!9 is   B	input_idsrZ   position_idsrV   inputs_embeds	use_cacher[   r\   r8   c                     |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|F||j	                         nd}	t        j                  |	|	|j                  d   z   |j                        }||j                  d      }| j                  j                  t        nt        }
 |
| j                  |||||      }|}t        j                  j                  || j                   | j"                        }| j%                  ||      }| j&                  d | j                  j(                   D ]  } ||f||||||d|} | j+                  |      }t-        ||r|	      S d 	      S )
Nz:You must specify exactly one of input_ids or inputs_embeds)r#   r   r   )device)r#   input_embedsrZ   r[   rV   r   r:   )rZ   r   rV   r   r[   rY   )last_hidden_staterV   )
ValueErrorembed_tokensr   r#   get_seq_lengthrD   arangerd   r   	unsqueezerb   r   r   r   r=   r>   r   r<   
rotary_embr   r   r   r   )r2   r   rZ   r   rV   r   r   r[   r\   past_seen_tokensmask_functioncausal_maskr7   rY   decoder_layers                  r5   r?   zStarcoder2Model.forward   s    -t";<YZZ  --i8M0*$++>O!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L.2kk.H.H.P*Vw#;;&))+%
 &--T33dmm . 

 #oom\J![[)H4;;+H+HI 
	M)	*) /#-$7	 	M
	 		-0&+/8O
 	
>B
 	
r6   )NNNNNNN)r@   rA   rB   r    r(   r   r   rD   rx   rw   r   r
   listrE   boolr   r   r   r?   rF   rG   s   @r5   r   r      s    :/ :  151537KO59$(59?
E,,-?
 !.?
 u//0	?

 "%tE4E4E/F(F"GH?
   1 12?
 D>?
 !!1!12?
 +,?
 
!?
 ?
r6   r   c                       e Zd Zy)Starcoder2ForCausalLMNr   r   r6   r5   r   r      r   r6   r   c                       e Zd Zy)#Starcoder2ForSequenceClassificationNr   r   r6   r5   r   r      r   r6   r   c                       e Zd Zy) Starcoder2ForTokenClassificationNr   r   r6   r5   r   r      r   r6   r   )r   r   Starcoder2PreTrainedModelr   r   )8__doc__typingr   r   r   rD   torch.utils.checkpointr   transformers.utils.genericr   activationsr	   cache_utilsr
   r   masking_utilsr   r   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   utils.deprecationr   mistral.modeling_mistralr   r   r   r   r   r   r   r   r   configuration_starcoder2r    
get_loggerr@   loggerModuler"   rI   rz   r   r   r   r   r   __all__r   r6   r5   <module>r      s   (   , ,    9 ! . R B 7 5 & 0 0
 
 
 7 
		H	%BII "8)* 8)vb0 b	 6 	I
l I
X	. 		*J 		'D 	r6   