
    ]hoN                    4   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZmZ 	 d dlmZ d dlZd dlmZmZmZmZmZmZ d dlmZ d d	lmZ d d
lmZ  ej>                  e       Z!er e       rd dl"m#Z# ddZ$ G d de      Z%y# e$ r	 d dlmZ Y dw xY w)    )annotationsN)Path)TYPE_CHECKINGAnyCallable)load_onnx_modelload_openvino_model)Self)
AutoConfig	AutoModelAutoTokenizer	MT5ConfigPretrainedConfigT5Config)is_peft_available)find_adapter_config_file)InputModule
PeftConfigc                     d fd}|S )Nc                t    t        j                  t        |       z  d        t        |       z  fi |S )NT)exist_ok)osmakedirsr   )save_directorykwargs_save_pretrained_fn	subfolders     f/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/sentence_transformers/models/Transformer.pywrapperz)_save_pretrained_wrapper.<locals>.wrapper   s5    
D(94tD"4#7)#CNvNN    )r   z
str | PathreturnNone )r   r   r    s   `` r   _save_pretrained_wrapperr%      s    O Nr!   c                      e Zd ZU dZdZded<   ddgZded<   d	Zd
ed<   	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 	 	 	 	 	 	 	 	 	 ddZ		 	 	 	 	 	 	 	 	 	 	 	 ddZ
ddZddZddZddZd dZ	 d!	 	 	 	 	 d"dZd!d#dZe	 	 	 	 	 	 	 	 	 	 d$	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d%d       Ze	 	 	 	 	 	 	 	 	 	 d$	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d&d       Ze	 	 	 	 	 	 d'	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d( fd       Z xZS ))Transformera  Hugging Face AutoModel to generate token embeddings.
    Loads the correct class, e.g. BERT / RoBERTa etc.

    Args:
        model_name_or_path: Hugging Face models name
            (https://huggingface.co/models)
        max_seq_length: Truncate any inputs longer than max_seq_length
        model_args: Keyword arguments passed to the Hugging Face
            Transformers model
        tokenizer_args: Keyword arguments passed to the Hugging Face
            Transformers tokenizer
        config_args: Keyword arguments passed to the Hugging Face
            Transformers config
        cache_dir: Cache dir for Hugging Face Transformers to store/load
            models
        do_lower_case: If true, lowercases the input (independent if the
            model is cased or not)
        tokenizer_name_or_path: Name or path of the tokenizer. When
            None, then model_name_or_path is used
        backend: Backend used for model inference. Can be `torch`, `onnx`,
            or `openvino`. Default is `torch`.
    sentence_bert_config.jsonstrconfig_file_namemax_seq_lengthdo_lower_casez	list[str]config_keysTboolsave_in_rootc
                J   t         |           || _        |	| _        |i }|i }|i }| j	                  |||	|      \  }
} | j
                  ||
||	|fi | t        t        j                  | j                  j                        j                        }t        |      h dz  | _        |	d|vr||d<   t        j                  ||n|fd|i|| _        |t#        | j                  d      rtt#        | j                  j$                  d      rTt#        | j                   d      r>t'        | j                  j$                  j(                  | j                   j*                        }|| _        |:| j                   j.                  j0                  | j                  j$                  _        y y )N>   	input_idsinputs_embedsattention_masktoken_type_idsmodel_max_length	cache_dirconfigmax_position_embeddings)super__init__r,   backend_load_config_load_modellistinspect	signature
auto_modelforward
parameterssetmodel_forward_paramsr   from_pretrained	tokenizerhasattrr7   minr8   r5   r+   	__class____name__tokenizer_class)selfmodel_name_or_pathr+   
model_argstokenizer_argsconfig_argsr6   r,   tokenizer_name_or_pathr;   r7   is_peft_modelrE   rJ   s                r   r:   zTransformer.__init__A   s    	*J!NK $ 1 12DiQXZe f+VYeZde  $G$5$5doo6M6M$N$Y$YZ$'(<$= A
 %
! %*<N*R1?N-.&66&<&H"N`

 
 !2DOO224MNDNN,>?!$T__%;%;%S%SUYUcUcUtUt!u,!-59^^5M5M5V5VDOO""2 .r!   c           
     @   t        |||j                  d      |j                  d      |j                  dd            	 Dt               st        d      |dk7  rt	        d      d	d
lm}  |j                  |fi |d|idfS t        j                  |fi |d|idfS )a  Loads the transformers or PEFT configuration

        Args:
            model_name_or_path (str): The model name on Hugging Face (e.g. 'sentence-transformers/all-MiniLM-L6-v2')
                or the path to a local model directory.
            cache_dir (str | None): The cache directory to store the model configuration.
            backend (str): The backend used for model inference. Can be `torch`, `onnx`, or `openvino`.
            config_args (dict[str, Any]): Keyword arguments passed to the Hugging Face Transformers config.

        Returns:
            tuple[PretrainedConfig, bool]: The model configuration and a boolean indicating whether the model is a PEFT model.
        tokenrevisionlocal_files_onlyF)r6   rU   rV   rW   zgLoading a PEFT model requires installing the `peft` package. You can install it via `pip install peft`.torcha  PEFT models can currently only be loaded with the `torch` backend. To use other backends, load the model with `backend="torch"`, call `model.transformers_model.merge_and_unload()`, save that model with `model.save_pretrained()` and then load the model with the desired backend.r   r   r6   T)	r   getr   	Exception
ValueErrorpeftr   rF   r   )rM   rN   r6   r;   rQ   r   s         r   r<   zTransformer._load_configz   s      %"#!oog.$4!,1CU!K  %&}  '! w 
 (-:--.@eKe[degkkk))*<aaW`achhhr!   c                   |dk(  r|rdD ]  }|j                  |d        t        |t              r | j                  |||fi | yt        |t              r | j
                  |||fi | yt        j                  |f||d|| _        y|dk(  rt        d||dd|| _        y|dk(  rt        d||dd|| _        yt        d	| d
      )a  Loads the transformers or PEFT model into the `auto_model` attribute

        Args:
            model_name_or_path (str): The model name on Hugging Face (e.g. 'sentence-transformers/all-MiniLM-L6-v2')
                or the path to a local model directory.
            config ("PeftConfig" | PretrainedConfig): The model configuration.
            cache_dir (str | None): The cache directory to store the model configuration.
            backend (str): The backend used for model inference. Can be `torch`, `onnx`, or `openvino`.
            is_peft_model (bool): Whether the model is a PEFT model.
            model_args (dict[str, Any]): Keyword arguments passed to the Hugging Face Transformers model.
        rX   )rV   Nr7   r6   onnxzfeature-extraction)rN   r7   	task_nameopenvinozUnsupported backend 'z6'. `backend` should be `torch`, `onnx`, or `openvino`.r$   )pop
isinstancer   _load_t5_modelr   _load_mt5_modelr   rF   rA   r   r	   r[   )rM   rN   r7   r6   r;   rS   rO   adapter_only_kwargs           r   r=   zTransformer._load_model   s   ( g *6 =&NN#5t<= &(+###$6	XZXFI.$$$%7YjY"+";";&#/5#NX# - #5. 	DO 
"1 #5. 	DO 4WI=stuur!   c                V    ddl m} dg|_         |j                  |f||d|| _        y)Loads the encoder model from T5r   )T5EncoderModel	decoder.*r^   N)transformersri   "_keys_to_ignore_on_load_unexpectedrF   rA   )rM   rN   r7   r6   rO   ri   s         r   rd   zTransformer._load_t5_model   s8    /=HM98.88
'-
FP
r!   c                V    ddl m} dg|_         |j                  |f||d|| _        y)rh   r   )MT5EncoderModelrj   r^   N)rk   rn   rl   rF   rA   )rM   rN   r7   r6   rO   rn   s         r   re   zTransformer._load_mt5_model   s8    0>I]:9/99
'-
FP
r!   c                |    dt        | j                         | j                  j                  j                         dS )NzTransformer()architecture))dictget_config_dictrA   rJ   rK   rM   s    r   __repr__zTransformer.__repr__   s3    d4#7#7#9HaHaHjHjkllmnnr!   c                   |j                         D ci c]  \  }}|| j                  v s|| }}} | j                  di ||ddi}|d   }||d<   t               rddlm} t        | j                  |      r| j                  j                  j                  rr|j                  d      }	|d   }
t        j                  |	| j                  j                  j                  |
j                        }t        j                  ||
fd	      |d<   | j                  j                  j                   rd
|v r|d
   |d<   |S c c}}w )a^  
        Forward pass through the transformer model.

        This method processes the input features through the underlying transformers model
        and returns the token embeddings along with any other relevant outputs.

        Notes:
            - Only passes arguments that are expected by the underlying transformer model

        Args:
            features (dict[str, torch.Tensor]): Input features dictionary containing at least
                'input_ids' and 'attention_mask'. May also contain other tensors required by
                the underlying transformer model.
            **kwargs: Additional keyword arguments to pass to the underlying transformer model.

        Returns:
            dict[str, torch.Tensor]: Updated features dictionary containing the input features, plus:
                - 'token_embeddings': Token-level embeddings from the transformer model
                - 'attention_mask': Possibly modified attention mask if using PeftModel with prompt learning
                - 'all_layer_embeddings': If the model outputs hidden states, contains embeddings from all layers
        return_dictTr   token_embeddings)PeftModelForFeatureExtractionr3   )device   )dimhidden_statesall_layer_embeddingsr$   )itemsrE   rA   r   r\   ry   rc   active_peft_configis_prompt_learningsizerX   onesnum_virtual_tokensrz   catr7   output_hidden_states)rM   featuresr   keyvaluetrans_featuresoutputsrx   ry   
batch_sizer3   prefix_attention_masks               r   rB   zTransformer.forward   s2   , 8@~~7Gle3RVRkRkKk#u*ll!$//ONOfO$O"1:'7#$ : 4??,IJOO66II-2215
!)*:!;(-

 B B U U^l^s^s)% .3YY8M~7^de-f)*??!!66?g;U/6/GH+,3 ms
   D;D;c                B    | j                   j                  j                  S )N)rA   r7   hidden_sizert   s    r   get_word_embedding_dimensionz(Transformer.get_word_embedding_dimension  s    %%111r!   c           
        i }t        |d   t              r|g}nt        |d   t              r\g }g |d<   |D ]L  }t        t	        |j                                     \  }}|j                  |       |d   j                  |       N |g}n7g g }	}|D ]*  }
|j                  |
d          |	j                  |
d          , ||	g}|D cg c])  }|D cg c]  }t        |      j                          c}+ }}}| j                  r-|D cg c]   }|D cg c]  }|j                          c}" }}}|j                   | j                  ||dd| j                  d       |S c c}w c c}}w c c}w c c}}w )z-Tokenizes a text and maps tokens to token-idsr   	text_keysr{   longest_firstpt)padding
truncationreturn_tensors
max_length)rc   r)   rr   nextiterr   appendstripr,   lowerupdaterG   r+   )rM   textsr   outputto_tokenizelookuptext_keytextbatch1batch2
text_tuplecolss                r   tokenizezTransformer.tokenize!  sr    eAh$ 'Ka$'K"$F; 5!%d6<<>&:!;$""4({#**845 '-KFF# -
jm,jm,- "6*K ALL41A4LL ?JKs3!AGGI3KKKDNN*#..	
  5L 4Ks0   	E$ E.E$	E/E*'E/E$*E/c                    | j                   j                  ||       | j                  j                  |       | j                  |       y )N)safe_serialization)rA   save_pretrainedrG   save_config)rM   output_pathr   r   s       r   savezTransformer.saveI  s:    ''HZ'[&&{3%r!   c                N    | j                  |||||||||	|
|      } | dd|i|S )N)rN   r   rU   cache_folderrV   rW   trust_remote_codemodel_kwargstokenizer_kwargsconfig_kwargsr;   rN   r$   )_load_init_kwargs)clsrN   r   rU   r   rV   rW   r   r   r   r   r;   r   init_kwargss                 r   loadzTransformer.loadN  sP    $ ++1%-/%-' , 
 H&8HKHHr!   c                   | j                  ||||||      }|||||d}d|vri |d<   d|vri |d<   d|vri |d<   |d   j                  |       |d   j                  |       |d   j                  |       |r|d   j                  |       |	r|d   j                  |	       |
r|d   j                  |
       i |||dS )N)rN   r   rU   r   rV   rW   )r   rU   rV   rW   r   rO   rP   rQ   )r6   r;   )load_configr   )r   rN   r   rU   r   rV   rW   r   r   r   r   r;   r   r7   
hub_kwargss                  r   r   zTransformer._load_init_kwargso  s   $ 1%- ! 
 #  0!2

 v%#%F< 6)')F#$&$&F=! 	|##J/ ''
3}$$Z0 < ''5#$++,<==!((7H&H|HHr!   c           
        |r|gng d}|D ]  }t         
|   |||||||      }	|	s n d	v rd|	d   v r|	d   j                  d       d|	v rd|	d   v r|	d   j                  d       d|	v rd|	d   v r|	d   j                  d       |	S )N)r(   zsentence_roberta_config.jsonzsentence_distilbert_config.jsonzsentence_camembert_config.jsonzsentence_albert_config.jsonz sentence_xlm-roberta_config.jsonzsentence_xlnet_config.json)rN   r   config_filenamerU   r   rV   rW   rO   r   rP   rQ   )r9   r   rb   )r   rN   r   r   rU   r   rV   rW   config_filenamesr7   rJ   s             r   r   zTransformer.load_config  s       	  0 	OW(#5# /)!!1 ) F 	 6!&9VL=Q&Q< $$%89v%*=HXAY*Y#$(()<=F"':f]>S'S=!%%&9:r!   )NNNNNFNrX   )rN   r)   r+   z
int | NonerO   dict[str, Any] | NonerP   r   rQ   r   r6   
str | Noner,   r.   rR   r   r;   r)   r"   r#   )
rN   r)   r6   r   r;   r)   rQ   dict[str, Any]r"   z*tuple[PeftConfig | PretrainedConfig, bool])rN   r)   r7   zPeftConfig | PretrainedConfigr6   r)   r;   r)   rS   r.   r"   r#   )rN   r)   r7   r   r6   r)   r"   r#   )r"   r)   )r   dict[str, torch.Tensor]r"   r   )r"   int)T)r   z.list[str] | list[dict] | list[tuple[str, str]]r   z
str | boolr"   r   )r   r)   r   r.   r"   r#   )
 NNNFFNNNrX   )rN   r)   r   r)   rU   bool | str | Noner   r   rV   r   rW   r.   r   r.   r   r   r   r   r   r   r;   r)   r"   r
   )rN   r)   r   r)   rU   r   r   r   rV   r   rW   r.   r   r.   r   r   r   r   r   r   r;   r)   r"   r   )r   NNNNF)rN   r)   r   r)   r   r   rU   r   r   r   rV   r   rW   r.   r"   r   )rK   
__module____qualname____doc__r*   __annotations__r-   r/   r:   r<   r=   rd   re   ru   rB   r   r   r   classmethodr   r   r   __classcell__)rJ   s   @r   r'   r'   %   sc   . 8c7.@K@L$
 &*,004-1 $#-17W7W #7W *	7W
 .7W +7W 7W 7W !+7W 7W 
7Wr(i"%(i2<(iGJ(iYg(i	3(iT2v2v .2v 	2v
 2v 2v 
2vh

o/b2 \`&C&NX&	 &P&
 
 #'#'#!&"'.226/3II 	I
 !I !I I I  I ,I 0I -I I  
!I I@ 
 #'#'#!&"'.226/37I7I 	7I
 !7I !7I 7I 7I  7I ,7I 07I -7I 7I  
!7I 7Ir  &*#'#'#!&++ + $	+
 !+ !+ + + 
+ +r!   r'   )r   r   r   r)   r"   zCallable[..., None])&
__future__r   r?   loggingr   pathlibr   typingr   r   r   sentence_transformers.backendr   r	   r
   ImportErrortyping_extensionsrX   rk   r   r   r   r   r   r   transformers.utils.import_utilsr   transformers.utils.peft_utilsr   (sentence_transformers.models.InputModuler   	getLoggerrK   loggerr\   r   r%   r'   r$   r!   r   <module>r      s~    "   	  / / N'  d d = B @			8	$&(p+ p1  '&'s   B	 	BB