
    h"                     v    d dl Z d dlmZmZmZmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ dZdZd	Z G d
 de	      Zy)    N)AnyListMappingOptional)CallbackManagerForLLMRun)LLM)
ConfigDict)enforce_stop_tokenszgoogle/flan-t5-largetext2text-generation)r   text-generationsummarizationc                   P   e Zd ZU dZdZeed<   eZe	ed<   	 dZ
ee   ed<   	 dZee   ed<   	  ed      Ze	 	 	 	 	 	 	 dde	d	e	d
ee   dee	   dee   dee   dee   dee   dee   dedefd       Zedee	ef   fd       Zede	fd       Z	 	 dde	deee	      dee   dede	f
dZy)WeightOnlyQuantPipelinea  Weight only quantized model.

    To use, you should have the `intel-extension-for-transformers` packabge and
        `transformers` package installed.
    intel-extension-for-transformers:
        https://github.com/intel/intel-extension-for-transformers

    Example using from_model_id:
        .. code-block:: python

            from langchain_community.llms import WeightOnlyQuantPipeline
            from intel_extension_for_transformers.transformers import (
                WeightOnlyQuantConfig
            )
            config = WeightOnlyQuantConfig
            hf = WeightOnlyQuantPipeline.from_model_id(
                model_id="google/flan-t5-large",
                task="text2text-generation"
                pipeline_kwargs={"max_new_tokens": 10},
                quantization_config=config,
            )
    Example passing pipeline in directly:
        .. code-block:: python

            from langchain_community.llms import WeightOnlyQuantPipeline
            from intel_extension_for_transformers.transformers import (
                AutoModelForSeq2SeqLM
            )
            from intel_extension_for_transformers.transformers import (
                WeightOnlyQuantConfig
            )
            from transformers import AutoTokenizer, pipeline

            model_id = "google/flan-t5-large"
            tokenizer = AutoTokenizer.from_pretrained(model_id)
            config = WeightOnlyQuantConfig
            model = AutoModelForSeq2SeqLM.from_pretrained(
                model_id,
                quantization_config=config,
            )
            pipe = pipeline(
                "text-generation",
                model=model,
                tokenizer=tokenizer,
                max_new_tokens=10,
            )
            hf = WeightOnlyQuantPipeline(pipeline=pipe)
    Npipelinemodel_idmodel_kwargspipeline_kwargsallow)extrataskdevice
device_mapload_in_4bitload_in_8bitquantization_configkwargsreturnc
           	         | t        |t              r|dkD  rt        d      t        j                  j                  d      t        d      	 ddlm}m} ddl	m
} dd	lm} dd
lm} t        |t              r&|dk\  r! |       st        d      dt        |      z   }nt        |t              r|dk  rd}||d}|xs i } |j                   |fi |}	 |dk(  r |j                   |f|||	d|d|}n4|dv r |j                   |f|||	d|d|}nt        d| dt"         d      d|v r)|j%                         D ci c]  \  }}|dk7  s|| }}}|xs i } |d|||||d|}|j&                  t"        vr t        d|j&                   dt"         d       | d||||d|
S # t        $ r t        d      w xY w# t        $ r}t        d| d      |d}~ww xY wc c}}w )z5Construct the pipeline object from model_id and task.Nz7`Device` and `device_map` cannot be set simultaneously!torchz;Weight only quantization pipeline only support PyTorch now!r   )AutoModelForCausalLMAutoModelForSeq2SeqLM)is_ipex_available)AutoTokenizer)r   zCould not import transformers python package. Please install it with `pip install transformers` and `pip install intel-extension-for-transformers`.z)Don't find out Intel GPU on this machine!zxpu:cpur   F)r   r   r   use_llm_runtimer   )r   r   Got invalid task , currently only  are supportedzCould not load the z# model due to missing dependencies.trust_remote_code)r   model	tokenizerr   r   )r   r   r   r    )
isinstanceint
ValueError	importlibutil	find_spec-intel_extension_for_transformers.transformersr!   r"   ,intel_extension_for_transformers.utils.utilsr#   transformersr$   r   ImportErrorstrfrom_pretrainedVALID_TASKSitemsr   )clsr   r   r   r   r   r   r   r   r   r   r!   r"   r#   r$   hf_pipeline_model_kwargsr,   r+   ekv_pipeline_kwargsr   s                           o/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/llms/weight_only_quantization.pyfrom_model_idz%WeightOnlyQuantPipeline.from_model_idO   s    !z&#'>6B;VWW>>##G,4M 	 W2< fc"v{$& !LMM#f+-J$!F>!"
$*1M11(LmL		((<,<<!-!-(;$)) $ BB=-==!-!-(;$)) $ !'v .&&1].B  -/!.!4!4!6A!?R:R1M  +0b 
&
 
 ==+#HMM? 3""-n>   
&,	

 
 	
K  	F 	\  	%dV+NO	s1   F( AG  ?G!G!(F= 	G	GGc                 J    | j                   | j                  | j                  dS )zGet the identifying parameters.r   r   r   rF   selfs    rC   _identifying_paramsz+WeightOnlyQuantPipeline._identifying_params   s'      --#33
 	
    c                      y)zReturn type of llm.weight_only_quantizationr-   rG   s    rC   	_llm_typez!WeightOnlyQuantPipeline._llm_type   s     *rJ   promptstoprun_managerc                 |   | j                  |      }| j                   j                  dk(  r|d   d   t        |      d }nn| j                   j                  dk(  r	|d   d   }nL| j                   j                  dk(  r	|d   d   }n*t        d| j                   j                   d	t         d
      |rt        ||      }|S )ab  Call the HuggingFace model and return the output.

        Args:
            prompt: The prompt to use for generation.
            stop: A list of strings to stop generation when encountered.

        Returns:
            The generated text.

        Example:
            .. code-block:: python

                from langchain_community.llms import WeightOnlyQuantPipeline
                llm = WeightOnlyQuantPipeline.from_model_id(
                    model_id="google/flan-t5-large",
                    task="text2text-generation",
                )
                llm.invoke("This is a prompt.")
        r   r   generated_textNr   r   summary_textr'   r(   r)   )r   r   lenr0   r:   r
   )rH   rN   rO   rP   r   responsetexts          rC   _callzWeightOnlyQuantPipeline._call   s    4 ==(==!22A;/0V?D]]#99A;/0D]]?2A;~.D#DMM$6$6#7 8""-n>   'tT2DrJ   )r   NNNFFN)NN)__name__
__module____qualname____doc__r   r   __annotations__DEFAULT_MODEL_IDr   r8   r   r   dictr   r	   model_configclassmethodr/   boolr   rD   propertyr   rI   rM   r   r   rW   r-   rJ   rC   r   r      s   /b Hc$Hc$*#'L(4.'1&*OXd^*4L 
 !#$('+*.',',-1h
h
 h
 	h

 SMh
 tnh
 "$h
 tnh
 tnh
 &c]h
 h
 
h
 h
T 
WS#X%6 
 
 *3 * * %):>	++ tCy!+ 67	+
 + 
+rJ   r   )r1   typingr   r   r   r    langchain_core.callbacks.managerr   #langchain_core.language_models.llmsr   pydanticr	   langchain_community.llms.utilsr
   r]   DEFAULT_TASKr:   r   r-   rJ   rC   <module>ri      s8     / / E 3  >) %Jdc drJ   