
    hg$                         d dl mZ d dlmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ  G d d	ee      Z G d
 de      Z G d de
      Zy)    )Enum)AnyIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)	BaseModel
ConfigDict)enforce_stop_tokensc                       e Zd ZdZdZdZy)Devicez,The device to use for inference, cuda or cpucudacpuN)__name__
__module____qualname____doc__r   r        d/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_community/llms/titan_takeoff.pyr   r      s    6D
Cr   r   c                       e Zd ZU dZ ed      Zeed<   	 ej                  Z
eed<   	 dZeed<   	 dZee   ed	<   	 d
Zeed<   	 dZeed<   y)ReaderConfigzAConfiguration for the reader to be deployed in Titan Takeoff API.r   )protected_namespaces
model_namedeviceprimaryconsumer_groupNtensor_paralleli   max_seq_length   max_batch_size)r   r   r   r   r   model_configstr__annotations__r   r   r   r   r    r   intr!   r#   r   r   r   r   r      se    KL O&[[FF 6#NC#5%)OXc])INCKNC@r   r   c                        e Zd ZU dZdZeed<   	 dZeed<   	 dZ	eed<   	 dZ
eed	<   	 d
Zeed<   	 ddddg fdededed	edee   f
 fdZedefd       Z	 	 ddedeee      dee   dedef
dZ	 	 ddedeee      dee   dedee   f
dZ xZS )TitanTakeoffa  Titan Takeoff API LLMs.

    Titan Takeoff is a wrapper to interface with Takeoff Inference API for
    generative text to text language models.

    You can use this wrapper to send requests to a generative language model
    and to deploy readers with Takeoff.

    Examples:
        This is an example how to deploy a generative language model and send
        requests.

        .. code-block:: python
            # Import the TitanTakeoff class from community package
            import time
            from langchain_community.llms import TitanTakeoff

            # Specify the embedding reader you'd like to deploy
            reader_1 = {
                "model_name": "TheBloke/Llama-2-7b-Chat-AWQ",
                "device": "cuda",
                "tensor_parallel": 1,
                "consumer_group": "llama"
            }

            # For every reader you pass into models arg Takeoff will spin
            # up a reader according to the specs you provide. If you don't
            # specify the arg no models are spun up and it assumes you have
            # already done this separately.
            llm = TitanTakeoff(models=[reader_1])

            # Wait for the reader to be deployed, time needed depends on the
            # model size and your internet speed
            time.sleep(60)

            # Returns the query, ie a List[float], sent to `llama` consumer group
            # where we just spun up the Llama 7B model
            print(embed.invoke(
                "Where can I see football?", consumer_group="llama"
            ))

            # You can also send generation parameters to the model, any of the
            # following can be passed in as kwargs:
            # https://docs.titanml.co/docs/next/apis/Takeoff%20inference_REST_API/generate#request
            # for instance:
            print(embed.invoke(
                "Where can I see football?", consumer_group="llama", max_new_tokens=100
            ))
    zhttp://localhostbase_urli  porti  	mgmt_portF	streamingNclientmodelsc                 
   t         |   ||||       	 ddlm}  || j
                  | j                  | j                        | _        |D ]  }| j                  j                  |        y# t        $ r t	        d      w xY w)a  Initialize the Titan Takeoff language wrapper.

        Args:
            base_url (str, optional): The base URL where the Takeoff
                Inference Server is listening. Defaults to `http://localhost`.
            port (int, optional): What port is Takeoff Inference API
                listening on. Defaults to 3000.
            mgmt_port (int, optional): What port is Takeoff Management API
                listening on. Defaults to 3001.
            streaming (bool, optional): Whether you want to by default use the
                generate_stream endpoint over generate to stream responses.
                Defaults to False. In reality, this is not significantly different
                as the streamed response is buffered and returned similar to the
                non-streamed response, but the run manager is applied per token
                generated.
            models (List[ReaderConfig], optional): Any readers you'd like to
                spin up on. Defaults to [].

        Raises:
            ImportError: If you haven't installed takeoff-client, you will
            get an ImportError. To remedy run `pip install 'takeoff-client==0.4.0'`
        )r*   r+   r,   r-   r   )TakeoffClientzjtakeoff-client is required for TitanTakeoff. Please install it with `pip install 'takeoff-client>=0.4.0'`.)r+   r,   N)
super__init__takeoff_clientr1   ImportErrorr*   r+   r,   r.   create_reader)	selfr*   r+   r,   r-   r/   r1   model	__class__s	           r   r3   zTitanTakeoff.__init__o   s    < 	DI 	 	
	4 $MM		T^^
  	-EKK%%e,	-  	P 	s   A- -Breturnc                      y)zReturn type of llm.titan_takeoffr   )r7   s    r   	_llm_typezTitanTakeoff._llm_type   s     r   promptstoprun_managerkwargsc                     | j                   r,d}| j                  |||      D ]  }||j                  z  } |S  | j                  j                  |fi |}|d   }|t        ||      }|S )a  Call out to Titan Takeoff (Pro) generate endpoint.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager: Optional callback manager to use when streaming.

        Returns:
            The string generated by the model.

        Example:
            .. code-block:: python

                model = TitanTakeoff()

                prompt = "What is the capital of the United Kingdom?"

                # Use of model(prompt), ie `__call__` was deprecated in LangChain 0.1.7,
                # use model.invoke(prompt) instead.
                response = model.invoke(prompt)

         )r>   r?   r@   text)r-   _streamrD   r.   generater   )	r7   r>   r?   r@   rA   text_outputchunkresponserD   s	            r   _callzTitanTakeoff._call   s    : >>K' &  *
 uzz)* '4;;''9&9&tT2Dr   c              +     K    | j                   j                  |fi |}d}|D ]  }||j                  z  }d|v s|j                  d      rd}t	        |j                  dd            dk(  r&|j                  dd      \  }}	|j                  d      }|sqt        |      }
d}|r|j                  |
j                         |
  |r?t        |j                  dd            }
|r|j                  |
j                         |
 y	y	w)
a  Call out to Titan Takeoff (Pro) stream endpoint.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager: Optional callback manager to use when streaming.

        Yields:
            A dictionary like object containing a string token.

        Example:
            .. code-block:: python

                model = TitanTakeoff()

                prompt = "What is the capital of the United Kingdom?"
                response = model.stream(prompt)

                # OR

                model = TitanTakeoff(streaming=True)

                response = model.invoke(prompt)

        rC   zdata:      
)rD   )tokenz</s>N)r.   generate_streamdata
startswithlensplitrstripr
   on_llm_new_tokenrD   replace)r7   r>   r?   r@   rA   rI   bufferrD   content_rH   s              r   rE   zTitanTakeoff._stream   s    @ /4;;..v@@ 	 DdiiF& $$W-Fv||GQ/0A5!'gq!9JGQ$^^D1F+8EF"#445::4FK	 $ #)CDE,,5::,>K	 s   8D	AD	A4D	)NN)r   r   r   r   r*   r%   r&   r+   r'   r,   r-   boolr.   r   r   r   r3   propertyr=   r   r   rJ   r   r
   rE   __classcell__)r9   s   @r   r)   r)   -   sQ   0d 'Hc&WD#EIsPIt8FCE +%',-,- ,- 	,-
 ,- \",-\ 3   %):>	,, tCy!, 67	,
 , 
,b %):>	88 tCy!8 67	8
 8 
/	"8r   r)   N)enumr   typingr   r   r   r   langchain_core.callbacksr   #langchain_core.language_models.llmsr	   langchain_core.outputsr
   pydanticr   r   langchain_community.llms.utilsr   r%   r   r   r)   r   r   r   <module>re      sG     0 0 = 3 2 * >S$ A9 A4[3 [r   