
    h k                       d dl mZ d dlZd dlZd dlmZmZmZ d dlm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZ d dlmZ d dlmZmZmZ d dlmZmZmZmZmZ d d	lmZ  ej@                  e!      Z"	 	 	 	 	 	 	 	 	 	 	 	 dd
Z# G d dee      Z$y)    )annotationsN)IterableMappingSequence)AnyLiteralOptionalUnioncast)
Embeddings)run_in_executor)from_envget_pydantic_field_namessecret_from_env)	BaseModel
ConfigDictField	SecretStrmodel_validator)Selfc                
   t        |       D cg c]  }g  }}t        |       D cg c]  }g  }}t        t        |            D ]S  }|rt        ||         dk(  r|||      j                  ||          |||      j                  t        ||                U g }	t        |       D ]  }||   }
t        |
      dk(  r|	j                  d        (t        |
      dk(  r|	j                  |
d          Kt        ||         }t	        |
 D cg c]$  }t        d t	        |||         D              |z  & }}t        d |D              dz  }|	j                  |D cg c]  }||z  	 c}        |	S c c}w c c}w c c}w c c}w )N   r   c              3  ,   K   | ]  \  }}||z    y wN ).0valweights      ^/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain_openai/embeddings/base.py	<genexpr>z6_process_batched_chunked_embeddings.<locals>.<genexpr>@   s       #V &Ls   c              3  &   K   | ]	  }|d z    yw)   Nr   )r   r   s     r   r    z6_process_batched_chunked_embeddings.<locals>.<genexpr>J   s     6sCF6   g      ?)rangelenappendsumzip)	num_textstokensbatched_embeddingsindices
skip_empty_resultsnum_tokens_in_batchi
embeddings_resulttotal_weight	embeddingaverage	magnituder   s                   r   #_process_batched_chunked_embeddingsr8      s    5:)4D'Eq'EG'E
 9>i8H+I1B+I+I3w<  ?#0349
""#5a#89GAJ'..s6!9~>	? /1J9  D%,QZw<1 d#\Qgaj) 2156L "%g   '*96I!6L'M  	G  6g66#=I'B3sYBCA DD c (F
 ,J< Cs   	E1	E6)E;F 
c                     e Zd ZU dZ edd      Zded<    edd      Zded<   dZd	ed
<   dZ	ded<   	 eZ
ded<    e edd      d      Zded<   	  ed edd            Zded<   	  e edd            Zded<    e edd            Zded<   dZded<   	  ed  ed!d            Zd"ed#<   	  ed$ ed%d&gd            Zded'<   	 dZd(ed)<   dZd*ed+<   d,Zded-<   	 d.Zded/<   	  edd01      Zd2ed3<   	 dZded4<   dZd5ed6<   	 dZded7<   	 d8Zd5ed9<   	  ee      Zd:ed;<   	 d8Zd5ed<<   	 dZ d=ed><   dZ!d?ed@<   dAZ"dedB<   	 dCZ#dedD<   	 dZ$dEedF<   	 dZ%dEedG<   	 dZ&d5edH<   	  e'dIddJK      Z( e)dLM      e*dZdN              Z+ e)dOM      d[dP       Z,e-d\dQ       Z.	 	 	 	 	 	 d]dRZ/ddS	 	 	 	 	 	 	 	 	 d^dTZ0ddS	 	 	 	 	 	 	 	 	 d^dUZ1	 d_	 	 	 	 	 	 	 d`dVZ2	 d_	 	 	 	 	 	 	 d`dWZ3dadXZ4dadYZ5y)bOpenAIEmbeddingsu	  OpenAI embedding model integration.

    Setup:
        Install ``langchain_openai`` and set environment variable ``OPENAI_API_KEY``.

        .. code-block:: bash

            pip install -U langchain_openai
            export OPENAI_API_KEY="your-api-key"

    Key init args — embedding params:
        model: str
            Name of OpenAI model to use.
        dimensions: Optional[int] = None
            The number of dimensions the resulting output embeddings should have.
            Only supported in ``'text-embedding-3'`` and later models.

    Key init args — client params:
        api_key: Optional[SecretStr] = None
            OpenAI API key.
        organization: Optional[str] = None
            OpenAI organization ID. If not passed in will be read
            from env var ``OPENAI_ORG_ID``.
        max_retries: int = 2
            Maximum number of retries to make when generating.
        request_timeout: Optional[Union[float, Tuple[float, float], Any]] = None
            Timeout for requests to OpenAI completion API

    See full list of supported init args and their descriptions in the params section.

    Instantiate:
        .. code-block:: python

            from langchain_openai import OpenAIEmbeddings

            embed = OpenAIEmbeddings(
                model="text-embedding-3-large"
                # With the `text-embedding-3` class
                # of models, you can specify the size
                # of the embeddings you want returned.
                # dimensions=1024
            )

    Embed single text:
        .. code-block:: python

            input_text = "The meaning of life is 42"
            vector = embeddings.embed_query("hello")
            print(vector[:3])

        .. code-block:: python

            [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915]

    Embed multiple texts:
        .. code-block:: python

            vectors = embeddings.embed_documents(["hello", "goodbye"])
            # Showing only the first 3 coordinates
            print(len(vectors))
            print(vectors[0][:3])

        .. code-block:: python

            2
            [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915]

    Async:
        .. code-block:: python

            await embed.aembed_query(input_text)
            print(vector[:3])

            # multiple:
            # await embed.aembed_documents(input_texts)

        .. code-block:: python

            [-0.009100092574954033, 0.005071679595857859, -0.0029193938244134188]

    NT)defaultexcluder   clientasync_clientztext-embedding-ada-002strmodelOptional[int]
dimensionszOptional[str]
deploymentOPENAI_API_VERSION)r;   api_version)default_factoryaliasopenai_api_versionbase_urlOPENAI_API_BASE)rG   rF   openai_api_baseOPENAI_API_TYPE)rF   openai_api_typeOPENAI_PROXYopenai_proxyi  intembedding_ctx_lengthapi_keyOPENAI_API_KEYzOptional[SecretStr]openai_api_keyorganizationOPENAI_ORG_IDOPENAI_ORGANIZATIONopenai_organizationz%Union[Literal['all'], set[str], None]allowed_specialz4Union[Literal['all'], set[str], Sequence[str], None]disallowed_speciali  
chunk_sizer"   max_retriestimeout)r;   rG   z0Optional[Union[float, tuple[float, float], Any]]request_timeoutheadersbooltiktoken_enabledtiktoken_model_nameFshow_progress_bardict[str, Any]model_kwargsr-   zUnion[Mapping[str, str], None]default_headersz!Union[Mapping[str, object], None]default_query   retry_min_seconds   retry_max_secondszUnion[Any, None]http_clienthttp_async_clientcheck_embedding_ctx_lengthforbidr   )extrapopulate_by_nameprotected_namespacesbefore)modec           
     `   t        |       }|j                  di       }t        |      D ]M  }||v rt        d| d      ||vst	        j
                  d| d| d| d       |j                  |      ||<   O |j                  |j                               }|rt        d| d	      ||d<   |S )
z>Build extra kwargs from additional params that were passed in.re   zFound z supplied twice.z	WARNING! z/ is not default parameter.
                    zJ was transferred to model_kwargs.
                    Please confirm that z is what you intended.zParameters za should be specified explicitly. Instead they were passed in as part of `model_kwargs` parameter.)	r   getlist
ValueErrorwarningswarnpopintersectionkeys)clsvaluesall_required_field_namesrp   
field_nameinvalid_model_kwargss         r   build_extrazOpenAIEmbeddings.build_extra  s     $<C#@ 

>2.v, 		;JU" 6*5E!FGG!99!* .L !))34JN
 %+JJz$:j!		;  8DDUZZ\R23 4S T 
 "'~    afterc                4   | j                   dv rt        d      | j                  r| j                  j                         nd| j                  | j
                  | j                  | j                  | j                  | j                  d}| j                  rP| j                  s| j                  r8| j                  }| j                  }| j                  }t        d|d|d|      | j                  sr| j                  r2| j                  s&	 ddl}|j!                  | j                  
      | _        d| j                  i}t#        j$                  di ||j&                  | _        | j(                  sr| j                  r2| j                  s&	 ddl}|j+                  | j                  
      | _        d| j                  i}t#        j,                  di ||j&                  | _        | S # t        $ r}t        d	      |d}~ww xY w# t        $ r}t        d	      |d}~ww xY w)z?Validate that api key and python package exists in environment.)azureazure_adazureadzEIf you are using Azure, please use the `AzureOpenAIEmbeddings` class.N)rR   rU   rI   r]   r\   rf   rg   zwCannot specify 'openai_proxy' if one of 'http_client'/'http_async_client' is already specified. Received:
openai_proxy=z
http_client=z
http_async_client=r   zRCould not import httpx python package. Please install it with `pip install httpx`.)proxyrl   r   )rM   rx   rT   get_secret_valuerX   rK   r^   r\   rf   rg   rO   rl   rm   r=   httpxImportErrorClientopenaiOpenAIr2   r>   AsyncClientAsyncOpenAI)	selfclient_paramsrO   rl   rm   r   esync_specificasync_specifics	            r   validate_environmentz%OpenAIEmbeddings.validate_environment!  s*    #CCW 
 ;?:M:M##446SW 44,,++++#33!//

 $"2"2d6L6L,,L**K $ 6 6!/K>1F4E3GI 
 {{  )9)9  $)<<d6G6G<#H *D,<,<=M --I-I=ITTDK    )?)?  */):):ARAR):)S&+T-C-CDN & 2 2 !! ! j  / # %F  # %F s0   G  G=  	G:)G55G:=	HHHc                p    d| j                   i| j                  }| j                  | j                  |d<   |S )Nr@   rB   )r@   re   rB   )r   paramss     r   _invocation_paramsz#OpenAIEmbeddings._invocation_paramsZ  s8    At/@/@A??&#'??F< r   c                2   g }g }| j                   xs | j                  }| j                  s	 ddlm} |j                  |      }t        |      D ]  \  }}	|j                  |	d      }
t        dt        |
      | j                        D ]G  }|
||| j                  z    }|j                  |      }|j                  |       |j                  |       I  n	 t        j                   |      }| j&                  | j(                  dj+                         D ci c]
  \  }}||| }}}t        |      D ]  \  }}	| j                  j-                  d	      r|	j/                  d
d      }	|r |j                  |	fi |}n|j1                  |	      }t        dt        |      | j                        D ]4  }|j                  |||| j                  z           |j                  |       6  | j2                  r$	 ddlm}  |t        dt        |      |            }nt        dt        |      |      }|||fS # t
        $ r t        d      w xY w# t"        $ r t        j$                  d      }Y ~w xY wc c}}w # t
        $ r t        dt        |      |      }Y iw xY w)a  
        Take the input `texts` and `chunk_size` and return 3 iterables as a tuple:

        We have `batches`, where batches are sets of individual texts
        we want responses from the openai api. The length of a single batch is
        `chunk_size` texts.

        Each individual text is also split into multiple texts based on the
        `embedding_ctx_length` parameter (based on number of tokens).

        This function returns a 3-tuple of the following:

        _iter: An iterable of the starting index in `tokens` for each *batch*
        tokens: A list of tokenized texts, where each text has already been split
            into sub-texts based on the `embedding_ctx_length` parameter. In the
            case of tiktoken, this is a list of token arrays. In the case of
            HuggingFace transformers, this is a list of strings.
        indices: An iterable of the same length as `tokens` that maps each token-array
            to the index of the original text in `texts`.
        r   )AutoTokenizerzCould not import transformers python package. This is needed for OpenAIEmbeddings to work without `tiktoken`. Please install it with `pip install transformers`. )pretrained_model_name_or_pathF)add_special_tokenscl100k_base)rY   rZ   001
 )tqdm)rb   r@   ra   transformersr   r   rx   from_pretrained	enumerateencoder$   r%   rQ   decoder&   tiktokenencoding_for_modelKeyErrorget_encodingrY   rZ   itemsendswithreplaceencode_ordinaryrc   	tqdm.autor   )r   textsr[   r*   r,   
model_namer   	tokenizerr1   text	tokenizedjtoken_chunk
chunk_textencodingkvencoder_kwargstokenr   _iters                        r   	_tokenizezOpenAIEmbeddings._tokenizea  s   . /1--;
 $$6 &55.8 6 I %U+ &4'0'7'7QV'7'W	 q#i.$2K2KL &A-6A 9 99.K
 '0&6&6{&CJMM*-NN1%&&@#66zB (,';';*.*A*A %'.Aq = 1.N . %U+ &4::&&u-  <<c2D!+HOODCNCE$44T:E q#e*d.G.GH &AMM%A0I0I,I"JKNN1%&&" !!:*"&uQFZ'H"I !S[*5Efg%%   V 6  @#00?@.<  :aVj9:s5   H1 I	 I.3"I4 1I	I+*I+4JJ)r[   c          	         |xs  j                   }i  j                  | j                  ||      \  }}}g }	|D ]a  }
  j                  j                  dd||
|
|z    i}t        |t              s|j                         }|	j                  d |d   D               c t        t        |      ||	| j                        }dd fd}|D cg c]  }||n |        c}S c c}w )al  
        Generate length-safe embeddings for a list of texts.

        This method handles tokenization and embedding generation, respecting the
        set embedding context length and chunk size. It supports both tiktoken
        and HuggingFace tokenizer based on the tiktoken_enabled flag.

        Args:
            texts (List[str]): A list of texts to embed.
            engine (str): The engine or model to use for embeddings.
            chunk_size (Optional[int]): The size of chunks for processing embeddings.

        Returns:
            List[List[float]]: A list of embeddings for each input text.
        inputc              3  &   K   | ]	  }|d      ywr5   Nr   r   rs     r   r    z<OpenAIEmbeddings._get_len_safe_embeddings.<locals>.<genexpr>       %Oan%Or#   dataNc                     I j                   j                  dddi} t        | t              s| j	                         } | d   d   d   S Nr    r   r   r5   r   )r=   create
isinstancedict
model_dumpaverage_embedded_cached_empty_embeddingclient_kwargsr   s    r   empty_embeddingzBOpenAIEmbeddings._get_len_safe_embeddings.<locals>.empty_embedding  s]    &.#54;;#5#5#PB#P-#P !"2D9'7'B'B'D$*:6*B1*Ek*R'**r   r   returnlist[float])r[   r   r   r=   r   r   r   r   extendr8   r%   r-   r   r   enginer[   kwargs_chunk_sizer   r*   r,   r+   r1   responser2   r   r   r   r   s   `              @@r   _get_len_safe_embeddingsz)OpenAIEmbeddings._get_len_safe_embeddings  s   . !3DOO=422=f=!%{!Cvw02 	PA)t{{)) Q[15BH h-#..0%%%Ohv>N%OO	P 9J 2GT__

 :>	+ DNNaQ](99NNNs   C'c          	     P   K   |xs  j                   }i  j                  |t        d j                  ||       d{   \  }}}g }	t	        dt        |      |      D ]i  }
  j                  j                  dd||
|
|z    i d{   }t        |t              s|j                         }|	j                  d |d   D               k t        t        |      ||	| j                        }dd fd}|D cg c]  }||n |        d{    c}S 7 7 7 c c}w w)	a  
        Asynchronously generate length-safe embeddings for a list of texts.

        This method handles tokenization and asynchronous embedding generation,
        respecting the set embedding context length and chunk size. It supports both
        `tiktoken` and HuggingFace `tokenizer` based on the tiktoken_enabled flag.

        Args:
            texts (List[str]): A list of texts to embed.
            engine (str): The engine or model to use for embeddings.
            chunk_size (Optional[int]): The size of chunks for processing embeddings.

        Returns:
            List[List[float]]: A list of embeddings for each input text.
        Nr   r   c              3  &   K   | ]	  }|d      ywr   r   r   s     r   r    z=OpenAIEmbeddings._aget_len_safe_embeddings.<locals>.<genexpr>  r   r#   r   c                    K   Q j                   j                  dddi d {   } t        | t              s| j	                         } | d   d   d   S 7 1wr   )r>   r   r   r   r   r   s    r   r   zCOpenAIEmbeddings._aget_len_safe_embeddings.<locals>.empty_embedding$  sw     &.)A):):)A)A ** -* $  ""2D9'7'B'B'D$*:6*B1*Ek*R'**$s   $AA2Ar   r   )r[   r   r   r   r$   r%   r>   r   r   r   r   r   r8   r-   r   s   `              @@r   _aget_len_safe_embeddingsz*OpenAIEmbeddings._aget_len_safe_embeddings  sF    0 !3DOO=422=f='6$..%(
 "
vw 13q#f+{3 	PA5T..55 Q[15B H h-#..0%%%Ohv>N%OO	P 9J 2GT__

 :>		+ JTTAQ]o.?(??TT;"

0 )@TsI   <D&DA
D&DA1D&>D!D
D!D&D&D!!D&c           	        |xs | j                   }i | j                  |}| j                  s~g }t        dt	        |      |      D ]a  } | j
                  j                  dd||||z    i|}t        |t              s|j                         }|j                  d |d   D               c |S t        t        | j                        }	 | j                  |f|	|d|S )a  Call out to OpenAI's embedding endpoint for embedding search docs.

        Args:
            texts: The list of texts to embed.
            chunk_size: The chunk size of embeddings. If None, will use the chunk size
                specified by the class.
            kwargs: Additional keyword arguments to pass to the embedding API.

        Returns:
            List of embeddings, one for each text.
        r   r   c              3  &   K   | ]	  }|d      ywr   r   r   s     r   r    z3OpenAIEmbeddings.embed_documents.<locals>.<genexpr>I       !KQ!K.!Kr#   r   r   r[   r   )r[   r   rn   r$   r%   r=   r   r   r   r   r   r   r?   rC   r   
r   r   r[   r   chunk_size_r   r2   r1   r   r   s
             r   embed_documentsz OpenAIEmbeddings.embed_documents1  s     !3DOO=422=f=..,.J1c%j+6 L-4;;-- AO48E "(D1'224H!!!K(6:J!KKL  c4??+,t,,
 Z
;A
 	
r   c           	       K   |xs | j                   }i | j                  |}| j                  sg }t        dt	        |      |      D ]i  } | j
                  j                  dd||||z    i| d{   }t        |t              s|j                         }|j                  d |d   D               k |S t        t        | j                        }	 | j                  |f|	|d| d{   S 7 {7 w)a  Call out to OpenAI's embedding endpoint async for embedding search docs.

        Args:
            texts: The list of texts to embed.
            chunk_size: The chunk size of embeddings. If None, will use the chunk size
                specified by the class.
            kwargs: Additional keyword arguments to pass to the embedding API.

        Returns:
            List of embeddings, one for each text.
        r   r   Nc              3  &   K   | ]	  }|d      ywr   r   r   s     r   r    z4OpenAIEmbeddings.aembed_documents.<locals>.<genexpr>k  r   r#   r   r   r   )r[   r   rn   r$   r%   r>   r   r   r   r   r   r   r?   rC   r   r   s
             r   aembed_documentsz!OpenAIEmbeddings.aembed_documentsS  s     !3DOO=422=f=..,.J1c%j+6 L!9!2!2!9!9 "AO4"8E"  "(D1'224H!!!K(6:J!KKL  c4??+3T33
 Z
;A
 
 	

s%   A/C11C-2A6C1(C/)C1/C1c                0     | j                   |gfi |d   S )a  Call out to OpenAI's embedding endpoint for embedding query text.

        Args:
            text: The text to embed.
            kwargs: Additional keyword arguments to pass to the embedding API.

        Returns:
            Embedding for the text.
        r   )r   )r   r   r   s      r   embed_queryzOpenAIEmbeddings.embed_queryu  s#     $t##TF5f5a88r   c                P   K    | j                   |gfi | d{   }|d   S 7 	w)a	  Call out to OpenAI's embedding endpoint async for embedding query text.

        Args:
            text: The text to embed.
            kwargs: Additional keyword arguments to pass to the embedding API.

        Returns:
            Embedding for the text.
        Nr   )r   )r   r   r   r2   s       r   aembed_queryzOpenAIEmbeddings.aembed_query  s4      1400$B6BB
!} Cs   &$
&)r   rd   r   r   )r   r   )r   rd   )r   	list[str]r[   rP   r   z<tuple[Iterable[int], list[Union[list[int], str]], list[int]])
r   r   r   r?   r[   rA   r   r   r   list[list[float]]r   )r   r   r[   rA   r   r   r   r   )r   r?   r   r   r   r   )6__name__
__module____qualname____doc__r   r=   __annotations__r>   r@   rB   rC   r   rH   rK   rM   rO   rQ   r   rT   rX   rY   rZ   r[   r\   r^   r_   ra   rb   rc   r   re   r-   rf   rg   ri   rk   rl   rm   rn   r   model_configr   classmethodr   r   propertyr   r   r   r   r   r   r   r   r   r   r   r:   r:   P   s   Pd d3FC3dD9L#9)E3) $J$
 !&J%(- !5tD)  T%*(3Dd*S&O]  &+ !2DA&O]  #( >#L-  !%#$8*/9ISW)X+N'  R). 34d
*  Q=AO:AOSLSJ8K<HMIIOE GS!d!K)--J $t#8#(#>L.>VJ!6:O3:7;M4; s7s7$(K!( +/'.W'++- BL (#  $2 '"6 #6p  ^&^&,/^&	E^&N %)1O1O 	1O
 "1O 1O 
1Ot %)7U7U 	7U
 "7U 7U 
7Ut =A 
 
,9 
LO 
	 
F =A 
 
,9 
LO 
	 
D
9r   r:   )r)   rP   r*   zlist[Union[list[int], str]]r+   r   r,   z	list[int]r-   r`   r   zlist[Optional[list[float]]])%
__future__r   loggingry   collections.abcr   r   r   typingr   r   r	   r
   r   r   r   langchain_core.embeddingsr   langchain_core.runnables.configr   langchain_core.utilsr   r   r   pydanticr   r   r   r   r   typing_extensionsr   	getLoggerr   loggerr8   r:   r   r   r   <module>r     s    "   7 7 6 6   0 ; T T M M "			8	$::': *: 	:
 : !:z|y* |r   