
    hJ                     ~   d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	 ddl
mZ ddlmZmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZmZ ddlmZ defdZ ej<                  e      Z  ejB                  d      de"fd       Z#defdZ$ G d de%e      Z& G d de      Z' G d de'e      Z( G d de'e      Z)y)z@A chain for comparing the output of two models using embeddings.    N)Enum)util)AnyOptional)	Callbacks)AsyncCallbackManagerForChainRunCallbackManagerForChainRun)
Embeddings)pre_init)
ConfigDictField)Chain)PairwiseStringEvaluatorStringEvaluatorRUN_KEYreturnc                  N    	 dd l } | S # t        $ r}d}t        |      |d }~ww xY w)Nr   z@Could not import numpy, please install with `pip install numpy`.)numpyImportError)npemsgs      j/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain/evaluation/embedding_distance/base.py_import_numpyr      s5    & I  &P#A%&s    	$$   )maxsizec                  l    t        t        j                  d            ryt        j	                  d       y)Nr   Ta  NumPy not found in the current Python environment. langchain will use a pure Python implementation for embedding distance operations, which may significantly impact performance, especially for large datasets. For optimal speed and efficiency, consider installing NumPy: pip install numpyF)boolr   	find_specloggerwarning     r   _check_numpyr%   #   s,    DNN7#$
NN	 r$   c                      	 ddl m}   |        S # t        $ r0 	 ddlm}  n# t        $ r}d}t        |      |d}~ww xY wY  |        S w xY w)zaCreate an Embeddings object.
    Returns:
        Embeddings: The created Embeddings object.
    r   OpenAIEmbeddingstCould not import OpenAIEmbeddings. Please install the OpenAIEmbeddings package using `pip install langchain-openai`.N)langchain_openair(   r   %langchain_community.embeddings.openai)r(   r   r   s      r   _embedding_factoryr,   1   sl    *5   
*		*  	*Q  c")	* 
*s)    	A A	<7<AAc                   $    e Zd ZdZdZdZdZdZdZy)EmbeddingDistancea  Embedding Distance Metric.

    Attributes:
        COSINE: Cosine distance metric.
        EUCLIDEAN: Euclidean distance metric.
        MANHATTAN: Manhattan distance metric.
        CHEBYSHEV: Chebyshev distance metric.
        HAMMING: Hamming distance metric.
    cosine	euclidean	manhattan	chebyshevhammingN)	__name__
__module____qualname____doc__COSINE	EUCLIDEAN	MANHATTAN	CHEBYSHEVHAMMINGr#   r$   r   r.   r.   I   s"     FIIIGr$   r.   c                      e Zd ZU dZ ee      Zeed<    ee	j                        Ze	ed<   edeeef   deeef   fd       Z ed	
      Zedee   fd       ZdedefdZde	defdZedededefd       Zedededefd       Zedededefd       Zedededefd       Zedededefd       ZdedefdZy)_EmbeddingDistanceChainMixina0  Shared functionality for embedding distance evaluators.

    Attributes:
        embeddings (Embeddings): The embedding objects to vectorize the outputs.
        distance_metric (EmbeddingDistance): The distance metric to use
                                            for comparing the embeddings.
    )default_factory
embeddings)defaultdistance_metricvaluesr   c                 \   |j                  d      }g }	 ddlm} |j                  |       	 ddlm} |j                  |       |sd}t	        |      t        |t        |            r	 ddl}|S |S # t        $ r Y Pw xY w# t        $ r Y Gw xY w# t        $ r}d}t	        |      |d}~ww xY w)zValidate that the TikTok library is installed.

        Args:
            values (Dict[str, Any]): The values to validate.

        Returns:
            Dict[str, Any]: The validated values.
        r@   r   r'   r)   NzThe tiktoken library is required to use the default OpenAI embeddings with embedding distance evaluators. Please either manually select a different Embeddings object or install tiktoken using `pip install tiktoken`.)	getr*   r(   appendr   r+   
isinstancetupletiktoken)clsrC   r@   types_r(   r   rI   r   s           r   _validate_tiktoken_installedz9_EmbeddingDistanceChainMixin._validate_tiktoken_installedg   s     ZZ-
	9MM*+	 MM*+ Q  c""j%-0	. v=  		  		  .I  "#&A-.s:   A1 B  )B 1	A=<A= 	BB	B+B&&B+T)arbitrary_types_allowedc                     dgS )zgReturn the output keys of the chain.

        Returns:
            List[str]: The output keys.
        scorer#   selfs    r   output_keysz(_EmbeddingDistanceChainMixin.output_keys   s     yr$   resultc                 D    d|d   i}t         |v r|t            |t         <   |S )NrO   r   )rQ   rS   parseds      r   _prepare_outputz,_EmbeddingDistanceChainMixin._prepare_output   s*    6'?+f$WoF7Or$   metricc           
      <   t         j                  | j                  t         j                  | j                  t         j
                  | j                  t         j                  | j                  t         j                  | j                  i}||v r||   S d| }t        |      )zGet the metric function for the given metric name.

        Args:
            metric (EmbeddingDistance): The metric name.

        Returns:
            Any: The metric function.
        zInvalid metric: )r.   r8   _cosine_distancer9   _euclidean_distancer:   _manhattan_distancer;   _chebyshev_distancer<   _hamming_distance
ValueError)rQ   rW   metricsr   s       r   _get_metricz(_EmbeddingDistanceChainMixin._get_metric   s     $$d&;&;'')A)A'')A)A'')A)A%%t'='=
 W6?" )or$   abc                 f    	 ddl m} d || |      z
  S # t        $ r}d}t        |      |d}~ww xY w)zCompute the cosine distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.ndarray: The cosine distance.
        r   )cosine_similarityzThe cosine_similarity function is required to compute cosine distance. Please install the langchain-community package using `pip install langchain-community`.Ng      ?)langchain_community.utils.mathrd   r   )ra   rb   rd   r   r   s        r   rY   z-_EmbeddingDistanceChainMixin._cosine_distance   sJ    	*H &q!,,,  	*6 
 c")	*s    	0+0c                     t               r"ddl}|j                  j                  | |z
        S t	        d t        | |      D              dz  S )zCompute the Euclidean distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Euclidean distance.
        r   Nc              3   8   K   | ]  \  }}||z
  ||z
  z    y wNr#   .0xys      r   	<genexpr>zC_EmbeddingDistanceChainMixin._euclidean_distance.<locals>.<genexpr>   s!     ;AAEa!e$;s   g      ?)r%   r   linalgnormsumzipra   rb   r   s      r   rZ   z0_EmbeddingDistanceChainMixin._euclidean_distance   s?     >99>>!a%((;Q;;sBBr$   c                     t               r-t               }|j                  |j                  | |z
              S t        d t	        | |      D              S )zCompute the Manhattan distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Manhattan distance.
        c              3   >   K   | ]  \  }}t        ||z
          y wrh   absri   s      r   rm   zC_EmbeddingDistanceChainMixin._manhattan_distance.<locals>.<genexpr>        4$!Q3q1u:4   )r%   r   rp   rv   rq   rr   s      r   r[   z0_EmbeddingDistanceChainMixin._manhattan_distance   B     >B66"&&Q-((4#a)444r$   c                     t               r-t               }|j                  |j                  | |z
              S t        d t	        | |      D              S )zCompute the Chebyshev distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Chebyshev distance.
        c              3   >   K   | ]  \  }}t        ||z
          y wrh   ru   ri   s      r   rm   zC_EmbeddingDistanceChainMixin._chebyshev_distance.<locals>.<genexpr>  rw   rx   )r%   r   maxrv   rq   rr   s      r   r\   z0_EmbeddingDistanceChainMixin._chebyshev_distance   ry   r$   c                     t               rt               }|j                  | |k7        S t        d t	        | |      D              t        |       z  S )zCompute the Hamming distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Hamming distance.
        c              3   2   K   | ]  \  }}||k7  sd   yw)r   Nr#   ri   s      r   rm   zA_EmbeddingDistanceChainMixin._hamming_distance.<locals>.<genexpr>  s     5Aa1f15s   )r%   r   meanrp   rq   lenrr   s      r   r]   z._EmbeddingDistanceChainMixin._hamming_distance	  sB     >B7716?"5Q55A>>r$   vectorsc                 L   | j                  | j                        }t               rft        |t	               j
                        rH ||d   j                  dd      |d   j                  dd            j                         }t        |      S  ||d   |d         }t        |      S )zCompute the score based on the distance metric.

        Args:
            vectors (np.ndarray): The input vectors.

        Returns:
            float: The computed score.
        r   r   )	r`   rB   r%   rG   r   ndarrayreshapeitemfloat)rQ   r   rW   rO   s       r   _compute_scorez+_EmbeddingDistanceChainMixin._compute_score  s     !!$"6"67>j-/2I2IJ71:--a4gaj6H6HB6OPUUWE U| 71:wqz2EU|r$   N) r4   r5   r6   r7   r   r,   r@   r
   __annotations__r.   r8   rB   r   dictstrr   rL   r   model_configpropertylistrR   rV   r`   staticmethodrY   rZ   r[   r\   r]   r   r   r#   r$   r   r>   r>   [   s    #3EFJ
F).7H7O7O)PO&P-$sCx. -T#s(^ - -^  $L T#Y  d t "3  * -C -C -C - -* Cs Cs Cs C C" 5s 5s 5s 5 5  5s 5s 5s 5 5  ?S ?S ?S ? ? c e r$   r>   c                   l   e Zd ZdZedefd       Zedefd       Zede	e   fd       Z
	 ddeeef   dee   deeef   fd	Z	 ddeeef   dee   deeef   fd
Zdddddddedee   dedee	e      deeeef      dededefdZdddddddedee   dedee	e      deeeef      dededefdZy)EmbeddingDistanceEvalChaina"  Use embedding distances to score semantic difference between
    a prediction and reference.

    Examples:
        >>> chain = EmbeddingDistanceEvalChain()
        >>> result = chain.evaluate_strings(prediction="Hello", reference="Hi")
        >>> print(result)
        {'score': 0.5}
    r   c                      y)zReturn whether the chain requires a reference.

        Returns:
            bool: True if a reference is required, False otherwise.
        Tr#   rP   s    r   requires_referencez-EmbeddingDistanceEvalChain.requires_reference6  s     r$   c                 6    d| j                   j                   dS )N
embedding_	_distancerB   valuerP   s    r   evaluation_namez*EmbeddingDistanceEvalChain.evaluation_name?  s    D00667yAAr$   c                 
    ddgS )eReturn the input keys of the chain.

        Returns:
            List[str]: The input keys.
        
prediction	referencer#   rP   s    r   
input_keysz%EmbeddingDistanceEvalChain.input_keysC  s     k**r$   Ninputsrun_managerc                     | j                   j                  |d   |d   g      }t               rt               }|j	                  |      }| j                  |      }d|iS )a0  Compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (Optional[CallbackManagerForChainRun], optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r   r   rO   r@   embed_documentsr%   r   arrayr   rQ   r   r   r   r   rO   s         r   _callz EmbeddingDistanceEvalChain._callL  sa     //11L!6+#67
 >Bhhw'G##G,r$   c                    K   | j                   j                  |d   |d   g       d{   }t               rt               }|j	                  |      }| j                  |      }d|iS 7 >w)a:  Asynchronously compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r   r   NrO   r@   aembed_documentsr%   r   r   r   r   s         r   _acallz!EmbeddingDistanceEvalChain._acalld  sr      88|${#
 
 >Bhhw'G##G,
   'A*A(?A*F)r   	callbackstagsmetadatainclude_run_infor   r   r   r   r   r   kwargsc                D     | ||d||||      }| j                  |      S )a  Evaluate the embedding distance between a prediction and
        reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The reference string (required)
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   r   r   r   r   r   rV   	rQ   r   r   r   r   r   r   r   rS   s	            r   _evaluate_stringsz,EmbeddingDistanceEvalChain._evaluate_strings  s5    2 ",9E-
 ##F++r$   c                r   K   | j                  ||d||||       d{   }| j                  |      S 7 w)a  Asynchronously evaluate the embedding distance between
        a prediction and reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   NacallrV   r   s	            r   _aevaluate_stringsz-EmbeddingDistanceEvalChain._aevaluate_strings  sL     2 zz",9E- " 
 
 ##F++
   757rh   )r4   r5   r6   r7   r   r   r   r   r   r   r   r   r   r   r	   r   r   r   r   r   r   r#   r$   r   r   r   +  s    D   B B B +DI + + =A S#X  89  
c3h	 6 BF S#X  =>  
c3h	 > $(#$(-1!& ,  , C=	 ,
  , tCy! , 4S>* ,  ,  , 
 ,L $(#$(-1!& ,  , C=	 ,
  , tCy! , 4S>* ,  ,  , 
 ,r$   r   c                   F   e Zd ZdZedee   fd       Zedefd       Z	 dde	ee
f   dee   de	ee
f   fdZ	 dde	ee
f   dee   de	ee
f   fd	Zdddd
ddedededeee      dee	ee
f      dede
de	fdZdddd
ddedededeee      dee	ee
f      dede
de	fdZy)"PairwiseEmbeddingDistanceEvalChaina  Use embedding distances to score semantic difference between two predictions.

    Examples:
    >>> chain = PairwiseEmbeddingDistanceEvalChain()
    >>> result = chain.evaluate_string_pairs(prediction="Hello", prediction_b="Hi")
    >>> print(result)
    {'score': 0.5}
    r   c                 
    ddgS )r   r   prediction_br#   rP   s    r   r   z-PairwiseEmbeddingDistanceEvalChain.input_keys  s     n--r$   c                 6    d| j                   j                   dS )Npairwise_embedding_r   r   rP   s    r   r   z2PairwiseEmbeddingDistanceEvalChain.evaluation_name  s    $T%9%9%?%?$@	JJr$   Nr   r   c                     | j                   j                  |d   |d   g      }t               rt               }|j	                  |      }| j                  |      }d|iS )a  Compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (CallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r   r   rO   r   r   s         r   r   z(PairwiseEmbeddingDistanceEvalChain._call  sd     //11|$~&
 >Bhhw'G##G,r$   c                    K   | j                   j                  |d   |d   g       d{   }t               rt               }|j	                  |      }| j                  |      }d|iS 7 >w)a/  Asynchronously compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        r   r   NrO   r   r   s         r   r   z)PairwiseEmbeddingDistanceEvalChain._acall  sr      88|$~&
 
 >Bhhw'G##G,
r   F)r   r   r   r   r   r   r   r   r   r   r   c                D     | ||d||||      }| j                  |      S )a  Evaluate the embedding distance between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   r   r   	rQ   r   r   r   r   r   r   r   rS   s	            r   _evaluate_string_pairsz9PairwiseEmbeddingDistanceEvalChain._evaluate_string_pairs  s5    4 ",lK-
 ##F++r$   c                r   K   | j                  ||d||||       d{   }| j                  |      S 7 w)a  Asynchronously evaluate the embedding distance

        between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to traces
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   Nr   r   s	            r   _aevaluate_string_pairsz:PairwiseEmbeddingDistanceEvalChain._aevaluate_string_pairs7  sL     8 zz",lK- " 
 
 ##F++
r   rh   )r4   r5   r6   r7   r   r   r   r   r   r   r   r   r	   r   r   r   r   r   r   r   r#   r$   r   r   r     s    .DI . . K K K =A S#X  89  
c3h	 < BF S#X  =>  
c3h	 @  $$(-1!&!, !, 	!,
 !, tCy!!, 4S>*!, !, !, 
!,P  $$(-1!&#, #, 	#,
 #, tCy!#, 4S>*#, #, #, 
#,r$   r   )*r7   	functoolsloggingenumr   	importlibr   typingr   r   langchain_core.callbacksr    langchain_core.callbacks.managerr   r	   langchain_core.embeddingsr
   langchain_core.utilsr   pydanticr   r   langchain.chains.baser   langchain.evaluation.schemar   r   langchain.schemar   r   	getLoggerr4   r!   	lru_cacher   r%   r,   r   r.   r>   r   r   r#   r$   r   <module>r      s    F       . 1 ) & ' P $s  
		8	$ Q
d 
  
J 0T $M5 M`V,!= V,rV, V,r$   