
    h                       d Z ddlmZ ddlZddlZddlZddlmZ ddlm	Z	m
Z
 ddlmZmZmZmZmZmZ ddlmZ ddlmZ dd	lmZ 	 dd
lmZmZmZmZ ddlZddl m!Z! ddl"m#Z#m$Z$m%Z%m&Z&  ejN                  e(      Z) G d de      Z* G d ded      Z+ G d de      Z, G d ded      Z- G d d      Z.ee,e-e/f   Z0 G d de      Z1ee1e/f   Z2 G d de.      Z3	 	 d'dZ4dZ5d(dZ6 G d  d!      Z7	 	 	 	 d)d"Z8	 	 	 	 d*d#Z9	 	 	 	 d+d$Z:	 	 	 	 d,d%Z;eee
ejL                     e
ejJ                     gee,e-f   f   ee<ejL                     e<ejJ                     gee,e-f   f   f   Z=d-d&Z>y# e$ r dd
lmZmZmZmZ Y )w xY w).z?This module contains the evaluator classes for evaluating runs.    )annotationsN)abstractmethod)	AwaitableSequence)AnyCallableLiteralOptionalUnioncast)	TypedDictrun_helpers)schemas)	BaseModelFieldValidationError	validator)wraps)
SCORE_TYPE
VALUE_TYPEExampleRunc                  (    e Zd ZU dZded<   	 ded<   y)Categoryz$A category for categorical feedback.Optional[Union[float, int]]valuestrlabelN__name__
__module____qualname____doc____annotations__     \/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langsmith/evaluation/evaluator.pyr   r   /   s    .&&CJ&r'   r   c                  @    e Zd ZU dZded<   	 ded<   	 ded<   	 ded<   y	)
FeedbackConfigziConfiguration to define a type of feedback.

    Applied on on the first creation of a feedback_key.
    z0Literal['continuous', 'categorical', 'freeform']typer   minmaxz%Optional[list[Union[Category, dict]]]
categoriesNr    r&   r'   r(   r*   r*   8   s*    
 ;:	$$;	$$A55r'   r*   F)totalc                      e Zd ZU dZded<   	 dZded<   	 dZded<   	 dZd	ed
<   	 dZded<   	  e	e
      Zded<   	 dZded<   	 dZded<   	 dZded<   	 dZded<   	  G d d      Z edd      d        Zy)EvaluationResultzEvaluation result.r   keyNr   scorer   r   zOptional[str]commentzOptional[dict]
correction)default_factorydictevaluator_infoz%Optional[Union[FeedbackConfig, dict]]feedback_configOptional[Union[uuid.UUID, str]]source_run_idtarget_run_idextrac                      e Zd ZdZdZy)EvaluationResult.ConfigzPydantic model configuration.FN)r!   r"   r#   r$   allow_extrar&   r'   r(   Configr?   b   s
    +r'   rA   T)prec                t    d|vs|d   .t        |t        t        f      rt        j	                  d|        |S )z$Check that the value is not numeric.r3   zJNumeric values should be provided in the 'score' field, not 'value'. Got: )
isinstanceintfloatloggerwarning)clsvvaluess      r(   check_value_non_numericz(EvaluationResult.check_value_non_numericg   sE    
 & F7O$;!c5\*C!
 r'   )r!   r"   r#   r$   r%   r3   r   r4   r5   r   r7   r8   r9   r;   r<   r=   rA   r   rL   r&   r'   r(   r1   r1   G   s    	H@E:0E:8!G]!2!%J%: 6ND65=AO:A;59M29659M29 !E> ) 
 wD! "r'   r1   c                      e Zd ZU dZded<   y)EvaluationResultszqBatch evaluation results.

    This makes it easy for your evaluator to return multiple
    metrics at once.
    zlist[EvaluationResult]resultsNr    r&   r'   r(   rN   rN   v   s     $#!r'   rN   c                  R    e Zd ZdZe	 	 d	 	 	 	 	 	 	 dd       Z	 	 d	 	 	 	 	 	 	 ddZy)RunEvaluatorzEvaluator interface class.Nc                     y)zEvaluate an example.Nr&   )selfrunexampleevaluator_run_ids       r(   evaluate_runzRunEvaluator.evaluate_run   s    r'   c                    K   t        j                          fd}t        j                         j	                  d|       d{   S 7 w)z#Evaluate an example asynchronously.c                     t        j                  di  5  j                        cd d d        S # 1 sw Y   y xY w)Nr&   )rhtracing_contextrW   )current_contextrV   rU   rT   rS   s   r(   _run_with_contextz5RunEvaluator.aevaluate_run.<locals>._run_with_context   s@    ##6o6 I((g7GHI I Is   4=N)rZ   get_tracing_contextasyncioget_running_looprun_in_executor)rS   rT   rU   rV   r]   r\   s   ```` @r(   aevaluate_runzRunEvaluator.aevaluate_run   sH      002	I 	I --/??FWXXXXs   AAAANNrT   r   rU   Optional[Example]rV   Optional[uuid.UUID]return*Union[EvaluationResult, EvaluationResults])r!   r"   r#   r$   r   rW   rb   r&   r'   r(   rQ   rQ      s}    $ &*04	## ## .	#
 
4# # &*04	YY #Y .	Y
 
4Yr'   rQ   c                  H    e Zd ZU dZded<   	 ded<   	 dZded<   	 dZd	ed
<   y)ComparisonEvaluationResultzFeedback scores for the results of comparative evaluations.

    These are generated by functions that compare two or more runs,
    returning a ranking or other feedback.
    r   r2   z'dict[Union[uuid.UUID, str], SCORE_TYPE]scoresNr:   r;   z6Optional[Union[str, dict[Union[uuid.UUID, str], str]]]r4   )r!   r"   r#   r$   r%   r;   r4   r&   r'   r(   rj   rj      s8     
H@33459M296FJGCJ:r'   rj   c                       e Zd ZdZ	 d	 	 	 ddZ	 d	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 ddZ	 	 	 	 	 	 ddZedd       Z		 	 d	 	 	 	 	 	 	 ddZ
	 	 d	 	 	 	 	 d fdZ	 d	 	 	 	 	 dd	Zdd
Z xZS )DynamicRunEvaluatora  A dynamic evaluator that wraps a function and transforms it into a `RunEvaluator`.

    This class is designed to be used with the `@run_evaluator` decorator, allowing
    functions that take a `Run` and an optional `Example` as arguments, and return
    an `EvaluationResult` or `EvaluationResults`, to be used as instances of `RunEvaluator`.

    Attributes:
        func (Callable): The function that is wrapped by this evaluator.
    c                   t        |      \  }|rt        |      \  }d	fd} t        |      |        ddlm} |*|j	                  ||      | _        t        |dd      | _        t        j                  |      r8|t        d      |j	                  ||      | _        t        |dd      | _        y|j	                  t        t        t        t        t           gt         f   |      |      | _        t        |dd      | _        y)
zInitialize the DynamicRunEvaluator with a given function.

        Args:
            func (Callable): A function that takes a `Run` and an optional `Example` as
            arguments, and returns a dict or `ComparisonEvaluationResult`.
        c                f    | S  | j                  d      | j                  d            \  }}}|S )NrT   rU   getinputs_traced_inputsprepare_inputss      r(   process_inputsz4DynamicRunEvaluator.__init__.<locals>.process_inputs   s>    %$2

5!6::i#8%!Q= ! r'   r   r   Nrw   r!   rm   Func was provided as a coroutine function, but afunc was also provided. If providing both, func should be a regular function to avoid ambiguity.rs   r7   rg   r7   )_normalize_evaluator_funcr   	langsmithr   ensure_traceableafuncgetattr_nameinspectiscoroutinefunction	TypeErrorr   r   r   r
   r   _RUNNABLE_OUTPUTfuncrS   r   r~   rw   r   rv   s        @r(   __init__zDynamicRunEvaluator.__init__   s   ( ";4!@~&?&F#UN	! 	dD)$55n 6 DJ !
4IJDJ&&t, 3 
 %55^ 6 DJ !z3HIDJ#44XsHW$568HHI4P- 5 DI !z3HIDJr'   c                <   t        t              rj                  s|_        S 	 st        d       dvr|r| j                  d<   t        fddD              rt        d       t        di d|iS # t        $ r}t        d       |d }~ww xY w)	NziExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got empty result: r2   c              3  &   K   | ]  }|v 
 y wNr&   ).0kresults     r(   	<genexpr>z@DynamicRunEvaluator._coerce_evaluation_result.<locals>.<genexpr>  s     Jq1F?J   )r3   r   r4   zrExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score' or categorical 'value'; got r;   z[Expected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got r&   )rD   r1   r;   
ValueErrorr   allr   )rS   r   r;   allow_no_keyes    `   r(   _coerce_evaluation_resultz-DynamicRunEvaluator._coerce_evaluation_result   s     f./'''4$M	 FFLXO  F"| $

uJ,IJJ OOUhX  $Q&P&PQQ 	44:8= 	s   AA> >	BBBc                    d|v rB|j                         }|d   D cg c]  }| j                  ||       c}|d<   t        di |S | j                  t        t        |      |d      S c c}w )NrO   )r;   T)r;   r   r&   )copyr   rN   r   r7   )rS   rO   r;   cprs        r(   _coerce_evaluation_resultsz.DynamicRunEvaluator._coerce_evaluation_results  s    
 B !+ ..q.NByM %*r**--w}4 . 
 	
s   A)c                    t        |t              r|j                  s||_        |S t        |      }| j	                  ||      S r   )rD   r1   r;   _format_evaluator_resultr   )rS   r   r;   s      r(   _format_resultz"DynamicRunEvaluator._format_result)  sB     f./'''4$M)&1..v}EEr'   c                    t        | d      S zCheck if the evaluator function is asynchronous.

        Returns:
            bool: True if the evaluator function is asynchronous, False otherwise.
        r~   hasattrrS   s    r(   is_asynczDynamicRunEvaluator.is_async7       tW%%r'   c                   t        | d      sPt        j                         }|j                         rt	        d      |j                  | j                  ||            S |t        j                         }d|j                  i}t        |dd      rt        |j                        |d<   | j                  ||||d      }| j                  ||      S )	a  Evaluate a run using the wrapped function.

        This method directly invokes the wrapped function with the provided arguments.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        r   tCannot call `evaluate_run` on an async run evaluator from within an running event loop. Use `aevaluate_run` instead.Nr<   
session_id
experimentrun_idmetadatalangsmith_extra)r   r_   get_event_loop
is_runningRuntimeErrorrun_until_completerb   uuiduuid4idr   r   r   r   r   )rS   rT   rU   rV   running_loopr   r   s          r(   rW   z DynamicRunEvaluator.evaluate_run@  s    " tV$"113L&&("R 
 $66t7I7I#w7WXX##zz|$3SVV#<3d+%(%8H\"'7XN  

 ""6+;<<r'   c                P  K   t        | d      st        | 	  ||       d{   S |t        j                         }d|j
                  i}t        |dd      rt        |j                        |d<   | j                  ||||d       d{   }| j                  ||      S 7 ~7 w)a  Evaluate a run asynchronously using the wrapped async function.

        This method directly invokes the wrapped async function with the
            provided arguments.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used
                in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        r~   Nr<   r   r   r   r   )r   superrb   r   r   r   r   r   r   r~   r   )rS   rT   rU   rV   r   r   	__class__s         r(   rb   z!DynamicRunEvaluator.aevaluate_runf  s     & tW%.sG<<<##zz|$3SVV#<3d+%(%8H\"zz'7XN " 
 

 ""6+;<< =
s"    B&B"A'B&B$B&$B&c                &    | j                  ||      S )a  Make the evaluator callable, allowing it to be used like a function.

        This method enables the evaluator instance to be called directly, forwarding the
        call to `evaluate_run`.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
        )rW   )rS   rT   rU   s      r(   __call__zDynamicRunEvaluator.__call__  s       g..r'   c                "    d| j                    dS ))Represent the DynamicRunEvaluator object.z<DynamicRunEvaluator >r   r   s    r(   __repr__zDynamicRunEvaluator.__repr__  s    &tzzl!44r'   r   )r   XCallable[[Run, Optional[Example]], Union[_RUNNABLE_OUTPUT, Awaitable[_RUNNABLE_OUTPUT]]]r~   zIOptional[Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]])F)r   zUnion[EvaluationResult, dict]r;   	uuid.UUIDr   boolrg   r1   )rO   zUnion[dict, EvaluationResults]r;   r   rg   rh   )r   zMUnion[EvaluationResult, EvaluationResults, dict, str, int, bool, float, list]r;   r   rg   rh   rg   r   rc   rd   )rT   r   rU   re   rV   rf   )rT   r   rU   re   rg   rh   rg   r   )r!   r"   r#   r$   r   r   r   r   propertyr   rW   rb   r   r   __classcell__)r   s   @r(   rm   rm      s;   , 8J
8J
8J| #	- ! 	
 
<
/
 !
 
4	
"F
F
 !F 
4F & & &*04	$=$= #$= .	$=
 
4$=R &*04	== #= .	=D 6://!2/	3/"5r'   rm   c                    t        |       S )zmCreate a run evaluator from a function.

    Decorator that transforms a function into a `RunEvaluator`.
    )rm   r   s    r(   run_evaluatorr     s     t$$r'   i'  c                ^    t        |       }t        |      t        kD  r|d t        dz
   dz   }|S )N   z...))reprlen_MAXSIZE)objss     r(   _maxsize_reprr     s1    S	A
1vn1&Hr'   c                      e Zd ZdZ	 d	 	 	 ddZedd       Z	 d	 	 	 	 	 ddZ	 d	 	 	 	 	 ddZ	 d	 	 	 	 	 ddZ	ddZ
edd	       Z	 	 	 	 	 	 	 	 dd
Zy)DynamicComparisonRunEvaluatorz4Compare predictions (as traces) from 2 or more runs.Nc                    t        |      \  }|rt        |      \  }d	fd} t        |      |        ddlm} |*|j	                  ||      | _        t        |dd      | _        t        j                  |      r8|t        d      |j	                  ||      | _        t        |dd      | _        y|j	                  t        t        t        t           t        t            gt"        f   |      |      | _        t        |dd      | _        y)
zInitialize the DynamicRunEvaluator with a given function.

        Args:
            func (Callable): A function that takes a `Run` and an optional `Example` as
            arguments, and returns an `EvaluationResult` or `EvaluationResults`.
        c                f    | S  | j                  d      | j                  d            \  }}}|S )NrunsrU   rp   rr   s      r(   rw   z>DynamicComparisonRunEvaluator.__init__.<locals>.process_inputs  s>    %$2

6"FJJy$9%!Q= ! r'   r   r   Nrx   r!   rm   ry   rz   )$_normalize_comparison_evaluator_funcr   r|   r   r}   r~   r   r   r   r   r   r   r   r   r   r
   r   _COMPARISON_OUTPUTr   r   s        @r(   r   z&DynamicComparisonRunEvaluator.__init__  s(   ( "Fd!K~&J5&Q#UN	! 	dD)$55n 6 DJ !
4IJDJ&&t, 3 
 %55^ 6 DJ !z3HIDJ#44!#(9:*,   . 5 	DI !z3HIDJr'   c                    t        | d      S r   r   r   s    r(   r   z&DynamicComparisonRunEvaluator.is_async  r   r'   c                X   t        | d      sPt        j                         }|j                         rt	        d      |j                  | j                  ||            S t        j                         }| j                  |      }| j                  ||||d      }| j                  |||      S )zCompare runs to score preferences.

        Args:
            runs: A list of runs to compare.
            example: An optional example to be used in the evaluation.

        r   r   r   tagsr   )r   r_   r   r   r   r   acompare_runsr   r   	_get_tagsr   _format_results)rS   r   rU   r   r;   r   r   s          r(   compare_runsz*DynamicComparisonRunEvaluator.compare_runs  s     tV$"113L&&("R 
 $66&&tW5  

~~d#'4dC  

 ##FM4@@r'   c                   K   t        | d      s| j                  ||      S t        j                         }| j	                  |      }| j                  ||||d       d{   }| j                  |||      S 7 w)a  Evaluate a run asynchronously using the wrapped async function.

        This method directly invokes the wrapped async function with the
            provided arguments.

        Args:
            runs (Run): The runs to be evaluated.
            example (Optional[Example]): An optional example to be used
                in the evaluation.

        Returns:
            ComparisonEvaluationResult: The result of the evaluation.
        r~   r   r   N)r   r   r   r   r   r~   r   )rS   r   rU   r;   r   r   s         r(   r   z+DynamicComparisonRunEvaluator.acompare_runs  s       tW%$$T733

~~d#zz'4dC " 
 

 ##FM4@@
s   AA: A8!A:c                &    | j                  ||      S )a  Make the evaluator callable, allowing it to be used like a function.

        This method enables the evaluator instance to be called directly, forwarding the
        call to `evaluate_run`.

        Args:
            run (Run): The run to be evaluated.
            example (Optional[Example]): An optional example to be used in the evaluation.

        Returns:
            ComparisonEvaluationResult: The result of the evaluation.
        )r   )rS   r   rU   s      r(   r   z&DynamicComparisonRunEvaluator.__call__:  s       w//r'   c                "    d| j                    dS )r   z<DynamicComparisonRunEvaluator r   r   r   s    r(   r   z&DynamicComparisonRunEvaluator.__repr__K  s    0A>>r'   c                    g }| D ]^  }|j                  dt        |j                        z          t        |dd      s8|j                  dt        |j                        z          ` |S )zExtract tags from runs.zrun:r   Nzexperiment:)appendr   r   r   r   )r   r   rT   s      r(   r   z'DynamicComparisonRunEvaluator._get_tagsO  s`      	ACKKSVV,-sL$/MC,??@	A r'   c                   t        |t              r|j                  s||_        |S t        |t              r9t	        ||      D ci c]  \  }}|j
                  | c}}| j                  |d}n4t        |t              rd|vr | j                  |d<   nd|}t        |      	 t        di d|i|S c c}}w # t        $ r}t        d|       |d }~ww xY w)N)rk   r2   r;   r2   zXExpected 'dict', 'list' or 'ComparisonEvaluationResult' result object. Received: result=r;   zExpected a dictionary with a 'key' and dictionary of scores mappingrun IDs to numeric scores, or ComparisonEvaluationResult object, got r&   )
rD   rj   r;   listzipr   r   r7   r   r   )rS   r   r;   r   rT   r3   msgr   s           r(   r   z-DynamicComparisonRunEvaluator._format_resultsZ  s     f89'''4$M%;>tV;LMZS%3665=Mzz!.F
 %F" $

u-%+I/  S/!		- "M<V<  N"  	x! 		s   B3$B9 9	CCCr   )r   fCallable[[Sequence[Run], Optional[Example]], Union[_COMPARISON_OUTPUT, Awaitable[_COMPARISON_OUTPUT]]]r~   zUOptional[Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]]r   )r   Sequence[Run]rU   re   rg   rj   r   )r   r   rg   z	list[str])r   z-Union[dict, list, ComparisonEvaluationResult]r;   r   r   r   rg   rj   )r!   r"   r#   r$   r   r   r   r   r   r   r   staticmethodr   r   r&   r'   r(   r   r     s    > >J
>J
>J@ & & AEA!A,=A	#A@ AEA!A,=A	#A: AE0!0,=0	#0"?  "=" !" 	"
 
$"r'   r   c                    t        |       S )z.Create a comaprison evaluator from a function.)r   r   s    r(   comparison_evaluatorr     s     )..r'   c                   	
 dt        j                         

j                  j                         D cg c]!  \  }}|j                  |j
                  k7  s |# }}}
j                  j                         D cg c]-  \  }}|j                  t         j                  j                  ur|/ c}}	|r6t        	fd|D              s2t        |D cg c]	  }|	vs| c}      dk7  rd d}t        |      t        	fd|D              r|ddgk(  r d fS t        j                         rF	 	 	 	 	 	 d
fd		 	 	 	 	 	 d fd
}t         d      rt         d      n|j                  |_        |fS 	 	 	 	 	 	 d
fdd fd}t         d      rt         d      n|j                  |_        |fS c c}}w c c}}w c c}w )N)rT   rU   rs   outputsreference_outputsattachmentsc              3  2   K   | ]  }|v xs |v   y wr   r&   r   pnameargs_with_defaultssupported_argss     r(   r   z,_normalize_evaluator_func.<locals>.<genexpr>  )      
GLE^#Bu0B'BB
      UInvalid evaluator function. Must have at least one argument. Supported arguments are . Please see https://docs.smith.langchain.com/evaluation/how_to_guides/evaluation/evaluate_llm_application#use-custom-evaluatorsc              3  2   K   | ]  }|v xs |v   y wr   r&   r   s     r(   r   z,_normalize_evaluator_func.<locals>.<genexpr>  )      CH>5,>#>>r   rT   rU   c                   | ||r|j                   ni | j                  xs i |r|j                  xs i ni |r|j                  xs i ni d}i }g }i }j                  j	                         D ]e  \  }}||v s|j
                  |j                  |j                  fv r|j                  ||          n||   ||<   |dv rt        ||         n||   ||<   g |||fS N)rT   rU   rs   r   r   r   )rT   rU   
rs   r   r   
parametersitemskindPOSITIONAL_OR_KEYWORDPOSITIONAL_ONLYr   r   	rT   rU   arg_mapkwargsargsru   
param_nameparamsigs	           r(   _prepare_inputsz2_normalize_evaluator_func.<locals>._prepare_inputs      &07gnnR"{{0b@G7#6#6#<"RBI)>Br  "),)=)=)? %J!W, ::!77!11*  !KK
(;<181DF:.  *-?? *'**=>!(!4 &j1 V]22r'   c                J   K    | |      \  }}} |i | d {   S 7 wr   r&   rT   rU   r	  r  rt   r  r   s        r(   awrapperz+_normalize_evaluator_func.<locals>.awrapper  s2      %4C$A!vq!4262222   #!#r!   c                   | ||r|j                   ni | j                  xs i |r|j                  xs i ni |r|j                  xs i ni d}i }g }i }j                  j	                         D ]e  \  }}||v s|j
                  |j                  |j                  fv r|j                  ||          n||   ||<   |dv rt        ||         n||   ||<   g |||fS r   r   r  s	           r(   r  z2_normalize_evaluator_func.<locals>._prepare_inputs  r  r'   c                .     | |      \  }}} |i |S r   r&   r  s        r(   wrapperz*_normalize_evaluator_func.<locals>.wrapper  s&    $3C$A!vqT,V,,r'   )rT   r   rU   re   rg   tuple[list, dict, dict])rT   r   rU   re   rg   r   r   	signaturer  r  r  VAR_KEYWORDdefault	Parameteremptyr   r   r   r   r   r   r!   r   r   pall_argsar   r  r  r  r   r  r   s   `       @@@@r(   r{   r{     s   N 

D
!C&)nn&:&:&<X(%!--@WXHX ,,.E199G--333 	
  
PX
 
 HDq1C(CDEJ11?0@ AFG 	 o  LT 	 
 Tz&&t,33#43(3>33#43!3 4, j)&& 
 o..33#43(3>- 4, j)%% 
 _--k Y Es   !F:F: 2G 	G Gc                   	
 dt        j                         

j                  j                         D cg c]!  \  }}|j                  |j
                  k7  s |# }}}
j                  j                         D cg c]-  \  }}|j                  t         j                  j                  ur|/ c}}	|r6t        	fd|D              s2t        |D cg c]	  }|	vs| c}      dk7  rd d}t        |      t        	fd|D              r|ddgk(  r d fS t        j                         rF	 	 	 	 	 	 d
fd		 	 	 	 	 	 d fd
}t         d      rt         d      n|j                  |_        |fS 	 	 	 	 	 	 d
fd	 	 	 	 	 	 d fd}t         d      rt         d      n|j                  |_        |fS c c}}w c c}}w c c}w )Nr   rU   rs   r   r   c              3  2   K   | ]  }|v xs |v   y wr   r&   r   s     r(   r   z7_normalize_comparison_evaluator_func.<locals>.<genexpr>%  r   r   r   r   r   c              3  2   K   | ]  }|v xs |v   y wr   r&   r   s     r(   r   z7_normalize_comparison_evaluator_func.<locals>.<genexpr>3  r   r   r   rU   c                   | ||r|j                   ni | D cg c]  }|j                  xs i  c}|r|j                  xs i ni d}i }g }i }	j                  j                         D ]e  \  }}||v s|j                  |j
                  |j                  fv r|j                  ||          n||   ||<   |dv rt        ||         n||   ||<   g |||fS c c}w Nr"  )r   rU   	rs   r   r  r  r  r  r  r   r   
r   rU   rT   r  r  r	  ru   r
  r  r  s
            r(   r  z=_normalize_comparison_evaluator_func.<locals>._prepare_inputs=      !&07gnnR=ABc 1r 1BBI)>Br  "),)=)=)? %J!W, ::!77!11*  !KK
(;<181DF:.  *-@@ *'**=>!(!4 &j1 V]22+  C   C
c                J   K    | |      \  }}} |i | d {   S 7 wr   r&   r   rU   r	  r  rt   r  r   s        r(   r  z6_normalize_comparison_evaluator_func.<locals>.awrapper[  s2      %4D'$B!vq!4262222r  r!   c                   | ||r|j                   ni | D cg c]  }|j                  xs i  c}|r|j                  xs i ni d}i }g }i }	j                  j                         D ]e  \  }}||v s|j                  |j
                  |j                  fv r|j                  ||          n||   ||<   |dv rt        ||         n||   ||<   g |||fS c c}w r&  r'  r(  s
            r(   r  z=_normalize_comparison_evaluator_func.<locals>._prepare_inputsj  r)  r*  c                .     | |      \  }}} |i |S r   r&   r,  s        r(   r  z5_normalize_comparison_evaluator_func.<locals>.wrapper  s(     %4D'$B!vqT,V,,r'   )r   r   rU   re   rg   r  )r   r   rU   re   rg   r   r  r  s   `       @@@@r(   r   r     s-    SN


D
!C&)nn&:&:&<X(%!--@WXHX ,,.E199G--333 	
  
PX
 
 HDq1C(CDEJ11?0@ AFG 	 o  LT 	 
 Tz&&t,3#3.?3(3<3#3.?3#3 4, j)&& 
 _,,3#3.?3(3<-#-.?-#- 4, j)%% 
 O++k Y Es   !G G  2G	G Gc                @   t        | t        t        t        f      rd| i} | S | st	        d|        t        | t
              r't        d | D              st	        d|  d      d| i} | S t        | t              rd| i} | S t        | t              r	 | S t	        d|        )	Nr3   zdExpected a non-empty dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got c              3  <   K   | ]  }t        |t                y wr   )rD   r7   )r   xs     r(   r   z+_format_evaluator_result.<locals>.<genexpr>  s     71:a&7s   z8Expected a list of dicts or EvaluationResults. Received .rO   r   zZExpected a dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got )	rD   r   rF   rE   r   r   r   r   r7   )r   s    r(   r   r     s     &4,-6"* M) ;;A(D
 	
 
FD	!777J6(RST  V$ M 
FC	 6" M 
FD	! M	 &&,X/
 	
r'   c                   	 d	t        j                         j                  j                         D cg c]  \  }}|	 }}}j                  j                         D cg c]-  \  }}|j                  t         j
                  j                  ur|/ c}}|r6t        	fd|D              s=t        |D cg c]	  }|vs| c}      dk7  rd	 d}|r	|d| dz  }t        |      t        	fd|D              r|dd	gk(  r S 	 	 	 	 	 	 d fd
}t         d      rt         d      |_        |S |j                  |_        |S c c}}w c c}}w c c}w )Nr   examplesrs   r   r   c              3  2   K   | ]  }|v xs |v   y wr   r&   r   s     r(   r   z/_normalize_summary_evaluator.<locals>.<genexpr>  r   r   r   r   r2  z Received arguments c              3  &   K   | ]  }|v  
 y wr   r&   )r   r   r   s     r(   r   z/_normalize_summary_evaluator.<locals>.<genexpr>  s     ?U.(?r   r   r5  c           	        | ||D cg c]  }|j                    c}| D cg c]  }|j                  xs i  c}|D cg c]  }|j                  xs i  c}d}i }g }j                  j                         D ]K  \  }}||v s|j                  |j
                  |j                  fv r|j                  ||          D||   ||<   M  
|i |}	t        |	t              r|	S t        |	      S c c}w c c}w c c}w )Nr4  )rs   r   r  r  r  r  r  r   rD   r1   r   )r   r5  rU   rT   r  r  r	  r
  r  r   r   r  s             r(   r  z-_normalize_summary_evaluator.<locals>.wrapper  s    $9ABg7>>B9=>#CKK-2->KS%Tgoo&;&;%TG FD%(^^%9%9%; 	A!
E(zz33--&  GJ$78-4Z-@z*	A 4*6*F&"23+F33) C>%Ts   C+C0
C5r!   )r   zSequence[schemas.Run]r5  zSequence[schemas.Example]rg   rh   )r   r  r  r  r  r  r  r   r   r   r   r   r!   )
r   r   r  r  r   r   r  r   r  r   s
   `      @@@r(   _normalize_summary_evaluatorr9    s   SN


D
!C&)nn&:&:&<=(%=H= ,,.E199G--333 	
  
PX
 
 HDq1C(CDEJ11?0@C 	 )(155Co ?h??8P D 	4'	43L	47	4: *1z)BGD*% 	  IPHXHX 	 w > Es   E%2E;	EE)r   r   )r   r   )r   r   rg   r   )r   r   rg   ztuple[Union[Callable[[Run, Optional[Example]], _RUNNABLE_OUTPUT], Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]], Optional[Callable[..., dict]]])r   r   rg   ztuple[Union[Callable[[Sequence[Run], Optional[Example]], _COMPARISON_OUTPUT], Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]], Optional[Callable[..., dict]]])r   z;Union[EvaluationResults, dict, str, int, bool, float, list]rg   zUnion[EvaluationResults, dict])r   r   rg   SUMMARY_EVALUATOR_T)?r$   
__future__r   r_   r   r   abcr   collections.abcr   r   typingr   r   r	   r
   r   r   typing_extensionsr   r|   r   rZ   r   pydantic.v1r   r   r   r   ImportErrorpydanticlogging	functoolsr   langsmith.schemasr   r   r   r   	getLoggerr!   rG   r   r*   r1   rN   rQ   r7   r   rj   r   rm   r   r   r   r   r   r{   r   r   r   r:  r9  r&   r'   r(   <module>rG     s   E "     /  ( '     B B			8	$'y '6Ye 6,y ,^"	 "Y Y8 )+<dBC : :$ 5t;< e5, e5P	%	% I IX//
 #/G.
G.G.T@,
@,@,FG#8 	'++	 9: 112	4 	gkk	D12 112	4		 >   s   E' 'E<;E<