
    h*                       d Z ddlmZ ddlZddlZddlmZ ddlmZm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZmZ ddlmZ ddZddZ  G d deee      Z! G d deee      Z" G d de"      Z#y)z-LLM Chains for evaluating question answering.    )annotationsN)Sequence)AnyOptional)	Callbacks)BaseLanguageModel)PromptTemplate)
ConfigDict)override)LLMChain)CONTEXT_PROMPT
COT_PROMPTPROMPT)LLMEvalChainStringEvaluator)RUN_KEYc                   t        j                  d| j                         t         j                        }|rF|j	                  d      j                         dk(  ry|j	                  d      j                         dk(  ry	 | j                         j                         d   j                  t        j                  ddt        j                              }|j                         dk(  ry|j                         dk(  ry| j                         j                         d	   j                  t        j                  ddt        j                              }|j                         dk(  ry|j                         dk(  ry	 y # t        $ r Y y w xY w)
Nzgrade:\s*(correct|incorrect)   CORRECT)r   r   	INCORRECT)r   r   r    )researchstrip
IGNORECASEgroupuppersplit	translatestr	maketransstringpunctuation
IndexError)textmatch
first_word	last_words       `/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/langchain/evaluation/qa/eval_chain.py
_get_scorer+      sC   II5tzz|R]]SE;;q>!Y.;;q>![0!JJL #--cmmBFDVDV.WX 	 *,!JJLUWRYs}}RV-?-?@A 	
 ??	)??+! ,   s&   =A&E5 $E5 8A&E5 E5 5	F Fc                ^    | j                         }t        |      }|d\  }}n|\  }}|||dS )zParse the output text.

    Args:
        text (str): The output text to parse.

    Returns:
        Any: The parsed output.
    )NN)	reasoningvaluescore)r   r+   )r&   r-   parsed_scoresr.   r/   s        r*   _parse_string_eval_outputr1   3   sD     

Iy)M!u$u     c                  F   e Zd ZU dZdZded<    ed      Zedd       Z	e
dd       Ze
dd	       Ze
dd
       Ze	 d	 	 	 	 	 	 	 dd       Z	 	 	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZeddddd	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zeddddd	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zy)QAEvalChainz,LLM Chain for evaluating question answering.resultsr!   
output_keyignoreextrac                     yNF clss    r*   is_lc_serializablezQAEvalChain.is_lc_serializableR       r2   c                     y)Ncorrectnessr<   selfs    r*   evaluation_namezQAEvalChain.evaluation_nameV   s    r2   c                     yNTr<   rC   s    r*   requires_referencezQAEvalChain.requires_referenceZ       r2   c                     yrG   r<   rC   s    r*   requires_inputzQAEvalChain.requires_input^   rI   r2   Nc                    |xs t         }h d}|t        |j                        k7  rd| d|j                   }t        |       | d||d|S )a  Load QA Eval Chain from LLM.

        Args:
            llm (BaseLanguageModel): the base language model to use.

            prompt (PromptTemplate): A prompt template containing the input_variables:
            'input', 'answer' and 'result' that will be used as the prompt
            for evaluation.
            Defaults to PROMPT.

            **kwargs: additional keyword arguments.

        Returns:
            QAEvalChain: the loaded QA eval chain.
        >   queryanswerresultInput variables should be 
, but got llmpromptr<   )r   setinput_variables
ValueError)r>   rS   rT   kwargsexpected_input_varsmsgs         r*   from_llmzQAEvalChain.from_llmb   sm    , !6;#f&<&<"==,-@,A B!1124  S/!4s64V44r2   	callbacksc                   t        |      D cg c]  \  }}||   ||   ||   |   d }	}}| j                  |	|      S c c}}w )5Evaluate question answering examples and predictions.rM   rN   rO   r\   	enumerateapply)
rD   examplespredictionsquestion_key
answer_keyprediction_keyr]   iexampleinputss
             r*   evaluatezQAEvalChain.evaluate   sd    " (1
 7	 !.!*-%a.8
 
 zz&Iz66
   Ac                f    t        || j                           }t        |v r|t           |t        <   |S Nr1   r6   r   rD   rO   parsed_results      r*   _prepare_outputzQAEvalChain._prepare_output   1    1&2IJf%+G_M'"r2   F	referenceinputr]   include_run_infoc               B     | |||d||      }| j                  |      S )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): the LLM or chain prediction to evaluate.
            reference (Optional[str], optional): the reference label
                to evaluate against.
            input (Optional[str], optional): the input to consider during evaluation
            callbacks (Callbacks, optional): the callbacks to use for tracing.
            include_run_info (bool, optional): whether to include run info in the
                returned results.
            **kwargs: additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        r`   r]   rx   rs   rD   
predictionrv   rw   r]   rx   rX   rO   s           r*   _evaluate_stringszQAEvalChain._evaluate_strings   s7    2 #$
  -
 ##F++r2   c               p   K   | j                  |||d||       d {   }| j                  |      S 7 w)Nr`   rk   r]   rx   acallrs   r|   s           r*   _aevaluate_stringszQAEvalChain._aevaluate_strings   sH      zz"i:N- " 
 

 ##F++
   646returnboolr   r!   ro   )rS   r   rT   Optional[PromptTemplate]rX   r   r   r4   r`   )rd   Sequence[dict]re   r   rf   r!   rg   r!   rh   r!   r]   r   r   
list[dict]rO   dictr   r   r}   r!   rv   Optional[str]rw   r   r]   r   rx   r   rX   r   r   r   )__name__
__module____qualname____doc__r6   __annotations__r
   model_configclassmethodr?   propertyrE   rH   rK   r[   rl   rs   r   r~   r   r<   r2   r*   r4   r4   I   s   6JL          ,055 )5 	5
 
5 5F $"&7  $7 7 $7 	7
 7 7 7 
7, 
 $(##!&!, !, !	!,
 !, !, !, !, 
!, !,F 
 $(##!&, , !	,
 , , , , 
, ,r2   r4   c                  H   e Zd ZdZedd       Zedd       Zedd       Z e	d      Z
edd       Zedd       Ze	 d	 	 	 	 	 	 	 dd
       Z	 	 	 dd	d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZed	d	d	dd	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zed	d	d	dd	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zy	)ContextQAEvalChainz3LLM Chain for evaluating QA w/o GT based on contextc                     yr;   r<   r=   s    r*   r?   z%ContextQAEvalChain.is_lc_serializable   r@   r2   c                     y)z.Whether the chain requires a reference string.Tr<   rC   s    r*   rH   z%ContextQAEvalChain.requires_reference        r2   c                     y)z+Whether the chain requires an input string.Tr<   rC   s    r*   rK   z!ContextQAEvalChain.requires_input   r   r2   r7   r8   c                v    h d}|t        |j                        k7  rd| d|j                   }t        |      y )N>   rM   rO   contextrP   rQ   )rU   rV   rW   )r>   rT   rY   rZ   s       r*   _validate_input_varsz'ContextQAEvalChain._validate_input_vars   sQ    <#f&<&<"==,-@,A B!1124  S/! >r2   c                     y)NzContextual Accuracyr<   rC   s    r*   rE   z"ContextQAEvalChain.evaluation_name   s    $r2   Nc                N    |xs t         }| j                  |        | d||d|S )a  Load QA Eval Chain from LLM.

        Args:
            llm (BaseLanguageModel): the base language model to use.

            prompt (PromptTemplate): A prompt template containing the input_variables:
            'query', 'context' and 'result' that will be used as the prompt
            for evaluation.
            Defaults to PROMPT.

            **kwargs: additional keyword arguments.

        Returns:
            ContextQAEvalChain: the loaded QA eval chain.
        rR   r<   )r   r   r>   rS   rT   rX   s       r*   r[   zContextQAEvalChain.from_llm   s1    , )>  (4s64V44r2   r\   c                   t        |      D cg c]  \  }}||   ||   ||   |   d }	}}| j                  |	|      S c c}}w )r_   rM   r   rO   r\   ra   )
rD   rd   re   rf   context_keyrh   r]   ri   rj   rk   s
             r*   rl   zContextQAEvalChain.evaluate  sd    " (1
 7	 !.";/%a.8
 
 zz&Iz66
rm   c                f    t        || j                           }t        |v r|t           |t        <   |S ro   rp   rq   s      r*   rs   z"ContextQAEvalChain._prepare_output(  rt   r2   Fru   c               B     | |||d||      }| j                  |      S )Nr   rz   r{   r|   s           r*   r~   z$ContextQAEvalChain._evaluate_strings.  s7     $$
  -
 ##F++r2   c               p   K   | j                  |||d||       d {   }| j                  |      S 7 w)Nr   r   r   r|   s           r*   r   z%ContextQAEvalChain._aevaluate_stringsD  sH      zz"yJO- " 
 

 ##F++
r   r   )rT   r	   r   Noner   ro   )rS   r   rT   r   rX   r   r   r   r   )rd   r   re   r   rf   r!   r   r!   rh   r!   r]   r   r   r   r   r   )r   r   r   r   r   r?   r   rH   rK   r
   r   r   rE   r[   rl   rs   r   r~   r   r<   r2   r*   r   r      s   =      L " " % %  ,055 )5 	5
 
5 5: $$&7  $77  7 	7
 7 7 7 
7, 
 $(##!&, , !	,
 , , , , 
, ,* 
 $(##!&, , !	,
 , , , , 
, ,r2   r   c                  X    e Zd ZdZedd       Zedd       Ze	 d	 	 	 	 	 	 	 d	d       Zy)
CotQAEvalChainz=LLM Chain for evaluating QA using chain of thought reasoning.c                     yr;   r<   r=   s    r*   r?   z!CotQAEvalChain.is_lc_serializableZ  r@   r2   c                     y)NzCOT Contextual Accuracyr<   rC   s    r*   rE   zCotQAEvalChain.evaluation_name^  s    (r2   Nc                N    |xs t         }| j                  |        | d||d|S )zLoad QA Eval Chain from LLM.rR   r<   )r   r   r   s       r*   r[   zCotQAEvalChain.from_llmb  s1     %:  (4s64V44r2   r   r   ro   )rS   r   rT   r   rX   r   r   r   )	r   r   r   r   r   r?   r   rE   r[   r<   r2   r*   r   r   W  sj    G  ) )  ,0	5	5 )	5 		5
 
	5 	5r2   r   )r&   r!   r   zOptional[tuple[str, int]])r&   r!   r   r   )$r   
__future__r   r   r#   collections.abcr   typingr   r   langchain_core.callbacksr   langchain_core.language_modelsr   langchain_core.promptsr	   pydanticr
   typing_extensionsr   langchain.chains.llmr   #langchain.evaluation.qa.eval_promptr   r   r   langchain.evaluation.schemar   r   langchain.schemar   r+   r1   r4   r   r   r<   r2   r*   <module>r      sr    3 " 	  $   . < 1  & ) R R E $:,I,(O\ I,X,?L ,D5' 5r2   