
    ghG                    d   d Z ddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
 ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ  ej        e          Z G d dee
          Z G d de          Z G d d          Z G d dee          Z  G d dee          Z! G d dee          Z"dS )z3Interfaces to be implemented by general evaluators.    )annotationsN)ABCabstractmethod)Sequence)Enum)AnyOptionalUnion)warn)AgentAction)BaseLanguageModel)run_in_executor)Chainc                      e Zd ZdZdZ	 dZ	 dZ	 dZ	 dZ	 dZ		 dZ
	 d	Z	 d
Z	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZdS )EvaluatorTypezThe types of the evaluators.qacot_qa
context_qapairwise_stringscore_stringlabeled_pairwise_stringlabeled_score_string
trajectorycriterialabeled_criteriastring_distanceexact_matchregex_matchpairwise_string_distanceembedding_distancepairwise_embedding_distancejson_validityjson_equalityjson_edit_distancejson_schema_validationN)__name__
__module____qualname____doc__QACOT_QA
CONTEXT_QAPAIRWISE_STRINGSCORE_STRINGLABELED_PAIRWISE_STRINGLABELED_SCORE_STRINGAGENT_TRAJECTORYCRITERIALABELED_CRITERIASTRING_DISTANCEEXACT_MATCHREGEX_MATCHPAIRWISE_STRING_DISTANCEEMBEDDING_DISTANCEPAIRWISE_EMBEDDING_DISTANCEJSON_VALIDITYJSON_EQUALITYJSON_EDIT_DISTANCEJSON_SCHEMA_VALIDATION     X/var/www/FlaskApp/flask-venv/lib/python3.11/site-packages/langchain/evaluation/schema.pyr   r      s        &&	BF% JS'O!L7H1@#VH<)7'OPKIKN9=-M"?;#M.#M=-T5IIr?   r   c                  :    e Zd ZdZeed	d                        ZdS )
LLMEvalChainz,A base class for evaluators that use an LLM.llmr   kwargsr   returnc                    dS )z#Create a new evaluator from an LLM.Nr>   )clsrC   rD   s      r@   from_llmzLLMEvalChain.from_llmN         r?   N)rC   r   rD   r   rE   rB   )r&   r'   r(   r)   classmethodr   rH   r>   r?   r@   rB   rB   K   sB        662 2 2 ^ [2 2 2r?   rB   c                      e Zd ZdZedd            Zedd            Zedd            Zedd            Z	 	 dddZ	d	S )_EvalArgsMixinz(Mixin for checking evaluation arguments.rE   boolc                    dS z2Whether this evaluator requires a reference label.Fr>   selfs    r@   requires_referencez!_EvalArgsMixin.requires_referenceW   	     ur?   c                    dS )0Whether this evaluator requires an input string.Fr>   rP   s    r@   requires_inputz_EvalArgsMixin.requires_input\   rS   r?   strc                "    d| j         j         dS )z&Warning to show when input is ignored.zIgnoring input in , as it is not expected.	__class__r&   rP   s    r@   _skip_input_warningz"_EvalArgsMixin._skip_input_warninga   s     VDN$;UUUUr?   c                "    d| j         j         dS )z*Warning to show when reference is ignored.zIgnoring reference in rY   rZ   rP   s    r@   _skip_reference_warningz&_EvalArgsMixin._skip_reference_warningf   s     WT^%<VVV	
r?   N	referenceOptional[str]inputNonec                   | j         r|t          | j        j         d          || j         st	          | j                   | j        r|t          | j        j         d          || j        st	          | j                   dS dS dS )a  Check if the evaluation arguments are valid.

        Args:
            reference (Optional[str], optional): The reference label.
            input (Optional[str], optional): The input string.
        Raises:
            ValueError: If the evaluator requires an input string but none is provided,
                or if the evaluator requires a reference label but none is provided.
        Nz requires an input string.z requires a reference string.)rV   
ValueErrorr[   r&   r   r\   rR   r^   )rQ   r_   ra   s      r@   _check_evaluation_argsz%_EvalArgsMixin._check_evaluation_argsm   s      	+5= 7SSSTTTt':)***" 	/y'8 7VVVWWW"4+B"-..... #"""r?   rE   rM   rE   rW   )NN)r_   r`   ra   r`   rE   rb   )
r&   r'   r(   r)   propertyrR   rV   r\   r^   re   r>   r?   r@   rL   rL   T   s        22   X    X V V V XV 
 
 
 X
 $(#/ / / / / / /r?   rL   c                      e Zd ZdZedd            Zedd            Zeddddd            ZdddddZ	dddddZ
dddddZdS )StringEvaluatorzcGrade, tag, or otherwise evaluate predictions relative to their inputs
    and/or reference labels.rE   rW   c                    | j         j        S )zThe name of the evaluation.rZ   rP   s    r@   evaluation_namezStringEvaluator.evaluation_name   s     ~&&r?   rM   c                    dS rO   r>   rP   s    r@   rR   z"StringEvaluator.requires_reference   rS   r?   Nr_   ra   
predictionUnion[str, Any]r_   Optional[Union[str, Any]]ra   rD   r   dictc                   dS )a:  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nr>   rQ   ro   r_   ra   rD   s        r@   _evaluate_stringsz!StringEvaluator._evaluate_strings   rI   r?   c               @   K   t          d| j        f|||d| d{V S )aI  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nro   r_   ra   )r   ru   rt   s        r@   _aevaluate_stringsz"StringEvaluator._aevaluate_strings   s`      , %"
 "
 
 
 
 
 
 
 
 
 
 	
r?   r`   c               R    |                      ||            | j        d|||d|S )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        rn   rw   r>   )re   ru   rt   s        r@   evaluate_stringsz StringEvaluator.evaluate_strings   sM    $ 	##iu#EEE%t% 
!Ye
 
GM
 
 	
r?   c               b   K   |                      ||            | j        d|||d| d{V S )a	  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        rn   rw   Nr>   )re   rx   rt   s        r@   aevaluate_stringsz!StringEvaluator.aevaluate_strings   so      $ 	##iu#EEE,T, 
!Ye
 
GM
 
 
 
 
 
 
 
 	
r?   rg   rf   )
ro   rp   r_   rq   ra   rq   rD   r   rE   rr   )
ro   rW   r_   r`   ra   r`   rD   r   rE   rr   )r&   r'   r(   r)   rh   rl   rR   r   ru   rx   rz   r|   r>   r?   r@   rj   rj      s           ' ' ' X'    X 
 04+/     ^6 04+/
 
 
 
 
 
F $(#
 
 
 
 
 
6 $(#
 
 
 
 
 
 
 
r?   rj   c                  b    e Zd ZdZeddddd            ZdddddZdddddZdddddZdS )PairwiseStringEvaluatorzDCompare the output of two models (or two outputs of the same model).Nrn   ro   rW   prediction_br_   r`   ra   rD   r   rE   rr   c                   dS )1  Evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nr>   rQ   ro   r   r_   ra   rD   s         r@   _evaluate_string_pairsz.PairwiseStringEvaluator._evaluate_string_pairs   rI   r?   c               B   K   t          d| j        f||||d| d{V S )@  Asynchronously evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nro   r   r_   ra   )r   r   r   s         r@   _aevaluate_string_pairsz/PairwiseStringEvaluator._aevaluate_string_pairs  sc      ( %'
 "%
 
 
 
 
 
 
 
 
 
 	
r?   c               T    |                      ||            | j        d||||d|S )r   rn   r   r>   )re   r   r   s         r@   evaluate_string_pairsz-PairwiseStringEvaluator.evaluate_string_pairs0  sT    ( 	##iu#EEE*t* 
!%	
 

 
 
 	
r?   c               d   K   |                      ||            | j        d||||d| d{V S )r   rn   r   Nr>   )re   r   r   s         r@   aevaluate_string_pairsz.PairwiseStringEvaluator.aevaluate_string_pairsM  sv      ( 	##iu#EEE1T1 
!%	
 

 
 
 
 
 
 
 
 
 	
r?   )ro   rW   r   rW   r_   r`   ra   r`   rD   r   rE   rr   )	r&   r'   r(   r)   r   r   r   r   r   r>   r?   r@   r~   r~      s        NN $(#     ^4 $(#
 
 
 
 
 
F $(#
 
 
 
 
 
D $(#
 
 
 
 
 
 
 
r?   r~   c                  r    e Zd ZdZedd            Zedddd            ZddddZddddZ	ddddZ
dS )AgentTrajectoryEvaluatorz,Interface for evaluating agent trajectories.rE   rM   c                    dS )rU   Tr>   rP   s    r@   rV   z'AgentTrajectoryEvaluator.requires_inputn  s	     tr?   N)r_   ro   rW   agent_trajectory!Sequence[tuple[AgentAction, str]]ra   r_   r`   rD   r   rr   c                   dS )  Evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        Nr>   rQ   ro   r   ra   r_   rD   s         r@   _evaluate_agent_trajectoryz3AgentTrajectoryEvaluator._evaluate_agent_trajectorys  rI   r?   c               B   K   t          d| j        f||||d| d{V S )  Asynchronously evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        N)ro   r   r_   ra   )r   r   r   s         r@   _aevaluate_agent_trajectoryz4AgentTrajectoryEvaluator._aevaluate_agent_trajectory  sc      * %+
 "-
 
 
 
 
 
 
 
 
 
 	
r?   c               T    |                      ||            | j        d||||d|S )r   rn   ro   ra   r   r_   r>   )re   r   r   s         r@   evaluate_agent_trajectoryz2AgentTrajectoryEvaluator.evaluate_agent_trajectory  sT    * 	##iu#EEE.t. 
!-	
 

 
 
 	
r?   c               d   K   |                      ||            | j        d||||d| d{V S )r   rn   r   Nr>   )re   r   r   s         r@   aevaluate_agent_trajectoryz3AgentTrajectoryEvaluator.aevaluate_agent_trajectory  sv      * 	##iu#EEE5T5 
!-	
 

 
 
 
 
 
 
 
 
 	
r?   rf   )ro   rW   r   r   ra   rW   r_   r`   rD   r   rE   rr   )r&   r'   r(   r)   rh   rV   r   r   r   r   r   r>   r?   r@   r   r   k  s        66   X  $(     ^8 $(
 
 
 
 
 
J $(
 
 
 
 
 
H $(
 
 
 
 
 
 
 
r?   r   )#r)   
__future__r   loggingabcr   r   collections.abcr   enumr   typingr   r	   r
   warningsr   langchain_core.agentsr   langchain_core.language_modelsr   langchain_core.runnables.configr   langchain.chains.baser   	getLoggerr&   loggerrW   r   rB   rL   rj   r~   r   r>   r?   r@   <module>r      s   9 9 " " " " " "  # # # # # # # # $ $ $ $ $ $       ' ' ' ' ' ' ' ' ' '       - - - - - - < < < < < < ; ; ; ; ; ; ' ' ' ' ' '		8	$	$3J 3J 3J 3J 3JC 3J 3J 3Jl2 2 2 2 25 2 2 2./ ./ ./ ./ ./ ./ ./ ./bq
 q
 q
 q
 q
nc q
 q
 q
ho
 o
 o
 o
 o
nc o
 o
 o
dx
 x
 x
 x
 x
~s x
 x
 x
 x
 x
r?   