
    ghe6                     (   d Z ddlZddlmZ ddlmZmZmZmZm	Z	 ddl
mZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ ddlm Z m!Z! ddl"m#Z#m$Z$  G d de          Z% G d de          Z& G d de#e$          Z'dS )zA chain for evaluating ReAct style agents.

This chain is used to evaluate ReAct style agents by reasoning about
the sequence of actions taken and their outcomes. It uses a language model
chain (LLMChain) to generate the reasoning and scores.
    N)Sequence)AnyOptional	TypedDictUnioncast)AgentAction)AsyncCallbackManagerForChainRunCallbackManagerForChainRun	Callbacks)OutputParserException)BaseLanguageModel)BaseChatModel)BaseOutputParser)BaseTool)
ConfigDictField)LLMChain)EVAL_CHAT_PROMPTTOOL_FREE_EVAL_CHAT_PROMPT)AgentTrajectoryEvaluatorLLMEvalChainc                   *    e Zd ZU dZeed<   	 eed<   dS )TrajectoryEvalzBA named tuple containing the score and reasoning for a trajectory.score	reasoningN)__name__
__module____qualname____doc__float__annotations__str     n/var/www/FlaskApp/flask-venv/lib/python3.11/site-packages/langchain/evaluation/agents/trajectory_eval_chain.pyr   r   '   s-         LLLLL?NNN&&r%   r   c                   >    e Zd ZdZedefd            ZdedefdZdS )TrajectoryOutputParserzTrajectory output parser.returnc                     dS )Nagent_trajectoryr$   selfs    r&   _typezTrajectoryOutputParser._type3   s    !!r%   textc                    d|vrt          d|           |                    dd          \  }}|                                |                                }}t          j        d|          }|d|                    d          v rt          d	|           t          |                    d                    }d|cxk    rd
k    sn t          d|           |dz
  dz  }t          ||          S )a  Parse the output text and extract the score and reasoning.

        Args:
            text (str): The output text to parse.

        Returns:
            TrajectoryEval: A named tuple containing the normalized score and reasoning.

        Raises:
            OutputParserException: If the score is not found in the output text or
                if the LLM's score is not a digit in the range 1-5.
        zScore:z+Could not find score in model eval output: zScore:    )maxsplitz(\d+(\.\d+)?)N.z0Score is not an integer digit in the range 1-5:    z'Score is not a digit in the range 1-5:    )r   r   )r   splitstripresearchgroupintr   )r-   r/   r   	score_str_scorer   normalized_scores          r&   parsezTrajectoryOutputParser.parse7   s     4'DdDD    $zz)az@@	9(00)//2C2C9	 +Y77>SFLLOO33'I4II   FLLOO$$EQ'@$@@   "AI?$4	JJJJr%   N)	r   r   r   r    propertyr#   r.   r   r?   r$   r%   r&   r(   r(   0   si        ##"s " " " X"(K# (K. (K (K (K (K (K (Kr%   r(   c                       e Zd ZU dZdZeee                  ed<   	 e	ed<   	  e
e          Zeed<   	 dZeed<   	  ed	
          Zedefd            Zedefd            Zedeeeeeef                  f         defd            Zedee         defd            Ze	 	 d&dedeee                  dee         dedd f
d            Zedee         fd            Z edee         fd            Z!dee"eef         ef         de"eef         f fdZ#	 d'de"eef         dee$         de"eef         fdZ%	 d'de"eef         dee&         de"eef         fdZ'dddddddededeeeef                  dee         d e(d!eee                  d"ee"eef                  d#edede"fd$Z)dddddddededeeeef                  dee         d e(d!eee                  d"ee"eef                  d#edede"fd%Z* xZ+S )(TrajectoryEvalChaina-  A chain for evaluating ReAct style agents.

    This chain is used to evaluate ReAct style agents by reasoning about
    the sequence of actions taken and their outcomes.
    Based on the paper "ReAct: Synergizing Reasoning and Acting in Language Models"
    (https://arxiv.org/abs/2210.03629)

    Example:

    .. code-block:: python

        from langchain.agents import AgentType, initialize_agent
        from langchain_community.chat_models import ChatOpenAI
        from langchain.evaluation import TrajectoryEvalChain
        from langchain.tools import tool

        @tool
        def geography_answers(country: str, question: str) -> str:
            """Very helpful answers to geography questions."""
            return f"{country}? IDK - We may never know {question}."

        llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
        agent = initialize_agent(
            tools=[geography_answers],
            llm=llm,
            agent=AgentType.OPENAI_FUNCTIONS,
            return_intermediate_steps=True,
        )

        question = "How many dwell in the largest minor region in Argentina?"
        response = agent(question)

        eval_chain = TrajectoryEvalChain.from_llm(
            llm=llm, agent_tools=[geography_answers], return_reasoning=True
        )

        result = eval_chain.evaluate_agent_trajectory(
            input=question,
            agent_trajectory=response["intermediate_steps"],
            prediction=response["output"],
            reference="Paris",
        )
        print(result["score"])  # noqa: T201
        # 0
    Nagent_tools
eval_chain)default_factoryoutput_parserFreturn_reasoningignore)extrar)   c                     dS )z2Whether this evaluator requires a reference label.Fr$   r,   s    r&   requires_referencez&TrajectoryEvalChain.requires_reference   s	     ur%   c                 x    | j         dS d                    d t          | j         d          D                       S )zwGet the description of the agent tools.

        Returns:
            str: The description of the agent tools.
        N 

c                 >    g | ]\  }}d | d|j          d|j         S )zTool z: z
Description: )namedescription).0itools      r&   
<listcomp>z:TrajectoryEvalChain._tools_description.<locals>.<listcomp>   sX        At#! # #ty # ## #  r%   r1   )rC   join	enumerater,   s    r&   _tools_descriptionz&TrajectoryEvalChain._tools_description   sS     #2{{   ))91==  
 
 	
r%   stepsc                     t          | t                    r| S d                    d t          | d          D                       S )zGet the agent trajectory as a formatted string.

        Args:
            steps (Union[str, List[Tuple[AgentAction, str]]]): The agent trajectory.

        Returns:
            str: The formatted agent trajectory.
        rN   c           
      J    g | ] \  }\  }}d | d|j          d|j         d| !S )zStep z:
Tool used: z
Tool input: z
Tool output: )rT   
tool_input)rR   rS   actionoutputs       r&   rU   z<TrajectoryEvalChain.get_agent_trajectory.<locals>.<listcomp>   sm       
 (A'	!  K      r%   r1   )
isinstancer#   rV   rW   )rY   s    r&   get_agent_trajectoryz(TrajectoryEvalChain.get_agent_trajectory   sY     eS!! 	L{{ 
 ,5UA+>+>  
 
 	
r%   	referencec                     | sdS d|  dS )zFormat the reference text.

        Args:
            reference (str): The reference text.

        Returns:
            str: The formatted reference text.
        rM   zX

The following is the expected answer. Use this to measure correctness:
[GROUND_TRUTH]
z
[END_GROUND_TRUTH]
r$   )ra   s    r&   _format_referencez%TrajectoryEvalChain._format_reference   s/      	2 	   	r%   llmkwargsc                     t          |t                    st          d          |rt          }nt          }t          ||          } | d|||pt                      d|S )a  Create a TrajectoryEvalChain object from a language model chain.

        Args:
            llm (BaseChatModel): The language model chain.
            agent_tools (Optional[Sequence[BaseTool]]): A list of tools
                available to the agent.
            output_parser (Optional[TrajectoryOutputParser]): The output parser
                used to parse the chain output into a score.
        Returns:
            TrajectoryEvalChain: The TrajectoryEvalChain object.
        z9Only chat models supported by the current trajectory eval)rd   prompt)rC   rD   rF   r$   )r_   r   NotImplementedErrorr   r   r   r(   )clsrd   rC   rF   re   rg   rD   s          r&   from_llmzTrajectoryEvalChain.from_llm   s    & #}-- 	%K    	0%FF/F#f555
s 
#!'C+A+C+C
 
 	
 
 	
r%   c                 
    g dS )zcGet the input keys for the chain.

        Returns:
            List[str]: The input keys.
        questionr+   answerra   r$   r,   s    r&   
input_keyszTrajectoryEvalChain.input_keys  s     GFFFr%   c                 
    ddgS )zeGet the output keys for the chain.

        Returns:
            List[str]: The output keys.
        r   r   r$   r,   s    r&   output_keyszTrajectoryEvalChain.output_keys  s     %%r%   inputsc                     |                      |                    d                    |d<   t                                          |          S )zValidate and prep inputs.ra   )rc   getsuperprep_inputs)r-   rr   	__class__s     r&   rv   zTrajectoryEvalChain.prep_inputs  s?    "44VZZ5L5LMM{ww""6***r%   run_managerc                    i |}| j         r
| j        |d<   |pt          j                    }| j                            ||                                          }t          t          | j	        
                    |                    S )I  Run the chain and generate the output.

        Args:
            inputs (Dict[str, str]): The input values for the chain.
            run_manager (Optional[CallbackManagerForChainRun]): The callback
                manager for the chain run.

        Returns:
            Dict[str, Any]: The output values of the chain.
        tool_descriptions	callbacks)rC   rX   r   get_noop_managerrD   run	get_childr   dictrF   r?   r-   rr   rx   chain_input_run_manager
raw_outputs         r&   _callzTrajectoryEvalChain._call  s     !j 	G/3/FK+,"S&@&Q&S&S_((<#9#9#;#; ) 
 

 D$,22:>>???r%   c                   K   i |}| j         r
| j        |d<   |pt          j                    }| j                            ||                                           d{V }t          t          | j	        
                    |                    S )rz   r{   r|   N)rC   rX   r
   r~   rD   arunr   r   r   rF   r?   r   s         r&   _acallzTrajectoryEvalChain._acall5  s       !j 	G/3/FK+,"X&E&V&X&X?//<#9#9#;#; 0 
 
 
 
 
 
 
 

 D$,22:>>???r%   )ra   r}   tagsmetadatainclude_run_info
predictioninputr+   r}   r   r   r   c                l    ||                      |          ||d}
|                     |
||||d          S )a7  Evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            input (str): The input to the agent.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            reference (Optional[str]): The reference answer.
            callbacks (Callbacks): Callbacks to use for this chain run.

        Returns:
            dict: The evaluation result, which includes the score and optionally
                the reasoning for reaching that.
        rl   Trr   r}   r   r   r   return_only_outputs)r`   __call__r-   r   r   r+   ra   r}   r   r   r   re   rr   s              r&   _evaluate_agent_trajectoryz.TrajectoryEvalChain._evaluate_agent_trajectoryM  sZ    8  $ 9 9:J K K "	
 
 }}- $  
 
 	
r%   c                |   K   ||                      |          ||d}
|                     |
||||d           d{V S )aF  Asynchronously evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            input (str): The input to the agent.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            reference (Optional[str]): The reference answer.
            callbacks (Callbacks): Callbacks to use for this chain run.

        Returns:
            dict: The evaluation result, which includes the score and optionally
                the reasoning for reaching that.
        rl   Tr   N)r`   acallr   s              r&   _aevaluate_agent_trajectoryz/TrajectoryEvalChain._aevaluate_agent_trajectoryw  s|      8  $ 9 9:J K K "	
 
 ZZ- $   
 
 
 
 
 
 
 
 	
r%   )NN)N),r   r   r   r    rC   r   listr   r"   r   r   r(   rF   rG   boolr   model_configr@   rK   r#   rX   staticmethodr   r   tupler	   r`   rc   classmethodr   r   rj   ro   rq   r   rv   r   r   r
   r   r   r   r   __classcell__)rw   s   @r&   rB   rB   b   s        , ,\ -1K$x.)00017,1E.- - -M)    6"d"""0:  L D    X 
C 
 
 
 X
  
S(5c)9#:;;<
	
 
 
 \
0 Xc] s    \&  59:>	 
  
 
 hx01 
   67	 

  
 
 
  
  
 [ 
D GDI G G G XG &T#Y & & & X&+%S#X(;"< +c3h + + + + + + =A@ @S#X@ 89@ 
c3h	@ @ @ @6 BF@ @S#X@ =>@ 
c3h	@ @ @ @< $(#$(-1!&(
 (
 (
 (
 	(

 #5c)9#:;(
 C=(
 (
 tCy!(
 4S>*(
 (
 (
 
(
 (
 (
 (
` $(#$(-1!&(
 (
 (
 (
 	(

 #5c)9#:;(
 C=(
 (
 tCy!(
 4S>*(
 (
 (
 
(
 (
 (
 (
 (
 (
 (
 (
r%   rB   )(r    r8   collections.abcr   typingr   r   r   r   r   langchain_core.agentsr	    langchain_core.callbacks.managerr
   r   r   langchain_core.exceptionsr   langchain_core.language_modelsr   *langchain_core.language_models.chat_modelsr   langchain_core.output_parsersr   langchain_core.toolsr   pydanticr   r   langchain.chains.llmr   2langchain.evaluation.agents.trajectory_eval_promptr   r   langchain.evaluation.schemar   r   r   r(   rB   r$   r%   r&   <module>r      s     
			 $ $ $ $ $ $              . - - - - -         
 < ; ; ; ; ; < < < < < < D D D D D D : : : : : : ) ) ) ) ) ) & & & & & & & & ) ) ) ) ) )        O N N N N N N N' ' ' ' 'Y ' ' '/K /K /K /K /K- /K /K /Kd}
 }
 }
 }
 }
2L }
 }
 }
 }
 }
r%   