
    gh                        d Z ddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ  ej        e          Z G d dee
          ZdS )zDHypothetical Document Embeddings.

https://arxiv.org/abs/2212.10496
    )annotationsN)AnyOptional)CallbackManagerForChainRun)
Embeddings)BaseLanguageModel)StrOutputParser)BasePromptTemplate)Runnable)
ConfigDict)Chain)
PROMPT_MAP)LLMChainc                      e Zd ZU dZded<   ded<    edd          Zed'd            Zed'd            Z	d(dZ
d)dZd*dZ	 d+d,dZe	 	 d-d.d%            Zed/d&            ZdS )0HypotheticalDocumentEmbedderzrGenerate hypothetical document for query, and then embed that.

    Based on https://arxiv.org/abs/2212.10496
    r   base_embeddingsr   	llm_chainTforbid)arbitrary_types_allowedextrareturn	list[str]c                J    | j         j                                        d         S )z Input keys for Hyde's LLM chain.required)r   input_schemamodel_json_schemaselfs    W/var/www/FlaskApp/flask-venv/lib/python3.11/site-packages/langchain/chains/hyde/base.py
input_keysz'HypotheticalDocumentEmbedder.input_keys(   s      ~*<<>>zJJ    c                T    t          | j        t                    r| j        j        S dgS )z!Output keys for Hyde's LLM chain.text)
isinstancer   r   output_keysr   s    r   r%   z(HypotheticalDocumentEmbedder.output_keys-   s*     dnh// 	>--8Or!   textslist[list[float]]c                6    | j                             |          S )zCall the base embeddings.)r   embed_documents)r   r&   s     r   r)   z,HypotheticalDocumentEmbedder.embed_documents5   s    #33E:::r!   
embeddingslist[float]c                "   	 ddl }t          |                    |                              d                    S # t          $ rG t
                              d           |sg cY S t          |          fdt          | D             cY S w xY w)z)Combine embeddings into final embeddings.r   N)axisa*  NumPy not found in the current Python environment. HypotheticalDocumentEmbedder will use a pure Python implementation for internal calculations, which may significantly impact performance, especially for large datasets. For optimal speed and efficiency, consider installing NumPy: pip install numpyc                4    g | ]}t          |          z  S  )sum).0
dim_valuesnum_vectorss     r   
<listcomp>zCHypotheticalDocumentEmbedder.combine_embeddings.<locals>.<listcomp>J   s$    UUUjC
OOk1UUUr!   )	numpylistarraymeanImportErrorloggerwarninglenzip)r   r*   npr3   s      @r   combine_embeddingsz/HypotheticalDocumentEmbedder.combine_embeddings9   s    	V,,11q199::: 	V 	V 	VNNK    			j//KUUUUCDTUUUUUU	Vs   9= (B'$BBr#   strc                   | j         d         }| j                            ||i          }t          | j        t                    r|| j        d                  g}n|g}|                     |          }|                     |          S )z1Generate a hypothetical document and embedded it.r   )r    r   invoker$   r   r%   r)   r?   )r   r#   var_nameresult	documentsr*   s         r   embed_queryz(HypotheticalDocumentEmbedder.embed_queryL   s    ?1%&&$'788dnh// 	! 0 345III)))44
&&z222r!   Ninputsdict[str, Any]run_manager$Optional[CallbackManagerForChainRun]dict[str, str]c                    |pt          j                    }| j                            |d|                                i          S )zCall the internal llm chain.	callbacks)config)r   get_noop_managerr   rB   	get_child)r   rG   rI   _run_managers       r   _callz"HypotheticalDocumentEmbedder._callW   sM     #S&@&Q&S&S~$$K)?)?)A)AB % 
 
 	
r!   llmr   
prompt_keyOptional[str]custom_promptOptional[BasePromptTemplate]kwargsr   c                    ||}nJ||t           v rt           |         }n1t          dt          t          j                               d          ||z  t	                      z  } | d||d|S )zILoad and use LLMChain with either a specific prompt key or custom prompt.NzHMust specify prompt_key if custom_prompt not provided. Should be one of .)r   r   r/   )r   
ValueErrorr6   keysr	   )clsrS   r   rT   rV   rX   promptr   s           r   from_llmz%HypotheticalDocumentEmbedder.from_llmb   s     $"FF#
j(@(@
+FF1:?,,--1 1 1  
 SL?#4#44	sR?iRR6RRRr!   c                    dS )N
hyde_chainr/   r   s    r   _chain_typez(HypotheticalDocumentEmbedder._chain_typey   s    |r!   )r   r   )r&   r   r   r'   )r*   r'   r   r+   )r#   r@   r   r+   )N)rG   rH   rI   rJ   r   rK   )NN)rS   r   r   r   rT   rU   rV   rW   rX   r   r   r   )r   r@   )__name__
__module____qualname____doc____annotations__r   model_configpropertyr    r%   r)   r?   rF   rR   classmethodr_   rb   r/   r!   r   r   r      sK         
  : $  L
 K K K XK    X; ; ; ;V V V V&	3 	3 	3 	3 =A	
 	
 	
 	
 	
 
 %)6:S S S S [S,    X  r!   r   )rf   
__future__r   loggingtypingr   r   langchain_core.callbacksr   langchain_core.embeddingsr   langchain_core.language_modelsr   langchain_core.output_parsersr	   langchain_core.promptsr
   langchain_core.runnablesr   pydanticr   langchain.chains.baser   langchain.chains.hyde.promptsr   langchain.chains.llmr   	getLoggerrc   r:   r   r/   r!   r   <module>ry      sJ   
 # " " " " "                  ? ? ? ? ? ? 0 0 0 0 0 0 < < < < < < 9 9 9 9 9 9 5 5 5 5 5 5 - - - - - -       ' ' ' ' ' ' 4 4 4 4 4 4 ) ) ) ) ) )		8	$	$a a a a a5* a a a a ar!   