
    hh3                        d dl mZ d dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZmZ d dlmZ d dlmZ erd dlmZmZ d dlmZ  G d	 d
e          ZdS )    )annotationsN)
TYPE_CHECKINGAnyAsyncIteratorDict	GeneratorIteratorListMappingOptionalUnion)AsyncCallbackManagerForLLMRunCallbackManagerForLLMRun)LLM)GenerationChunk)RESTfulChatModelHandleRESTfulGenerateModelHandle)LlamaCppGenerateConfigc                       e Zd ZU dZdZded<   ded<   	 ded<   	 ded	<   	 	 	 	 d.d/ fdZed0d            Zed1d            Z	d2dZ
	 	 d3d4dZ	 	 d3d5d Z	 	 d3d6d"Z	 d7d8d%Zed9d(            Z	 	 d3d:d+Z	 d7d;d-Z xZS )<
Xinferencea	  `Xinference` large-scale model inference service.

    To use, you should have the xinference library installed:

    .. code-block:: bash

       pip install "xinference[all]"

    If you're simply using the services provided by Xinference, you can utilize the xinference_client package:

    .. code-block:: bash

        pip install xinference_client

    Check out: https://github.com/xorbitsai/inference
    To run, you need to start a Xinference supervisor on one server and Xinference workers on the other servers

    Example:
        To start a local instance of Xinference, run

        .. code-block:: bash

           $ xinference

        You can also deploy Xinference in a distributed cluster. Here are the steps:

        Starting the supervisor:

        .. code-block:: bash

           $ xinference-supervisor

        Starting the worker:

        .. code-block:: bash

           $ xinference-worker

    Then, launch a model using command line interface (CLI).

    Example:

    .. code-block:: bash

       $ xinference launch -n orca -s 3 -q q4_0

    It will return a model UID. Then, you can use Xinference with LangChain.

    Example:

    .. code-block:: python

        from langchain_community.llms import Xinference

        llm = Xinference(
            server_url="http://0.0.0.0:9997",
            model_uid = {model_uid} # replace model_uid with the model UID return from launching the model
        )

        llm.invoke(
            prompt="Q: where can we visit in the capital of France? A:",
            generate_config={"max_tokens": 1024, "stream": True},
        )

    Example:

    .. code-block:: python

        from langchain_community.llms import Xinference
        from langchain.prompts import PromptTemplate

        llm = Xinference(
            server_url="http://0.0.0.0:9997",
            model_uid={model_uid}, # replace model_uid with the model UID return from launching the model
            stream=True
        )
        prompt = PromptTemplate(
            input=['country'],
            template="Q: where can we visit in the capital of {country}? A:"
        )
        chain = prompt | llm
        chain.stream(input={'country': 'France'})


    To view all the supported builtin models, run:

    .. code-block:: bash

        $ xinference list --all

    NzOptional[Any]clientOptional[str]
server_url	model_uidzDict[str, Any]model_kwargsapi_keyr   c                   	 ddl m} n:# t          $ r- 	 ddlm} n"# t          $ r}t          d          |d }~ww xY wY nw xY w|pi } t	                      j        d
i |||d | j        t          d          | j        t          d          i | _	        d| _
        |                                  || j
        rd| | j	        d	<    |||          | _        d S )Nr   )RESTfulClientzCould not import RESTfulClient from xinference. Please install it with `pip install xinference` or `pip install xinference_client`.r   r   r   zPlease provide server URLzPlease provide the model UIDFzBearer Authorization )xinference.clientr   ImportErrorxinference_clientsuper__init__r   
ValueErrorr   _headers_cluster_authed_check_cluster_authenticatedr   )selfr   r   r   r   r   e	__class__s          `/var/www/FlaskApp/flask-venv/lib/python3.11/site-packages/langchain_community/llms/xinference.pyr&   zXinference.__init__   sd   		7777777 	 	 	;;;;;;;   !Y   <;	 $)r 	
 	
(& , 	
 	
 	
 ?"8999>!;<<<(*$))+++4#7-@w-@-@DM/*#mJ88s)   
 
AA
;6;A Areturnstrc                    dS )zReturn type of llm.
xinferencer!   r+   s    r.   	_llm_typezXinference._llm_type   s	     |    Mapping[str, Any]c                <    i d| j         id| j        id| j        iS )zGet the identifying parameters.r   r   r   r   r3   s    r.   _identifying_paramszXinference._identifying_params   s<    
T_-
DN+
 t01
 	
r5   Nonec                0   | j          d}t          j        |          }|j        dk    r	d| _        d S |j        dk    r*t          d|                                d                    |                                }t          |d                   | _        d S )Nz/v1/cluster/auth  F   z+Failed to get cluster information, detail: detailauth)r   requestsgetstatus_coder)   RuntimeErrorjsonbool)r+   urlresponseresponse_datas       r.   r*   z'Xinference._check_cluster_authenticated   s    222<$$3&&#(D   #s**";'}}x8; ;   %MMOOM#'f(=#>#>D   r5   promptstopOptional[List[str]]run_manager"Optional[CallbackManagerForLLMRun]kwargsc                   | j         t          d          | j                             | j                  }|                    di           }i | j        |}|r||d<   |r:|                    d          r%d}|                     ||||          D ]}||z  }|S |                    ||          }	|	d	         d
         d         S )aq  Call the xinference model and return the output.

        Args:
            prompt: The prompt to use for generation.
            stop: Optional list of stop words to use when generating.
            generate_config: Optional dictionary for the configuration used for
                generation.

        Returns:
            The generated string by the model.
        NClient is not initialized!generate_configrI   stream )modelrH   rK   rP   rH   rP   choicesr   text)r   r'   	get_modelr   r@   r   _stream_generategenerate)
r+   rH   rI   rK   rM   rS   rP   combined_text_outputtoken
completions
             r.   _callzXinference._call   s    $ ;9:::%%dn554:JJ?PRT4U4UBT.B/B 	+&*OF# 	4228<< 	4#% ..' /	 /   . . %-$$'' vWWJi(+F33r5   rS   =Union['RESTfulGenerateModelHandle', 'RESTfulChatModelHandle']rP   "Optional['LlamaCppGenerateConfig']Generator[str, None, None]c              #  r  K   |                     ||          }|D ]}t          |t                    r|                    dg           }|rk|d         }t          |t                    rN|                    dd          }	|                    d          }
|r|                    |	| j        |
           |	V  dS )	a^  
        Args:
            prompt: The prompt to use for generation.
            model: The model used for generation.
            stop: Optional list of stop words to use when generating.
            generate_config: Optional dictionary for the configuration used for
                generation.

        Yields:
            A string token.
        rT   rU   r   rV   rR   logprobs)r[   verbose	log_probsN)rY   
isinstancedictr@   on_llm_new_tokenrc   )r+   rS   rH   rK   rP   streaming_responsechunkrU   choicer[   rd   s              r.   rX   zXinference._stream_generate   s      $ #^^? , 
 
 ( 	$ 	$E%&& $))Ir22 	$$QZF!&$// $ &

62 6 6$*JJz$:$:	& '88&+T\Y 9    $	$ 	$r5   Iterator[GenerationChunk]c              +    K   |                     di           }i | j        |}|r||d<   |                     ||          D ]@}|r<|                     |          }|r!|                    |j        | j                   |V  Ad S NrP   rI   )rc   )r@   r   _create_generate_stream$_stream_response_to_generation_chunkrg   rV   rc   r+   rH   rI   rK   rM   rP   stream_respri   s           r.   _streamzXinference._stream  s       !**%6;;BT.B/B 	+&*OF#77PP 	 	K AA+NN 00
 $ 1    	 	r5   Optional[Dict[str, List[str]]]Iterator[str]c              #     K   | j         t          d          | j                             | j                  }|                    ||          E d {V  d S )NrO   rT   )r   r'   rW   r   rY   )r+   rH   rP   rS   s       r.   rn   z"Xinference._create_generate_stream.  sd       ;9:::%%dn55>>>QQQQQQQQQQQr5   stream_responser   c           
        d}t          | t                    r|                     dg           }|r|d         }t          |t                    r^|                    dd          }t          |t          |                    dd          |                    dd                    	          S t	          d
          t          |          S t	          d          )z0Convert a stream response to a generation chunk.rR   rU   r   rV   finish_reasonNrb   )rx   rb   )rV   generation_infozchoice type error!)rV   zstream_response type error!)re   rf   r@   r   	TypeError)rv   r[   rU   rj   s       r.   ro   z/Xinference._stream_response_to_generation_chunk6  s    
 ot,, 	;%)))R88G 3 fd++ :"JJvr22E*"(,*0**_d*K*K%+ZZ
D%A%A) ) )    $$8999&E22229:::r5   'Optional[AsyncCallbackManagerForLLMRun]AsyncIterator[GenerationChunk]c               "  K   |                     di           }i | j        |}|r||d<   |                     ||          2 3 d {V }|rC|                     |          }|r'|                    |j        | j                   d {V  |W V  M6 d S rm   )r@   r   _acreate_generate_streamro   rg   rV   rc   rp   s           r.   _astreamzXinference._astreamQ  s       !**%6;;BT.B/B 	+&*OF#!%!>!>v!W!W 	 	 	 	 	 	 	+ AA+NN %66
 $ 7           "X!W!Ws   BAsyncIterator[str]c               t  K   | j         |d}||                                D ]
\  }}|||<   t          |o|                    d                    }t	          j                    4 d {V }|                    | j         d|          4 d {V 	 }|j        dk    r;|j        dk    rt          d          |j
        }	t          d|j         d	|	           |j        2 3 d {V }
|st          j        |
          W V  !|
                    d
          }|
                    d          rC|t#          d          d                                          }|swt          j        |          W V  6 	 d d d           d {V  n# 1 d {V swxY w Y   d d d           d {V  d S # 1 d {V swxY w Y   d S )N)rS   rH   rQ   z/v1/completions)rE   rC   r<   r;   z)astream call failed with status code 404.z%astream call failed with status code z. Details: zutf-8s   data:)r   itemsrD   r@   aiohttpClientSessionpostr   statusFileNotFoundErrorrV   r'   contentrC   loadsdecode
startswithlenstrip)r+   rH   rP   request_bodykeyvaluerQ   sessionrF   optional_detaillinejson_strs               r.   r~   z#Xinference._acreate_generate_streamf  s>      266'R'R&-3355 * *
U$)S!!oG/*=*=h*G*GHH(** 	7 	7 	7 	7 	7 	7 	7g||777! $   7 7 7 7 7 7 7 7 ?c))#--/G   +3-(;HO ; ;)8; ;  
 #+"2 	7 	7 	7 	7 	7 	7 	7$! 7"j.......#';;w#7#7??844 7'/H'@'F'F'H'HH#+ ) ("&*X"6"66666 #3"2!7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7 	7sD   +&F'AF E/&B
F1F'
F	F'F	F''
F14F1)NNN)r   r   r   r   r   r   r   r   )r/   r0   )r/   r6   )r/   r9   )NN)
rH   r0   rI   rJ   rK   rL   rM   r   r/   r0   )
rS   r^   rH   r0   rK   rL   rP   r_   r/   r`   )
rH   r0   rI   rJ   rK   rL   rM   r   r/   rk   )N)rH   r0   rP   rs   r/   rt   )rv   r0   r/   r   )
rH   r0   rI   rJ   rK   r{   rM   r   r/   r|   )rH   r0   rP   rs   r/   r   )__name__
__module____qualname____doc__r   __annotations__r&   propertyr4   r8   r*   r]   rX   rr   rn   staticmethodro   r   r~   __classcell__)r-   s   @r.   r   r      s        Z Zx !F    &#    : %)#'!%	(9 (9 (9 (9 (9 (9 (9T    X 
 
 
 X
? ? ? ?" %):>	*4 *4 *4 *4 *4` ;?>B!$ !$ !$ !$ !$L %):>	    , NRR R R R R ; ; ; \;: %)?C	    , NR#7 #7 #7 #7 #7 #7 #7 #7 #7r5   r   )
__future__r   rC   typingr   r   r   r   r   r	   r
   r   r   r   r   r?   langchain_core.callbacksr   r   #langchain_core.language_models.llmsr   langchain_core.outputsr   r"   r   r   xinference.model.llm.corer   r   r!   r5   r.   <module>r      sk   " " " " " "                                  4 3 3 3 3 3 2 2 2 2 2 2 ATTTTTTTT@@@@@@j7 j7 j7 j7 j7 j7 j7 j7 j7 j7r5   