
    Ig'                     z    d dl mZmZmZmZmZmZmZmZ d dl	m
Z
 d dlmZ erd dlmZmZ d dlmZ  G d de      Zy)	    )TYPE_CHECKINGAnyDict	GeneratorListMappingOptionalUnion)CallbackManagerForLLMRun)LLM)RESTfulChatModelHandleRESTfulGenerateModelHandle)LlamaCppGenerateConfigc                   2    e Zd ZU dZeed<   ee   ed<   	 ee   ed<   	 eeef   ed<   	 	 	 ddee   dee   def fdZ	e
defd	       Ze
deeef   fd
       Z	 	 ddedeee      dee   dedef
dZ	 	 dded   dedee   ded   deeddf   f
dZ xZS )
Xinferencea  `Xinference` large-scale model inference service.

    To use, you should have the xinference library installed:

    .. code-block:: bash

       pip install "xinference[all]"

    If you're simply using the services provided by Xinference, you can utilize the xinference_client package:

    .. code-block:: bash

        pip install xinference_client

    Check out: https://github.com/xorbitsai/inference
    To run, you need to start a Xinference supervisor on one server and Xinference workers on the other servers

    Example:
        To start a local instance of Xinference, run

        .. code-block:: bash

           $ xinference

        You can also deploy Xinference in a distributed cluster. Here are the steps:

        Starting the supervisor:

        .. code-block:: bash

           $ xinference-supervisor

        Starting the worker:

        .. code-block:: bash

           $ xinference-worker

    Then, launch a model using command line interface (CLI).

    Example:

    .. code-block:: bash

       $ xinference launch -n orca -s 3 -q q4_0

    It will return a model UID. Then, you can use Xinference with LangChain.

    Example:

    .. code-block:: python

        from langchain_community.llms import Xinference

        llm = Xinference(
            server_url="http://0.0.0.0:9997",
            model_uid = {model_uid} # replace model_uid with the model UID return from launching the model
        )

        llm.invoke(
            prompt="Q: where can we visit in the capital of France? A:",
            generate_config={"max_tokens": 1024, "stream": True},
        )

    To view all the supported builtin models, run:

    .. code-block:: bash

        $ xinference list --all

    client
server_url	model_uidmodel_kwargsNc                 $   	 ddl m} |xs i }t	        |   di |||d | j                  t        d      | j                  t        d       ||      | _	        y # t        $ r( 	 ddlm} n# t        $ r}t        d      |d }~ww xY wY w xY w)Nr   )RESTfulClientzCould not import RESTfulClient from xinference. Please install it with `pip install xinference` or `pip install xinference_client`.r   r   r   zPlease provide server URLzPlease provide the model UID )
xinference.clientr   ImportErrorxinference_clientsuper__init__r   
ValueErrorr   r   )selfr   r   r   r   e	__class__s         `/var/www/html/answerous/venv/lib/python3.12/site-packages/langchain_community/llms/xinference.pyr   zXinference.__init__\   s    		7 $)r 	
(& ,	
 ??"899>>!;<<#J/3  	; !Y  <	s5   A 	B(A/.B/	B	8BB		BBreturnc                      y)zReturn type of llm.
xinferencer   r    s    r#   	_llm_typezXinference._llm_type   s         c                 Z    i d| j                   id| j                  id| j                  iS )zGet the identifying parameters.r   r   r   r   r'   s    r#   _identifying_paramszXinference._identifying_params   sC    
T__-
DNN+
 t001
 	
r)   promptstoprun_managerkwargsc                 F   | j                   j                  | j                        }|j                  di       }i | j                  |}|r||d<   |r4|j                  d      r#d}| j                  ||||      D ]  }||z  }	 |S |j                  ||      }	|	d   d   d	   S )
aq  Call the xinference model and return the output.

        Args:
            prompt: The prompt to use for generation.
            stop: Optional list of stop words to use when generating.
            generate_config: Optional dictionary for the configuration used for
                generation.

        Returns:
            The generated string by the model.
        generate_configr-   stream )modelr,   r.   r1   r,   r1   choicesr   text)r   	get_modelr   getr   _stream_generategenerate)
r    r,   r-   r.   r/   r4   r1   combined_text_outputtoken
completions
             r#   _callzXinference._call   s    $ %%dnn54:JJ?PRT4UBT..B/B&*OF#228<#% ..' /	 /  . %-$. (' vWJi(+F33r)   r4   )r   r   r1   r   c              #   F  K   |j                  ||      }|D ]  }t        |t              s|j                  dg       }|s)|d   }t        |t              s?|j                  dd      }	|j                  d      }
|r|j	                  |	| j
                  |
       |	  yw)	a^  
        Args:
            prompt: The prompt to use for generation.
            model: The model used for generation.
            stop: Optional list of stop words to use when generating.
            generate_config: Optional dictionary for the configuration used for
                generation.

        Yields:
            A string token.
        r5   r6   r   r7   r3   logprobs)r=   verbose	log_probsN)r;   
isinstancedictr9   on_llm_new_tokenrB   )r    r4   r,   r.   r1   streaming_responsechunkr6   choicer=   rC   s              r#   r:   zXinference._stream_generate   s     $ #^^? , 
 ( 	$E%&))Ir2$QZF!&$/ &

62 6$*JJz$:	&'88&+T\\Y 9  $	$s   )B!B!B!A
B!)NN)__name__
__module____qualname____doc__r   __annotations__r	   strr   r   propertyr(   r   r+   r   r   r?   r
   r   r:   __classcell__)r"   s   @r#   r   r      sT   FP K&}#sCx. : %)#'!0SM!0 C=!0 	!0F 3   
WS#X%6 
 
 %):>	(4(4 tCy!(4 67	(4
 (4 
(4\ ;?>B!$KL!$ !$ 67	!$
 "":;!$ 
3d?	#!$r)   r   N)typingr   r   r   r   r   r   r	   r
   langchain_core.callbacksr   #langchain_core.language_models.llmsr   r   r   r   xinference.model.llm.corer   r   r   r)   r#   <module>rV      s+    V V V = 3T@M$ M$r)   