
    +#h(E                     j   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlZd dlZd dl	m
Z
mZ d dlmZmZ eZd Z G d de      Z e
j&                  dg	      d
        Z ej&                  dg	      d        Ze
j&                  d        Z G d de      Z e
j&                  dg	      d        Z ej&                  dg	      d        Ze
j&                  d        Ze
j&                  d        Z G d d      Z G d de      Z G d de      Z G d de      Z G d de      Z e
j&                  d        Z!y)    N)Literal)CacheMemoryNotebookCacheMemory)HFModelopenai_to_hfc                 :    t         dj                  di |        y)z.Handler from https://pypi.org/project/backoff/zbBacking off {wait:0.1f} seconds after {tries} tries calling function {target} with kwargs {kwargs}N )printformat)detailss    R/var/www/html/sandstorm/venv/lib/python3.12/site-packages/dsp/modules/hf_client.pybackoff_hdlrr      s&    		 	6	%#	%    c                   &     e Zd Zd fd	Zd Z xZS )HFClientTGIc           	          t         |   |d       || _        t        |t              r|n|g| _        |xs i | _        ddi| _        |||ddddd	d
gd|| _        y )NTmodel	is_clientContent-Typeapplication/json{Gz?K   
ףp=
?   


)r   porturltemperature
max_tokenstop_pnstop)	super__init__r   
isinstancelistportshttp_request_kwargsheaderskwargs)selfr   r   r   r*   r,   	__class__s         r   r&   zHFClientTGI.__init__   s{    u5'd3T$
#6#<" &(:; 6N

 

r   c                    i | j                   |}||d   dkD  |d   |d   dkD  d|d}t        di |d   |d<   t        d|d   d         |d   d<   t        | j                   dt        j                         j                  | j                         d	z   f| j                  t        | j                        || j                  d
| j                  }	 |j                         }|d   g}d|v r#d|d   v r||d   d   D cg c]  }|d   	 c}z  }||D cg c]  }d|i c}d}|S c c}w c c}w # t        $ r" t        d|j                         t        d      w xY w)Nr#   r   )	do_samplebest_ofr   )inputs
parametersr3   g?r    :z	/generate)r   r)   jsonr+   generated_textr   best_of_sequencestextpromptchoicesFailed to parse JSON response:*Received invalid JSON response from serverr	   )r,   r   maxsend_hftgi_request_v01_wrappedr   randomRandomchoicer)   tupler+   r*   r5   	Exceptionr
   r8   )	r-   r:   r,   payloadresponsejson_responsecompletionsxcs	            r   	_generatezHFClientTGI._generate1   s   *DKK*6* qc{c{Q 

 !- Ew|/D E/2&}50
m, 2xxj&--/00<=>L


#LL
 &&
	J$MMOM ))9:;K ]*'=+CC*956IJ  &'  
 #);5Wavqk5WXHO 
 6X 	J2HMMBHII	Js*   ,D2  D(
D2 D-!D2 (
D2 2+E)zhttp://future-hgx-1N__name__
__module____qualname__r&   rK   __classcell__r.   s   @r   r   r      s    
.5Jr   r   arg)ignorec                 .    t        j                  | fi |S NrequestspostrR   r   r)   r,   s       r   send_hftgi_request_v01rZ   i       =='''r   c                     t        | ||fi |S rU   rZ   rY   s       r   r?   r?   n   s    !#sE<V<<r   c                 .    t        j                  | fi |S rU   rV   rR   r,   s     r   send_hftgi_request_v00r`   s   r[   r   c                   2     e Zd Zdded   f fdZd Z xZS )HFClientVLLM
model_type)chatr8   c                    t         |   |d       t        |t              r|| _        n6t        |t
              r| d| g| _        nt        dt        |       d      t        | j                        | _	        || _
        || _        ddi| _        | xj                  |z  c_        | j                  j                  || j                  d       y )	NTr   r4   z\The url provided to `HFClientVLLM` is neither a string nor a list of strings. It is of type .r   r   )r   r   )r%   r&   r'   r(   urlsstr
ValueErrortyperC   
urls_constr   rc   r+   r,   update)r-   r   r   rc   r   r,   r.   s         r   r&   zHFClientVLLM.__init__y   s    u5c4 DIS!5$)DI {  }A  BE  }F  |G  GH  I  J  J		*	$&(:;v??
 	r   c                    i | j                   |}| j                  j                  d      }| j                  j                  |       | j                  dk(  r|j                  dd       }d|dg}|r|j                  dd|d       | j                   d   |d|}t        | d	| j                  | j                  || j                  
      }	 |j                         }|d   }	||	D 
cg c]  }
d|
d   d   i c}
d}|S | j                   d   |d|}t        | d| j                  | j                  || j                  
      }	 |j                         }|d   }	||	D 
cg c]	  }
d|
d   i c}
d}|S c c}
w # t        $ r" t        d|j                         t        d      w xY wc c}
w # t        $ r" t        d|j                         t        d      w xY w)Nr   rd   system_promptuserrolecontentsystemr   )r   messagesz/v1/chat/completions)r   r   r5   r+   r;   r8   messagerr   r9   r<   r=   r   r:   /v1/completions)r,   rg   popappendrc   getinsertsend_hfvllm_request_v01_wrappedrk   r   r+   r5   rD   r
   r8   )r-   r:   r,   r   rn   rt   rE   rF   rG   rH   rJ   s              r   rK   zHFClientVLLM._generate   s   *DKK*6* iimmA		??f$"JJt<M!'F;<HH#OPW-$ G
 7%+,OOYYHN (+I6$KVWa9i)@ AW   W-  G 7%'OOYYHN (+I6$=HI6 3I  ;  X  N6F LMMN.  J  N6F LMMNs<   E7 %E26E7 F* F%+F* 2E7 7+F"%F* *+G)r8   http://localhost)rM   rN   rO   r   r&   rK   rP   rQ   s   @r   rb   rb   x   s    0G 0?Nr   rb   c                 .    t        j                  | fi |S rU   rV   rR   r   r   r,   s       r   send_hfvllm_request_v01r      r[   r   c                     t        | ||fi |S rU   r]   r   s       r   r|   r|      s    !#sD;F;;r   c                 .    t        j                  | fi |S rU   rV   r_   s     r   send_hfvllm_request_v00r      r[   r   c                 .    t        j                  | fi |S rU   rV   r_   s     r   send_hfvllm_chat_request_v00r      r[   r   c                        e Zd Zd Zd ZddZy)HFServerTGIc                 @   t         j                  j                  t         j                  j                  t        j                         d|            | _        t         j                  j                  | j
                        s t        j                  | j
                         y y )Nztext-generation-inference)ospathabspathjoingetcwdmodel_weights_direxistsmakedirs)r-   user_dirs     r   r&   zHFServerTGI.__init__   s]    !#biikKfhp1q!rww~~d445KK../ 6r   c                 $   t        j                  ddgt         j                  t         j                        }|j                         \  }}t	        |       |r|j                         j                         j                  d      }|dd  }|D ]  }t        j                  d|      }|s|j                  d      }t        j                  dd|g      j                         j                         }d| |v sit        j                  dd	|gd
        y y )Ndockerps)stdoutstderrr   r   z^([a-zA-Z0-9]+)r   z0.0.0.0:r$   F)check)
subprocessPopenPIPEcommunicater
   decodestripsplitresearchgroupcheck_outputrun)	r-   r   processr   _container_idscontainer_idmatchport_mappings	            r   close_serverzHFServerTGI.close_server   s    ""Hd#3JOOT^TcTcd'')	f"MMO11399$?M)!"-M - V		"4lC#(;;q>L#-#:#:Hfl;[#\#c#c#e#k#k#mL!$(L8"&,'GuUV r   Nc
                 V   | j                  |       |rt        j                  j                  |      }
t        j                  j	                  | j
                  |
      }t        j                  ||       t        j                  j                  t        j                  j                  | j
                        z   t        j                  j                  z   t        j                  j                  |      z   }d| d| d| j
                   dt        j                  j                  t        j                  j                  | j
                        z    d| d| d| d| d	| d
|	 }t        d|        t        j                  |dt        j                  t        j                  d      }d}g }	 |j                  j                         }|sn'|j!                  |j#                                d|v rd}nD|s-t        d       |D ]  }t        |        |j%                          |j'                          y )Nzdocker run --gpus z --shm-size 1g -p z:80 -v r4   z -e z@ ghcr.io/huggingface/text-generation-inference:1.1.0 --model-id z --num-shard z --max-input-length z --max-total-tokens z --max-best-of zConnect Command: T)shellr   r   r8   F	Connectedz'Could not connect to server. Error log:)r   r   r   basenamer   r   shutilcopytreesepr
   r   r   r   STDOUTr   readlinery   r   	terminatewait)r-   r   
model_name
model_pathenv_variablegpus	num_shardmax_input_lengthmax_total_tokensmax_best_ofmodel_file_name	link_pathdocker_commanddocker_process	connectedoutputlines                    r   
run_serverzHFServerTGI.run_server   sc   $ gg..z:OT%;%;_MIOOJ	2rww'7'78N8N'OORTRYRYR]R]]`b`g`g`p`pq{`||J-dV3EdV7SWSiSiRjjklnlslslwlwz|  {B  {B  {K  {K  LP  Lb  Lb  {c  mc  ld  dh  iu  hv  vv  wA  vB  BO  PY  OZ  Zn  o  n@  @T  Ue  Tf  fu  vA  uB  C!.!123#)).Z__eoevev  ~B  C	!((113DMM$**,'d" 	  ;< d$$&r   )NNNallr   i  i   d   )rM   rN   rO   r&   r   r   r	   r   r   r   r      s    0
Vr   r   c                   l     e Zd Z fdZ ej
                  ej                  ede      dd       Z	 xZ
S )Togetherc                 N    t            |d       t        j                          _        t        j                  d       _        t        j                  d       _        | _	        d _
        t         fddD              rd _
        d}d	d
ddddd|vr|n|d   d| _        y )NTr   TOGETHER_API_BASETOGETHER_API_KEYFc              3   V   K   | ]   }|j                   j                         v  " y wrU   )r   lower).0keywordr-   s     r   	<genexpr>z$Together.__init__.<locals>.<genexpr>  s#     Qw$****,,Qs   &))instinstructz

---        i   r      r$   )r    r!   r"   top_krepetition_penaltyr#   r$   )r%   r&   rW   Sessionsessionr   getenvapi_basetokenr   use_inst_templateanyr,   )r-   r   r,   stop_defaultr.   s   `   r   r&   zTogether.__init__  s    u5'')		"56YY12

!&Q<PQQ%)D"  "#$*&$8LfVn	
 	
r   i  )max_time
on_backoffc           	         | j                    }i | j                  |}|j                  d      }|j                  d      }|j                  dd      }|j                  dd      }|j                  dd      }	|j                  d	d
      }
| j                  rd| dn|}|r/| j                    d}dddd|dg}| j                  |||||	|
|d}n| j                  |||||	|
|d}dd| j
                   i}	 | j                  j                  |||      5 }|j                         }|r:|d   j                  dg       d   j                  di       j                  dd      g}n)|d   j                  dg       d   j                  dd      g}||D cg c]  }d|i c}d}|cd d d        S c c}w # 1 sw Y   y xY w# t        $ r.}rt        d|        t        d |        t        d!      d }~ww xY w)"Nr$   r    r!      r"   gffffff?r   2   r   r   [INST][/INST]/chat/completionsrs   mYou are a helpful assistant. You must continue the user text directly without *any* additional interjections.rp   ro   )r   rt   r    r!   r"   r   r   r$   )r   r:   r    r!   r"   r   r   r$   AuthorizationBearer r+   r5   r   r;   r   ru   rr    r8   r9   z
resp_json:Failed to parse JSON response: r=   )r   r,   rz   r   r   r   r   rX   r5   rD   r
   )r-   r:   use_chat_apir,   r   r$   r    r!   r"   r   r   rt   bodyr+   resp	resp_jsonrH   rJ   rF   es                       r   rK   zTogether._generate-  sB    *DKK*6*zz&!jj/ZZc2


7C(

7B'#ZZ(<a@-1-C-C6&)]]O#45C!  /^  _F3H
 $*(&8	D  *(&8	D #gdjj\$:;	J""3d"C  t IIK	#,X#6#:#:9b#I!#L#P#PQZ\^#_#c#cdmoq#r"sK#,X#6#:#:9b#I!#L#P#PQWY[#\"]K&,{9[!61+9[\    :\     	J
9+./3A378HII		JsI   4F9 A;F-F(F-	F9 (F--F62F9 6F9 9	G0)G++G0F)rM   rN   rO   r&   backoffon_exceptionexpoERRORSr   rK   rP   rQ   s   @r   r   r     s;    
0 W	8J8Jr   r   c                   &     e Zd Z fdZddZ xZS )Anyscalec                     t         |   |d       t        j                         | _        t        j                  d      | _        t        j                  d      | _        || _	        ddd|| _
        y )NTr   ANYSCALE_API_BASEANYSCALE_API_KEYr   r   )r    r#   )r%   r&   rW   r   r   r   r   r   r   r   r,   )r-   r   r,   r.   s      r   r&   zAnyscale.__init__o  sg    u5'')		"56YY12


 
r   c                 ~   | j                    d}i | j                  |}|j                  d      }|j                  dd      }|r+| j                    d}dddd	|dg}| j                  |||d
}n| j                  d| d||d}dd| j                   i}		 g }
t        |j                  dd            D ]  }| j                  j                  ||	|      5 }|j                         }|rF|
j                  |j                  dg       d   j                  di       j                  dd      g       n5|
j                  |j                  dg       d   j                  dd      g       d d d         ||
D cg c]  }d|i c}d}|S # 1 sw Y   xY wc c}w # t        $ r}t        d|        t        d      d }~ww xY w)Nz/completionsr    r!   r   r   rs   r   rp   ro   )r   rt   r    r!   r   r   )r   r:   r    r!   r   r   r#   r   r   r;   r   ru   rr   r   r8   r9   r   r=   )r   r,   rz   r   r   ranger   rX   r5   extendrD   r
   )r-   r:   r   r,   r   r    r!   rt   r   r+   rH   ir   r   rJ   rF   r   s                    r   rK   zAnyscale._generate{  s   |,*DKK*6*jj/ZZc2
]]O#45C!  /^  _F3H
 $*(	D "6('2*(	D #gdjj\$:;	JK6::c1-. ^\\&&sG$&G ^4 $		I##**IMM)R,H,K,O,OPY[],^,b,bclnp,q+rs#**IMM)R,H,K,O,OPVXZ,[+\]^ ^^ #);5Wavqk5WXHO^ ^ 6X 	J3A378HII	Js=   >F BF#F 2F=F F		F 	F<F77F<r   rL   rQ   s   @r   r   r   n  s    

+Jr   r   c                   $     e Zd Z fdZd Z xZS )ChatModuleClientc                 f    t         |   |d       ddlm}m}  ||| |d            | _        y )NTr   r   )
ChatConfig
ChatModuleLM)conv_template)r   lib_pathchat_config)r%   r&   mlc_chatr  r  cm)r-   r   r   r  r  r.   s        r   r&   zChatModuleClient.__init__  s1    u53**SW:X
r   c                     | j                   j                  |      }	 d|ig}||d}|S # t        $ r" t        dj                         t        d      w xY w)N)r:   r8   r9   zFailed to parse output:zReceived invalid output)r  generaterD   r
   r8   )r-   r:   r,   r   rH   rF   s         r   rK   zChatModuleClient._generate  si    !! " 
	7"F+,K"([AHO 	7+X]];566	7s	   * +ArL   rQ   s   @r   r  r    s    

7r   r  c                   &     e Zd Zd fd	Zd Z xZS )HFClientSGLangc                 t    t         |   |d       | d| | _        ddi| _        dddd	d
dgd|| _        y )NTr   r4   r   r   r   r   r   r   r   r   )r    r!   r"   r#   r$   )r%   r&   r   r+   r,   )r-   r   r   r   r,   r.   s        r   r&   zHFClientSGLang.__init__  s[    u5U!D6?&(:;  6N
 
r   c                 `   i | j                   |}|j                  dd      |d|}t        | j                   d|| j                        }	 |j                         }|d   }||D cg c]	  }d|d   i c}d}|S c c}w # t        $ r" t        d	|j                         t        d
      w xY w)Nr   defaultrv   rw   )r5   r+   r;   r8   r9   r<   r=   )	r,   rz   send_hfsglang_request_v00r   r+   r5   rD   r
   r8   )r-   r:   r,   rE   rF   rG   rH   rJ   s           r   rK   zHFClientSGLang._generate  s    *DKK*6* ZZ3
 
 -xxj(LL
	J$MMOM'	2K 9DEAVQvY/EH O F  	J2HMMBHII	Js   B (A=6B =B +B-)r}   rL   rQ   s   @r   r  r    s    
Jr   r  c                 .    t        j                  | fi |S rU   rV   r_   s     r   r  r    r[   r   )"r   r@   r   r   r   typingr   r   rW   dsp.modules.cache_utilsr   r   dsp.modules.hfr   r   rD   r   r   r   cacherZ   r?   r`   rb   r   r|   r   r   r   r   r   r  r  r  r	   r   r   <module>r     s   	  	      D 0
MJ' MJ` 5'"( #( 5'*= += ( (XN7 XNt 5'"( #( 5'*< +< ( ( ( (. .`WJw WJt8Jw 8Jv7w 7.)JW )JX ( (r   