
    +#hoy                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ 	  G d	 d
e      Z G d de      Z G d de
j                        Z G d de
j                        Z G d de
j                        Z G d de
j                        Z G d de
j                        Z G d de      Zy)    N)defaultdict)Any)Evaluate)	Signature)signature_to_template)BootstrapFewShot)Teleprompterc                       e Zd ZdZ ej
                  d      Z ej                  d      Z ej                  d      Z	y)BasicGenerateInstructiona  You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative.,The initial instructions before optimizationdesc0The improved instructions for the language modelUThe string at the end of the prompt, which will help the model start solving the taskN)
__name__
__module____qualname____doc__dspy
InputFieldbasic_instructionOutputFieldproposed_instruction proposed_prefix_for_output_field     \/var/www/html/sandstorm/venv/lib/python3.12/site-packages/dspy/teleprompt/mipro_optimizer.pyr   r   0   sD     W'-[\+4++1cd'7t'7'7d($r   r   c                       e Zd ZdZ ej
                  d      Z ej
                  d      Z ej                  d      Z	 ej                  d      Z
y),BasicGenerateInstructionWithDataObservationsa`  You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English.  I will also give you some ``observations`` I have made about the dataset and task. Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative.r   r   'Observations about the dataset and taskr   r   N)r   r   r   r   r   r   r   observationsr   r   r   r   r   r   r   r   :   sU     k'-[\"4??(QRL+4++1cd'7t'7'7d($r   r   c                       e Zd ZdZ ej
                  d      Z ej
                  ej                  d      Z	 ej                  d      Z ej                  d      Zy)	$BasicGenerateInstructionWithExamplesa  You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Specifically, I will also provide you with the current ``basic instruction`` that is being used for this task. I will also provide you with some ``examples`` of the expected inputs and outputs.

    Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative.r   r   Example(s) of the taskformatr   r   r   N)r   r   r   r   r   r   r   dsppassages2textexamplesr   r   r   r   r   r   r#   r#   E   s_    M
 (-[\tc&7&7>VWH+4++1cd'7t'7'7d($r   r#   c                       e Zd ZdZ ej
                  d      Z ej
                  ej                  d      Z	 ej
                  d      Z
 ej                  d      Z ej                  d      Zy	)
7BasicGenerateInstructionWithExamplesAndDataObservationsa;  You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Specifically, I will give you some ``observations`` I have made about the dataset and task, along with some ``examples`` of the expected inputs and outputs. I will also provide you with the current ``basic instruction`` that is being used for this task.

    Your task is to propose a new improved instruction and prefix for the output field that will lead a good language model to perform the task well. Don't be afraid to be creative.r    r   r$   r%   r   r   r   N)r   r   r   r   r   r   r!   r'   r(   r)   r   r   r   r   r   r   r   r+   r+   T   sp    y #4??(QRLtc&7&7>VWH'-[\+4++1cd'7t'7'7d($r   r+   c                   \    e Zd ZdZ ej
                  d      Z ej                  d      Zy)ObservationSummarizerzGiven a series of observations I have made about my dataset, please summarize them into a brief 2-3 sentence summary which highlights only the most important details.z)Observations I have made about my datasetr   zXTwo to Three sentence summary of only the most significant highlights of my observationsN)	r   r   r   r   r   r   r!   r   summaryr   r   r   r-   r-   b   s.     q"4??(STLdgGr   r-   c                   \    e Zd ZdZ ej
                  d      Z ej                  d      Zy)DatasetDescriptoraU  Given several examples from a dataset please write observations about trends that hold for most or all of the samples. Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative#Sample data points from the datasetr   zCSomethings that holds true for most or all of the data you observedN)	r   r   r   r   r   r   r)   r   r!   r   r   r   r0   r0   k   s.    	P
 t$IJH#4##)noLr   r0   c                       e Zd ZdZ ej
                  d      Z ej
                  d      Z ej                  d      Z	y)&DatasetDescriptorWithPriorObservationsa  Given several examples from a dataset please write observations about trends that hold for most or all of the samples. I will also provide you with a few observations I have already made.  Please add your own observations or if you feel the observations are comprehensive say 'COMPLETE' Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creativer1   r   z-Some prior observations I made about the datazjSomethings that holds true for most or all of the data you observed or COMPLETE if you have nothing to addN)
r   r   r   r   r   r   r)   prior_observationsr   r!   r   r   r   r3   r3   v   sB    	P t$IJH(.]^#4##yLr   r3   c                      e Zd Zddi dddddfdZd ZddZdd	Zd
 Zd Zd Z	de
j                  dededededeeef   fdZdddddde
j"                  dee
j&                     dedededeeef   de
j"                  fdZy)MIPRON
         ?FTc
                     || _         || _        || _        ||nt        j                  j
                  | _        ||nt        j                  j
                  | _        || _        || _	        || _
        |	| _        y )N)num_candidatesmetricinit_temperaturer   settingslmprompt_model
task_modelverbosetrack_statsteacher_settingsview_data_batch_size)
selfr;   r?   r@   rC   r:   r<   rA   rB   rD   s
             r   __init__zMIPRO.__init__   sk     - 0,8,DL$--JZJZ(2(>*DMMDTDT& 0$8!r   c                    t        |j                               D ]  \  }}| j                  rt        d|        | j                  r't        d| j	                  |      j
                          | j	                  |      j                  j                         ^ }}| j                  rt        d|j                  d           | j                  st        d        y )Nz
Predictor zi: zp: prefix
)		enumerate
predictorsrA   print_get_signatureinstructionsfieldsvaluesjson_schema_extra)rE   programi	predictor_
last_fields         r   _print_full_programzMIPRO._print_full_program   s    %g&8&8&:; 		LAy||
1#&'||D//	:GGHIJ!00;BBIIKNQ
||J88BCDE||d		r   c                 ^    | j                   rt        d| d       |j                  |       y )NzModel (z
) History:n)rA   rL   inspect_history)rE   modelrZ   s      r   _print_model_historyzMIPRO._print_model_history   s+    <<GE7*-."r   c                    t        t        |      | j                        } t        j                  t
        dd      |d| j                               }|d   }d}d}t        | j                  t        |      | j                        D ]  }t        t        |      || j                  z         } t        j                  t        dd      |||| j                               }	|dz  }t        |	d         dk\  r&|	d   d d j                         d	k(  r|dz  }|d
k\  r n||k\  r n
||	d   z  }  t        j                  t        dd      |      }
|
j                  S )N   r8   rZ   temperaturer   )r)   r!   )r4   r)      COMPLETE   )r!   )minlenrD   r   Predictr0   __repr__ranger3   upperr-   r.   )rE   trainsetmax_iterations	upper_limobservationr!   skips
iterationsboutputr.   s              r   _observe_datazMIPRO._observe_data   s_   Ht'@'@A	Kdll#4sKV^_`ajVkVtVtVvx">2
t00#h-AZAZ[ 	3ACM1t/H/H+HII_T\\"HA[^_#/"1Y/88:F !OJ6.)*a/F>4J2A4N4T4T4VZd4d
A:^+F>22L	3  L$,,4sKYefr   c                     g }|D ]P  }|j                   }|j                  }|j                  }|j                  |      }| | | }	|j	                  |	       R dj                  |      S )NrI   )name	separatorinput_variablegetappendjoin)
rE   rO   examplerr   fieldru   rv   rw   value	field_strs
             r   _create_example_stringzMIPRO._create_example_string   st     
	%E::DI"11N KK/E  &E73IMM)$
	% yy  r   c                 d    t        |d      r|j                  S t        |d      r|j                  S y Nextended_signature	signaturehasattrr   r   )rE   rT   s     r   rM   zMIPRO._get_signature   s2    923///Y,&&&r   c                 T    t        |d      r||_        y t        |d      r||_        y y r   r   )rE   rT   updated_signatures      r   _set_signaturezMIPRO._set_signature   s,    923+<I(Y,"3I -r   moduleN	view_dataview_examplesdemo_candidatesreturnc                 &
   i }t        t              }|rpd | _        t        j                  j                  | j                        5  | j                  |      j                  dd      j                  dd      | _        d d d        |ri }	|j                         D ]  }
i }|t        |
         }t        |      D ]  \  }}|dk7  r|D ]  }d|v s|d   s||vrg ||<   t        |
j                        j                  }t        | j!                  |
      j"                  j%                               }| j'                  ||      }||   j)                  |        ||	t        |
      <   g ||<   ||	t        |
      <     |j                         D ]B  }
d }d }| j!                  |
      j*                  }| j!                  |
      j                  j-                         ^ }}|j.                  d   }t        j                  j                  | j                        5  |r|rd	t        |
         vrt1        d	      d }t3        d| j4                        D ]  } t        j6                  t8        d| j:                  
      || j                  |	t        |
         |         }|s|}P|j<                  j>                  jA                  |j<                  j>                         |j<                  jB                  jA                  |j<                  jB                          nC|r= t        j6                  tD        |dz
  | j:                  
      || j                        }n|rd }t3        d| j4                        D ]  } t        j6                  tF        d| j:                  
      |	t        |
         |         }|s|}E|j<                  j>                  jA                  |j<                  j>                         |j<                  jB                  jA                  |j<                  jB                          n0 t        j6                  tH        |dz
  | j:                  
      |      }d d d        j<                  j>                  jK                  d|       |j<                  jB                  jK                  d|       |j<                  |t        |
      <   i |t        |
      <   E | jL                  r| jO                  | j                         ||fS # 1 sw Y   xY w# 1 sw Y   xY w)N)r>   zObservations: zSummary:r   	augmentedrH   r_   z)No examples found for the given predictorr`   )r   r!   r)   )r   r!   )r   r)   )r   )(r   dictr!   r   r=   contextr?   rs   replacerK   idrJ   r   r   rO   listrM   input_fieldskeysr   ry   rN   rP   rQ   
ValueErrorri   r:   rg   r+   r<   completionsr   extendr   r   r#   r   insertrA   r]   )rE   r   r   r   r   r   devset
candidatesevaluated_candidatesexample_setsrT   example_setall_sets_of_examplesexample_set_iset_of_examplesr{   fields_to_use_input_variable_namesexample_stringr   basic_prefixrU   rV   instructrS   new_instructs                             r   _generate_first_N_candidatesz"MIPRO._generate_first_N_candidates   s    
*40 $D&&$*;*;&< t$($6$6v$>$F$FXZ$[$c$cdnpr$s!t L#..0 B	 '6r)}'E$6?@T6U B2M?$)'6 RG*g5'+:N#0#CACK$>0EiFYFY0Z0a0a8<T=P=PQZ=[=h=h=m=m=o8p 5151L1L]\c1d +M : A A. QR 7BR	]357M26AR	]3B	B(  **, E	5I $L $ 3 3I > K K!00;BBIIKNQ
%77AL&&$*;*;&< 9R	] ;;()TUU#H"1d&9&9: (t||S(,(=(=(
 /@)-):):%1"Y-%@%C(  ('3H$00EELL , 8 8 M M %00QQXX , 8 8 Y Y!(  t||Da%$($9$9  ):HYHY	 [H ##H"1d&9&9: (t||@(,(=(=(
 /@%1"Y-%@%C(  ('3H$00EELL , 8 8 M M %00QQXX , 8 8 Y Y(  rt||,DA[_[p[pq*; Ho9x   55<<Q@QR  AAHHLY(0(<(<Jr)}%24 I/KE	5N <<%%d&7&78///Gt t>9 9s   7S:IT:TT	*   )seedr   r   requires_permission_to_runstudentrk   
num_trialsmax_bootstrapped_demosmax_labeled_demoseval_kwargsc       	         B	   ()*+ d}d}d}d}t        j                  |       t        |      |z  }d j                  t        |j	                               z  z   }t        j                  dj                  g d| | d| d	| d
| | t        |       | | d| | | | | d| | d| | d| | | d| | d| d| d| | d| | d| |  j                   | | d| | t        |j	                                | | d| | | | | d| d| | d| d| d| d| d| d| d|             }t        j                  d| d| d | d!| d"| d#| d$| d%| d| d&| d'      }t        |       t        j                  j                          d(}|
rDt        |       t        d)      j                         j                         }|d*k7  rt        d+       d,}|r||j                         }t!        d<| j"                  d-|}|d.k(  r
|d.k(  rd/}d/}n|}|}i }t%         j                        D ]a  }|d.k(  rN|j	                         D ]:  }t'        |      |vrg |t'        |      <   |t'        |         j)                  g        < W j*                  rt        d0| d1 j                  d/z
          t        j,                  |      }|d d  }|j/                  |       t1         j"                  || j2                  2      }|j5                  |j                         |3      }t7        |j	                         |j	                               D ]G  \  }} t'        |      |vrg |t'        |      <   |t'        |         j)                  | j8                         I d  j;                  | j                  ||	||      \  }!}"|d.k(  r|d.k(  rd }t=        d4      )d (d.+i *() *+fd5}# |#||!|||      }$t>        j@                  jC                  |6      }%t?        jD                  d7|%8      }&|&jG                  |$|9      }'( jH                  r*(_%        t        d:( d;       (S y )=Nz[93mz[94mz[1mz[0mr7   r   z            z,WARNING: Projected Language Model (LM) Callsz

            Please be advised that based on the parameters you have set, the maximum number of LM calls is projected as follows:

            z- Task Model: z examples in dev set * z
 trials * z# of LM calls in your programz = (z  * # of LM calls in your programz) task model callsz
            z-- Prompt Model: # data summarizer calls (max 10z) + z * z lm calls in program = z prompt model callsz

            zEstimated Cost Calculation:a  Total Cost = (Number of calls to task model * (Avg Input Token Length per Call * Task Model Price per Input Token + Avg Output Token Length per Call * Task Model Price per Output Token) 
                        + (Number of calls to prompt model * (Avg Input Token Length per Call * Task Prompt Price per Input Token + Avg Output Token Length per Call * Prompt Model Price per Output Token).a	  

            For a preliminary estimate of potential costs, we recommend you perform your own calculations based on the task
            and prompt models you intend to use. If the projected costs exceed your budget or expectations, you may consider:

            zt- Reducing the number of trials (`num_trials`), the size of the trainset, or the number of LM calls in your program.z4- Using a cheaper task model to optimize the prompt.zT            To proceed with the execution of this program, please confirm by typing z'y'z for yes or z'n'zg for no.

            If you would like to bypass this confirmation step in future executions, set the z`requires_permission_to_run`z	 flag to z`False`.zAwaiting your input...z	
        Tz Do you wish to continue? (y/n): yz Compilation aborted by the user.F)r   r;   r   r_   zCreating basic bootstrap: /)r;   r   r   rC   )r   rk   z-infc           
      .      	
f
d}|S )Nc           	      P  
 j                         }t        d"        i !"<   t        j                         |j                               D ]  \  }}t	        |         }rt	        |         }| j                  t	        |       dt        t        |                  }r0| j                  t	        |       dt        t                          }|!"   t	        |       d<   r!"   t	        |       d<   ||   }|j                  j                  d      j                         }	|j                  j                  d      j                         }
j                  |      j                  j                         ^ }}j                  |      j                  |	      j                  ||
      }j!                  ||       r   }s|_         j$                  rt        d       j$                  rj'                  |       |!"   d<   d}d	}t)        j*                  t               |z        }t        |      D ]  }||z  }t-        |d
z   |z  t                     } || } ||d      }j$                  rt        | d|        ||t        |      z  z  }|t-        |d
z   d	z  t                     z  }j$                  rt        d|        | j/                  ||       | j1                         st        d       |!"   d<   d!"   d<   "d
z  "t3        j4                          j$                  rt        d        j$                  rj7                  j8                  d
       }|!"   d<   d!"   d<   |kD  r||j                         "d
z  "|S )NzStarting trial #_predictor_instruction_predictor_demos")rH   zEvaling the following program:rR   r   d   r_   )r   display_tablezst split score: zcurr average score: zTrial pruned.scoreTprunedzFully evaled score: rY   F)deepcopyrL   ziprK   r   suggest_categoricalri   rf   r   stripr   rM   rO   r   with_instructionswith_updated_fieldsr   demosrA   rW   mathceilre   reportshould_pruneoptunaTrialPrunedr]   r@   )#trialcandidate_programp_oldp_newp_instruction_candidatesp_demo_candidatesinstruction_idx	demos_idxselected_candidateselected_instructionselected_prefixrU   rV   r   selected_demostotal_score
batch_sizenum_batchesrS   start_index	end_indexsplit_trainsetsplit_scorecurr_weighted_avg_scorer   baseline_programbest_program
best_scorer   evaluateinstruction_candidatesrE   rk   
trial_logs	trial_nums#                            r   	objectivez:MIPRO.compile.<locals>.create_objective.<locals>.objective  s   (8(A(A(C% ,YK89,.Jy)(+,<,G,G,IK\KgKgKi(j (9u3I"U)3T0*0?5	0J- +0*C*C!%yk)?@!#&>"?@+ +(-(A(A#%e9+-= > %c*;&< =)I Wf
9-E;Q.RS*T]Jy1RYK?O2PQ .Fo-V*/A/V/V/\/\]`/a/g/g/i,*<*]*]*c*cdg*h*n*n*p *.)<)<U)C)J)J)O)O)QJ //6../CD00O0T *
 ++E3DE +->y-IN +*8EKQ(9T ||>?||001BC7HJy))4 #$K!$J"&))CMJ,F"GK";/ 7&'*n$'Q*(<c(m$L	)1+i)H&./@gh&i<<!QC'7}"EF#{S5H'HH2=QUcMSVW_S`@a2a/<<!$89P8Q"RS%<a@ !--/!/2=TJy1':>BJy1(;%NI"("4"4"66+7. || 45L4MNO||11$//Q1G3E5LJy)'26;Jy)(3 z)%*
'8'A'A'CNI Lr   r   )r   r   r   r   rk   r   r   r   rE   r   r   s   ````` r   create_objectivez'MIPRO.compile.<locals>.create_objective  s    d! d!L ! r   )r   maximize)	directionsampler)n_trialsz
Returning z from continue_programr   )&randomr   rf   r:   rK   textwrapdedentrz   rL   sysstdoutflushinputr   lowerr   r   r;   ri   r   ry   rA   Randomshuffler   rC   compiler   r   r   floatr   samplers
TPESamplercreate_studyoptimizerB   r   ),rE   r   rk   r   r   r   r   r   r   r   r   YELLOWBLUEBOLDENDC*estimated_task_model_calls_wo_module_callsestimated_prompt_model_callsuser_messageuser_confirmation_messagerun
user_inputr   r   (max_bootstrapped_demos_for_candidate_gen#max_labeled_demos_for_candidate_genr   rS   module_prngshuffled_trainsettpr   candidate_pr   rU   r   objective_functionr   study_scorer   r   r   r   s,   `                                       @@@@r   r   zMIPRO.compile[  s.	    D58]Z5O2')D,?,?# C
 -
 (
$   (R (R , (RH(RF(RF(RGKf(RM(R
 H(R
 #(R
 $(&(R
 *.(R
 038}o(R
 ?CV(R
 EK8(R
 Lc(R
 dhbh(R
 jnhn(R
 pznz(R
 |@  {A(R
 BH  AI(R
 IS(R
 TX  SY(R
 Z^  Y_(R
 _|(R
 }A  |B(R
 CI  BJ(R
 JN(R
 OS  NT(R
 UY  TZ(R
 [E  ZF(R
 Ff(R
 gk  fl(R
 ms  lt(R
 tF(R
 GK  FL(R
L(R H(R B(R CG(R IMv(R NP(R QUv(R W]U](R ^b(R cgag(R imgm(R os  oB  oB  nC(R DH  CI(R JP  IQ(R QT(R UY  TZ(R [_  Z`(R ad  el  ew  ew  ey  az  `{(R |@  {A(R BH  AI(R I`(R ae  `f(R gk  fl(R mI  lJ(R KO  JP(R QW  PX(R Xk(R lp  kq(Rq(R H(R F(R 6(R 7;V(R<(R H(RM(R NR  MS(RS(R  H!(R  I!(R  JN  IO!(R O!(R" H#(R" I#(R" JN#(R S& %-OO 9UUYTZZ]^b]ccoptouuxy}x~ ^^d]e  fB  CG  BH  HQ  RX  QY  Ya  bf  ag gH*4& 1	5 %! 	l

%+,ABHHJPPRJS 89%%'FSxS{SH '!+0AQ0F;<8673;Q86G3 !O4../ P6$*$5$5$7 Ah<><>OBxL9'85<<R@A
 || :1#Qt?R?RST?T>UVW !--*C(0%KK 12)#{{/W*M)-)>)>	B )+

6??;LWh
(i% 25V5F5F5HJ[JfJfJh1i P-+h<><>OBxL9'85<<[=N=NOP/P: )-(I(I##)%"A &*/@A/E"& vJLIJg! g!T "2&:PRackmu!voo00d0;G''*gNE^^$6^LF'D,<,<*4'J|n,BCDr   )r_   )r7   )r   r   r   rF   rW   r]   rs   r   rM   r   r   Moduleintboolr   tupler   Programr   Examplestrr   r   r   r   r   r6   r6      s    9,
#
6!$4r0r0 r0 	r0
 r0 r0 
tTz	r0z #'yy t||$	y
 y !$y y #s(^y 
yr   r6   )r   r   r   r   collectionsr   typingr   r   r'   r   dspy.evaluate.evaluater   dspy.signaturesr   dspy.signatures.signaturer   dspy.telepromptr   dspy.teleprompt.telepromptr	   r   r   r#   r+   r-   r0   r3   r6   r   r   r   <module>r!     s      
  #   
  + % ; , 3<y 9 4>> dnn DNN p pT^^ OL Or   