
    +#h              
       >   d dl Z d dlmZmZ d dlZd dlZd dlmZm	Z	m
Z
mZmZmZ  G d de      Zd Zdee   defd	Zdee   d
edee   fdZdee   dee   defdZddZdedee   defdZdedefdZdedefdZefdee   deegef   deeegee   f   fdZy)    N)AnyCallable)EMF1DPR_normalizedotdict
has_answernormalize_textc                   F     e Zd ZU dZee   ed<    fdZd Zd Z	d Z
 xZS )Examplez0A primitive datatype for representing an exampledemosc                     t        |      dk  sJ t        | 	          |r$t        |      dk(  sJ | j                  |d           | j                  di | y )N   r    )lensuper__init__update)selfargskwargs	__class__s      W/var/www/html/sandstorm/venv/lib/python3.12/site-packages/dsp/primitives/demonstrate.pyr   zExample.__init__   sR    4yA~~t9>!>KKQ f    c                 6    t        di i t        |       |}|S )Nr   )r   dict)r   r   the_copys      r   copyzExample.copy   s#    65T
5f56r   c                     t        |      }t        | j                         D ci c]  \  }}||vs|| c}}      S c c}}w )z=Removes the provided keys from the example and returns a copy)setr   items)r   keyskvs       r   withoutzExample.without   s8    4yGA$1GHHGs   >
>
c           
          fd}| j                   D cg c]  } |j                  di  ||       }}| j                  |      S c c}w )zWReturns a copy of the example with the demos stage transformed by the provided functionc                 V    	  |       j                  d      S # t        $ r i cY S w xY w)N	augmented)r%   	Exception)examplefns    r   atzExample.demos_at.<locals>.at&   s1    '{**;77 	s    (()r   r   )r   r   )r   r+   r,   r*   r   s    `   r   demos_atzExample.demos_at#   sK    	 =AJJG,7,GGyyuy%% Hs   A)__name__
__module____qualname____doc__listr   __annotations__r   r   r%   r-   __classcell__)r   s   @r   r   r   
   s%    :9
I

&r   r   c                       d fd	}|S )zUReturns an Augment function that applies the provided transformations to the Examplesc                    g }g }| D ]o  }t        j                  |      }|t        |      |k\  rd }D ]  }| n
 ||      } |d|_        |j	                  |       Xd|_        |j	                  |       q |r||z   S |S )NTF)dspr   r   r(   append)	trainr#   
return_allrdemosademosr*   raw_exampleftransformationss	           r   
do_augmentzannotate.<locals>.do_augment3   s     	+G++g.K3v;!#3$ %?G*	% "$(!g&(-%k*#	+& F?"r   )NFr   )r?   r@   s   ` r   annotaterA   0   s    8 r   r9   r#   c                     t        j                  t        j                  j                        }| D cg c]  }t        j
                  |       }}|j                  |       |d| S c c}w )zSample k examples from train.N)randomRandomr7   settings
branch_idxr   shuffle)r9   r#   rngr*   shuffled_trains        r   samplerJ   R   sU    
--//
0C:?@wckk'*@N@KK"1 As   A&xreturnc                     | D cg c]h  }t         j                  t        |j                  dg       |j                  gz         t        |j                  dg       |j                  gz               s|j }}|S c c}w )zORemoves the example x from the train set by comparing the question and history.history)r    intersectiongetquestion)r9   rK   youtputs       r   all_butrT   [   sw    
 i$

|34i$

|34
 	
F  Ms   A-A6passagesanswersc                 ,    t        fd| D              S )z8Returns True if any of the passages contains the answer.c              3   6   K   | ]  }t        |        y w)N)passage_has_answers).0psgrV   s     r   	<genexpr>z passage_match.<locals>.<genexpr>l   s     ES"30Es   )any)rU   rV   s    `r   passage_matchr^   j   s    EHEEEr   c                 B    |dk\  rt        | |      S t        | |      |k\  S )N      ?)r   r   )
predictionrV   fracs      r   answer_matchrc   o   s*     s{*g&&j'"d**r   passagec           
      x    t        |D cg c]  }t        t        |             c}t        |             S c c}w )z0Returns True if the passage contains the answer.)tokenized_answerstext)r	   r   r
   )rd   rV   anss      r   rY   rY   y   s2    IPQ#=)<=QG$ Qs   7inp_examplec                 :    | j                  | j                        S )ze
    Extracts question as a field to vectorize with Vectorizer object. `question` field is used.
    text_to_vectorize)r   rQ   )ri   s    r   !cast_naive_get_only_question_textrm      s     k.B.BCCr   c                     | j                   j                         dz   | j                  j                         z   }| j                  |      S )z
    Extracts question and answer as fields to vectorize with Vectorizer object.
    `question` and `answer` fields are used. They will be concatenated with the word "Answer"
    between.
    z	 Answer: rk   )rQ   stripanswerr   )ri   rl   s     r   "cast_naive_get_question_and_answerrq      sK     	""${2[5G5G5M5M5OO  .?@@r   castc                    	 ddl m}  D cg c]
  } |       }}t        j                  j                  	 	|      j                  t        j                        } |d	|j                  d   t               d|j                  |       j                  |       dt        dt        dt        t           f 	fd}|S c c}w )
a  
    A function that vectorizes train data using `dsm.settings.vectorizer`, then build an ANN/KNN
    index to search similar questions among `train` samples.

    Args:
        train: a bunch of questions to put in index & search later
        cast: function that contructs text before vectorization. By default,
            it uses only question. Check `cast_naive_get_question_and_answer` for more details.
        n_probe: number of closest IVF-clusters to check for neighbours.
            Doesn't affect bruteforce-based search.
        knn_args: check `create_faiss_index` function for details on ANN/KNN arguments.
    Returns: function to search similar Examples from `train` in FAISS-index.
    r   )create_faiss_indexr   )emb_dim	n_objectsri   r#   rL   c                      
 |       g      }j                  ||      \  }}|d   D cg c]  }	|   	 }}|S c c}w )Nr   )search)ri   r#   inp_example_vector_nearest_samples_idxscur_idxtrain_sampledrr   indexr9   
vectorizers          r   inner_knn_searchzknn.<locals>.inner_knn_search   sU    'k):(;<"',,/A1"E7KA7NOGwOO Ps   =r   )dsp.utils.ann_utilsrt   r7   rE   r   astypenpfloat32shaper   r9   addr   intr2   )
r9   rr   knn_argsrt   cur_elemtrain_casted_to_vectorizeall_vectorsr   r~   r   s
   ``      @@r   knnr      s    $ 7@E FHh F F+.<<+B+BJ67>>rzzJK !!!$E
>FE 
KK	IIkg # $w-   # !Gs   C)r`   )rC   typingr   r   numpyr   r7   	dsp.utilsr   r   r   r   r	   r
   r   rA   r2   r   rJ   rT   strboolr^   rc   rY   rm   rq   r   r   r   r   <module>r      s       
 P P#&g #&LD$w- C 4= W g FDI FS	 Fd F
+ tCy T D7 Dw D	AG 	A 	A *K%=%
G9g%
&% wnd7m+,	%r   