
    +#h'                         d dl mZ 	 d dlZd dlmZ ddedeeef   fdZded	edefd
Z	dedededededefdZ
	 	 	 	 	 ddedededededededefdZy# e$ r	  ed      w xY w)    )TupleN)IndexzYou need to install FAISS library to perform ANN/KNN. Please check the official doc: https://github.com/facebookresearch/faiss/blob/main/INSTALL.mdmax_gpu_devicesreturnc                     t        j                         }|dkD  }| dkD  r|rt        ||       }||fS | dk(  r|r|}||fS d}d}||fS )a!  
    Determine which device we should use
    Args:
        max_gpu_devices: an integer value, define how many GPUs we'll use.
            -1 means all devices. 0 means there are no GPUs. Default is 0.

    Returns: number of devices and is it allowed to use CUDA device (True if yes)
    r      F)faissget_num_gpusmin)r   n_devices_totalis_gpunum_devicess       P/var/www/html/sandstorm/venv/lib/python3.12/site-packages/dsp/utils/ann_utils.pydetermine_devicesr      st     ((*Oq Fv/?;  
B	6%      emb_dim	dist_typec                     |j                         dk(  rt        j                  |       }|S |j                         dk(  rt        j                  |       }|S t	        d|       )Nipl2z*Wrong distance type for FAISS Flat Index: )lowerr
   IndexFlatIPIndexFlatL2
ValueError)r   r   indexs      r   _get_brute_indexr   #   se    D !!'* L 
	d	"!!'* L Ei[QRRr   	n_objectsin_list_dist_typecentroid_dist_typeencode_residualsc                    t        d|dz  z        }|j                         dk(  rt        j                  |       }n7|j                         dk(  rt        j                  |       }nt        d|       |j                         dk(  rt        j                  }n2|j                         dk(  rt        j                  }nt        d|       t        j                  || |t        j                  j                  ||      }|S )N   g      ?r   r   z)Wrong distance type for FAISS quantizer: z%Wrong distance type for FAISS index: )intr   r
   r   r   r   METRIC_INNER_PRODUCT	METRIC_L2IndexIVFScalarQuantizerScalarQuantizerQT_fp16)	r   r   r   r    r!   n_list	quannizercentroid_metricr   s	            r   _get_ivf_indexr-   .   s     i3&'(F D(%%g.			 	 	"d	*%%g.	DEVDWXYY!T)44		!	!	#t	+//@AS@TUVV))%%E Lr   n_probec           	         |dk  rt        | |      }nt        | ||||      }||_        t        |      \  }}	|	rDt	        j
                         }
d|
_        t	        j                  ||
t        t        |                  }|S )a6  
    Create IVF index (with IP or L2 dist), without adding data and training
    Args:
        emb_dim: size of each embedding
        n_objects: size of a trainset for index. Used to determine optimal type
            of index and its settings (will use bruteforce if `n_objects` is less than 20_000).
        n_probe: number of closest IVF-clusters to check for neighbours.
            Doesn't affect bruteforce-based search.
        max_gpu_devices: maximum amount of GPUs to use for ANN-index. 0 if run on CPU.
        encode_residuals: whether or not compute residuals. The residual vector is 
            the difference between a vector and the reconstruction that can be
            decoded from its representation in the index.
        in_list_dist_type: type of distance to calculate simmilarities within one IVF.
            Can be `IP` (for inner product) or `L2` distance. Case insensetive.
            If the index type is bruteforce (`n_objects` < 20_000), this variable will define
            the distane type for that bruteforce index. `centroid_dist_type` will be ignored.
        centroid_dist_type: type of distance to calculate simmilarities between a query 
            and cluster centroids. Can be `IP` (for inner product) or `L2` distance.
            Case insensetive.
    Returns: untrained FAISS-index
    i N  )r   r   )r   r   r   r    r!   T)
r   r-   nprober   r
   GpuMultipleClonerOptionsshardindex_cpu_to_gpus_listlistrange)r   r   r.   r   r!   r   r    r   r   r   cloner_optionss              r   create_faiss_indexr7   Q   s    < 6 !<MN/1-
 EL+O<K779#,,UND{I[D\]Lr   )r   )
   r   TL2r9   )typingr   r
   r   ImportErrorr$   boolr   strr   r-   r7    r   r   <module>r?      s   s 5d3C ,c c e       	 
    L !!"333 3 	3
 3 3 3 3W  
	I s   
A A#