
    *#h                     x    d Z ddlZddlmZ deeeef   defdZ G d de      Z	 G d	 d
e      Z
 G d d      Zy)a  
Hashing function for dataset keys using `hashlib.md5`

Requirements for the hash function:

- Provides a uniformly distributed hash from random space
- Adequately fast speed
- Working with multiple input types (in this case, `str`, `int` or `bytes`)
- Should be platform independent (generates same hash on different OS and systems)

The hashing function provides a unique 128-bit integer hash of the key provided.

The split name is being used here as the hash salt to avoid having same hashes
in different splits due to same keys
    N)Union	hash_datareturnc                     t        | t              r| S t        | t              r| j                  dd      } n't        | t              rt        |       } nt        |       | j                  d      S )z|
    Returns the input hash_data in its bytes form

    Args:
    hash_data: the hash salt/key to be converted to bytes
    \/zutf-8)
isinstancebytesstrreplaceintInvalidKeyErrorencode)r   s    M/var/www/html/sandstorm/venv/lib/python3.12/site-packages/datasets/keyhash.py	_as_bytesr   %   sc     )U#	Is	# %%dC0		Is	#	N	 i((G$$    c                   "     e Zd ZdZ fdZ xZS )r   z6Raises an error when given key is of invalid datatype.c                     d| _         d| dt        |       | _        d| _        t        |   | j                    | j                   | j                          y )Nz7
FAILURE TO GENERATE DATASET: Invalid key type detectedz
Found Key z	 of type z-
Keys should be either str, int or bytes type)prefixtypeerr_msgsuffixsuper__init__)selfr   	__class__s     r   r   zInvalidKeyError.__init__?   sP    P%i[	$y/9JKFDKK=t{{mDEr   __name__
__module____qualname____doc__r   __classcell__r   s   @r   r   r   <   s    @F Fr   r   c                   $     e Zd ZdZd fd	Z xZS )DuplicatedKeysErrorz(Raise an error when duplicate key found.c                 r   || _         || _        || _        d| _        t	        |      dk  rddj                  |       d| | _        n.ddj                  |d d        dt	        |      dz
   d| | _        |rd|z   nd	| _        t        | %  | j                   | j                   | j                          y )
Nz3Found multiple examples generated with the same key   z
The examples at index z, z have the key z... (z more) have the key 
 )
keyduplicate_key_indicesfix_msgr   lenjoinr   r   r   r   )r   r*   r+   r,   r   s       r   r   zDuplicatedKeysError.__init__I   s    %:"K$%+5dii@U6V5WWefiejkDL5dii@UVYWY@Z6[5\\abef{b|  @B  cB  bC  CW  X[  W\  ]DL(/dWnRDKK=t{{mDEr   )r)   r   r#   s   @r   r%   r%   F   s    2
F 
Fr   r%   c                   8    e Zd ZdZdefdZdeeeef   defdZ	y)	KeyHasherz,KeyHasher class for providing hash using md5	hash_saltc                 J    t        j                  t        |            | _        y )N)hashlibmd5r   
_split_md5)r   r1   s     r   r   zKeyHasher.__init__Y   s    !++i	&:;r   r*   r   c                     | j                   j                         }t        |      }|j                  |       t	        |j                         d      S )zReturns 128-bits unique hash of input key

        Args:
        key: the input key to be hashed (should be str, int or bytes)

        Returns: 128-bit int hash key   )r5   copyr   updater   	hexdigest)r   r*   r4   byte_keys       r   hashzKeyHasher.hash\   s>     oo""$S>

83==?B''r   N)
r   r   r    r!   r   r   r   r   r
   r<    r   r   r0   r0   V   s/    6<# <(c3o. (3 (r   r0   )r!   r3   typingr   r   r   r
   r   	Exceptionr   r%   r0   r=   r   r   <module>r@      sV   "   %sC/ %E %.Fi FF) F ( (r   