
    *#h                     x   d dl Z d dlZd dlmZ d dlmZ ddlmZ ddlm	Z	  e	e
      Z G d d	e j                        Z G d
 de      Z G d de      Z G d de      Z G d de      Zd%dee   defdZ G d de      Z G d de      Z G d de      Z G d de      Zdee   defdZd&d ed!ed"efd#Zd$ Zy)'    N)sha256)Optional   )config   )
get_loggerc                       e Zd ZdZdZdZdZy)VerificationModea  `Enum` that specifies which verification checks to run.

    The default mode is `BASIC_CHECKS`, which will perform only rudimentary checks to avoid slowdowns
    when generating/downloading a dataset for the first time.

    The verification modes:

    |                           | Verification checks                                                           |
    |---------------------------|------------------------------------------------------------------------------ |
    | `ALL_CHECKS`              | Split checks, uniqueness of the keys yielded in case of the GeneratorBuilder  |
    |                           | and the validity (number of files, checksums, etc.) of downloaded files       |
    | `BASIC_CHECKS` (default)  | Same as `ALL_CHECKS` but without checking downloaded files                    |
    | `NO_CHECKS`               | None                                                                          |

    
all_checksbasic_checks	no_checksN)__name__
__module____qualname____doc__
ALL_CHECKSBASIC_CHECKS	NO_CHECKS     V/var/www/html/sandstorm/venv/lib/python3.12/site-packages/datasets/utils/info_utils.pyr
   r
      s      J!LIr   r
   c                       e Zd ZdZy)ChecksumVerificationExceptionz>Exceptions during checksums verifications of downloaded files.Nr   r   r   r   r   r   r   r   r   #   s    Hr   r   c                       e Zd ZdZy)UnexpectedDownloadedFilez(Some downloaded files were not expected.Nr   r   r   r   r   r   '   s    2r   r   c                       e Zd ZdZy)ExpectedMoreDownloadedFilesz7Some files were supposed to be downloaded but were not.Nr   r   r   r   r   r   +       Ar   r   c                       e Zd ZdZy)NonMatchingChecksumErrorz?The downloaded file checksum don't match the expected checksum.Nr   r   r   r   r!   r!   /   s    Ir   r!   expected_checksumsrecorded_checksumsc                 &   | t         j                  d       y t        t        |       t        |      z
        dkD  r)t	        t        t        |       t        |      z
              t        t        |      t        |       z
        dkD  r)t        t        t        |      t        |       z
              | D cg c]  }| |   ||   k7  s| }}|d|z   nd}t        |      dkD  rt        d| d| d      t         j                  d|z          y c c}w )	NzUnable to verify checksums.r   z for  zChecksums didn't matchz:
zY
Set `verification_mode='no_checks'` to skip checksums verification and ignore this errorz&All the checksums matched successfully)loggerinfolensetr   strr   r!   )r"   r#   verification_nameurlbad_urlsfor_verification_names         r   verify_checksumsr/   3   s!   !12
3!"S);%<<=A)#c2D.EL^H_._*`aa
3!"S);%<<=A&s3/A+BSI[E\+\']^^1h5G5LPbcfPg5ghHh;L;XG&77^`
8}q&$%:$;3j gg
 	

 KK8;PPQ is   5DDc                       e Zd ZdZy)SplitsVerificationExceptionz%Exceptions during splis verificationsNr   r   r   r   r1   r1   F   s    /r   r1   c                       e Zd ZdZy)UnexpectedSplitsz6The expected splits of the downloaded file is missing.Nr   r   r   r   r3   r3   J   s    @r   r3   c                       e Zd ZdZy)ExpectedMoreSplitsz!Some recorded splits are missing.Nr   r   r   r   r5   r5   N   s    +r   r5   c                       e Zd ZdZy)NonMatchingSplitsSizesErrorz7The splits sizes don't match the expected splits sizes.Nr   r   r   r   r7   r7   R   r   r   r7   expected_splitsrecorded_splitsc                 J   | t         j                  d       y t        t        |       t        |      z
        dkD  r)t	        t        t        |       t        |      z
              t        t        |      t        |       z
        dkD  r)t        t        t        |      t        |       z
              | D cg c],  }| |   j                  ||   j                  k7  r| |   ||   d. }}t        |      dkD  rt        t        |            t         j                  d       y c c}w )NzUnable to verify splits sizes.r   )expectedrecordedz$All the splits matched successfully.)	r&   r'   r(   r)   r5   r*   r3   num_examplesr7   )r8   r9   name
bad_splitss       r   verify_splitsr@   V   s	   45
3#o"667!; S%9C<P%P!QRR
3#o"667!;s3#7#o:N#NOPP $4 --1F1S1SS %T*8MNJ 
 :)#j/::
KK67s   51D pathrecord_checksumreturnc                    |rUt               }t        | d      5 t        fdd      D ]  }|j                  |        |j	                         }ddd       nd}t
        j                  j                  |       dS # 1 sw Y   +xY w)z7Compute the file size and the sha256 checksum of a filerbc                  &     j                  d      S )Ni   )read)fs   r   <lambda>z(get_size_checksum_dict.<locals>.<lambda>m   s    affWo r   r   N)	num_byteschecksum)r   openiterupdate	hexdigestosrA   getsize)rA   rB   mchunkrK   rH   s        @r   get_size_checksum_dictrT   h   s    H$ 	%5s;   {{}H	% 	%
 .HEE	% 	%s   6A==Bc                 N    | r#t         j                  r| t         j                  k  S y)zCheck if `dataset_size` is smaller than `config.IN_MEMORY_MAX_SIZE`.

    Args:
        dataset_size (int): Dataset size in bytes.

    Returns:
        bool: Whether `dataset_size` is smaller than `config.IN_MEMORY_MAX_SIZE`.
    F)r   IN_MEMORY_MAX_SIZE)dataset_sizes    r   is_small_datasetrX   u   s#     11f7777r   )N)T)enumrP   hashlibr   typingr   r%   r   loggingr   r   r&   Enumr
   	Exceptionr   r   r   r!   dictr/   r1   r3   r5   r7   r@   r*   boolrT   rX   r   r   r   <module>ra      s     	     
H	tyy ,II I3< 3B"? BJ< JR$ RT R&0) 0A2 A,4 ,B"= B88D> 8D 8$
F 
Ft 
Ft 
Fr   