
    *#h[                        d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZmZmZ ddlZddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZmZmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$  e"e%      Z& G d de'      Z( ed      d'd       Z) ed      d'd       Z* ed      d(de+de+dee   fd       Z, ed      d(de+de+dee   fd       Z-	 	 	 	 	 	 d)de+deeee	e+f      dee   deeee+f      deee+e$f      deee.e+f      fd Z/	 	 	 	 	 d*de+deee+e$f      dee   deeee+f      d!ee+   deeee	e+f      fd"Z0	 	 	 	 	 	 	 d+de+d#ee+   deee+ee+   e
e+ee+ee+   f   f   f      dee   deeee+f      deee+e$f      deee.e+f      d$efd%Z1	 	 	 	 	 	 	 d+de+d#ee+   deee+ee+   e
e+ee+ee+   f   f   f      dee   deeee+f      deee+e$f      deee.e+f      fd&Z2y),z List and inspect datasets.    N)PurePath)DictListMappingOptionalSequenceUnion   )DownloadConfig)DownloadMode)StreamingDownloadManager)DatasetInfo)dataset_module_factoryget_dataset_builder_classimport_main_classload_dataset_buildermetric_module_factory)
deprecated)relative_to_absolute_path)
get_logger)Versionc                       e Zd Zy)SplitsNotFoundErrorN)__name__
__module____qualname__     M/var/www/html/sandstorm/venv/lib/python3.12/site-packages/datasets/inspect.pyr   r   /   s    r   r   z,Use 'huggingface_hub.list_datasets' instead.c                     t        j                  |      }| s|D cg c]  }d|j                  vs| }}|s|D cg c]  }|j                   }}t        |      S c c}w c c}w )a]  List all the datasets scripts available on the Hugging Face Hub.

    Args:
        with_community_datasets (`bool`, *optional*, defaults to `True`):
            Include the community provided datasets.
        with_details (`bool`, *optional*, defaults to `False`):
            Return the full details on the datasets instead of only the short name.

    Example:

    ```py
    >>> from datasets import list_datasets
    >>> list_datasets()
    ['acronym_identification',
     'ade_corpus_v2',
     'adversarial_qa',
     'aeslc',
     'afrikaans_ner_corpus',
     'ag_news',
     ...
    ]
    ```
    )full/)huggingface_hublist_datasetsidlist)with_community_datasetswith_detailsdatasetsdatasets       r   r$   r$   3   sb    2 ,,,?H"+3Ms'**7LGMM.677GJJ77> N7s   AAA"ux   Use 'evaluate.list_evaluation_modules' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluatec                     t        j                         }| s|D cg c]  }d|j                  vs| }}|s|D cg c]  }|j                   }}|S c c}w c c}w )u  List all the metrics script available on the Hugging Face Hub.

    <Deprecated version="2.5.0">

    Use `evaluate.list_evaluation_modules` instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate

    </Deprecated>

    Args:
        with_community_metrics (:obj:`bool`, optional, default ``True``): Include the community provided metrics.
        with_details (:obj:`bool`, optional, default ``False``): Return the full details on the metrics instead of only the short name.

    Example:

    ```py
    >>> from datasets import list_metrics
    >>> list_metrics()
    ['accuracy',
     'bertscore',
     'bleu',
     'bleurt',
     'cer',
     'chrf',
     ...
    ]
    ```
    r"   )r#   list_metricsr%   )with_community_metricsr(   metricsmetrics       r   r,   r,   T   s[    > **,G!(/Hf3fii3G6HH+2369933N I3s   AAAz?Clone the dataset repository from the Hugging Face Hub instead.path
local_pathdownload_configc                 *   t        | fd|i|}t        |      }t        j                  |      }t        j
                  j                  |      }t	        j                  |      D ]  \  }}	}
t        j
                  j                  |t        j
                  j                  ||            }t	        j                  |d       |	D cg c]  }|j                  d      r| c}|	dd |
D ]T  }t        j                  t        j
                  j                  ||      t        j
                  j                  ||             V t        j                  ||        t        |      }t!        d|  d| d| d	t#        |      j%                          d
	       yc c}w )a  
    Allow inspection/modification of a dataset script by copying on local drive at local_path.

    Args:
        path (`str`): Path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name
                as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`.
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`list_datasets`])
                e.g. `'squad'`, `'glue'` or `'openai/webtext'`.
        local_path (`str`):
            Path to the local folder to copy the dataset script to.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        **download_kwargs (additional keyword arguments):
            Optional arguments for [`DownloadConfig`] which will override
            the attributes of `download_config` if supplied.
    r2   Texist_ok.__Nz"The processing script for dataset  can be inspected at . The main class is in zP. You can modify this processing script and use it with `datasets.load_dataset("")`.)r   r   inspectgetsourcefileosr0   dirnamewalkjoinrelpathmakedirs
startswithshutilcopy2copystatr   printr   as_posix)r0   r1   r2   download_kwargsdataset_modulebuilder_clsmodule_source_pathmodule_source_dirpathdirpathdirnames	filenamesdst_dirpathr?   filenames                 r   inspect_datasetrT   {   sb   * ,De/eUdeN+N;K ..{;GGOO,>?(*0E(F .$9ggll:rwwwH]/^_
K$/ /7^7g>P>PQ\>]w^! 	_HLLgx8"'',,{T\:]^	_-. +:6J	
,TF2G
| T  56 7YYablYmYvYvYxXyy}	 _s   FFuz   Use 'evaluate.inspect_evaluation_module' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluatec                 B   t        | fd|i|}t        |j                  d      }t        j                  |      }t
        j                  j                  |      }t        j                  |      D ]  \  }}	}
t
        j                  j                  |t
        j                  j                  ||            }t        j                  |d       |	D cg c]  }|j                  d      r| c}|	dd |
D ]T  }t        j                  t
        j                  j                  ||      t
        j                  j                  ||             V t        j                  ||        t!        |      }t#        d|  d	| d
| dt%        |      j'                          d	       yc c}w )u  
    Allow inspection/modification of a metric script by copying it on local drive at local_path.

    <Deprecated version="2.5.0">

    Use `evaluate.inspect_evaluation_module` instead, from the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate

    </Deprecated>

    Args:
        path (``str``): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. ``'./dataset/squad'`` or ``'./dataset/squad/squad.py'``
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with ``datasets.list_datasets()``)
                e.g. ``'squad'``, ``'glue'`` or ``'openai/webtext'``
        local_path (``str``): path to the local folder to copy the datset script to.
        download_config (Optional ``datasets.DownloadConfig``): specific download configuration parameters.
        **download_kwargs (additional keyword arguments): optional attributes for DownloadConfig() which will override the attributes in download_config if supplied.
    r2   F)r*   Tr4   r6   Nz"The processing scripts for metric r9   r:   zP. You can modify this processing scripts and use it with `datasets.load_metric("r;   )r   r   module_pathr<   r=   r>   r0   r?   r@   rA   rB   rC   rD   rE   rF   rG   r   rH   r   rI   )r0   r1   r2   rJ   metric_module
metric_clsrM   rN   rO   rP   rQ   rR   r?   rS   s                 r   inspect_metricrY      sh   0 *$ccSbcM"=#<#<eLJ ..z:GGOO,>?(*0E(F .$9ggll:rwwwH]/^_
K$/.6^7g>P>PQ\>]w^! 	_HLLgx8"'',,{T\:]^	_-. +:6J	
,TF2G
| T  56 7YYablYmYvYvYxXyy}	 _s   F&F
data_filesdownload_moderevisiontokenc                     |dk7  r t        j                  d| dt               |}t        | |||||      }|D 	ci c]  }	|	t	        d| |	|||||d| c}	S c c}	w )a!
  Get the meta information about a dataset, returned as a dict mapping config name to DatasetInfoDict.

    Args:
        path (`str`): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
                e.g. `'squad'`, `'glue'` or``'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        use_auth_token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.

            <Deprecated version="2.14.0">

            `use_auth_token` was deprecated in favor of `token` in version 2.14.0 and will be removed in 3.0.0.

            </Deprecated>

        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_infos
    >>> get_dataset_infos('rotten_tomatoes')
    {'default': DatasetInfo(description="Movie Review Dataset.
This is a dataset of containing 5,331 positive and 5,331 negative processed
sentences from Rotten Tomatoes movie reviews...), ...}
    ```
    r   'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.
You can remove this warning by passing 'token=
' instead.)r0   r\   r2   r[   rZ   r]   )r0   config_namerZ   r2   r[   r\   r]   r   )warningswarnFutureWarningget_dataset_config_namesget_dataset_config_info)
r0   rZ   r2   r[   r\   r]   use_auth_tokenconfig_kwargsconfig_namesra   s
             r   get_dataset_infosrj      s    n %==K<LJX	

 +'#L& (  	, 	
#!+'	
 	
 		
  s   Adynamic_modules_pathc           	         t        | f|||||d|}t        |t        j                  j	                  |             }t        |j                  j                               xs+ |j                  j                  d|j                  xs d      gS )a  Get the list of available config names for a particular dataset.

    Args:
        path (`str`): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
                e.g. `'squad'`, `'glue'` or `'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        dynamic_modules_path (`str`, defaults to `~/.cache/huggingface/modules/datasets_modules`):
            Optional path to the directory in which the dynamic modules are saved. It must have been initialized with `init_dynamic_modules`.
            By default the datasets and metrics are stored inside the `datasets_modules` module.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        **download_kwargs (additional keyword arguments):
            Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
            for example `token`.

    Example:

    ```py
    >>> from datasets import get_dataset_config_names
    >>> get_dataset_config_names("glue")
    ['cola',
     'sst2',
     'mrpc',
     'qqp',
     'stsb',
     'mnli',
     'mnli_mismatched',
     'mnli_matched',
     'qnli',
     'rte',
     'wnli',
     'ax']
    ```
    )r\   r2   r[   rk   rZ   )dataset_namera   default)r   r   r>   r0   basenamer&   builder_configskeysbuilder_kwargsgetDEFAULT_CONFIG_NAME)	r0   r\   r2   r[   rk   rZ   rJ   rK   rL   s	            r   re   re   '  s    p ,'#1 N ,NIYIYZ^I_`K++0023 %%))-9X9X9e\ef8 r   ra   returnc           
      (   |dk7  r t        j                  d| dt               |}t        | f||||||d|}	|	j                  }
|
j
                  |r|j                         n	t               }|||_        |	j                  t        |	j                  |             	 |	j                  t        |	j                  |            D ci c]  }|j                  |j                  | d c}|
_        |
S |
S c c}w # t        $ r}t        d      |d}~ww xY w)	a  Get the meta information (DatasetInfo) about a dataset for a particular config

    Args:
        path (``str``): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. ``'./dataset/squad'`` or ``'./dataset/squad/squad.py'``
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with ``datasets.list_datasets()``)
                e.g. ``'squad'``, ``'glue'`` or ``'openai/webtext'``
        config_name (:obj:`str`, optional): Defining the name of the dataset configuration.
        data_files (:obj:`str` or :obj:`Sequence` or :obj:`Mapping`, optional): Path(s) to source data file(s).
        download_config (:class:`~download.DownloadConfig`, optional): Specific download configuration parameters.
        download_mode (:class:`DownloadMode` or :obj:`str`, default ``REUSE_DATASET_IF_EXISTS``): Download/generate mode.
        revision (:class:`~utils.Version` or :obj:`str`, optional): Version of the dataset script to load.
            As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
            You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
        token (``str`` or :obj:`bool`, optional): Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If True, or not specified, will get token from `"~/.huggingface"`.
        use_auth_token (``str`` or :obj:`bool`, optional): Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If True, or not specified, will get token from `"~/.huggingface"`.

            <Deprecated version="2.14.0">

            `use_auth_token` was deprecated in favor of `token` in version 2.14.0 and will be removed in 3.0.0.

            </Deprecated>

        **config_kwargs (additional keyword arguments): optional attributes for builder class which will override the attributes if supplied.

    r   r_   r`   )namerZ   r2   r[   r\   r]   N)	base_pathr2   )rw   rm   z<The split names could not be parsed from the dataset config.)rb   rc   rd   r   infosplitscopyr   r]   _check_manual_downloadr   rx   _split_generatorsrw   	Exceptionr   )r0   ra   rZ   r2   r[   r\   r]   rg   rh   builderry   split_generatorerrs                r   rf   rf   n  sC   R %==K<LJX	

 "	'#	 	G <<D{{4C/..0IY$)O!&&$w/@/@Rab	
	o (/'@'@,w7H7HZij(#  $$/C/CUY&ZZDK K4K  	o%&deknn	os*   (C7 !C2&C7 2C7 7	D DDc           
          |dk7  r t        j                  d| dt               |}t        | f||||||d|}	t	        |	j
                  j                               S )aW	  Get the list of available splits for a particular config and dataset.

    Args:
        path (`str`): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
                e.g. `'squad'`, `'glue'` or `'openai/webtext'`
        config_name (`str`, *optional*):
            Defining the name of the dataset configuration.
        data_files (`str` or `Sequence` or `Mapping`, *optional*):
            Path(s) to source data file(s).
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        revision ([`Version`] or `str`, *optional*):
            Version of the dataset script to load.
            As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
            You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
        token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        use_auth_token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.

            <Deprecated version="2.14.0">

            `use_auth_token` was deprecated in favor of `token` in version 2.14.0 and will be removed in 3.0.0.

            </Deprecated>

        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_split_names
    >>> get_dataset_split_names('rotten_tomatoes')
    ['train', 'validation', 'test']
    ```
    r   r_   r`   )ra   rZ   r2   r[   r\   r]   )rb   rc   rd   rf   r&   rz   rq   )
r0   ra   rZ   r2   r[   r\   r]   rg   rh   ry   s
             r   get_dataset_split_namesr     s~    p %==K<LJX	

 "	'#	 	D   "##r   )TF)N)NNNNNr   )NNNNN)NNNNNNr   )3__doc__r<   r>   rE   rb   pathlibr   typingr   r   r   r   r   r	   r#   download.download_configr   download.download_managerr   #download.streaming_download_managerr   ry   r   loadr   r   r   r   r   utils.deprecation_utilsr   utils.file_utilsr   utils.loggingr   utils.versionr   r   logger
ValueErrorr   r$   r,   strrT   rY   boolrj   re   rf   r   r   r   r   <module>r      s    "  	    A A  4 3 I   0 7 % " 
H		* 	 :; <@ ~!!H MN&# &3 &.AY & O&R  A& &# &@X &&V 48048<.2(,S
StT3/0S n-S E,"345	S
 uS'\*+S E$)$%Sp /3048<*.37D
DuS'\*+D n-D E,"345	D
 #3-D tT3/0DR "&_c048<.2(,L
L#L sHSM73c8TW=FX@Y;Y3ZZ[\L n-	L
 E,"345L uS'\*+L E$)$%L Lb "&_c048<.2(,J$
J$#J$ sHSM73c8TW=FX@Y;Y3ZZ[\J$ n-	J$
 E,"345J$ uS'\*+J$ E$)$%J$r   