
    *#h(              	          d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZ d dlZd dlmZ ddlmZ ddlmZ d	d
lmZ  ee      Z G d dej.                        Zdede
e	e   ef   fdZ ed       G d de             Z G d deeeeef   f         Zi dg dg dg dg dg dg dg dg dg dg dg d g d!g d"g d#g d$g d%g i d&g d'g d(g d)g d*g d+g d,g d-g d.g d/g d0g d1g d2g d3g d4g d5g d6g g g g d7Zed8k(  rmd d9lm Z   e d:;      Z!e!jE                  d<       e!jG                         Z$ ee$jJ                        Z%ejM                  e%      Z' e(e'       e'jS                  e%       yy)=    N)Counter)Path)AnyClassVarDictOptionalTupleUnion)DatasetCardData   )METADATA_CONFIGS_FIELD)
get_logger   )
deprecatedc                   &     e Zd Zd Zd fd	Z xZS )_NoDuplicateSafeLoaderc                 4   |j                   D cg c]  \  }}| j                  |    }}}|D cg c]  }t        |t              rt	        |      n|! }}t        |      }|D cg c]  }||   dkD  s| }}|rt        d|       y c c}}w c c}w c c}w )Nr   zGot duplicate yaml keys: )valueconstructed_objects
isinstancelisttupler   	TypeError)selfnodekey_node_keyskeycounterduplicate_keyss           T/var/www/html/sandstorm/venv/lib/python3.12/site-packages/datasets/utils/metadata.py(_check_no_duplicates_on_constructed_nodez?_NoDuplicateSafeLoader._check_no_duplicates_on_constructed_node   s    FJjjQ{x((2QQHLMjd3c
<MM$-)0E#GCL14D#EE77GHII 	 RMEs   B
$B%B3Bc                 L    t         |   ||      }| j                  |       |S )N)deep)superconstruct_mappingr#   )r   r   r%   mapping	__class__s       r"   r'   z(_NoDuplicateSafeLoader.construct_mapping   s*    '+Dt+<55d;    )F)__name__
__module____qualname__r#   r'   __classcell__)r)   s   @r"   r   r      s    J r*   r   readme_contentreturnc                    t        | j                               }|rS|d   dk(  rKd|dd  v rD|dd  j                  d      dz   }dj                  |d|       }|dj                  ||dz   d        fS d dj                  |      fS )Nr   z---r   
)r   
splitlinesindexjoin)r/   full_contentsep_idx	yamlblocks       r"   _split_yaml_from_readmer9       s    1134LQ50Ul12>N5Nqr"((/!3IIl1W56	$))L1$?@@@<(((r*   z.Use `huggingface_hub.DatasetCardData` instead.c                       e Zd ZdhZedeeef   dd fd       ZdefdZ	dde
e   defdZed	edd fd
       ZdefdZy)DatasetMetadatatrain_eval_indexpathr0   c                     t        |d      5 }t        |j                               \  }}ddd       | j                  |      S  |        S # 1 sw Y   #xY w)aS  Loads and validates the dataset metadata from its dataset card (README.md)

        Args:
            path (:obj:`Path`): Path to the dataset card (its README.md file)

        Returns:
            :class:`DatasetMetadata`: The dataset's metadata

        Raises:
            :obj:`TypeError`: If the dataset's metadata is invalid
        utf-8encodingN)openr9   readfrom_yaml_string)clsr=   readme_fileyaml_stringr   s        r"   from_readmezDatasetMetadata.from_readme/   s^     $) 	I[4[5E5E5GHNK	I"''445L	I 	Is   AAc                    |j                         r't        |d      5 }|j                         }d d d        nd }| j                        }t        |dd      5 }|j	                  |       d d d        y # 1 sw Y   CxY w# 1 sw Y   y xY w)Nr?   r@   w)existsrB   rC   
_to_readmewrite)r   r=   rF   r/   updated_readme_contents        r"   	to_readmezDatasetMetadata.to_readmeC   s    ;;=dW- 4!,!1!1!34 4 "N!%!@$g. 	6+45	6 	64 4
	6 	6s   A4B 4A= B	Nr/   c                     |)t        |      \  }}d| j                         z   dz   |z   }|S d| j                         z   dz   }|S )Nz---
)r9   to_yaml_string)r   r/   r   contentr6   s        r"   rL   zDatasetMetadata._to_readmeM   s[    %0@JAw"T%8%8%::WDwNL  #T%8%8%::WDLr*   stringc                    t        j                  |t              xs i }|j                         D ci c]8  \  }}|j	                  dd      | j
                  v r|j	                  dd      n||: }}} | di |S c c}}w )a'  Loads and validates the dataset metadata from a YAML string

        Args:
            string (:obj:`str`): The YAML string

        Returns:
            :class:`DatasetMetadata`: The dataset's metadata

        Raises:
            :obj:`TypeError`: If the dataset's metadata is invalid
        )Loader-r    )yamlloadr   itemsreplace_FIELDS_WITH_DASHES)rE   rS   metadata_dictr   r   s        r"   rD   z DatasetMetadata.from_yaml_stringU   s     		&1GHNB
 ,113
U '*kk#s&;s?V?V&VS[[c"\_bgg
 
 #]##	
s   =A;c           
          t        j                  | j                         D ci c](  \  }}|| j                  v r|j	                  dd      n||* c}}ddd      j                  d      S c c}}w )Nr   rV   FTr?   )	sort_keysallow_unicoderA   )rX   	safe_dumprZ   r\   r[   decode)r   r   r   s      r"   rQ   zDatasetMetadata.to_yaml_stringk   sp    ~~ #'**,C +.1I1I*IS#&sUZZ 
 &/	s   -A+
)N)r+   r,   r-   r\   classmethodr
   r   strrH   rO   r   rL   rD   rQ   rW   r*   r"   r;   r;   *   s     ..uT3Y/ 4E  &6d 6# #  $c $.? $ $*	 	r*   r;   c                   x    e Zd ZU dZeZee   ed<   e	de
fd       Zededd fd       Zdeddfd	Zdee   fd
Zy)MetadataConfigsz5Should be in format {config_name: {**config_params}}.
FIELD_NAMEmetadata_configc                    | j                  d      }|t        j                  d| d      }t        |t        t
        f      st        |      t        |t              rq|D ]k  }t        |t
        t        f      rIt        |t              s*t        |      dk(  r*d|v r&t        |j                  d      t
        t        f      rbt        |       y y y )N
data_filesz
                Expected data_files in YAML to be either a string or a list of strings
                or a list of dicts with two keys: 'split' and 'path', but got a  
                Examples of data_files in YAML:

                   data_files: data.csv

                   data_files: data/*.png

                   data_files:
                    - part0/*
                    - part1/*

                   data_files:
                    - split: train
                      path: train/*
                    - split: test
                      path: test/*

                   data_files:
                    - split: train
                      path:
                      - train/part1/*
                      - train/part2/*
                    - split: test
                      path: test/*
                r   splitr=   )	gettextwrapdedentr   r   rd   
ValueErrordictlen)rh   yaml_data_filesyaml_error_messageyaml_data_files_items       r"   $_raise_if_data_files_field_not_validz4MetadataConfigs._raise_if_data_files_field_not_valid|   s    )--l;&!)OO^N_ `": oc{; !344/40,; 
=(&';c4[I%&:DA 45: '+? ? *+?+C+CF+KcSW[ Y ));<<
= 1A 'r*   dataset_card_datar0   c                    |j                  | j                        r|| j                     }t        |t              st	        d| j                   d| d      |D ]&  }d|vrt	        d| d      | j                  |       (  | |D ci c]0  }|d   |j                         D ci c]  \  }}|dk7  s|| c}}2 c}}}      S  |        S c c}}w c c}}}w )Nz	Expected z to be a list, but got ''config_namezUEach config must include `config_name` field with a string name of a config, but got z. )rl   rg   r   r   ro   ru   rZ   )rE   rv   metadata_configsrh   configparamr   s          r"   from_dataset_card_dataz&MetadataConfigs.from_dataset_card_data   s     00@.5 9S^^,<<TUeTffg!hii#3 J 7$##2"327  88IJ  #3  =)V\\^+v\UE_dhu_uE5L+vv  u	 ,ws   
C&C4C9CCNc                    | r| j                         D ]  }| j                  |        | j                  |      }t        t	        i || j                                     }|j                         D ]  \  }}|j                  dd         |j                         D cg c]  \  }}d|i| c}}|| j                  <   y y c c}}w )Nry   )valuesru   r}   rp   sortedrZ   poprg   )r   rv   rh   current_metadata_configstotal_metadata_configsry   config_metadatas          r"   to_dataset_card_dataz$MetadataConfigs.to_dataset_card_data   s    #';;= K99/JK'+'B'BCT'U$%)&1U4L1UPT1U1[1[1]*^%_"0F0L0L0N 9,_##M489 5K4P4P4R20K ??2doo. 2s   C c                     d }| j                         D ]2  \  }}|dk(  s|j                  d      s||}"t        d| d| d       |S )Ndefaultz&Dataset has several default configs: 'z' and 'z'.)rZ   rl   ro   )r   default_config_namery   rh   s       r"   get_default_config_namez'MetadataConfigs.get_default_config_name   sp    ",0JJL 	(Ki'?+>+>y+I&.*5'$@AT@UU\]h\iikl 	 #"r*   )r+   r,   r-   __doc__r   rg   r   rd   __annotations__staticmethodrp   ru   rc   r   r}   r   r   r   rW   r*   r"   rf   rf   w   ss    ? 6J6-=d -= -=^  K\  (o $ 
## 
#r*   rf   zimage-classificationtranslationzimage-segmentationz	fill-maskzautomatic-speech-recognitionztoken-classificationzsentence-similarityzaudio-classificationzquestion-answeringsummarizationzzero-shot-classificationztable-to-textzfeature-extractionotherzmultiple-choiceztext-classificationztext-to-imageztext2text-generationzzero-shot-image-classificationztabular-classificationztabular-regressionzimage-to-imageztabular-to-textzunconditional-image-generationztext-retrievalztext-to-speechzobject-detectionzaudio-to-audioztext-generationconversationalztable-question-answeringzvisual-question-answeringzimage-to-textzreinforcement-learning)zvoice-activity-detectionztime-series-forecastingzdocument-question-answering__main__)ArgumentParserz5Validate the yaml metadata block of a README.md file.)usagereadme_filepath)*rm   collectionsr   pathlibr   typingr   r   r   r   r	   r
   rX   huggingface_hubr   r{   r   utils.loggingr   deprecation_utilsr   r+   logger
SafeLoaderr   rd   r9   rp   r;   rf   known_task_idsargparser   apadd_argument
parse_argsargsr   rH   dataset_metadataprintrO   rW   r*   r"   <module>r      s      > >  + + & ) 
H	T__ )C )E(3-:L4M ) <=Id I >IXa#d3S#X./ a#N&B&2& "& 	&
 #B& B& 2& B& "& R& & R& "& R& r&  2!&" R#&$ B%&& %b'&( b)&* "+&, b-&. r/&0 %b1&2 b3&4 b5&6 7&8 b9&: r;&< b=&> ?&@  A&B RC&D bE&F !#!#%K&R z'	U	VBOO%&==?D4//0O&22?C	
/ r*   