
    *#h                     ~    d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 dZ
dZg dZg dZd	efd
Z G d de      Zy)    N)ArgumentParser	Namespace)BaseDatasetsCLICommand)
get_loggerz><<<<<<< This should probably be modified because it mentions: z=======
>>>>>>>
)TextEncoderConfigByteTextEncoderSubwordTextEncoderencoder_configmaybe_build_from_corpus
manual_dir))z
tfds\.coredatasets)ztf\.io\.gfile\.GFileopen)ztf\.([\w\d]+)zdatasets.Value('\1'))ztfds\.features\.Text\(\)zdatasets.Value('string'))ztfds\.features\.Text\(zdatasets.Value('string'),)z+features\s*=\s*tfds.features.FeaturesDict\(zfeatures=datasets.Features()ztfds\.features\.FeaturesDict\(zdict()zThe TensorFlow Datasets AuthorszDThe TensorFlow Datasets Authors and the HuggingFace Datasets Authors)ztfds\.z	datasets.)zdl_manager\.manual_dirzself.config.data_dir)zself\.builder_configzself.configargsc                 B    t        | j                  | j                        S )zz
    Factory function used to convert a model TF 1.0 checkpoint in a PyTorch checkpoint.

    Returns: ConvertCommand
    )ConvertCommand	tfds_pathdatasets_directory)r   s    V/var/www/html/sandstorm/venv/lib/python3.12/site-packages/datasets/commands/convert.pyconvert_command_factoryr   *   s     $..$*A*ABB    c                   8    e Zd Zedefd       ZdedefdZd Zy)r   parserc                     | j                  dd      }|j                  dt        dd       |j                  dt        dd	       |j                  t        
       y)z
        Register this command to argparse so it's available for the datasets-cli

        Args:
            parser: Root parser to register command-specific arguments
        convertzHConvert a TensorFlow Datasets dataset to a HuggingFace Datasets dataset.)helpz--tfds_pathTzQPath to a TensorFlow Datasets folder to convert or a single tfds file to convert.)typerequiredr   z--datasets_directoryz(Path to the HuggingFace Datasets folder.)funcN)
add_parseradd_argumentstrset_defaultsr   )r   train_parsers     r   register_subcommandz"ConvertCommand.register_subcommand4   ss     (([ ) 
 	!!d	 	" 	
 	!!"tBl 	" 	
 	!!'>!?r   r   r   c                 @    t        d      | _        || _        || _        y )Nzdatasets-cli/converting)r   _logger
_tfds_path_datasets_directory)selfr   r   r   s       r   __init__zConvertCommand.__init__K   s    !";<##5 r   c                    t         j                  j                  | j                        r*t         j                  j	                  | j                        }n^t         j                  j                  | j                        r*t         j                  j                  | j                        }nt        d      t         j                  j	                  | j                        }| j                  j                  d| d|        g }g }i }t         j                  j                  | j                        rt        j                  |      }n*t         j                  j                  | j                        g}|D ]  }| j                  j                  d|        t         j                  j                  ||      }t         j                  j                  ||      }	t         j                  j                  |      rd|v sd|v sd|vr| j                  j                  d       t        |d	
      5 }
|
j                         }d d d        g }d}d}g }D ]  }|dv rdv rdv rdv rdndv rd#dv rdndv rj!                  dd      nt#        fdt$        D              rid}t'        t)        fdt$                    }|j+                  t,        t/        |      z   dz          |j+                         |j+                  t0               t2        D ]  \  }}t5        j6                  ||       dv r`t5        j8                  d      }|j;                  d |j=                  d      j?                  d      D               d |j=                  d      z   d!v sd"v sdv rt        d#jA                                d$v sd%v rd}|j+                          |sd&|v r|j!                  dd      }t         j                  j                  ||      }t         j                  j                  ||      }	t        jB                  |d'       | j                  j                  d(|        |jE                  |D ci c]  }|| c}       n|j+                  |	       |r|j+                  |	       t        |	d)d	
      5 }
|
jG                  |       d d d        | j                  j                  d*|	         |D ]n  }	 t         j                  j                  |      }||j!                  dd         }| j                  j                  d+| d|        tI        jJ                  ||       p |r'|D ]!  }| j                  jQ                  d.| d/       # y y # 1 sw Y   ]xY wc c}w # 1 sw Y   xY w# tL        $ r" | j                  jO                  d,| d-       Y w xY w)0NzA--tfds_path is neither a directory nor a file. Please check path.zConverting datasets from z to zLooking at file r*   _testz.pyzSkipping filezutf-8)encodingFz!import tensorflow.compat.v2 as tfz
@tfds.corezbuilder=selfz-import tensorflow_datasets.public_api as tfdszimport datasets
zimport tensorflow zfrom absl import loggingzfrom datasets import logging
	getLoggerr   c              3   &   K   | ]  }|v  
 y wN ).0
expressionout_lines     r   	<genexpr>z%ConvertCommand.run.<locals>.<genexpr>   s     OJx/Os   Tc                     | v S r1   r2   )er5   s    r   <lambda>z$ConvertCommand.run.<locals>.<lambda>   s    a8m r   
tensorflow_datasetsz/from\stensorflow_datasets.*import\s([^\.\r\n]+)c              3   <   K   | ]  }|j                           y wr1   )strip)r3   imps     r   r6   z%ConvertCommand.run.<locals>.<genexpr>   s     'Y		'Ys      ,zfrom . import ztf.ztfds.zError converting GeneratorBasedBuilderBeamBasedBuilderwmt)exist_okzAdding directory wzConverted in zMoving z#Cannot find destination folder for z. Please copy manually.z!You need to manually update file z4 to remove configurations using 'TextEncoderConfig'.))ospathisdirr'   abspathisfiledirname
ValueErrorr(   r&   infolistdirbasenamejoinr   	readlinesreplaceanyTO_HIGHLIGHTlistfilterappendHIGHLIGHT_MESSAGE_PREr!   HIGHLIGHT_MESSAGE_POST
TO_CONVERTresubmatchextendgroupsplitr=   makedirsupdate
writelinesshutilcopyKeyErrorerrorwarning)r)   abs_tfds_pathabs_datasets_pathutils_fileswith_manual_updateimports_to_builder_map
file_namesf_name
input_fileoutput_fileflines	out_lines
is_builderneeds_manual_updatetfds_importsline	to_removepatternreplacementr]   dir_name
output_dirr>   
utils_filedest_folder	file_pathr5   s                              @r   runzConvertCommand.runQ   sH   77==)GGOODOO<MWW^^DOO,GGOODOO<M`aaGGOOD,D,DE5m_DIZH[\]!#77==)M2J''**4??;<J  N	=FLL 09:mV<J'',,'8&AK77>>*-v1ETZIZ^ckq^q!!/2j73 &q& IJ"'L ++ 7(B!X-#x/DP2H(H4!H/8;?H H,'//\JHO,OO*.' $V,C\%R SI$$%:S^%Kd%RS$$X.$$%;<0: J,#%66';#IJ )H4HH%WYabE '''Yu{{1~?S?STW?X'YY/%++a.@H H$8(;?TX`?`$'89I8J%KLL*h6:LPX:X!%J  *W++Z Uf_!>>%4WW\\*;XF
 ggll:v>J6!!$5j\"BC&--,.W3sJ.WX "";/""))+6k39 (QY'(LLk];<]N	=` & 	nJn))*54V^^E25NO!!GK=ZL"IJJ4	n / 	$$7	{Bvw a& &v /X( (  n""%HTk#lmns1   $V4
V
8V"9A+V.V	"V+	.(WWN)	__name__
__module____qualname__staticmethodr   r$   r!   r*   r   r2   r   r   r   r   3   s7    @N @ @,6# 63 6rr   r   )rF   r[   rd   argparser   r   datasets.commandsr   datasets.utils.loggingr   rX   rY   rT   rZ   r   r   r2   r   r   <module>r      sS    	 	  . 4 - ]  
"C) CP+ Pr   