
    *#h                         d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d Z G d de      Zy)    N)ArgumentParser)Path)copyfile)List)config)DatasetBuilder)BaseDatasetsCLICommand)DownloadConfig)DownloadMode)dataset_module_factoryimport_main_class)VerificationModec                     t        | j                  | j                  | j                  | j                  | j
                  | j                  | j                  xs | j                  | j                  | j                  f	i |S N)RunBeamCommanddatasetname	cache_dirbeam_pipeline_optionsdata_dirall_configs	save_info
save_infosignore_verificationsforce_redownload)argskwargss     W/var/www/html/sandstorm/venv/lib/python3.12/site-packages/datasets/commands/run_beam.pyrun_beam_command_factoryr      si    		"")$//!!      c                   T    e Zd Zedefd       Zdedededededed	ed
edefdZd Z	y)r   parserc                    | j                  dd      }|j                  dt        d       |j                  dt        d d	       |j                  d
t        d d	       |j                  dt        dd	       |j                  dt        d d	       |j                  ddd       |j                  ddd       |j                  ddd       |j                  ddd       |j                  ddd       |j                  t               y )Nrun_beamz&Run a Beam dataset processing pipeline)helpr   zName of the dataset to download)typer%   z--namezDataset config name)r&   defaultr%   z--cache_dirz-Cache directory where the datasets are storedz--beam_pipeline_options zrBeam pipeline options, separated by commas. Example:: `--beam_pipeline_options=job_name=my-job,project=my-project`z
--data_dirz?Can be used to specify a manual directory to get the files fromz--all_configs
store_truezTest all dataset configurations)actionr%   z--save_infozSave the dataset infos filez--ignore_verificationsz0Run the test without checksums and splits checksz--force_redownloadzForce dataset redownloadz--save_infoszalias for save_info)func)
add_parseradd_argumentstrset_defaultsr   )r"   run_beam_parsers     r   register_subcommandz"RunBeamCommand.register_subcommand    s4    ++J=e+f$$YS?`$a$$XCLa$b$$@	 	% 	
 	$$% F	 	% 	
 	$$R	 	% 	
 	$$_\Pq$r$$]<Nk$l$$$\@r 	% 	
 	$$%9,Uo$p$$^LOd$e$$*B$Cr    r   r   r   r   r   r   r   r   r   c
                     || _         || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _	        y r   )
_dataset_name
_cache_dir_beam_pipeline_options	_data_dir_all_configs_save_infos_ignore_verifications_force_redownload_config_kwargs)selfr   r   r   r   r   r   r   r   r   config_kwargss              r   __init__zRunBeamCommand.__init__A   sP      
#&;#!'%%9"!1+r    c                 ~   dd l }| j                  "| j                  rt        d       t	        d       | j
                  | j                  }}t        |      }t        |j                        }g }| j                  rb|j                  j                  j                  | j                  j                  d      D cg c]  }|sd|j                           c}      }nd }| j                  rt        |j                         dkD  rp|j                   D ]`  }	|j#                   ||	j$                  | j&                  |j(                  || j*                  |j,                  j/                  d                   b nT|j#                   |d|| j&                  || j*                  |j,                  j/                  d      d	| j0                         |D ]  }
|
j3                  | j4                  st6        j8                  nt6        j:                  t=        t>        j@                  
      | jB                  rtD        jF                  ntD        jH                  d       | jJ                  s|
jK                           t        d       | jJ                  r7tL        jN                  jQ                  |jS                         t>        jT                        }tW        |      j$                  dz   }tL        jN                  jQ                  ||      }tL        jN                  jY                  |      r tL        jN                  j[                  |      }n;tL        jN                  jY                  |      r|}nt        d|        t	        d       tL        jN                  jQ                  t>        jT                        }t]        ||       t        d|        y y c c}w )Nr   z?Both parameters `name` and `all_configs` can't be used at once.   ,z--)flags	base_path)config_namer   hashbeam_optionsr   rD   )rE   r   rG   r   rD   )r   F)download_modedownload_configverification_modetry_from_hf_gcszApache beam run successful.z.pyzDataset Infos file saved at  )/apache_beamr4   r8   printexitr3   r   r   module_pathr6   optionspipeline_optionsPipelineOptionssplitstriplenBUILDER_CONFIGSappendr   r7   rF   r5   builder_kwargsgetr<   download_and_preparer;   r   REUSE_CACHE_IF_EXISTSFORCE_REDOWNLOADr
   r   DOWNLOADED_DATASETS_PATHr:   r   	NO_CHECKS
ALL_CHECKSr9   ospathjoinget_imported_module_dirDATASETDICT_INFOS_FILENAMEr   isfiledirnamer   )r=   beamrb   rE   dataset_modulebuilder_clsbuildersoptrG   builder_configbuilderdataset_infos_pathr   combined_pathdataset_diruser_dataset_infos_paths                   r   runzRunBeamCommand.runY   s   "::!d&7&7STG MM4::k/5'(B(BC)+&&<<88HH595P5P5V5VWZ5[cc_bCIIK=)c I L  L[%@%@!AA!E"-"="= 
$2$7$7!%+00%1"&//"0"?"?"C"CK"P	
 OO  +!^^!-"oo,;;??L ))	   	&G((-- +@@!22 .9X9X Y-- #3"<"<%00 % ) 	 ##%	& 	+,
 !#k.Q.Q.SU[UvUv!w:??U*DGGLLt4Mww~~d# ggood3."45G4HIJQ ')ggll;@a@a&b#')@A01H0IJK# _ ds   5N:=N:N)
__name__
__module____qualname__staticmethodr   r1   r.   boolr?   rs   rL   r    r   r   r      s    DN D D@,, , 	,
  #, , , , #, ,0LLr    r   )ra   argparser   pathlibr   shutilr   typingr   datasetsr   datasets.builderr   datasets.commandsr	   !datasets.download.download_configr
   "datasets.download.download_managerr   datasets.loadr   r   datasets.utils.info_utilsr   r   r   rL   r    r   <module>r      s?    	 #     + 4 < ; C 6FL+ FLr    