
    *#h                         d dl Z d dlmZ d dlmZmZ d dlZd dlm	Z
 d dlZd dlmZ ej                  j                  j!                  e      Ze G d dej&                               Z G d dej*                        Zy)	    N)	dataclass)ListOptional)
table_castc                   b    e Zd ZU dZdZeed<   dZee	e
      ed<   dZeej                     ed<   y)ParquetConfigzBuilderConfig for Parquet.i'  
batch_sizeNcolumnsfeatures)__name__
__module____qualname____doc__r	   int__annotations__r
   r   r   strr   datasetsFeatures     f/var/www/html/sandstorm/venv/lib/python3.12/site-packages/datasets/packaged_modules/parquet/parquet.pyr   r      s7    $J#'GXd3i ',0Hhx(()0r   r   c                   Z    e Zd ZeZd Zd Zdej                  dej                  fdZ	d Z
y)Parquetc                 V    t        j                  | j                  j                        S )N)r   )r   DatasetInfoconfigr   )selfs    r   _infozParquet._info   s    ##T[[-A-ABBr   c                    | j                   j                  s"t        d| j                   j                         |j                  | j                   j                        }t	        |t
        t        t        f      re|}t	        |t
              r|g}|D cg c]  }|j                  |       }}t        j                  t        j                  j                  d|i      gS g }|j                         D ]X  \  }}t	        |t
              r|g}|D cg c]  }|j                  |       }}| j                  j                  t         j"                  j%                  |      D ]  }t'        |d      5 }t        j(                  j+                  t-        j.                  |            }| j                   j0                  Pt        j(                  |j                         D 	
ci c]!  \  }	}
|	| j                   j0                  v s|	|
# c}
}	      }|| j                  _        ddd        n |j3                  t        j                  |d|i             [ |S c c}w c c}w c c}
}	w # 1 sw Y   IxY w)z-We handle string, list and dicts in datafilesz=At least one data file must be specified, but got data_files=files)name
gen_kwargsNrb)r   
data_files
ValueErrordownload_and_extract
isinstancer   listtuple
iter_filesr   SplitGeneratorSplitTRAINitemsinfor   	itertoolschainfrom_iterableopenr   from_arrow_schemapqread_schemar
   append)r   
dl_managerr$   r    filesplits
split_namefr   colfeats              r   _split_generatorszParquet._split_generators   s   {{%%\]a]h]h]s]s\tuvv44T[[5K5KL
j3e"45E%%=BCTZ**40CEC++1E1ESZ\aRbcdd!+!1!1!3 	aJ%%=BCTZ**40CECyy!!)%OO99%@ DdD) 6Q#+#4#4#F#Fr~~VWGX#Y;;..:'/'8'8<DNN<L kysDPSW[WbWbWjWjPjd k(H .6		*6  MM(11zwX]N^_`!	a" ) D D !l	6 6s1   II,A+I I8I=III'pa_tablereturnc                     | j                   j                  *t        || j                   j                  j                        }|S N)r/   r   r   arrow_schema)r   r@   s     r   _cast_tablezParquet._cast_table>   s5    99) "(DII,>,>,K,KLHr   c              #     K   | j                   j                  | j                   j                  t        d | j                  j                  j
                  D              t        | j                   j                        k7  r:t        d| j                   j                   d| j                  j                   d      t        t        j                  j                  |            D ]  \  }}t        |d      5 }t        j                  |      }	 t        |j                  | j                   j                  | j                   j                              D ]?  \  }}t         j"                  j%                  |g      }| d| | j'                  |      f A 	 d d d         y # t        $ r-}	t(        j+                  d| d	t-        |	       d
|	         d }	~	ww xY w# 1 sw Y   xY ww)Nc              3   4   K   | ]  }|j                     y wrC   )r!   ).0fields     r   	<genexpr>z+Parquet._generate_tables.<locals>.<genexpr>G   s     NUejjNs   z)Tried to load parquet data with columns 'z' with mismatching features ''r#   )r	   r
   _zFailed to read file 'z' with error z: )r   r   r
   sortedr/   rD   r%   	enumerater0   r1   r2   r3   r5   ParquetFileiter_batchesr	   paTablefrom_batchesrE   loggererrortype)
r   r    file_idxr9   r<   parquet_file	batch_idxrecord_batchr@   es
             r   _generate_tableszParquet._generate_tablesE   s    ;;+0C0C0ONdii.@.@.M.MNNRXY]YdYdYlYlRmm ?@S@S?TTqrvr{r{  sE  sE  rF  FG  H  (	(E(Ee(LM 	NHddD! Q!~~a03<$11T[[=S=S]a]h]h]p]p1q4 T/	< $&88#8#8,#H "*
!I;79I9I(9SSST 	 " LL#8mDQRG9TVWXVY!Z[ s=   C1G!3G
BFG!	G%(GGGG	G!N)r   r   r   r   BUILDER_CONFIG_CLASSr   r?   rQ   rR   rE   r\   r   r   r   r   r      s4    (C@BHH  r   r   )r0   dataclassesr   typingr   r   pyarrowrQ   pyarrow.parquetparquetr5   r   datasets.tabler   utilslogging
get_loggerr   rT   BuilderConfigr   ArrowBasedBuilderr   r   r   r   <module>ri      sk     ! !    % 
			*	*8	4 1H** 1 1Ah(( Ar   