
    *#hd              
          d dl mZmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZmZ d dlZd dlZd dlZd dlmZ ddlmZ ddlmZ dd	lmZmZmZmZ dd
l m!Z! ddl"m#Z#  ed      Z$ ed      Z% ed      Z& ed      Z'de(de)fdZ*defdZ+de!dee,e-e(e.e
f   de!dejB                  fdZ/de!dee,e-e(e.e
f   dejB                  fdZ0dejb                  de)fdZ2 G d de	e%e&e'f         Z3dee.ee$   f   dee.e$f   fdZ4 G d de3ejB                  ejb                  ejB                  f         Z5 G d  d!e3e6e7e6f         Z8 G d" d#e3e6ejr                  e6f         Z: G d$ d%e3ejv                  ejx                  ejv                  f         Z= G d& d'      Z> G d( d)      Z? G d* d+e      Z@ G d, d-e@      ZA G d. d/e@      ZB G d0 d1e	e%e&e'f         ZC G d2 d3eCe%e&e'f         ZD G d4 d5eCejB                  ejb                  ejB                  f         ZE G d6 d7eCee7ef         ZF G d8 d9eCejv                  ejx                  ejv                  f         ZG G d: d;eCe6e&e6f         ZHde.d<ee.   ddfd=ZIdee,e-e(e
f   d>e,ddfd?ZJdee,e-e(e.e
f   de.fd@ZK	 dEde!dee,e-e(e.e
f   dee!   dejB                  fdAZL	 	 dFde!dee,e-e(e.e
f   dBeCdCee7   fdDZMy)G    )MappingMutableMapping)partial)	AnyCallableDictGenericIterableListOptionalTypeVarUnionN)version   )config)Features)_ArrayXDExtensionType_is_zero_copy_onlydecode_nested_examplepandas_types_mapper)Table)no_op_if_value_is_nullT	RowFormatColumnFormatBatchFormatkeyreturnc                 V    | j                   dk(  xr | j                  | j                  k\  S )N   )stepstopstartr   s    [/var/www/html/sandstorm/venv/lib/python3.12/site-packages/datasets/formatting/formatting.py_is_range_contiguousr&   (   s"    88q=2SXX22    c                 8    t        d|  dt        |        d      )NzWrong key type: 'z' of type 'z6'. Expected one of int, slice, range, str or Iterable.)	TypeErrortyper$   s    r%   _raise_bad_key_typer+   ,   s&    

C5DI;6lm r'   tableindicesc           
         t        |t              rK|j                  ||j                  z  d      j	                  d      d   j                         }t        | |      S t        |t              r!t        |j                  |j                         }t        |t              rt        |      rw|j                  dk\  rht        | |j                  |j                  |j                  |j                  z
        j	                  d      D cg c]  }|j                          c}      S 	 t        |t              r;| j                  |g      } t        | |j	                  d      j                               S t        |t               rJt        | |D cg c]4  }|j                  |d      j	                  d      d   j                         6 c}      S t#        |       yc c}w c c}w )aE  
    Query a pyarrow Table to extract the subtable that correspond to the given key.
    The :obj:`indices` parameter corresponds to the indices mapping in case we cant to take into
    account a shuffling or an indices selection for example.
    The indices table must contain one column named "indices" of type uint64.
    r    r   N)
isinstanceint
fast_slicenum_rowscolumnas_py_query_tablesliceranger-   r&   r#   r"   strselect	to_pylistr
   r+   )r,   r   r-   is       r%   !_query_table_with_indices_mappingr<   2   s|    #s  w'7'7!7;BB1EaHNNPE3''#uS[[!1!123#u$a7+=+=ciiTWT]T]I]+^+e+efg+hia	i  #scU#E7>>!#4#>#>#@AA#x E[^#_VWG$6$6q!$<$C$CA$Fq$I$O$O$Q#_`` j $`s   G
9G
c                 *   t        |t              r| j                  || j                  z  d      S t        |t              r!t        |j                  | j                         }t        |t
              rNt        |      rB|j                  dk\  r3| j                  |j                  |j                  |j                  z
        S 	 t        |t              r9| j                  j                  | j                  D cg c]
  }||k7  s	| c}      S t        |t              rlt        j                   |t        j"                        }t%        |      dk(  r| j                  j	                  dd      S | j'                  || j                  z        S t)        |       yc c}w )zY
    Query a pyarrow Table to extract the subtable that correspond to the given key.
    r    r   N)r/   r0   r1   r2   r6   r7   r-   r&   r#   r"   r8   r,   dropcolumn_namesr
   npfromiterint64lenfast_gatherr+   )r,   r   r3   s      r%   r5   r5   P   s-    #senn 4a88#uS[[01#u$a##CIIsxx#))/CDD#s{{e6H6H ZFFVYM Z[[#x kk#rxx(s8q=;;$$Q**  u~~!566 ![s   2
F=Fpa_arrayc                      | j                   dkD  S Nr   )
null_count)rE   s    r%   _is_array_with_nullsrI   i   s    ""r'   c                   |    e Zd ZdZdej
                  defdZdej
                  defdZ	dej
                  de
fdZy)BaseArrowExtractorz
    Arrow extractor are used to extract data from pyarrow tables.
    It makes it possible to extract rows, columns and batches.
    These three extractions types have to be implemented.
    pa_tabler   c                     t         NNotImplementedErrorselfrL   s     r%   extract_rowzBaseArrowExtractor.extract_rowt       !!r'   c                     t         rN   rO   rQ   s     r%   extract_columnz!BaseArrowExtractor.extract_columnw   rT   r'   c                     t         rN   rO   rQ   s     r%   extract_batchz BaseArrowExtractor.extract_batchz   rT   r'   N)__name__
__module____qualname____doc__par   r   rS   r   rV   r   rX    r'   r%   rK   rK   m   sL    "BHH " ""rxx "L ""bhh "; "r'   rK   py_dictc                 \    | j                         D ci c]  \  }}||d    c}}S c c}}w )z:Return the first element of a batch (dict) as a row (dict)r   )items)r_   r   arrays      r%   _unnestrc   ~   s(    ,3MMO<jc5CqM<<<s   (c                       e Zd Zdej                  dej                  fdZdej                  dej                  fdZdej                  dej                  fdZy)SimpleArrowExtractorrL   r   c                     |S rN   r^   rQ   s     r%   rS   z SimpleArrowExtractor.extract_row       r'   c                 $    |j                  d      S rG   )r3   rQ   s     r%   rV   z#SimpleArrowExtractor.extract_column   s    q!!r'   c                     |S rN   r^   rQ   s     r%   rX   z"SimpleArrowExtractor.extract_batch   rg   r'   N)	rY   rZ   r[   r]   r   rS   ArrayrV   rX   r^   r'   r%   re   re      sS    BHH  "rxx "BHH "bhh 288 r'   re   c                   x    e Zd Zdej                  defdZdej                  defdZdej                  defdZ	y)PythonArrowExtractorrL   r   c                 4    t        |j                               S rN   )rc   	to_pydictrQ   s     r%   rS   z PythonArrowExtractor.extract_row   s    x))+,,r'   c                 @    |j                  d      j                         S rG   )r3   r:   rQ   s     r%   rV   z#PythonArrowExtractor.extract_column   s    q!++--r'   c                 "    |j                         S rN   )rn   rQ   s     r%   rX   z"PythonArrowExtractor.extract_batch   s    !!##r'   N)
rY   rZ   r[   r]   r   dictrS   listrV   rX   r^   r'   r%   rl   rl      sG    -BHH - -.rxx .D .$bhh $4 $r'   rl   c                       e Zd Zd Zdej
                  defdZdej
                  dej                  fdZ
dej
                  defdZdej                  dej                  fdZy	)
NumpyArrowExtractorc                     || _         y rN   )np_array_kwargs)rR   rv   s     r%   __init__zNumpyArrowExtractor.__init__   s
    .r'   rL   r   c                 6    t        | j                  |            S rN   )rc   rX   rQ   s     r%   rS   zNumpyArrowExtractor.extract_row   s    t))(344r'   c                 D    | j                  ||j                  d            S rG   )_arrow_array_to_numpyr?   rQ   s     r%   rV   z"NumpyArrowExtractor.extract_column   s#    ))(83H3H3K*LMMr'   c                 f    |j                   D ci c]  }|| j                  ||          c}S c c}w rN   )r?   rz   )rR   rL   cols      r%   rX   z!NumpyArrowExtractor.extract_batch   s0    JRJ_J_`3T//>>```s   .rE   c                    t        |t        j                        rt        |j                  t              rUt        |j                  j                  d      }|j                  D cg c]  }|j                  |      D ]  }|  c}}nt        |j                        xr t        d |j                  D              }|j                  D cg c]  }|j                  |      D ]  }|  c}}nt        |j                  t              r4t        |j                  j                  d      }|j                  |      nCt        |j                        xr t        |       }|j                  |      j                         t              dkD  r0t        fdD              rt        j                  dt               S t        j                  d	      S c c}}w c c}}w )
NT)unnest)zero_copy_onlyc              3   4   K   | ]  }t        |         y wrN   )rI   ).0chunks     r%   	<genexpr>z<NumpyArrowExtractor._arrow_array_to_numpy.<locals>.<genexpr>   s       K8=,U33Ks   r   c              3   
  K   | ]z  }t        |t        j                        xr1 |j                  t        k(  xs |j
                  d    j
                  k7  xs' t        |t              xr t        j                  |       | yw)r   N)r/   r@   ndarraydtypeobjectshapefloatisnan)r   xrb   s     r%   r   z<NumpyArrowExtractor._arrow_array_to_numpy.<locals>.<genexpr>   sp        Arzz*_60A0^QWWPUVWPXP^P^E^ :q%(8RXXa[:s   B BF)copyr   r   )r/   r]   ChunkedArrayr*   r   r   storage_dtypechunksto_numpyallrI   tolistrC   anyr@   rb   r   )rR   rE   r   r   rowrb   s        @r%   rz   z)NumpyArrowExtractor._arrow_array_to_numpy   s   h0(--)>?!3HMM4O4OX\!]%-__!^l@m9<C "4HMM!B "s KAIK H &.__!^l@m9<C (--)>?!3HMM4O4OX\!]&//~/N!3HMM!B!iK_`hKiGi&//~/NUUWu:>   
 xxE@@xxE**3s   & G GN)rY   rZ   r[   rw   r]   r   rq   rS   r@   r   rV   rX   rj   rz   r^   r'   r%   rt   rt      sq    /5BHH 5 5Nrxx NBJJ Nabhh a4 a+bhh +2:: +r'   rt   c                       e Zd Zdej                  dej                  fdZdej                  dej                  fdZ	dej                  dej                  fdZ
y)PandasArrowExtractorrL   r   c                 N    |j                  d      j                  t              S )Nr    )lengthtypes_mapper)r6   	to_pandasr   rQ   s     r%   rS   z PandasArrowExtractor.extract_row   s"    ~~Q~'11?R1SSr'   c                 n    |j                  dg      j                  t              |j                  d      S )Nr   r   )r9   r   r   r?   rQ   s     r%   rV   z#PandasArrowExtractor.extract_column   s4    s#--;N-OPXPePefgPhiir'   c                 .    |j                  t              S )Nr   )r   r   rQ   s     r%   rX   z"PandasArrowExtractor.extract_batch   s    !!/B!CCr'   N)rY   rZ   r[   r]   r   pd	DataFramerS   SeriesrV   rX   r^   r'   r%   r   r      s\    TBHH T Tjrxx jBII jDbhh D2<< Dr'   r   c                   R    e Zd Zdee   fdZdedefdZdede	defdZ
d	edefd
Zy)PythonFeaturesDecoderfeaturesc                     || _         y rN   r   rR   r   s     r%   rw   zPythonFeaturesDecoder.__init__   	     r'   r   r   c                 T    | j                   r| j                   j                  |      S |S rN   )r   decode_example)rR   r   s     r%   
decode_rowz PythonFeaturesDecoder.decode_row   s"    48MMt}}++C0JsJr'   r3   column_namec                 V    | j                   r| j                   j                  ||      S |S rN   )r   decode_column)rR   r3   r   s      r%   r   z#PythonFeaturesDecoder.decode_column   s%    CG==t}}**6;?\V\\r'   batchc                 T    | j                   r| j                   j                  |      S |S rN   )r   decode_batchrR   r   s     r%   r   z"PythonFeaturesDecoder.decode_batch   s"    48MMt}}))%0LuLr'   N)rY   rZ   r[   r   r   rw   rq   r   rr   r8   r   r   r^   r'   r%   r   r      s]    !(!3 !Kd Kt K]D ]s ]t ]M$ M4 Mr'   r   c                       e Zd Zdee   fdZdej                  dej                  fdZdej                  de
dej                  fdZd	ej                  dej                  fd
Zy)PandasFeaturesDecoderr   c                     || _         y rN   r   r   s     r%   rw   zPandasFeaturesDecoder.__init__   r   r'   r   r   c                 D   | j                   r^| j                   j                         D ci c]8  \  }}| j                   j                  |   r|t        t	        t
        |            : c}}ni }|r+|j                  |      |t        |j                               <   |S c c}}w rN   )	r   ra   _column_requires_decodingr   r   r   	transformrr   keys)rR   r   r   featuredecodes        r%   r   z PandasFeaturesDecoder.decode_row   s     }} -1MM,?,?,A(K==::;G 3G<QSZ4[\\  	 '*}}V'<CV[[]#$
s   =Br3   r   c                     | j                   rM|| j                   v r?| j                   j                  |   r&t        t        t        | j                   |               nd }|r|j                  |      }|S rN   )r   r   r   r   r   r   )rR   r3   r   r   s       r%   r   z#PandasFeaturesDecoder.decode_column   sf     }}!=$--BiBijuBv #7+@$--P[B\#]^ 	
 %%f-Fr'   r   c                 $    | j                  |      S rN   )r   r   s     r%   r   z"PandasFeaturesDecoder.decode_batch   s    u%%r'   N)rY   rZ   r[   r   r   rw   r   r   r   r   r8   r   r   r^   r'   r%   r   r      sk    !(!3 !bll r|| BII C BII &",, &2<< &r'   r   c                       e Zd ZdZdej
                  ddfdZd Zd Zd Z	dd
Z
d Zd Zd Zej                   ej"                  d      k\  r	d Zd Zd Zd Zd Zedd       Zd Zd Zy	)LazyDictzeA dictionary backed by Arrow data. The values are formatted on-the-fly when accessing the dictionary.rL   	formatter	Formatterc                     || _         || _        |j                  D ci c]  }|d  c}| _        t	        | j                  j                               | _        y c c}w rN   )rL   r   r?   datasetr   keys_to_format)rR   rL   r   r   s       r%   rw   zLazyDict.__init__  sJ     "*2*?*?@3S$Y@	!$)).."23 As   
Ac                 ,    t        | j                        S rN   )rC   r   rR   s    r%   __len__zLazyDict.__len__
  s    499~r'   c                     | j                   |   }|| j                  v r;| j                  |      }|| j                   |<   | j                  j                  |       |S rN   )r   r   formatremoverR   r   values      r%   __getitem__zLazyDict.__getitem__  sQ    		#$%%%KK$E"DIIcN&&s+r'   c                 t    || j                   v r| j                   j                  |       || j                  |<   y rN   r   r   r   r   s      r%   __setitem__zLazyDict.__setitem__  s1    $%%%&&s+		#r'   Nc                 p    || j                   v r| j                   j                  |       | j                  |= y rN   r   rR   r   s     r%   __delitem__zLazyDict.__delitem__  s/    $%%%&&s+IIcNr'   c                 ,    t        | j                        S rN   )iterr   r   s    r%   __iter__zLazyDict.__iter__  s    DIIr'   c                     || j                   v S rN   )r   r   s     r%   __contains__zLazyDict.__contains__"  s    diir'   c                 L    | j                          t        | j                        S rN   )_format_allreprr   r   s    r%   __repr__zLazyDict.__repr__%  s    DIIr'   z3.9c                    t        |t              r}| j                         }|j                         }|j                          |xj                  |j
                  j                         z  c_        |j
                  |j
                  z  |_        |S t        |t              rI| j                         }|xj                  |j                         z  c_        |j
                  |z  |_        |S t        S rN   	r/   r   r   r   r   r   r   rq   NotImplementedrR   otherinsts      r%   __or__zLazyDict.__or__,  s    %*yy{

!!###uzz'88# II

2	%&yy{##uzz|3# II-	!!r'   c                    t        |t              r}| j                         }|j                         }|j                          |xj                  |j
                  j                         z  c_        |j
                  |j
                  z  |_        |S t        |t              rI| j                         }|xj                  |j                         z  c_        ||j
                  z  |_        |S t        S rN   r   r   s      r%   __ror__zLazyDict.__ror__;  s    %*yy{

!!###uzz'88#!JJ2	%&yy{##uzz|3#!DII-	!!r'   c                 r   t        |t              rn|j                         }|j                          | xj                  |j
                  j                         z  c_        | xj
                  |j
                  z  c_        | S | xj                  |j                         z  c_        | xj
                  |z  c_        | S rN   )r/   r   r   r   r   r   r   )rR   r   s     r%   __ior__zLazyDict.__ior__J  s    %*

!!###uzz'88#		UZZ'	 K ##uzz|3#		U"	Kr'   c                 B   | j                   j                  | j                         }|j                  j                  | j                         | j                  d   j	                         |j                  d<   | j                  d   j	                         |j                  d<   |S )Nr   r   )	__class____new____dict__updater   )rR   r   s     r%   __copy__zLazyDict.__copy__U  sw    ~~%%dnn5T]]+ $f 5 : : <f*.--8H*I*N*N*P&'r'   c                 ,    dd l }|j                  |       S rG   r   )rR   r   s     r%   r   zLazyDict.copy^  s    yyr'   c                     t         rN   rO   )clsiterabler   s      r%   fromkeyszLazyDict.fromkeysc  s    !!r'   c                     t         rN   rO   r   s     r%   r   zLazyDict.formatg  rT   r'   c                     | j                   D ]   }| j                  |      | j                  |<   " | j                   j                          y rN   )r   r   r   clearr   s     r%   r   zLazyDict._format_allj  s?    && 	.C![[-DIIcN	.!!#r'   )r   NrN   )rY   rZ   r[   r\   r]   r   rw   r   r   r   r   r   r   r   r   
PY_VERSIONr   parser   r   r   r   r   classmethodr   r   r   r^   r'   r%   r   r      s    o4 4k 4

  MGMM%00	"	"		
 " ""$r'   r   c                       e Zd Zd Zy)LazyRowc                 r    | j                   j                  | j                  j                  |g            d   S rG   r   format_columnrL   r9   r   s     r%   r   zLazyRow.formatq  s-    ~~++DMM,@,@#,GHKKr'   NrY   rZ   r[   r   r^   r'   r%   r   r   p  s    Lr'   r   c                       e Zd Zd Zy)	LazyBatchc                 l    | j                   j                  | j                  j                  |g            S rN   r   r   s     r%   r   zLazyBatch.formatv  s(    ~~++DMM,@,@#,GHHr'   Nr   r^   r'   r%   r  r  u  s    Ir'   r  c            	           e Zd ZdZeZeZeZ	e
Zddee   fdZdej                   dedeeeef   fdZdej                   defd	Zdej                   defd
Zdej                   defdZy)r   z
    A formatter is an object that extracts and formats data from pyarrow tables.
    It defines the formatting for rows, columns and batches.
    Nr   c                 z    || _         t        | j                         | _        t        | j                         | _        y rN   )r   r   python_features_decoderr   pandas_features_decoderr   s     r%   rw   zFormatter.__init__  s+     '<T]]'K$'<T]]'K$r'   rL   
query_typer   c                     |dk(  r| j                  |      S |dk(  r| j                  |      S |dk(  r| j                  |      S y Nr   r3   r   )
format_rowr   format_batch)rR   rL   r  s      r%   __call__zFormatter.__call__  sP    ??8,,8#%%h//7"$$X.. #r'   c                     t         rN   rO   rQ   s     r%   r
  zFormatter.format_row  rT   r'   c                     t         rN   rO   rQ   s     r%   r   zFormatter.format_column  rT   r'   c                     t         rN   rO   rQ   s     r%   r  zFormatter.format_batch  rT   r'   rN   )rY   rZ   r[   r\   re   simple_arrow_extractorrl   python_arrow_extractorrt   numpy_arrow_extractorr   pandas_arrow_extractorr   r   rw   r]   r   r8   r   r   r   r   r  r
  r   r  r^   r'   r%   r   r   z  s    
 21/1L(!3 L
/ /s /uYP\^iEi?j /"288 "	 ""bhh "< ""RXX "+ "r'   r   c                       e Zd ZdefdZy)TensorFormatterdata_structc                     t         rN   rO   )rR   r  s     r%   recursive_tensorizez#TensorFormatter.recursive_tensorize  rT   r'   N)rY   rZ   r[   rq   r  r^   r'   r%   r  r    s    "t "r'   r  c                       e Zd Zdej                  dej                  fdZdej                  dej                  fdZdej                  dej                  fdZy)ArrowFormatterrL   r   c                 @    | j                         j                  |      S rN   )r  rS   rQ   s     r%   r
  zArrowFormatter.format_row  s    **,88BBr'   c                 @    | j                         j                  |      S rN   )r  rV   rQ   s     r%   r   zArrowFormatter.format_column  s    **,;;HEEr'   c                 @    | j                         j                  |      S rN   )r  rX   rQ   s     r%   r  zArrowFormatter.format_batch  s    **,::8DDr'   N)	rY   rZ   r[   r]   r   r
  rj   r   r  r^   r'   r%   r  r    s\    C288 C CFbhh F288 FERXX E"(( Er'   r  c                        e Zd Zd fd	Zdej
                  defdZdej
                  defdZ	dej
                  defdZ
 xZS )PythonFormatterc                 2    t         |   |       || _        y rN   )superrw   lazy)rR   r   r"  r   s      r%   rw   zPythonFormatter.__init__  s    "	r'   rL   r   c                     | j                   rt        ||       S | j                         j                  |      }| j                  j                  |      }|S rN   )r"  r   r  rS   r  r   rR   rL   r   s      r%   r
  zPythonFormatter.format_row  sJ    998T**))+77A**55c:
r'   c                     | j                         j                  |      }| j                  j                  ||j                  d         }|S rG   )r  rV   r  r   r?   rR   rL   r3   s      r%   r   zPythonFormatter.format_column  D    ,,.==hG--;;FHDYDYZ[D\]r'   c                     | j                   rt        ||       S | j                         j                  |      }| j                  j                  |      }|S rN   )r"  r  r  rX   r  r   rR   rL   r   s      r%   r  zPythonFormatter.format_batch  sJ    99Xt,,++-;;HE,,99%@r'   NF)rY   rZ   r[   rw   r]   r   r   r
  rr   r   r  __classcell__r   s   @r%   r  r    sM    288  bhh 4 
RXX ' r'   r  c                       e Zd Zdej                  dej                  fdZdej                  dej                  fdZ	dej                  dej                  fdZ
y)PandasFormatterrL   r   c                 z    | j                         j                  |      }| j                  j                  |      }|S rN   )r  rS   r  r   r$  s      r%   r
  zPandasFormatter.format_row  s6    ))+77A**55c:
r'   c                     | j                         j                  |      }| j                  j                  ||j                  d         }|S rG   )r  rV   r  r   r?   r&  s      r%   r   zPandasFormatter.format_column  r'  r'   c                 z    | j                         j                  |      }| j                  j                  |      }|S rN   )r  rX   r  r   r$  s      r%   r  zPandasFormatter.format_batch  s6    ))+99(C**77<
r'   N)rY   rZ   r[   r]   r   r   r   r
  r   r   r  r^   r'   r%   r.  r.    sS    288  
bhh 299 
RXX ",, r'   r.  c                        e Zd ZdZd	deegef   f fdZdej                  defdZ	dej                  de
fdZdej                  defdZ xZS )
CustomFormattera  
    A user-defined custom formatter function defined by a ``transform``.
    The transform must take as input a batch of data extracted for an arrow table using the python extractor,
    and return a batch.
    If the output batch is not a dict, then output_all_columns won't work.
    If the ouput batch has several fields, then querying a single column won't work since we don't know which field
    to return.
    r   c                 4    t         |   |       || _        y )Nr   )r!  rw   r   )rR   r   r   kwargsr   s       r%   rw   zCustomFormatter.__init__  s    (+"r'   rL   r   c                 |    | j                  |      }	 t        |      S # t        $ r}t        d|       |d }~ww xY w)Nz]Custom formatting function must return a dict of sequences to be able to pick a row, but got )r  rc   	Exceptionr)   rR   rL   formatted_batchexcs       r%   r
  zCustomFormatter.format_row  sU    ++H5	?++ 	op  pA  B	s   
 	;6;c                 B   | j                  |      }t        |d      rBt        |j                               dkD  r4t	        dt        |j                                d      t	        d|       	 ||j                  d      S # t        $ r}t	        d|       |d }~ww xY w)Nr   r    zTried to query a column but the custom formatting function returns too many columns. Only one column was expected but got columns .zPCustom formatting function must return a dict to be able to pick a row, but got r   )r  hasattrrC   r   r)   rr   r?   r7  r8  s       r%   r   zCustomFormatter.format_column  s    ++H5?F+?'')*Q.DDHI]I]I_D`Caabd 
 bcrbst 	"8#8#8#;<< 	bcrbst	s   /B 	B
BBc                     | j                         j                  |      }| j                  j                  |      }| j	                  |      S rN   )r  rX   r  r   r   r)  s      r%   r  zCustomFormatter.format_batch   s@    ++-;;HE,,99%@~~e$$r'   rN   )rY   rZ   r[   r\   r   rq   rw   r]   r   r
  r   r   r  r+  r,  s   @r%   r3  r3    sd    #(D64<"8 #288  bhh < &%RXX %$ %r'   r3  columnsc                 .    | |vrt        d|  d|       y )NzColumn z5 not in the dataset. Current columns in the dataset: )KeyError)r   r?  s     r%   _check_valid_column_keyrB    s*    
'%Z[bZcdee r'   sizec                    t        | t              r$| dk  r| |z   dk  s| |k\  rt        d|  d|       y t        | t              ry t        | t              r<t        |       dkD  r-t        t        |       |       t        t        |       |       y y t        | t              rNt        |       dkD  r?t        t        t        |             |       t        t        t        |             |       y y t        |        y )Nr   zInvalid key: z is out of bounds for size )rC  )r/   r0   
IndexErrorr6   r7   rC   _check_valid_index_keymaxminr
   r+   )r   rC  s     r%   rF  rF    s    #s!Gd
QC4K}SE1LTFSTT	C		C	s8a<"3s8$7"3s8$7  
C	"s8a<"3s3x=t<"3s3x=t<  	C r'   c                     t        | t              ryt        | t              ryt        | t        t        t
        f      ryt        |        y r	  )r/   r0   r8   r6   r7   r
   r+   r$   s    r%   key_to_query_typerJ    s8    #s	C		C%1	2r'   c                 <   t        |t        t        t        t        t
        f      st        |       t        |t              rt        || j                         n&||j                  n| j                  }t        ||       |t        | |      }|S t        | ||      }|S )a1  
    Query a Table to extract the subtable that correspond to the given key.

    Args:
        table (``datasets.table.Table``): The input Table to query from
        key (``Union[int, slice, range, str, Iterable]``): The key can be of different types:
            - an integer i: the subtable containing only the i-th row
            - a slice [i:j:k]: the subtable containing the rows that correspond to this slice
            - a range(i, j, k): the subtable containing the rows that correspond to this range
            - a string c: the subtable containing all the rows but only the column c
            - an iterable l: the subtable that is the concatenation of all the i-th rows for all i in the iterable
        indices (Optional ``datasets.table.Table``): If not None, it is used to re-map the given key to the table rows.
            The indices table must contain one column named "indices" of type uint64.
            This is used in case of shuffling or rows selection.


    Returns:
        ``pyarrow.Table``: the result of the query on the input table
    )r-   )r/   r0   r6   r7   r8   r
   r+   rB  r?   r2   rF  r5   r<   )r,   r   r-   rC  pa_subtables        r%   query_tablerM  (  s    2 cCsH=>C #sU%7%78#*#6wENNsD)"5#.  8sGTr'   r   format_columnsc                    t        | t              r| j                  }n| }t        |      }t	        |j
                        }
 |||      S |dk(  r|v r	 |||      S  |||      S |j                  fd|j                  D              } |||      }	|r_t        |	t              rA|j                  fd|j                  D              }
 ||
|      }|	j                  |       |	S t        d|	       |	S )a  
    Format a Table depending on the key that was used and a Formatter object.

    Args:
        table (``datasets.table.Table``): The input Table to format
        key (``Union[int, slice, range, str, Iterable]``): Depending on the key that was used, the formatter formats
            the table as either a row, a column or a batch.
        formatter (``datasets.formatting.formatting.Formatter``): Any subclass of a Formatter such as
            PythonFormatter, NumpyFormatter, etc.
        format_columns (:obj:`List[str]`, optional): if not None, it defines the columns that will be formatted using the
            given formatter. Other columns are discarded (unless ``output_all_columns`` is True)
        output_all_columns (:obj:`bool`, defaults to False). If True, the formatted output is completed using the columns
            that are not in the ``format_columns`` list. For these columns, the PythonFormatter is used.


    Returns:
        A row, column or batch formatted object defined by the Formatter:
        - the PythonFormatter returns a dictionary for a row or a batch, and a list for a column.
        - the NumpyFormatter returns a dictionary for a row or a batch, and a np.array for a column.
        - the PandasFormatter returns a pd.DataFrame for a row or a batch, and a pd.Series for a column.
        - the TorchFormatter returns a dictionary for a row or a batch, and a torch.Tensor for a column.
        - the TFFormatter returns a dictionary for a row or a batch, and a tf.Tensor for a column.
    r   )r  r3   c              3   ,   K   | ]  }|vs|  y wrN   r^   r   r|   rN  s     r%   r   zformat_table.<locals>.<genexpr>|  s     *m3SV^lSl3*m   	c              3   ,   K   | ]  }|v s|  y wrN   r^   rQ  s     r%   r   zformat_table.<locals>.<genexpr>  s      @C><QC@rR  z\Custom formatting function must return a dict to work with output_all_columns=True, but got )r/   r   r,   rJ  r  r   r>   r?   r   r   r)   )r,   r   r   rN  output_all_columnsrL   r  python_formatterpa_table_to_formatformatted_outputpa_table_with_remaining_columnsremaining_columns_dicts      `        r%   format_tablerZ  P  s&   < %;;"3'J&	0B0BCj99	x	. Xz22#HDD%]]*m(:O:O*mm$%7JO*N;2:-- @#+#8#8@ 3/ *::Yfp)q& ''(>?
    r  tD  sE  F   r'   rN   r*  )Ncollections.abcr   r   	functoolsr   typingr   r   r   r	   r
   r   r   r   r   numpyr@   pandasr   pyarrowr]   	packagingr    r   r   r   features.featuresr   r   r   r   r,   r   utils.py_utilsr   r   r   r   r   r7   boolr&   r+   r0   r6   r8   r<   r5   rj   rI   rK   rc   re   rq   rr   rl   r   rt   r   r   r   r   r   r   r   r  r   r  r  r  r.  r3  rB  rF  rJ  rM  rZ  r^   r'   r%   <module>rf     s   4  Z Y Y       u u  3 CLK 	~&m$3e 3 3S S%X=>INXX< E#ueS(*J$K PRPXPX 2#288 # #"L+!EF ""=T#tAw,' =DaL =
-bhh"((.JK $-dD$.>? $++,T2::t-CD ++\D-bllBIIr||.ST DM M& &@m$~ m$`Lh L
I I
"	<<= "D"i	< DE "
EYrxx288;< Eiw 67 2ibii EF "-%ilD 89 -%`f ftCy fT f
!c5%&A B !# !RV !&5eUC!AB s   $%%	sE5#x/	0% e_% XX	%X &*9 9 	sE5#x/	09  9  TN	9 r'   