
    *#hs$                        d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dl	mZ
 d dlZd dlmZ d dlmZ ej                   j"                  j%                  e      Ze G d dej*                               Z G d d	ej.                        Zy)
    N)	dataclass)Optional)
table_cast)readlinec                       e Zd ZU dZdZeej                     ed<   dZ	e
ed<   dZee
   ed<   dZee
   ed<   dZeed	<   dZee   ed
<   dZeed<   dZee   ed<   y)
JsonConfigzBuilderConfig for JSON.Nfeaturesutf-8encodingencoding_errorsfieldTuse_threads
block_sizei   	chunksizenewlines_in_values)__name__
__module____qualname____doc__r	   r   datasetsFeatures__annotations__r   strr   r   r   boolr   intr   r        `/var/www/html/sandstorm/venv/lib/python3.12/site-packages/datasets/packaged_modules/json/json.pyr   r      sq    !,0Hhx(()0Hc%)OXc])E8C=K $J$Is)--r   r   c                   Z    e Zd ZeZd Zd Zdej                  dej                  fdZ	d Z
y)Jsonc                    | j                   j                  :t        j                  d       | j                   j                  | j                   _        | j                   j
                  durt        j                  d       | j                   j                  t        d      t        j                  | j                   j                        S )NzTThe JSON loader parameter `block_size` is deprecated. Please use `chunksize` insteadTzZThe JSON loader parameter `use_threads` is deprecated and doesn't have any effect anymore.zEThe JSON loader parameter `newlines_in_values` is no longer supported)r	   )configr   loggerwarningr   r   r   
ValueErrorr   DatasetInfor	   )selfs    r   _infoz
Json._info#   s    ;;!!-NNqr$(KK$:$:DKK!;;""$.NNl ;;))5dee##T[[-A-ABBr   c                    | j                   j                  s"t        d| j                   j                         |j                  | j                   j                        }t	        |t
        t        t        f      re|}t	        |t
              r|g}|D cg c]  }|j                  |       }}t        j                  t        j                  j                  d|i      gS g }|j                         D ]^  \  }}t	        |t
              r|g}|D cg c]  }|j                  |       }}|j                  t        j                  |d|i             ` |S c c}w c c}w )z-We handle string, list and dicts in datafilesz=At least one data file must be specified, but got data_files=files)name
gen_kwargs)r"   
data_filesr%   download_and_extract
isinstancer   listtuple
iter_filesr   SplitGeneratorSplitTRAINitemsappend)r'   
dl_managerr-   r*   filesplits
split_names          r   _split_generatorszJson._split_generators/   s;   {{%%\]a]h]h]s]s\tuvv44T[[5K5KL
j3e"45E%%=BCTZ**40CEC++1E1ESZ\aRbcdd!+!1!1!3 	aJ%%=BCTZ**40CECMM(11zwX]N^_`		a
  D Ds   EEpa_tablereturnc           
         | j                   j                  t        | j                   j                        t        |j                        z
  D ]o  }| j                   j                  j                  j                  |      j                  }|j                  |t        j                  d gt        |      z  |            }q t        || j                   j                  j                        }|S )N)type)r"   r	   setcolumn_namesarrow_schemar   r@   append_columnpaarraylenr   )r'   r=   column_namer@   s       r   _cast_tablezJson._cast_tableB   s    ;;+"4;;#7#783x?T?T;UU l{{++88>>{KPP#11+rxxQTU]Q^H^ei?jkl
 "(DKK,@,@,M,MNHr   c              #     K   t        t        j                  j                  |            D ]  \  }}| j                  j
                  t        || j                  j                  | j                  j                        5 }t        j                  |      }d d d        | j                  j
                     }t        |t        t        f      rc t               j                  |D cg c]  }|j!                          c} }|D ci c]"  }||D cg c]  }|j#                  |       c}$ }	}}n|}	t$        j&                  j)                  |	      }
|| j+                  |
      f <t        |d      5 }d}t-        | j                  j.                  dz  d      }| j                  j                  | j                  j                  nd}	 |j1                  | j                  j.                        }|sn	 ||j3                         z  }| j                  j                  dk7  r6|j;                  | j                  j                  |      j=                  d      }	 	 	 t?        j@                  t7        jB                  |      t?        jD                  |	      
      }
	 ||f| j+                  |
      f |dz  }d d d         y # 1 sw Y   +xY wc c}w c c}w c c}}w # t4        t6        j8                  f$ r |t3        |      z  }Y w xY w# t$        jF                  t$        jH                  f$ rp}t        |t$        jF                        rdtK        |      vs|tM        |      kD  r tN        jQ                  dtM        |       d| d|dz   d       |dz  }Y d }~nd }~ww xY wL# t$        jF                  $ r}	 t        || j                  j                  | j                  j                        5 }t        j                  |      }d d d        n# 1 sw Y   nxY wn@# t        jR                  $ r* tN        jU                  d| dtW        |       d|        |w xY wt        t              r		  t               j                  |D cg c]  }|j!                          nc c}w c} }|D ci c](  }||D cg c]  }|j#                  |       nc c}w c}* nc c}}w }	}}t$        j&                  j)                  |	      }
nX# t$        jF                  t4        f$ r<}tN        jU                  d| dtW        |       d|        tY        d| d      d d }~ww xY w|| j+                  |
      f Y d }~tN        jU                  d| dtW        |       d|        tY        d| dtK        t        |j!                                      d      d d }~ww xY w# 1 sw Y   xY ww)N)r   errorsrbr       i @  strictr
   )rK   )r   )read_options
straddlingz	Batch of z* bytes couldn't be parsed with block_size=z. Retrying with block_size=   .zFailed to read file 'z' with error z: z-Not able to read records in the JSON file at z. You should probably indicate the field of the JSON file containing your records. This JSON file contain the following fields: zX. Select the correct one and provide it as `field='XXX'` to the dataset loading method.    )-	enumerate	itertoolschainfrom_iterabler"   r   openr   r   jsonloadr/   r0   r1   rA   unionkeysgetrE   Tablefrom_pydictrI   maxr   readr   AttributeErrorioUnsupportedOperationdecodeencodepaj	read_jsonBytesIOReadOptionsArrowInvalidArrowNotImplementedErrorr   rG   r#   debugJSONDecodeErrorerrorr@   r%   )r'   r*   file_idxr9   fdatasetrowr\   colmappingr=   	batch_idxr   r   batches                   r   _generate_tableszJson._generate_tablesM   sn    '	(E(Ee(LM ]	'NHd{{  ,$)=)=dkkFaFab +fg"iilG+ "$++"3"34 ge}5&35;;w(G(GHDQUV#sW$EcSWWS\$EEVGV%G88//8 0 0 ::: $% I' !I "%T[[%:%:b%@(!KJ7;{{7R7R7^33dl $  !t{{'<'< =$!1!QZZ\1E  ;;//7:$)LL1E1EoL$^$e$efm$nE0,"&!8/2}}(*

5(9cmHn0&H %*\  (3T5E5Eh5OOO!Q	C I' I')]	'+ + )H$EV. !/0G0GH 1!Xa[0E1 )+9T9T'U !8(21boo(F,8A,F+5E
+B(- )/.7E
|Cmnxmy  zU  V`  cd  Vd  Ue  ef  -g)* )3a
!8 #'(  " ,(%)$(4;;3G3GPTP[P[PkPk&" !;%&.2iilG!; !; !; $(#7#7 ( &/DTF-X\]^X_W``bcdbe-f g&'(  *'48!x+635;;w8W8W8W+XDae.fZ]sW4UcSWWS\4U4U/U.f.fG.f/1xx/C/CG/LH(*'H !x$*LL3Hm\`ab\c[ddfghfi1j$k*47deidjjk5l*msw$w!x '/0@0@0J&J J % &/DTF-X\]^X_W``bcdbe-f g&0&STXSY ZTTWX\]d]i]i]kXlTmSn o}%~'"
 (,!,-,SI' I'sV  A=W!?J>AW!$K
;	W!
KK&K+AW!3A=W1KAWN!>L	W2W!>K	W!KW!'LWLW	#N,A&NN!NN!!W66P,P		PPPW=Q	W*S?RS?(
S2S
S)S?>W?U	7U	U	W,W2AWWWW	W!N)r   r   r   r   BUILDER_CONFIG_CLASSr(   r<   rE   r^   rI   ry   r   r   r   r    r        s4    %
C&	BHH 	 	^'r   r    )rc   rU   rY   dataclassesr   typingr   pyarrowrE   pyarrow.jsonrg   r   datasets.tabler   datasets.utils.file_utilsr   utilslogging
get_loggerr   r#   BuilderConfigr   ArrowBasedBuilderr    r   r   r   <module>r      st    	   !     % . 
			*	*8	4 
.'' 
. 
.K'8%% K'r   