
    XIg&#                        d dl mZmZmZmZmZmZ d dlZerddlm	Z	 d dl
mZ ddlmZ ddlmZmZmZ dd	lmZ ddlmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ d dlZddl m!Z!m"Z" ddl# dee   defdZ$ G d d      Z%dgZ&y)    )OptionalListAnyUnionIterableTYPE_CHECKINGN   Catalog)	DataFrame   ContributionsAcceptedError)
StructType
AtomicTypeDataType)	SparkConf)RuntimeConfigDataFrameReader)SparkContextUDFRegistrationDataStreamReader)PySparkTypeErrorPySparkValueError)*dataschemac           	          ddl m} g }| D ]Z  }t        ||D cg c]  }|j                   c}      D cg c]  \  }} |||j                         }}}|j                  |       \ |S c c}w c c}}w )Nr   )Value)duckdbr"   zipdataTypeduckdb_typeappend)	r   r    r"   new_datarowyxdtypenew_rows	            b/var/www/html/answerous/venv/lib/python3.12/site-packages/duckdb/experimental/spark/sql/session.py_combine_data_and_schemar/   %   sr    H !?B3]cHdXYHd?ef81e5E--.ff ! O Iefs
   A*A/c                      e Zd ZdefdZdeee   df   defdZ	dddefdZ
	 	 	 d%dedee   f   d	eeeee   f      d
ee   dedef
dZd&dZ	 	 	 d'dedee   dedee   ddf
dZdededefdZd(dZdedefdZd&dZed)d       Zedefd       Zedefd       Zedefd       Z edefd       Z!edefd        Z"ede#fd!       Z$edefd"       Z% G d# d$      Z& e&       Z'y)*SparkSessioncontextc                 h    |j                   | _        || _        t        | j                        | _        y N)
connectionconn_contextr   _confselfr2   s     r.   __init__zSparkSession.__init__0   s&    &&	"499-
    r   PandasDataFramereturnc                    	 dd l }d}|rrt        |j                        r\dt	        j
                          }| j                  j                  ||       t        | j                  j                  d| d      |       S d }t        |t              st        |      } ||       dt        fd	} ||      }d
 } ||      }	| j                  j                  ||	      }
t        |
|       S # t        $ r d}Y w xY w)Nr   TFpyspark_pandas_df_zselect * from ""c           
          t        |       dk  ry t        | d         }t        | dd        D ]B  \  }}t        |      }||k(  rt        dd| d|dz    t        |      t        |      d       y )Nr	   r   LENGTH_SHOULD_BE_THE_SAMEr   )arg1arg2arg1_lengtharg2_lengtherror_classmessage_parameters)len	enumerater   str)tuplesexpected_lengthiitemactual_lengths        r.   verify_tuple_integrityz>SparkSession._create_dataframe.<locals>.verify_tuple_integrity@   s    6{a!&)nO$VABZ0 4 #D	"m3& ;"&qc
"&qse'*?';'*='9	( 	r<   r>   c           
          d }t        | d         }t        |       D cg c]  \  }} ||d||z  z          }}}dj                  |      }d| d}|S c c}}w )Nc                     t        |       }t        |      D cg c]
  }d||z     }}ddj                  |      z   dz   }|S c c}w )N$(, ))rK   rangejoin)r)   start_param_idxparameter_countr+   
parameterss        r.   construct_values_listzVSparkSession._create_dataframe.<locals>.construct_query.<locals>.construct_values_listW   sT    "%c(?D_?UV!!O"3!45V
V 499Z#883>
!! Ws   Ar   r	   rX   z'
                select * from (values z)
            )rK   rL   r[   )rN   r_   row_sizerP   r+   values_listquerys          r.   construct_queryz7SparkSession._create_dataframe.<locals>.construct_queryV   su    " 6!9~HT]^dTefDAq0AX4FGfKf))K0K''2m 4E L gs   Ac                 L    g }| D ]  }|j                  t        |              |S r4   )extendlist)rN   r^   r)   s      r.   construct_parametersz<SparkSession._create_dataframe.<locals>.construct_parametersh   s.    J -!!$s),-r<   )params)pandasImportError
isinstancer   uuiduuid1r6   registersqlrf   rM   )r:   r   ri   
has_pandasunique_namerS   rc   rb   rg   r^   rels              r.   _create_dataframezSparkSession._create_dataframe5   s    	J *T6+;+;<.tzz|n=KII{D1TYY]]_[M+KLdSS	$ $%:Dt$	s 	   %	 *$/
iimmE*m5d##q  	J	s   C( (C65C6c                 l    | j                  |      }|r |j                  | }|r |j                  | }|S r4   )rs   _cast_typestoDF)r:   r   typesnamesdfs        r.   _createDataFrameFromPandasz'SparkSession._createDataFrameFromPandass   s>    ##D) 'B%B	r<   Nr    samplingRatioverifySchemac                 p   |rt         |st         d }d }t        |t              rt        dddi      |r&t        |t              r|j                         \  }}n|}	 dd l}d}|r)t        |j                        r| j                  |||      S d}	|s|rd}	t        d |D              g}|rt        |t              rt        ||      }| j                  |      }
|	r)|
j                  }|j                  d	      }t        ||       }
|r |
j                  | }
|r |
j                  | }
|
S # t        $ r d}Y w xY w)
NSHOULD_NOT_DATAFRAMEarg_namer   rH   r   TFc              3       K   | ]  }d   y wr4    ).0_s     r.   	<genexpr>z/SparkSession.createDataFrame.<locals>.<genexpr>   s     .1$.s   z1=0)NotImplementedErrorrk   r   r   r   extract_types_and_namesri   rj   rz   tupler/   rs   relationfilterru   rv   )r:   r   r    r{   r|   rw   rx   ri   rp   is_emptyry   rr   s               r.   createDataFramezSparkSession.createDataFrame~   sR    %%%%dI&"2$.#7 
 &*-%==?u	J
 *T6+;+;<224FF H.../Dj4+D&9D##D)++C**U#C3%B 'B%B	?  	J	s   D' 'D54D5c                 ,    t        | j                        S r4   )r1   r7   r:   s    r.   
newSessionzSparkSession.newSession   s    DMM**r<   startendstepnumPartitionsr   c                 r    |rt         ||}d}t        | j                  j                  d|||g      |       S )Nr   rZ   )r^   )r   r   r6   table_function)r:   r   r   r   r   s        r.   rZ   zSparkSession.range   sF     ,,;CE11'ucSWFX1YZ^__r<   sqlQuerykwargsc                 `    |rt         | j                  j                  |      }t        ||       S r4   )r   r6   ro   r   )r:   r   r   r   s       r.   ro   zSparkSession.sql   s*    %%99==*4((r<   c                 8    | j                   j                          y r4   )r7   stopr   s    r.   r   zSparkSession.stop   s    r<   	tableNamec                 P    | j                   j                  |      }t        ||       S r4   )r6   tabler   )r:   r   r   s      r.   r   zSparkSession.table   s!    99??9-4((r<   c                     | S r4   r   r   s    r.   getActiveSessionzSparkSession.getActiveSession   s    r<   c                 X    t        | d      sddlm}  ||       | _        | j                  S )N_catalogr   r
   )hasattr%duckdb.experimental.spark.sql.catalogr   r   )r:   r   s     r.   catalogzSparkSession.catalog   s$    tZ(E#DMDM}}r<   c                     | j                   S r4   )r8   r   s    r.   confzSparkSession.conf   s    zzr<   c                     t        |       S r4   r   r   s    r.   readzSparkSession.read       t$$r<   c                     t        |       S r4   r   r   s    r.   
readStreamzSparkSession.readStream   s    %%r<   c                     | j                   S r4   )r7   r   s    r.   sparkContextzSparkSession.sparkContext   s    }}r<   c                     t         r4   r   r   s    r.   streamszSparkSession.streams   s    ((r<   c                     t        |       S r4   r   r   s    r.   udfzSparkSession.udf   r   r<   c                      y)Nz1.0.0r   r   s    r.   versionzSparkSession.version   s    r<   c            	           e Zd Zd Zdedd fdZdedd fdZdedd fdZddZ	 dd
e	e   de	e
   de	e   dd fdZddZy	)SparkSession.Builderc                      y r4   r   r   s    r.   r;   zSparkSession.Builder.__init__  s    r<   namer>   c                     | S r4   r   r:   r   s     r.   masterzSparkSession.Builder.master      Kr<   c                     | S r4   r   r   s     r.   appNamezSparkSession.Builder.appName  r   r<   urlc                     | S r4   r   )r:   r   s     r.   remotezSparkSession.Builder.remote  r   r<   c                 .    t        d      }t        |      S )N__ignored__)r   r1   r9   s     r.   getOrCreatez SparkSession.Builder.getOrCreate  s    "=1G((r<   Nkeyvaluer   c                     | S r4   r   )r:   r   r   r   s       r.   configzSparkSession.Builder.config  s	     Kr<   c                     | S r4   r   r   s    r.   enableHiveSupportz&SparkSession.Builder.enableHiveSupport  r   r<   r>   r1   )NNN)r>   r   )__name__
__module____qualname__r;   rM   r   r   r   r   r   r   r   r   r   r   r<   r.   Builderr     s    		s 	'= 		 	(> 		c 	&< 		)
 gk	}	4<SM	PXYbPc	#	
	r<   r   )NNTr   )Nr	   N)r>   N)r>   r   )(r   r   r   r   r;   r   r   r   r   rs   rz   r   r   r   rM   floatboolr   r   intrZ   ro   r   r   r   propertyr   r   r   r   r   r   r   r   r   r   r   r   r   builderr   r<   r.   r1   r1   /   s   . .
<$eHSM;L,L&M <$R[ <$|	/@ 	S\ 	 :>)-!=%x}45= z49456=  	=
 = 
=~+ "'+`` c]` 	`
  }` 
` )C )3 )9 ))s )y )   m   %o % % &, & & l   ) ) ) %_ % %    : iGr<   r1   )'typingr   r   r   r   r   r   rl   r   r   pandas.core.framer   r=   	exceptionr   rw   r   r   r   r   r   	dataframer   
readwriterr   r2   r   r   r   	streamingr   r#   errorsr   r   errors.error_classesr/   r1   __all__r   r<   r.   <module>r      sp    F F  > 2 3 3     ' "   ' 
 %8C= * r rj 
r<   