
    +#hU                     b    d dl Z d dlmZ d dlmZmZmZ d dlmZ d dl	Z	d dl
mZ  G d de      Zy)    N)Mapping)ListTupleUnion)load_dataset)Datasetc                      e Zd Zd Zddddedee   dee   deeeee	j                     f   ee	j                     f   fd	Zdd
edee   dee   dee	j                     fdZdd
edee   dee   dee	j                     fdZdd
edee   dee   dee	j                     fdZdee	j                     dedee	j                     fdZ	 	 	 ddee	j                     deeef   deeef   dedeeee	j                     f   f
dZy)
DataLoaderc                      y )N )selfs    U/var/www/html/sandstorm/venv/lib/python3.12/site-packages/dspy/datasets/dataloader.py__init__zDataLoader.__init__   s        r   N)
input_keysfieldsdataset_namer   r   returnc                Z   |rt        |t              st        d      t        |t              st        d      t        |g|i |}t        |t              r7t        |d   t              r$t        |d         D ci c]  \  }}|||    }}}	 i }	|j                         D ]  }|rK||   D 
cg c]6  }
 t        j                  |D ci c]  }||
|   
 c}      j                  | 8 c}}
|	|<   P||   D 
cg c]D  }
 t        j                  |
j                         D ci c]  }||
|   
 c}      j                  | F c}}
|	|<    |	S c c}}w c c}w c c}}
w c c}w c c}}
w # t        $ r |rS|D 
cg c]<  }
 t        j                  |D ci c]  }||
|   
 nc c}w c}      j                  | > nc c}}
w c}}
cY S |D 
cg c]J  }
 t        j                  |
j                         D ci c]  }||
|   
 nc c}w c}      j                  | L nc c}}
w c}}
cY S w xY w)Nz:Invalid fields provided. Please provide a tuple of fields.zBInvalid input keys provided. Please provide a tuple of input keys.split)
isinstancetuple
ValueErrorr   list	enumeratekeysdspyExamplewith_inputsAttributeError)r   r   r   r   argskwargsdatasetidx
split_namereturned_splitrowfields               r   from_huggingfacezDataLoader.from_huggingface   sF    *VU3YZZ*e,abb|=d=f=gt$F7OT)JFOPVW^P_F`a?3
z'#,.aGa	~N%lln d
 LS  T^  L_  2`  EH2r$,,^d?eUZc%j@P?e2f2r2rt~2  2`N:. PW  Xb  Pc  2d  IL2v$,,^a^f^f^h?iUZc%j@P?i2j2v2v  yC  3D  2dN:.	d "! b @f  2`?i  2d  	~qxyjmXF%K5eCJ&6%K%KLXXZdeyyyu|}nq\CHHJ%O5eCJ&6%O%OP\\^hi}}}		~s   <EE* /EEE*E* :'E$!E.E$
E* EE* E$$E* *H*:F<F! F<;H*H*'H4HHH*)H*	file_pathc                     t        d|      d   }|st        |j                        }|D cg c]6  } t        j                  |D ci c]  }|||   
 c}      j
                  | 8 c}}S c c}w c c}}w )Ncsv
data_filestrainr   r   featuresr   r   r   r   r*   r   r   r#   r'   r(   s          r   from_csvzDataLoader.from_csv1   sj    u;GD'**+FipqbePFC5eCJ.CDPPR\]qqCq   A1A,A1,A1c                     t        d|      d   }|st        |j                        }|D cg c]6  } t        j                  |D ci c]  }|||   
 c}      j
                  | 8 c}}S c c}w c c}}w )Njsonr-   r/   r0   r2   s          r   	from_jsonzDataLoader.from_json9   sj    v)<WE'**+FipqbePFC5eCJ.CDPPR\]qqCqr4   c                     t        d|      d   }|st        |j                        }|D cg c]8  }t        j                  |D ci c]  }|||   
 c}      j                  |      : c}}S c c}w c c}}w )Nparquetr-   r/   r0   r2   s          r   from_parquetzDataLoader.from_parquetA   sg    yY?H'**+FipqbeVDEeSZ/DEQQR\]qqDqs   A3A.A3.A3r#   nc                     t        |t              st        dt        |       d      t	        j
                  ||g|i |S )Nz!Invalid dataset provided of type z$. Please provide a list of examples.)r   r   r   typerandomsample)r   r#   r;   r!   r"   s        r   r?   zDataLoader.sampleI   sC     '4(@gOstuu}}Wa9$9&99r   
train_size	test_sizerandom_statec                 N   |t        j                  |       |j                         }t        j                  |       |6t	        |t
              r&d|cxk  rdk  rn nt        t        |      |z        }n |t	        |t              r|}nt        d      |pt	        |t
              r&d|cxk  rdk  rn nt        t        |      |z        }nt	        |t              r|}nt        d      ||z   t        |      kD  rt        d      t        |      |z
  }|d | }||||z    }	||	dS )Nr      zEInvalid train_size. Please provide a float between 0 and 1 or an int.zDInvalid test_size. Please provide a float between 0 and 1 or an int.zAtrain_size + test_size cannot exceed the total number of samples.)r/   test)	r>   seedcopyshuffler   floatintlenr   )
r   r#   r@   rA   rB   dataset_shuffled	train_endtest_endtrain_datasettest_datasets
             r   train_test_splitzDataLoader.train_test_splitU   s#    #KK%"<<>'(!jU&CZI[Z[I[C 01J>?I#
:s(C"Idee )U+Y1B1Bs#34y@AIs+$ !ghh8#c*:&;; !dee+,y8H()4'	)h2FG&==r   )Nr   )g      ?NN)__name__
__module____qualname__r   strr   r   r   r   r   r   r)   r3   r7   r:   rJ   r?   rI   rQ   r   r   r   r
   r
      s    "$! ~ ~ #J	 ~
 c
 ~ 
wsD../dll1CC	D ~Dr rd3i rERUJ r`deieqeq`r rr# rtCy rUSVZ raefjfrfras rrc r49 rQVWZQ[ reijnjvjvew r
:dll#
: 
: 
dll	
: )-'+ #>dll##> #u*%#> e$	#>
 #> 
d4<<((	)#>r   r
   )r>   collections.abcr   typingr   r   r   datasetsr   r   dspy.datasets.datasetr   r
   r   r   r   <module>rZ      s(     # % % !  )m> m>r   