
    *#h                         d dl Z d dlmZmZ d dlmZ ddlmZmZ  ej                  e
      Z G d d      Zed        Zd	 Zd
 Zee j                   defd              Zy)    N)PoolRLock)tqdm   )experimentalloggingc                       e Zd ZdZy)ParallelBackendConfigN)__name__
__module____qualname__backend_name     W/var/www/html/sandstorm/venv/lib/python3.12/site-packages/datasets/parallel/parallel.pyr
   r
      s    Lr   r
   c           	      f    t         j                  t        | ||||||      S t        | ||||||      S )a  
    **Experimental.** Apply a function to iterable elements in parallel, where the implementation uses either
    multiprocessing.Pool or joblib for parallelization.

    Args:
        function (`Callable[[Any], Any]`): Function to be applied to `iterable`.
        iterable (`list`, `tuple` or `np.ndarray`): Iterable elements to apply function to.
        num_proc (`int`): Number of processes (if no backend specified) or jobs (using joblib).
        types (`tuple`): Additional types (besides `dict` values) to apply `function` recursively to their elements.
        disable_tqdm (`bool`): Whether to disable the tqdm progressbar.
        desc (`str`): Prefix for the tqdm progressbar.
        single_map_nested_func (`Callable`): Map function that applies `function` to an element from `iterable`.
            Takes a tuple of function, data_struct, types, rank, disable_tqdm, desc as input, where data_struct is an
            element of `iterable`, and `rank` is used for progress bar.
    )r
   r   _map_with_multiprocessing_pool_map_with_joblib)functioniterablenum_proctypesdisable_tqdmdescsingle_map_nested_funcs          r   parallel_mapr      sG    " ))1-h%tE[
 	
 Hh%tUkllr   c                    |t        |      k  r|n
t        |      }g }t        |      D ]Y  }t        |      |z  }	t        |      |z  }
|	|z  t        ||
      z   }||	z   ||
k  rdndz   }|j                  | ||| ||||f       [ t        |      t	        d |D              k7  r*t        dt        |       dt	        d |D                     t        j                  d| dt        |       d	|D cg c]  }t        |d          c}        d
\  }}|st               ft        j                  }}t        |||      5 }|j                  ||      }d d d        t        j                  d| d       D cg c]  }|D ]  }|  }}}t        j                  dt        |       d       |S c c}w # 1 sw Y   cxY wc c}}w )N   r   c              3   8   K   | ]  }t        |d            ywr   Nlen.0is     r   	<genexpr>z1_map_with_multiprocessing_pool.<locals>.<genexpr>3   s     :!C!I:   zHError dividing inputs iterable among processes. Total number of objects z
, length: c              3   8   K   | ]  }t        |d            ywr    r!   r#   s     r   r&   z1_map_with_multiprocessing_pool.<locals>.<genexpr>7   s     93qt99r'   z	Spawning z processes for z objects in slices of )NN)initargsinitializerz	Finished z
 processesz	Unpacked z objects)r"   rangeminappendsum
ValueErrorloggerinfor   r   set_lockr   map)r   r   r   r   r   r   r   
split_kwdsindexdivmodstartendr%   r)   r*   poolmappedproc_resobjs                       r   r   r   )   s   #s8}4x#h-HJx ](mx'(mh&ec%o-ck%#+Q158XeC%8%VZ[\] 8}:z:::''*8}o 69j99:<
 	
 KK
H:_S]O;QfpRqabSVWXYZW[S\RqQrs 'Hk!&
DMM+	h{	C >t0*=>
KK)H:Z01"(=hH=Sc=c=F=
KK)CK=12M Sr
> > >s   0F.>F37F?3F<c                      dd l j                  t        j                  |      5   j	                          fd|D              cd d d        S # 1 sw Y   y xY w)Nr   )n_jobsc           	   3   X   K   | ]!  } j                        |d dd f       # y w)NT)delayed)r$   r=   r   joblibr   r   s     r   r&   z#_map_with_joblib.<locals>.<genexpr>O   s7      !
ad2FNN12Hc5$PTVZ3[\!
s   '*)rB   parallel_backendr
   r   Parallel)r   r   r   r   r   r   r   rB   s   `  `  `@r   r   r   I   sU     		 	 !6!C!CH	 	U 
 v  !
hp!
 

 
 
s   "AAr   c              #      K   | t         _        | dk(  rddlm}  |        	 d dt         _        y# dt         _        w xY ww)a  
    **Experimental.**  Configures the parallel backend for parallelized dataset loading, which uses the parallelization
    implemented by joblib.

    Args:
        backend_name (str): Name of backend for parallelization implementation, has to be supported by joblib.

     Example usage:
     ```py
     with parallel_backend('spark'):
       dataset = load_dataset(..., num_proc=2)
     ```
    sparkr   )register_sparkN)r
   r   joblibsparkrG   )r   rG   s     r   rC   rC   T   s<       *6&w.
2-1*T*s   A1 A>A)
contextlibmultiprocessingr   r   	tqdm.autor   utilsr   r   
get_loggerr   r0   r
   r   r   r   contextmanagerstrrC   r   r   r   <module>rP      sy     '  ) 
		H	%  m m0@
 23 2  2r   