
    *#h                        d Z ddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	l	mZmZ dd
lmZmZmZmZmZmZm Z m!Z!m"Z"m#Z# ddl$m%Z% ddl&Z&ddl'Z'ddl(Z'ddl)Z*ddl+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2 ddl/m3Z3 	 ddl4Z5ddl4m6Z6m7Z7  e3jr                  e:      Z;ejx                  Z=d Z>de#e?e@f   de?fdZAd ZBde@de@dee@e@f   fdZCd ZDed        ZEedVde?fd       ZFd  ZGd! ZHd" ZId# ZJ G d$ d%eK      ZL G d& d'eM      ZNd( ZO	 	 	 	 	 	 	 	 	 dWd)eegef   d*ed+ePd,ePd-ePd.ePd/ee?   d0e?d1eeQ   d2ePd3ee@   defd4ZR G d5 d6      ZSdXd7ZTd8e@de!e@ee@   f   fd9ZUd:e@de!e@e@e@e@f   fd;ZV G d< d=e&j                        ZWd> ZXed?        ZYd@ ZZdA Z[e0j                   e,j                  dB      k  r e[e      dC        Z^n_e0j                  j                  ddD  e,j                  dB      j                   e,j                  dE      j                  fv r e[e      dF        Z`e0j                   e,j                  dG      k  r e[e      dH        Zane0j                  j                  ddD  e,j                  dG      j                  k(  r e[e      dI        Zan_e0j                  j                  ddD  e,j                  dB      j                   e,j                  dE      j                  fv r e[e      dJ        ZadK Zb e"dL      ZcdMej                  dNedOeec   f   dPeKde?fdQZedRe#ej                  j                  e'j                  j                  f   de e?   fdSZhdRe#ej                  j                  e'j                  j                  f   dNedOeec   f   dTeeK   deec   fdUZiy# e8$ r
 dxZ5xZ7Z6Y w xY w)Yz)Some python utils function and classes.

    N)contextmanager)fieldsis_dataclass)BytesIO)Manager)Empty)
disk_usage)CodeTypeFunctionType)
AnyCallableDictIterableListOptionalSetTupleTypeVarUnion)urlparse)version)tqdm   )config)parallel_map   )logging)FinalLiteralc                 ~    | syg d}t        |       } |D ]  \  }}| |z  }|dk\  s|dd| c S  t        |        dS )a6  Returns a human readable size string.

    If size_in_bytes is None, then returns "Unknown size".

    For example `size_str(1.5 * datasets.units.GiB) == "1.50 GiB"`.

    Args:
        size_in_bytes: `int` or `None`, the size, in bytes, that we want to
            format as a human-readable size string.
    zUnknown size))PiB           )TiB        )GiB   @)MiB   )KiB   g      ?z.2f z bytes)floatint)size_in_bytes
_NAME_LISTname
size_bytesvalues        T/var/www/html/sandstorm/venv/lib/python3.12/site-packages/datasets/utils/py_utils.pysize_strr4   E   sf     aJ-(M& )j
*C<C[$(() - !((    sizereturnc                    t        | t              r| S | j                         j                  d      rt        | dd       dz  S | j                         j                  d      rt        | dd       dz  S | j                         j                  d      rt        | dd       dz  S | j                         j                  d	      rt        | dd       d
z  S | j                         j                  d      rt        | dd       dz  S | j                         j                  d      r)t        | dd       dz  }| j                  d      r|dz  S |S | j                         j                  d      r)t        | dd       dz  }| j                  d      r|dz  S |S | j                         j                  d      r)t        | dd       dz  }| j                  d      r|dz  S |S | j                         j                  d      r)t        | dd       dz  }| j                  d      r|dz  S |S | j                         j                  d      r)t        | dd       dz  }| j                  d      r|dz  S |S t	        d|  d      )a)  
    Converts a size expressed as a string with digits an unit (like `"50MB"`) to an integer (in bytes).

    Args:
        size (`int` or `str`): The size to convert. Will be directly returned if an `int`.

    Example:

    ```py
    >>> convert_file_size_to_int("1MiB")
    1048576
    ```
    PIBNr"   TIBr$   GIBr&   MIBr(   KIBr*   PBl     I5 b   TBl    J)GBi ʚ;MBi@B KBi  z`size=zM` is not in a valid format. Use an integer followed by the unit, e.g., '5GB'.)
isinstancer-   upperendswith
ValueError)r6   int_sizes     r3   convert_file_size_to_intrL   ]   s?    $zz|U#49~''zz|U#49~''zz|U#49~''zz|U#49~''zz|U#49~''zz|T"tCRy>V, $c 2x1}@@zz|T"tCRy>V, $c 2x1}@@zz|T"tCRy>U+ $c 2x1}@@zz|T"tCRy>U+ $c 2x1}@@zz|T"tCRy>U+ $c 2x1}@@
vdV#pq
rrr5   c                    | j                  dd      j                  dd      j                  dd      j                  dd      j                  d	d
      j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      j                  d
      j                  dd      S )N\z\\.z\.*z.*+z\+z///(z\()z\)|z\|^z\^$z\$?)replacerstrip)patterns    r3   glob_pattern_to_regexr\      s     	e$	e		d		e		s		e		e		e		e		e			c	r5   stringr[   c                    t        j                  dd|      }t        j                  ||       }|t        d|  d|       t	        |j                               }t        j                  d|      }t        t        ||            }|S )a  Un-format a string using a python f-string pattern.
    From https://stackoverflow.com/a/36838374

    Example::

        >>> p = 'hello, my name is {name} and I am a {age} year old {what}'
        >>> s = p.format(name='cody', age=18, what='quarterback')
        >>> s
        'hello, my name is cody and I am a 18 year old quarterback'
        >>> string_to_dict(s, p)
        {'age': '18', 'name': 'cody', 'what': 'quarterback'}

    Args:
        string (str): input string
        pattern (str): pattern formatted like a python f-string

    Returns:
        Dict[str, str]: dictionary of variable -> value, retrieved from the input using the pattern

    Raises:
        ValueError: if the string doesn't match the pattern
    z{(.+?)}z(?P<_\1>.+)zString z doesn't match the pattern )	resubsearchrJ   listgroupsfindalldictzip)r]   r[   regexresultvalueskeys_dicts          r3   string_to_dictrl      sz    . FF:~w7EYYuf%F~76(*EgYOPP&--/"F::j'*DT6"#ELr5   c                 t    d fdt        | t              s |       st        |  d       |       S )zbConvert an object to its dictionary representation recursively.

    <Added version="2.4.0"/>
    c                 >    t        |       xr t        | t               S N)r   rG   typeobjs    r3   _is_dataclass_instancez&asdict.<locals>._is_dataclass_instance   s    C >C)>%>>r5   c                     |       rwi }t        |       D ]e  } t        | |j                              }|j                  r,||j                  k7  s|j
                  j                  dd      sW|||j                  <   g |S t        | t              r.t        | d      r" t        |       | D cg c]
  } |       c} S t        | t        t        f      r t        |       fd| D              S t        | t              r0| j                         D ci c]  \  }} |       |       c}}S t        j                  |       S c c}w c c}}w )N$include_in_asdict_even_if_is_defaultF_fieldsc              3   .   K   | ]  } |        y wro    ).0v_asdict_inners     r3   	<genexpr>z0asdict.<locals>._asdict_inner.<locals>.<genexpr>   s     ;!]1-;s   )r   getattrr0   initdefaultmetadatagetrG   tuplehasattrrp   rb   re   itemscopydeepcopy)rr   rh   fr2   rz   kr{   rs   s         r3   r{   zasdict.<locals>._asdict_inner   s   !#&FC[ +%gc166&:;vv!))!3qzz~~Flns7t%*F166N+ MU#Y(?49=A}Q/=>>dE]+ 49;s;;;T"CF99;O41aM!$mA&66OO==%% > Ps   ,EE	z is not a dict or a dataclass)rG   re   	TypeError)rr   r{   rs   s    @@r3   asdictr      s?    ?&* c4 )?)D3%<=>>r5   c              #      K   t        | |d      }t        | ||       	 d t        | ||       y# t        | ||       w xY ww)z%Temporarily assign obj.attr to value.N)r}   setattr)rr   attrr2   originals       r3   temporary_assignmentr      sB      sD$'HCu%T8$T8$s   A0 A?Aseedc              #     K   t         j                  j                         }t         j                  j                  |        |rt        j
                  rddl}|j                  j                         }|j                  j                  |        |j                  j                         r5|j                  j                         }|j                  j                  |        |rt        j                  rddl}ddlm} |j                  j#                         }	|j                  j$                  j'                  |       }
|j                  j)                  |
       |j+                         st-        d      |j!                         }|j.                  }t1        |d      }|r|j2                  }|j5                  |        	 d t         j                  j7                  |       |r`t        j
                  rPj                  j9                         |j                  j                         r|j                  j;                         |rJt        j                  r9j                  j)                  	       _        r|_        yt=        |d       yyy# t         j                  j7                  |       |r`t        j
                  rPj                  j9                         |j                  j                         r|j                  j;                         |rJt        j                  r9j                  j)                  	       _        r|_        w t=        |d       w w w xY ww)zUTemporarily set the random seed. This works for python numpy, pytorch and tensorflow.r   N)contextzBSetting random seed for TensorFlow is only available in eager mode_rng)nprandom	get_stater   r   TORCH_AVAILABLEtorchget_rng_statemanual_seedcudais_availableget_rng_state_allmanual_seed_allTF_AVAILABLE
tensorflowtensorflow.pythonr   get_global_generator	Generator	from_seedset_global_generatorexecuting_eagerlyrJ   _seedr   r   _set_global_seed	set_stateset_rng_stateset_rng_state_alldelattr)r   set_pytorchset_tensorflownp_stater   torch_statetorch_cuda_statestftfpycontexttf_statetemp_gen
tf_contexttf_seedtf_rng_initializedtf_rngs                  r3   	temp_seedr      sg     yy""$HIINN4v--ll002  &::""$ %

 < < >JJ&&t,&--<9911399&&006
		&&x0##%abb ((*
""$Z8__F##D),
		H%611LL&&{3zz&&(

,,->?f11II**84&J!"(

F+ 2> 			H%611LL&&{3zz&&(

,,->?f11II**84&J!"(

F+ 2>s    F0M3J 7CMCMMc              #   b   K   t               }| D ]  }||vs|j                  |       |  yw)z=Iterate over iterable and return only unique values in order.N)setadd)ri   seenr2   s      r3   unique_valuesr   %  s4     5D HHUOKs   //c                       fd}|S )z4If the value is None, return None, else call `func`.c                     |  |       S d S ro   rx   )r2   funcs    r3   wrapperz'no_op_if_value_is_null.<locals>.wrapper1  s    #/tE{9T9r5   rx   )r   r   s   ` r3   no_op_if_value_is_nullr   .  s    : Nr5   c                 :    t        |       D ]  \  }}|	||fc S  y)zwReturn the index and the value of the first non-null value in the iterable. If all values are None, return -1 as index.)N)	enumerate)iterableir2   s      r3   first_non_null_valuer   7  s.    h' 5e8O r5   c               '   ~   K   t        t        j                  |        D ]  t        fd| D              f  yw)z9Iterate over items of dictionaries grouped by their keys.c              3   (   K   | ]	  }|     y wro   rx   )ry   dkeys     r3   r|   zzip_dict.<locals>.<genexpr>C  s     /A3/s   N)r   	itertoolschainr   )dictsr   s    @r3   zip_dictr   ?  s:     Y__e45 05/////0s   :=c                   6     e Zd ZdZ fdZ fdZ fdZ xZS )NonMutableDictzDict where keys can only be added but not modified.

    Will raise an error if the user try to overwrite one key. The error message
    can be customized during construction. It will be formatted using {key} for
    the overwritten key.
    c                 l    |j                  dd      | _        |rt        d      t        |   |i | y )N	error_msgz$Try to overwrite existing key: {key}z1NonMutableDict cannot be initialized with kwargs.)pop
_error_msgrJ   super__init__)selfargskwargs	__class__s      r3   r   zNonMutableDict.__init__N  s<     **2
 PQQ$)&)r5   c                 v    || v r%t        | j                  j                  |            t        |   ||      S )Nr   )rJ   r   formatr   __setitem__)r   r   r2   r   s      r3   r   zNonMutableDict.__setitem__W  s9    $;T__333<==w"3..r5   c                      t         fd|D              r:t         j                  j                  t	               t	        |      z              t
           |      S )Nc              3   &   K   | ]  }|v  
 y wro   rx   )ry   r   r   s     r3   r|   z(NonMutableDict.update.<locals>.<genexpr>]  s     (QqDy(s   r   )anyrJ   r   r   r   r   update)r   otherr   s   ` r3   r   zNonMutableDict.update\  sJ    (%((T__33D	CJ8N3OPPw~e$$r5   )__name__
__module____qualname____doc__r   r   r   __classcell__)r   s   @r3   r   r   F  s    */
% %r5   r   c                       e Zd ZdZddZy)classpropertyz5Descriptor to be used as decorator for @classmethods.Nc                 D     | j                   j                  d |             S ro   )fget__get__)r   rr   objtypes      r3   r   zclassproperty.__get__e  s    /tyy  w/11r5   ro   )r   r   r   r   r   rx   r5   r3   r   r   b  s
    ?2r5   r   c                    | \  }}}}}}t        |t              st        ||      s ||      S |9t        j                         t        j                  k  rt        j
                          |0|s.t        d t        j                  D              rt        ddd       t        |t              r|j                         n|}|||dz   nddz   t        |      z   n|}t        j                  |||d|	      5 }	t        |t              r.|	D 
ci c]  \  }
}|
t        |||dddf       c}}
cddd       S |	D cg c]  }t        |||dddf       }}t        |t              r|cddd       S t        |t              rt        |      cddd       S t        j                   |      cddd       S c c}}
w c c}w # 1 sw Y   yxY w)
zEApply a function recursively to each element of a nested data struct.Nc              3   8   K   | ]  }d |j                   v   yw)notebookN)r   )ry   tqdm_clss     r3   r|   z%_single_map_nested.<locals>.<genexpr>v  s     4rYaZ8CTCT5T4rs   r+    T)endflush#rr   )disablepositionunitdesc)rG   re   r   get_verbosityWARNINGset_verbosity_warningr   r   __mro__printr   str_single_map_nestedrb   r   r   array)r   functiondata_structtypesrankdisable_tqdmr   pbar_iterable	pbar_descpbarr   rz   mappeds                r3   r  r  i  s   =A:Hk5$d k4(K1O$$ G113gooE%%' 4reieqeq4r1rcr& ,6k4+HK%%'kMNRN^t/R3>TJdhI	m\Du[d	e 
(imk4(^bcVZVWYZA)8QtT4*PQQc
( 
( [__UV((AudD$)OP_F_+t,
( 
( K/V}
( 
( xx'
( 
(c_	
( 
(s<   3F8	F-%F82F86F3F8+F8F8-F88Gr  r  	dict_onlymap_list	map_tuple	map_numpynum_procparallel_min_lengthr	  r  r   c                    |ag }|sR|r |j                   t               |r |j                   t               |r  |j                   t        j                         t        |      }t        |t              st        ||      s | |      S |	xs t        j                          }	t        |t              rt        |j                               n|}|d}|dk7  r|dk  st        |      |k  r5t        j                  ||	|
      D cg c]  }t        | ||dddf       }}nNt        j                         5  t        j                  ddt                t#        | ||||	|
t              }ddd       t        |t              r#t        t%        |j'                                     S t        |t              rS t        |t              rt              S t        j(                        S c c}w # 1 sw Y   xY w)	a  Apply a function recursively to each element of a nested data struct.

    Use multiprocessing if num_proc > 1 and the length of data_struct is greater than or equal to
    `parallel_min_length`.

    <Changed version="2.5.0">

    Before version 2.5.0, multiprocessing was not used if `num_proc` was greater than or equal to ``len(iterable)``.

    Now, if `num_proc` is greater than or equal to ``len(iterable)``, `num_proc` is set to ``len(iterable)`` and
    multiprocessing is used.

    </Changed>

    Args:
        function (`Callable`): Function to be applied to `data_struct`.
        data_struct (`Any`): Data structure to apply `function` to.
        dict_only (`bool`, default `False`): Whether only apply `function` recursively to `dict` values in
            `data_struct`.
        map_list (`bool`, default `True`): Whether also apply `function` recursively to `list` elements (besides `dict`
            values).
        map_tuple (`bool`, default `False`): Whether also apply `function` recursively to `tuple` elements (besides
            `dict` values).
        map_numpy (`bool, default `False`): Whether also apply `function` recursively to `numpy.array` elements (besides
            `dict` values).
        num_proc (`int`, *optional*): Number of processes.
        parallel_min_length (`int`, default `2`): Minimum length of `data_struct` required for parallel
            processing.
            <Added version="2.5.0"/>
        types (`tuple`, *optional*): Additional types (besides `dict` values) to apply `function` recursively to their
            elements.
        disable_tqdm (`bool`, default `True`): Whether to disable the tqdm progressbar.
        desc (`str`, *optional*): Prefix for the tqdm progressbar.

    Returns:
        `Any`
    Nr   r   )r   r   TignorezL.* is experimental and might be subject to breaking changes in the future\.$)messagecategory)appendrb   r   r   ndarrayrG   re   r   is_progress_bar_enabledri   lenr   r  warningscatch_warningsfilterwarningsUserWarningr   rf   rj   r  )r  r  r  r  r  r  r  r  r	  r  r   r   rr   r  s                 r3   
map_nestedr"    s   d }T"U#RZZ(e k4(K1O$$Hw'F'F'H#HL-7T-JtK&&()P[H2~(a-3x=;N+N ||HlN
 #udD$GH
 

 $$& 	o##g$
 "(Hh|UY[mnF	o +t$C((*F344k4(MU+= 88F##+

	o 	os   G 12G%%G.c                       e Zd ZddZddZy)NestedDataStructureNc                 &    ||| _         y g | _         y ro   )data)r   r&  s     r3   r   zNestedDataStructure.__init__  s     ,D	"	r5   c                     ||n| j                   }t        |t              r(| j                  t	        |j                                     S t        |t        t        f      r(|D cg c]  }| j                  |      D ]  }|  c}}S |gS c c}}w ro   )r&  rG   re   flattenrb   ri   r   )r   r&  item	flatteneds       r3   r(  zNestedDataStructure.flatten  sw    'tTYYdD!<<T[[] 344tUm,*.S$T@RS9ISISS6M Ts   $B
ro   )r   r   r   r   r(  rx   r5   r3   r$  r$    s    5r5   r$  c                     	 t        t        j                  j                  |            j                  }| |k  S # t
        $ r Y yw xY w)NT)r	   ospathabspathfreeOSError)needed_bytes	directory
free_bytess      r3   has_sufficient_disk_spacer4    sE    	 :;@@
 *$$  s   29 	AAurl_pathc                 v   t        |       }d}|j                  dv r|j                  dk(  rd| v r6| j                  d      st	        d|  d      | j                  dd      } | |fS |j                  d	d }d
|v r|j                  d
      n|df\  }}|j                  d      \  }}d| d| d| d} | d| }| |fS )zMConvert a link to a file on a github repo in a link to the raw github object.N)httphttpss3z
github.comblobz.pyzExternal import from github at z) should point to a file ending with '.py'rawr   z/tree/masterrR   zhttps://github.com/z	/archive/z.zip-)r   schemenetlocrI   rJ   rY   r-  split)r5  parsedsub_directorygithub_path	repo_infobranch
repo_owner	repo_names           r3   _convert_github_urlrH    s    hFM}}//FMM\4QX$$U+ #B8*Lu!vww''6H ]"" !++ab/K?G;?V 1 1( ;]hjr\sIv$-OOC$8!J	,ZL)IfXUYZH(k6(3M]""r5   	file_pathc                 N  	 g }t        | d      5 }|j                  |j                                ddd       t        j	                  d|  d       g }d}|D ]  }t        j                  d|      }t        |      dk(  r| }|r.t        j                  d	|t
        j                  
      		)t        j                  d|t
        j                  
      			j                  d      rt        	fd|D              r	j                  d      rD	j                  d      }t        |      \  }}|j                  d	j                  d      ||f       	j                  d      s|j                  d	j                  d      	j                  d      df       B	j                  d      r7	j                  d      }|j                  d	j                  d      |df       |j                  d	j                  d      	j                  d      df        |S # 1 sw Y   xY w)a  Find whether we should import or clone additional files for a given processing script.
        And list the import.

    We allow:
    - library dependencies,
    - local dependencies and
    - external dependencies whose url is specified with a comment starting from "# From:' followed by the raw url to a file, an archive or a github repository.
        external dependencies will be downloaded (and extracted if needed in the dataset folder).
        We also add an `__init__.py` to each sub-folder of a downloaded folder so the user can import from them in the script.

    Note that only direct import in the dataset processing script will be handled
    We don't recursively explore the additional import to download further files.

    Example::

        import tensorflow
        import .c4_utils
        import .clicr.dataset-code.build_json_dataset  # From: https://raw.githubusercontent.com/clips/clicr/master/dataset-code/build_json_dataset
    zutf-8)encodingNz	Checking z for additional imports.Fz[\s\S]*?"""[\s\S]*?r   z=^import\s+(\.?)([^\s\.]+)[^#\r\n]*(?:#\s+From:\s+)?([^\r\n]*))flagszQ^from\s+(\.?)([^\s\.]+)(?:[^\s]*)\s+import\s+[^#\r\n]*(?:#\s+From:\s+)?([^\r\n]*)c              3   L   K   | ]  }|d    j                  d      k(    yw)r   r   N)group)ry   impmatchs     r3   r|   zget_imports.<locals>.<genexpr>D  s"     ?3q6U[[^+?s   !$   externalr   internallibrary)openextend	readlinesloggerdebugr_   rd   r  rP  	MULTILINErN  r   rH  r  )
rI  linesr   importsis_in_docstringlinedocstr_start_matchr5  rB  rP  s
            @r3   get_importsr`    s   ( E	i'	* $aQ[[]#$ LL9YK'?@A9;GO )RZZ(>E!"a' #21O Y[_gigsgst=HHdllE
 };;q>?w??{{1~ ;;q>*=h*G'-
EKKNHmTUQ
EKKNEKKNDQR{{1~ ;;q>	5;;q>8TJK	5;;q>5;;q>4PQS)RV Nc$ $s    HH$c                       e Zd ZdZej
                  j                  ej                  j                  j                               ZddZ
d Zy)PicklerzHSame Pickler as the one from dill, but improved for notebooks and shellsc                   	
 t        |      }|t        j                  vrwt        j                  t        j                  d      k  rd n^t        j                  j                  d d t        j                  d      j                  t        j                  d      j                  fv rd |j                  |j                  fdk(  r	 dd l
	t        |      	fd       }n|j                  |j                  fd	k(  r	 dd lt        |      fd
       }n|j                  |j                  fdk(  r	 dd lt        |      fd       }nP|j                  j                  d      r5t!        d |j"                  D              r	 dd l
t        |      
fd       }t&        j                  j)                  | ||       y # t        $ r Y .w xY w# t        $ r Y =w xY w# t        $ r Y Lw xY w# t        $ r Y [w xY w)N0.3.6c                 V    t         j                  j                  j                  |       y ro   )dill_dillloginfopicklermsgs     r3   dill_logzPickler.save.<locals>.dill_loge  s    JJNN'',r5   rQ  0.3.7c                 X    t         j                  j                  j                  | |       y ro   )rf  rg  rX  tracerj  s     r3   rm  zPickler.save.<locals>.dill_logj  s    JJ%%++GS9r5   )_regexPatternr   c                      | d|        |j                   |j                  f}| j                  j                  ||        | d       y )NzRe: rq   z# Re)r[   rL  save_reducecompile)rk  rr   r   rm  rg   s      r3   _save_regexz!Pickler.save.<locals>._save_regexq  sP     D,7KKII   ++EMM4S+I &1r5   )r   Tensorc                     fd} | d|        |j                         j                         j                         f}| j                  |||        | d       y )Nc                 &    j                  |       S ro   )
from_numpy)np_arrayr   s    r3   _create_tensorz:Pickler.save.<locals>._save_tensor.<locals>._create_tensor  s    #(#3#3H#==r5   zTo: rq   z# To)detachcpunumpyrt  )rk  rr   r|  r   rm  r   s       r3   _save_tensorz"Pickler.save.<locals>._save_tensor  s\    > !D,7 #

 0 0 2 8 8 :<++NDc+J &1r5   )ztiktoken.coreEncodingc                      | d|        |j                   |j                  |j                  |j                  f}| j	                  j
                  ||        | d       y )NzEnc: rq   z# Enc)r0   _pat_str_mergeable_ranks_special_tokensrt  r  )rk  rr   r   rm  tiktokens      r3   _save_encodingz$Pickler.save.<locals>._save_encoding  s]     E#-8 ##,,8L8LcNaNab++H,=,=t+M '2r5   z
spacy.langc              3   R   K   | ]  }|j                   |j                  fd k(   ! yw))zspacy.languageLanguageN)r   r   )ry   clss     r3   r|   zPickler.save.<locals>.<genexpr>  s*      FUX.2PPFs   %'c                     fd} | d|        |j                   |j                         f}| j                  |||        | d       y )Nc                     j                   j                  | d   d         }|j                  |       }|j                  |      S )Nnlplang)utilget_lang_classfrom_config
from_bytes)r   
bytes_datalang_clsr  spacys       r3   _create_langz6Pickler.save.<locals>._save_lang.<locals>._create_lang  s?    ',zz'@'@vAV'WH"*"6"6v">C#&>>*#==r5   zSp: rq   z# Sp)r   to_bytesrt  )rk  rr   r  r   rm  r  s       r3   
_save_langz Pickler.save.<locals>._save_lang  sP    >
 !D,7 #

CLLN;++L$C+H &1r5   )save_persistent_id)rp   rb  dispatchr   DILL_VERSIONr   parsereleaser   r   rg   pklregisterImportErrorr   r  
startswithr   r  r  rf  save)r   rr   r  obj_typerv  r  r  r  rm  rg   r  r  r   s           @@@@@r3   r  zPickler.save_  s   97+++""W]]7%;;- $$,,Ra0W]]75K5S5SU\UbUbcjUkUsUs4tt: ##X%6%67;PP  * + %%x'8'89=PP  *	 +	 %%x'8'89=ZZ# * + $$//=# F\d\l\lF C  *
 +
 	$8JKq # $ #  # * # sH   F> =G 2G G+ >	G
	G
	GG	G('G(+	G76G7c                 j    t        |      t        k7  r!t        j                  j	                  | |       y y ro   )rp   r  rf  rb  memoize)r   rr   s     r3   r  zPickler.memoize  s'    9LL  s+ r5   N)T)r   r   r   r   rf  rg  MetaCatchingDictrb  r  r   r  r  rx   r5   r3   rb  rb  Z  s9    Rzz**4<<+@+@+E+E+GHHULn,r5   rb  c                 <    t        |d      j                  |        y)zpickle an object to a fileT)recurseN)rb  dumprr   files     r3   r  r    s    D$$$S)
r5   c              #   >  K   	 dt        |       j                  D cg c]  }|j                   c}v rAt        | d      r5t	        | j
                  t              rt        | di       5  d  d d d        y d  y c c}w # 1 sw Y   y xY w# t        $ r d  Y y w xY ww)NPreTrainedTokenizerBasecache)	rp   r  r   r   rG   r  re   r   r  )rr   
base_classs     r3   _no_cache_fieldsr    s     %PTUXPYPaPa)b***=*=)bbW%399d+%c7B7    *c 
  sh   BB
 A97B
 &A>+B
 3B4B
 8B9B
 >BB
 BB
 
BBBBc                     t               }t        |       5  t        | |       ddd       |j                         S # 1 sw Y   |j                         S xY w)zpickle an object to a stringN)StringIOr  r  getvaluer  s     r3   dumpsr    sB    :D	#	 S$==?==?s	   ;Ac                       fd}|S )Nc                 .    | t         j                  <   | S ro   )rb  r  )r   ts    r3   proxyzpklregister.<locals>.proxy  s    "r5   rx   )r  r  s   ` r3   r  r    s     Lr5   rd  c                 Z   t         j                  j                  j                  d|        |j                  j                  d      st        |j                  j                  t        j                  j                              dkD  rE|j                  j                  t        j                  j                        d   j                  d      s|j                  dk(  rdn(t        j                  j                  |j                        }d}t         j                  j                  r@t        |d      r|j                  |j                   |j"                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  ||j2                  |j4                  |j6                  f}n|j                  |j"                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  ||j2                  |j4                  |j6                  f}n|j                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  ||j2                  |j4                  |j6                  f}| j9                  t:        ||	       t         j                  j                  j                  d
       y)z
        From dill._dill.save_code
        This is a modified version that removes the origin (filename + line no.)
        of functions created in notebooks or shells for example.
        zCo: <r   r@   
ipykernel_<lambda>r   co_posonlyargcountrq   # CoN)rf  rg  rh  ri  co_filenamer  r  r@  r,  r-  sepco_namebasenamePY3r   co_argcountr  co_kwonlyargcount
co_nlocalsco_stacksizeco_flagsco_code	co_constsco_namesco_varnames	co_lnotabco_freevarsco_cellvarsrt  r
   rk  rr   r  co_firstlinenor   s        r3   
_save_coder    sE    	

d3%L)& ))#.COO))"''++67!;OO))"''++6r:EElS{{j(  !!#//2 	 ::>>s01OO**))NN$$LLKKMMLLOOKK"MMOOOO!( OO))NN$$LLKKMMLLOOKK"MMOOOO&   D  	Hd4

F#r5   rQ  rn  c                 
   t         j                  j                  j                  | d|       |j                  j                  d      st        |j                  j                  t        j                  j                              dkD  rE|j                  j                  t        j                  j                        d   j                  d      s|j                  dk(  rdn(t        j                  j                  |j                        }d}t        |d      r|j                  |j                  |j                   |j"                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  |j2                  ||j4                  |j6                  |j8                  |j:                  |j<                  |j>                  f}nt        |d	      r|j                  |j                  |j                   |j"                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  |j2                  ||j4                  |j:                  |j<                  |j>                  f}nt        |d
      r|j                  |j                  |j                   |j"                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  ||j4                  |j<                  |j>                  f}n>t        |d      r|j                  |j                   |j"                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  ||j                  |j<                  |j>                  f}n|j                  |j"                  |j$                  |j&                  |j(                  |j*                  |j,                  |j.                  |j0                  ||j                  ||j                  |j<                  |j>                  f}| jA                  t         j                  jB                  ||       t         j                  j                  j                  | d       y )NzCo: %sr  r   r@   r  r  r   co_endlinetableco_exceptiontableco_linetabler  rq   r  )"rf  rg  rX  rp  r  r  r  r@  r,  r-  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  co_qualnamer  r  co_columntabler  r  r  rt  _create_coder  s        r3   	save_coder  F  s   

37, ))#.COO))"''++67!;OO))"''++6r:EElS{{j(  !!#//2 	  3)*&&%%    ##""%%+D. S-.&&%%    %%'D* S.)&&%%    #D& S./&&%%  !D( %%  D$ 	DJJ33TsC

0r5   z0.3.5c           
      R   t         j                  j                  |      snt         j                  j                  j	                  d|        t        | dd      r}t         j                  j                  } ||dd      }t        |      t         j                  j                  v ret         j                  j                  r|j                  n|j                  }n2t         j                  j                  r|j                  n|j                  }t        t        |j                                     }t        | dd      }t        | dd      }t        |      t         j                  j                  v xr |du}t!        t         j                  j                        |ft         j                  j                  t        |      <   t         j                  j                  rdt        |j"                  d	d
      v xr |du}|rd| _        |rd| _        t        |dd      }| j)                  t         j                  j*                  |j"                  ||j,                  |j.                  |j0                  |j2                  |f|       ndt        |j4                  d	d
      v xr |duxr t        | dd      }|rd| _        |rd| _        | j)                  t         j                  j*                  |j4                  ||j6                  |j8                  |j:                  |j2                  f|       |r|| _        |r|| _        t         j                  j<                  r|s|s
|s|s|s|s|r| j?                          t         j                  j                  j	                  d       yt         j                  j                  j	                  d|        t        |dt        |dd            }	t         j                  j@                  jC                  | ||	       t         j                  j                  j	                  d       y)z
        From dill._dill.save_function
        This is a modified version that make globs deterministic since the order of
        the keys in the output dictionary of globalvars can change.
        zF1: _recurseFTr  builtin_byrefNr   r  rx   __kwdefaults__rq   # F1zF2: r   r   r0   # F2)"rf  rg  _locate_functionrh  ri  r}   detect
globalvarsidstackr  __globals__func_globalsre   sortedr   r  __code__r  r  rt  _create_functionr   __defaults____closure____dict__	func_code	func_namefunc_defaultsfunc_closureOLDER
clear_memoStockPicklersave_global)
rk  rr   r  globsr  r  _memo_superfkwdefaultsr0   s
             r3   save_functionr    s1    zz**3/JJNN$se-w
E2![[33
"3dCc7djj.../3zz~~COO3CSCSE+/::>>s?O?O ./EWh5Fw
D9HW

 0 00Lxt7KE(+DJJ,<,<(=s(BDJJRW%zz~~!WS\\:r%JJdQW_cQc%)GN',G$%c+;TB##JJ//\\5#,,8H8H#//[^[g[gitu $  z2 FF <t+<U; 
 %)GN',G$##JJ//]]E3==#:K:KSM]M]_b_k_kl $ 
 !'#+ 

  FufUW_""$JJNN' 		 JJNN$se-3Z0NODJJ##//4/HJJNN'r5   c           	      x   t         j                  j                  ||       sft         j                  j                  j	                  d|z         t        | dd       }t        | dd       }t        | dd       }t        | dt         j                  j                        }g }|r ddlm}  ||dd	      }d
|j                  i}	nt         j                  j                  r|j                  n|j                  }|r(||j                  u rt        | d|      j                  }|}	nV|F|j                  :t        t         j                  j                  |j                  d      dd       |u r|}	nd
|j                  i}	|	|u }
t        t!        |	j#                                     }	|
r|	}n$|"t        t!        |j#                                     }||	|urt         j                  j                  r'|j%                         D ch c]  }t'        |       }}n&|j)                         D ch c]  }t'        |       }}|D ]8  }||v s||   j+                  t         j                  j,                  |	|ff        n. |j+                  t         j                  j,                  |	|ff       t         j                  j                  r|j.                  }i }dD ]  }t        ||d       }||||<    |j0                  |j2                  k7  r|j0                  |d<   d
|	vs|j                  |	d
   k7  r|j                  |d<   |j                  }t5        |      t        ur||d<   d }|r||f}t         j                  j7                  | t         j                  j8                  |j:                  |	|j2                  |j<                  |f|f||       n|j>                  }|j@                  $|j+                  tB        |d|j@                  ff       d
|	vs|j                  |	d
   k7  r$|j+                  tB        |d|j                  ff       |j                  r$|j+                  tB        |d|j                  ff       t         j                  j7                  | t         j                  j8                  |jD                  |	|jF                  |jH                  |ff||       |rtK        tM        |j%                               d       }|r||rz|D ]u  }tB        |d|ff}	 |jO                  |        | jR                  |  t         j                  j                  r| jU                  tW        dd             e| jU                  d       w t         j                  j                  j	                  d       y t         j                  j                  j	                  d|z         t        |dt        |d
d             }t         j                  jX                  j[                  | ||       t         j                  j                  j	                  d       y c c}w c c}w # tP        $ r Y Rw xY w)NF1: %sr  	_postproc_main_modified_original_mainr   r  Tr  r   _mainr  r   r  __annotations__r   r   rr   postproc_listr   cell_contents0UTF-8r  F2: %sr  r  ).rf  rg  r  rh  ri  r}   __builtin__dill.detectr  r   r  r  r  r  _import_modulere   r  r   ri   r  
itervaluesr  	_setitemsr  r   r   rp   _save_with_postprocr  r  r  r  r   r   r  r  r  nextiterremoverJ   rt  writebytesr  r  )rk  rr   r  r  r   r  r  r  
globs_copyr  globs_is_globs_copygglob_idsstack_elementclosure
state_dict	fattrnamefattrstatetopmost_postproccellpossible_postprocr0   s                          r3   r  r  &  sn   zz**38JJNN3/w
D9Hd;I$W.>EN$W.>

@V@VWNM2'T4H
 $S^^404

S__CDTDT
 "jN4K4K&K!('>!J!S!SJ&E *2

 9 9#..$ OQ[]abfpp&E'8E #(:"5./E""
'!&)9)9);"<=
 %%z*A ::>>/9/@/@/BC!1CHC/9/D/D/FG!1GHG%. VM$0!-0779M9MPUWaOb8cdV
 "(($***>*>
@S)TUzz~~//
!Q 6I#CD9E(05
9-6 ##s||3141A1AJ~.U*cnnj@Q.Q/2~~J|,;d*-2Jz* E!:-E

..

33ucllC<L<LgV
 "/ / 	 **;;*!(('CCKK3P)QRU*cnnj@Q.Q!(('Cs~~3V)WX<<!(('CS\\3R)ST

..ZZ003==%X[XiXikr2st"/	 /  #'Y-=-=-?(@$#G / ' /-4t_c6R,S)%,334EF
 ,++->?::>>#MM%W*=>#MM#./ JJNN' 		 JJNN3/3Z0NODJJ##//4/HJJNN'a  DGz  * %$%s   1X"X'5X,,	X98X9c           	      h   t         j                  j                  ||       st        |j                        t
        urt        |dd       }|$t         j                  j                  j                  }t         j                  j                  |d      }d}	 t         j                  j                  ||j                        \  }}t        |dd       |u rd}|rpt         j                  j                  j                  | d|       | j                  t        df|       t         j                  j                  j                  | d       y t         j                  j                  j                  | d	|       t        | d
d       }t        | dd       }t        | dd       }	t        | dt         j                  j                        }
g }|r ddlm}  ||dd      }d|j$                  i}n|j&                  }|	r(||
j(                  u rt        | d|
      j(                  }|}nV|F|j$                  :t        t         j                  j                  |j$                  d      dd       |u r|}nd|j$                  i}||u }t+        t-        |j/                                     }|r|}n$|"t+        t-        |j/                                     }|||ur|j1                         D ch c]  }t3        |       }}|D ]8  }||v s||   j5                  t         j                  j6                  ||ff        n. |j5                  t         j                  j6                  ||ff       |j8                  }i }dD ]  }t        ||d       }||||<    |j                  |j                  k7  r|j                  |d<   d|vs|j$                  |d   k7  r|j$                  |d<   |j(                  }t        |      t*        ur||d<   d }|r||f}t         j                  j;                  | t         j                  j<                  |j                  ||j                  |j>                  |f|f||       |rutA        tC        |j1                               d       }|rP|rN|D ]I  }tD        |d|ff}	 |jG                  |        | j                  |  | jK                  tM        dd             K t         j                  j                  j                  | d       y t         j                  j                  j                  | d|       t        |dt        |dd             }t         j                  jN                  jQ                  | ||       t         j                  j                  j                  | d       y # t        $ r Y w xY wc c}w # tH        $ r Y 2w xY w)Nr   T)safeF__func__zF3: %srq   z# F3r  r  r  r   r  r   r  r  r   r  r  r  r   r  r  r	  r
  r  r  r  r  ))rf  rg  r  rp   r  r
   r}   r  r   r  _getattributer   AttributeErrorrX  rp  rt  r  r  r   r  r  re   r  r   ri   r  r  r  r  r  r  r  r  r  r   r  rJ   r  r  r  r  )rk  rr   module_namemodule_pypy_builtinfound_r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r   r!  r"  r#  r0   s                               r3   r  r    s9   zz**38CLL!1 &c<>&"&**"8"8"A"AK22;T2J %#zz77@P@PQHE1uj$73>(, !JJ%%++GXsC''%1D#'NJJ%%++GV<JJ##GXs;w
D9Hd;I$W.>EN$W.>

@V@VWNM2'T4H
 $S^^4 __
 "jN4K4K&K!('>!J!S!SJ&E *2

 9 9#..$ OQ[]abfpp&E'8E #(:"5./E""
'!&)9)9);"<=
 %%z*A ,6+<+<+>?aBqE??%. VM$0!-0779M9MPUWaOb8cdV
 "(($***>*>
@S)TUooGJM 2	Y5$,1Jy)2 3<</-0-=-=
>*&#..E*<M*M+.>>
<(LLEE{$&).
:&z)JJ**,,s||UCLLRURbRbdk.lnst+	 +  #'Y-=-=-?(@$#G / ' 
;-4t_c6R,S)%,334EF
 ,++->?eC&9:
; JJ##GV4 		 JJ##GXs;3Z0NODJJ##//4/HJJ##GV4s & D @R  * %$%s*   >V VV$	VV$	V10V1c                     t        j                  | j                  | j                  | j                  | j
                  | j                        }| j                  |_        |S ro   )r	  r   r  r  r   r  r  r  )r   rh   s     r3   copyfuncr/  @  sI    t/?/?PTPaPacgcscstF //FMr5   Yqueuer   .r   c                 \    t         |di |      D ]  \  }} | j                  |        S )Nrx   )r   put)r1  r   r   r   rh   s        r3   _write_generator_to_queuer4  I  s3    t~f~. 	6		&Hr5   poolc                 T    | j                   D ch c]  }|j                   c}S c c}w ro   )_poolpid)r5  r   s     r3   _get_pool_pidr9  O  s    ::&aAEE&&&s   %kwargs_iterablec          
   #     K   t        |       }d}t        | t        j                  j                        rt
        nt        j
                  } |       5 }|j                         }|D cg c]  }| j                  t        |||f       }	}	 	 	  |j                  d       t        |       |k7  rd}t        d      2c c}w # t        $ r( t        d |	D              r |j                         rY nY Qw xY w	 |sU|	D 
cg c]  }
|
j                  d       nc c}
w c}
 n.# |s'|	D 
cg c]  }
|
j                  d       nc c}
w c}
 w w xY wd d d        y # 1 sw Y   y xY ww)NFTg?)timeoutc              3   <   K   | ]  }|j                           y wro   )ready)ry   async_results     r3   r|   z%iflatmap_unordered.<locals>.<genexpr>f  s     RL<--/Rs   zkOne of the subprocesses has abruptly died during map operation.To debug the error, disable multiprocessing.)r9  rG   multiprocessingr5  Poolr   multiprocessQueueapply_asyncr4  r   r   allemptyRuntimeError)r5  r   r:  initial_pool_pidpool_changedmanager_clsmanagerr1  r   async_resultsr?  s              r3   iflatmap_unorderedrM  S  sh     %T*L'o.B.B.G.GH'lNbNbK	 S']l
SYD6f8MN
 
	S#%))D11 !&*::#'L&G  	
  RMRRWbW\WbWbWd  DQRL!!$!/RR  DQRL!!$!/RR  )S S Ss   AE"E& B<E	DC D<E,C2-D1C22D6E<D
EE
&E 
?E

E	E"EE")FF)	FTFFNr   NTN)rO   )jr   r   	functoolsr   multiprocessing.poolr@  r,  r1  r_   r	  r  
contextlibr   dataclassesr   r   ior   r  r   r   shutilr	   r
   r   typingr   r   r   r   r   r   r   r   r   r   urllib.parser   rf  rB  multiprocess.poolr  r   	packagingr   	tqdm.autor   r   r   parallelr   r   typing_extensions_typing_extensionsr   r   r  
get_loggerr   rX  	lru_cacher  r4   r-   r  rL   r\   rl   r   r   r   r   r   r   r   re   r   propertyr   r  boolr   r"  r$  r4  rH  r`  rb  r  r  r  r  r  r  r  r  r  r  r/  r0  rC  r4  r5  rA  r9  rM  rx   r5   r3   <module>r`     s        	  	   % , " #   ( \ \ \ !        # 020
 
		H	% 

)0)s5c? )ss )sX&3  c3h B$N % % 2,C 2, 2,j0%T %82H 2(F " !\$ucz"\$\$ \$ 	\$
 \$ \$ sm\$ \$ E?\$ \$ 3-\$ 	\$~ %## #%Xc]0B*C #&F3 F5c3);#< FR_,dll _,D    
w//\ \|   !$w)?)G)GW^I_IgIg(hhO Od 
w//F FP   !$g(>(F(FFJ JX   !$w)?)G)GW^I_IgIg(hhG GT CLU[[ hqkAQ8R \` eh 'o22779J9J9O9OOP 'UXY\U] 'S
$$))<+<+<+A+AA
BS
3#
$S d^	S
 a[S{(  0+///50s   "N: :O	O	