
    *#h                         d dl mZmZ d dlmZmZmZmZmZm	Z	m
Z
 d dlZerddlmZ e G d d             Ze G d d	             Zy)
    )	dataclassfield)TYPE_CHECKINGAnyClassVarDictListOptionalUnionN   )FeatureTypec                       e Zd ZU dZee   ed<   dZee   ed<   dZ	e
e   ed<   dZe
e   ed<    ed dd	      Zeed
<   d Zdedeedf   f   fdZy)Translationa  `FeatureConnector` for translations with fixed languages per example.
    Here for compatiblity with tfds.

    Args:
        languages (`dict`):
            A dictionary for each example mapping string language codes to string translations.

    Example:

    ```python
    >>> # At construction time:
    >>> datasets.features.Translation(languages=['en', 'fr', 'de'])
    >>> # During data generation:
    >>> yield {
    ...         'en': 'the cat',
    ...         'fr': 'le chat',
    ...         'de': 'die katze'
    ... }
    ```
    	languagesNiddictdtypepa_typeFdefaultinitrepr_typec                     t        j                  t        | j                        D ci c]  }|t        j                          c}      S c c}w N)pastructsortedr   string)selflangs     Z/var/www/html/sandstorm/venv/lib/python3.12/site-packages/datasets/features/translation.py__call__zTranslation.__call__)   s3    yyt~~8NO$		+OPPOs   A
returnr   c                 l    ddl m} t        | j                        D ci c]  }| |d       c}S c c}w )z2Flatten the Translation feature into a dictionary.r   )Valuer   )featuresr&   r   r   )r    r&   ks      r"   flattenzTranslation.flatten,   s+    #,24>>,BCq5?"CCCs   1)__name__
__module____qualname____doc__r	   str__annotations__r   r
   r   r   r   r   r   r   r#   r   r   r)        r"   r   r      sx    * CyB!E8C=!!GXc]!}5uEE3EQD}d33E.FFG Dr1   r   c                       e Zd ZU dZdZee   ed<   dZee	   ed<   dZ
ee   ed<   dZee   ed<   dZee   ed<    ed d	d	
      Zeed<   d Zd Zd Zdedeedf   f   fdZy)TranslationVariableLanguagesa$  `FeatureConnector` for translations with variable languages per example.
    Here for compatiblity with tfds.

    Args:
        languages (`dict`):
            A dictionary for each example mapping string language codes to one or more string translations.
            The languages present may vary from example to example.

    Returns:
        - `language` or `translation` (variable-length 1D `tf.Tensor` of `tf.string`):
            Language codes sorted in ascending order or plain text translations, sorted to align with language codes.

    Example:

    ```python
    >>> # At construction time:
    >>> datasets.features.TranslationVariableLanguages(languages=['en', 'fr', 'de'])
    >>> # During data generation:
    >>> yield {
    ...         'en': 'the cat',
    ...         'fr': ['le chat', 'la chatte,']
    ...         'de': 'die katze'
    ... }
    >>> # Tensor returned :
    >>> {
    ...         'language': ['en', 'de', 'fr', 'fr'],
    ...         'translation': ['the cat', 'die katze', 'la chatte', 'le chat'],
    ... }
    ```
    Nr   num_languagesr   r   r   r   Fr   r   c                     | j                   rt        t        | j                               nd | _         | j                   rt        | j                         | _        y d | _        y r   )r   r   setlenr4   r    s    r"   __post_init__z*TranslationVariableLanguages.__post_init__\   s=    8<DNN 34D48NNS0r1   c                     t        j                  t        j                  t        j                               t        j                  t        j                               d      S )Nlanguagetranslation)r   r   list_r   r8   s    r"   r#   z%TranslationVariableLanguages.__call__`   s8    yybhhryy{&;BHHUWU^U^U`Labccr1   c           
         t        | j                        }| j                  rSt        |      |z
  rEt        ddj                  t	        t        |      |z
               ddj                  |       d      g }|j                         D ]J  \  }}t        |t              r|j                  ||f       *|j                  |D cg c]  }||f c}       L t        t	        |       \  }}||dS c c}w )NzSome languages in example (z, z) are not in valid set (z).r;   )r6   r   
ValueErrorjoinr   items
isinstancer.   appendextendzip)	r    translation_dictlang_settranslation_tuplesr!   textelr   translationss	            r"   encode_examplez+TranslationVariableLanguages.encode_examplec   s   t~~&>>c"23h>-diisCS?TW_?_8`.a-bbz{  |E  |E  FN  |O  {P  PR  S   *002 	GJD$$$"))4,7"))*E"D":*EF		G #&v.@'A"B	<%lCC +Fs    C-
r$   r   c                 L    ddl m}m}  | |d             | |d            dS )zCFlatten the TranslationVariableLanguages feature into a dictionary.r   )Sequencer&   r   r;   )r'   rO   r&   )r    rO   r&   s      r"   r)   z$TranslationVariableLanguages.flattenx   s)    - !x1#E(O4
 	
r1   )r*   r+   r,   r-   r   r
   r	   r/   r4   intr   r.   r   r   r   r   r   r   r9   r#   rM   r   r   r)   r0   r1   r"   r3   r3   3   s    > !%Ix~$#'M8C='B!E8C=!!GXc]!=EPUVE3VMdD*
}d33E.FFG 
r1   r3   )dataclassesr   r   typingr   r   r   r   r	   r
   r   pyarrowr   r'   r   r   r3   r0   r1   r"   <module>rT      sU    ( L L L  % $D $D $DN K
 K
 K
r1   