
    Ig9              
       L   d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	 ddlm
Z
mZmZmZmZmZmZ ddlZddlZddlmZ ddlmZmZmZ  e       rddlmZ dd	lmZmZmZmZm Z  dd
l!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.  e.j^                  e0      Z1 e* e	e2      jf                        Z4ddddZ5ejl                  dk\  rejn                  Z7nejn                  Z7 G d ded      Z8 G d ded      Z9 G d ded      Z: G d ded      Z; G d ded      Z< G d de8e9e:e;e<d      Z= G d de$      Z>d  Z? e)e>j                        e>_@        e>j                  j                   8e>j                  j                   j                  d!d"d#$      e>j                  _         yy)%z8
Processing saving/loading class for common processors.
    N)Path)AnyDictListOptionalTuple	TypedDictUnion   )custom_object_save)ChannelDimensionis_valid_imageis_vision_available)PILImageResampling)PaddingStrategyPreTokenizedInputPreTrainedTokenizerBase	TextInputTruncationStrategy)CHAT_TEMPLATE_NAMEPROCESSOR_NAMEPushToHubMixin
TensorTypeadd_model_info_to_auto_map"add_model_info_to_custom_pipelinescached_file	copy_funcdirect_transformers_importdownload_urlis_offline_modeis_remote_urlloggingr   FeatureExtractionMixinImageProcessingMixin)AutoTokenizerAutoFeatureExtractorAutoImageProcessor)      c                      e Zd ZU dZeeeeee   ee   f      e	d<   eeeee   ee   f   e	d<   eeeeee   ee   f      e	d<   ee
   e	d<   ee
eef   e	d<   ee
eef   e	d<   ee   e	d<   ee   e	d	<   ee
   e	d
<   ee   e	d<   ee
   e	d<   ee
   e	d<   ee
   e	d<   ee
   e	d<   ee
   e	d<   ee
   e	d<   ee
   e	d<   ee   e	d<   y)
TextKwargsa  
    Keyword arguments for text processing. For extended documentation, check out tokenization_utils_base methods and
    docstrings associated.

    Attributes:
        add_special_tokens (`bool`, *optional*)
            Whether or not to add special tokens when encoding the sequences.
        padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*)
            Activates and controls padding.
        truncation (`bool`, `str` or [`~tokenization_utils_base.TruncationStrategy`], *optional*):
            Activates and controls truncation.
        max_length (`int`, *optional*):
            Controls the maximum length to use by one of the truncation/padding parameters.
        stride (`int`, *optional*):
            If set, the overflowing tokens will contain some tokens from the end of the truncated sequence.
        is_split_into_words (`bool`, *optional*):
            Whether or not the input is already pre-tokenized.
        pad_to_multiple_of (`int`, *optional*):
            If set, will pad the sequence to a multiple of the provided value.
        return_token_type_ids (`bool`, *optional*):
            Whether to return token type IDs.
        return_attention_mask (`bool`, *optional*):
            Whether to return the attention mask.
        return_overflowing_tokens (`bool`, *optional*):
            Whether or not to return overflowing token sequences.
        return_special_tokens_mask (`bool`, *optional*):
            Whether or not to return special tokens mask information.
        return_offsets_mapping (`bool`, *optional*):
            Whether or not to return `(char_start, char_end)` for each token.
        return_length (`bool`, *optional*):
            Whether or not to return the lengths of the encoded inputs.
        verbose (`bool`, *optional*):
            Whether or not to print more information and warnings.
        padding_side (`str`, *optional*):
            The side on which padding will be applied.
    	text_pairtext_targettext_pair_targetadd_special_tokenspadding
truncation
max_lengthstrideis_split_into_wordspad_to_multiple_ofreturn_token_type_idsreturn_attention_maskreturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_lengthverbosepadding_sideN)__name__
__module____qualname____doc__r   r
   r   r   r   __annotations__boolstrr   r   int     Z/var/www/html/answerous/venv/lib/python3.12/site-packages/transformers/processing_utils.pyr+   r+   Q   s   #J i):DOTRcMddeffy"3T)_dK\F]]^^uY0A4	?TXYjTk%klmm &4o-..dC!3344SM!$' %#D>)#D>)'~- (.$TN*D>!d^3-rG   r+   F)totalc                   T   e Zd ZU dZee   ed<   eeee	f      ed<   ee	   ed<   eeee	f      ed<   ee
de	f      ed<   ee   ed<   ee   ed	<   ee   ed
<   ee
eee   f      ed<   ee
eee   f      ed<   ee   ed<   eeee	f      ed<   ee   ed<   ee   ed<   ee
eef      ed<   y)ImagesKwargsaW  
    Keyword arguments for image processing. For extended documentation, check the appropriate ImageProcessor
    class methods and docstrings.

    Attributes:
        do_resize (`bool`, *optional*):
            Whether to resize the image.
        size (`Dict[str, int]`, *optional*):
            Resize the shorter side of the input to `size["shortest_edge"]`.
        size_divisor (`int`, *optional*):
            The size by which to make sure both the height and width can be divided.
        crop_size (`Dict[str, int]`, *optional*):
            Desired output size when applying center-cropping.
        resample (`PILImageResampling`, *optional*):
            Resampling filter to use if resizing the image.
        do_rescale (`bool`, *optional*):
            Whether to rescale the image by the specified scale `rescale_factor`.
        rescale_factor (`int` or `float`, *optional*):
            Scale factor to use if rescaling the image.
        do_normalize (`bool`, *optional*):
            Whether to normalize the image.
        image_mean (`float` or `List[float]`, *optional*):
            Mean to use if normalizing the image.
        image_std (`float` or `List[float]`, *optional*):
            Standard deviation to use if normalizing the image.
        do_pad (`bool`, *optional*):
            Whether to pad the image to the `(max_height, max_width)` of the images in the batch.
        pad_size (`Dict[str, int]`, *optional*):
            The size `{"height": int, "width" int}` to pad the images to.
        do_center_crop (`bool`, *optional*):
            Whether to center crop the image.
        data_format (`ChannelDimension` or `str`, *optional*):
            The channel dimension format for the output image.
        input_data_format (`ChannelDimension` or `str`, *optional*):
            The channel dimension format for the input image.
    	do_resizesizesize_divisor	crop_sizer   resample
do_rescalerescale_factordo_normalize
image_mean	image_stddo_padpad_sizedo_center_cropdata_formatinput_data_formatN)r>   r?   r@   rA   r   rC   rB   r   rD   rE   r
   floatr   r   rF   rG   rH   rK   rK      s    #J ~
4S>
""3-S#X''u136788UO#4. ud5k1233eT%[0122TNtCH~&&TN"*++c+;&; <==rG   rK   c                      e Zd ZU dZee   ed<   eeee	f      ed<   ee	   ed<   ed   ed<   ee   ed<   ee
   ed<   ee   ed	<   eee
ee
   f      ed
<   eee
ee
   f      ed<   ee   ed<   ee   ed<   ee   ed<   eeeef      ed<   y)VideosKwargsa  
    Keyword arguments for video processing.

    Attributes:
        do_resize (`bool`):
            Whether to resize the image.
        size (`Dict[str, int]`, *optional*):
            Resize the shorter side of the input to `size["shortest_edge"]`.
        size_divisor (`int`, *optional*):
            The size by which to make sure both the height and width can be divided.
        resample (`PILImageResampling`, *optional*):
            Resampling filter to use if resizing the image.
        do_rescale (`bool`, *optional*):
            Whether to rescale the image by the specified scale `rescale_factor`.
        rescale_factor (`int` or `float`, *optional*):
            Scale factor to use if rescaling the image.
        do_normalize (`bool`, *optional*):
            Whether to normalize the image.
        image_mean (`float` or `List[float]`, *optional*):
            Mean to use if normalizing the image.
        image_std (`float` or `List[float]`, *optional*):
            Standard deviation to use if normalizing the image.
        do_pad (`bool`, *optional*):
            Whether to pad the image to the `(max_height, max_width)` of the images in the batch.
        do_center_crop (`bool`, *optional*):
            Whether to center crop the image.
        data_format (`ChannelDimension` or `str`, *optional*):
            The channel dimension format for the output image.
        input_data_format (`ChannelDimension` or `str`, *optional*):
            The channel dimension format for the input image.
    rL   rM   rN   r   rP   rQ   rR   rS   rT   rU   rV   rX   rY   rZ   N)r>   r?   r@   rA   r   rC   rB   r   rD   rE   r[   r
   r   r   rF   rG   rH   r]   r]      s    @ ~
4S>
""3-+,,UO#4. ud5k1233eT%[0122TNTN"*++c+;&; <==rG   r]   c                       e Zd ZU dZee   ed<   eedee	   ed   eee	      f      ed<   eee
eef      ed<   ee   ed<   ee
   ed<   ee   ed<   ee
   ed	<   y
)AudioKwargsa  
    Keyword arguments for audio processing.

    Attributes:
        sampling_rate (`int`, *optional*):
            The sampling rate at which the `raw_speech` input was sampled.
        raw_speech (`np.ndarray`, `List[float]`, `List[np.ndarray]`, `List[List[float]]`):
            The sequence or batch of sequences to be padded. Each sequence can be a numpy array, a list of float
            values, a list of numpy arrays or a list of list of float values. Must be mono channel audio, not
            stereo, i.e. single float per timestep.
        padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*):
            Select a strategy to pad the returned sequences (according to the model's padding side and padding
            index) among:

            - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
                sequence if provided).
            - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
                acceptable input length for the model if that argument is not provided.
            - `False` or `'do_not_pad'`
        max_length (`int`, *optional*):
            Maximum length of the returned list and optionally padding length (see above).
        truncation (`bool`, *optional*):
            Activates truncation to cut input sequences longer than *max_length* to *max_length*.
        pad_to_multiple_of (`int`, *optional*):
            If set, will pad the sequence to a multiple of the provided value.
        return_attention_mask (`bool`, *optional*):
            Whether or not [`~ASTFeatureExtractor.__call__`] should return `attention_mask`.
    sampling_ratez
np.ndarray
raw_speechr0   r2   r1   r5   r7   N)r>   r?   r@   rA   r   rE   rB   r
   r   r[   rC   rD   r   rF   rG   rH   r_   r_      s    : C= |T%[$|:LdSWX]S^N__`aaeD#6788 %#D>)rG   r_   c                   (    e Zd ZU eeeef      ed<   y)CommonKwargsreturn_tensorsN)r>   r?   r@   r   r
   rD   r   rB   rF   rG   rH   rc   rc     s    U3
?344rG   rc   c                       e Zd ZU dZi ej
                  Zeed<   i ej
                  Zeed<   i e	j
                  Z
e	ed<   i ej
                  Zeed<   i ej
                  Zeed<   y)ProcessingKwargsa'  
    Base class for kwargs passing to processors.
    A model should have its own `ModelProcessorKwargs` class that inherits from `ProcessingKwargs` to provide:
        1) Additional typed keys and that this model requires to process inputs.
        2) Default values for existing keys under a `_defaults` attribute.
    New keys have to be defined as follows to ensure type hinting is done correctly.

    ```python
    # adding a new image kwarg for this model
    class ModelImagesKwargs(ImagesKwargs, total=False):
        new_image_kwarg: Optional[bool]

    class ModelProcessorKwargs(ProcessingKwargs, total=False):
        images_kwargs: ModelImagesKwargs
        _defaults = {
            "images_kwargs: {
                "new_image_kwarg": False,
            }
            "text_kwargs": {
                "padding": "max_length",
            },
        }

    ```

    For Python 3.8 compatibility, when inheriting from this class and overriding one of the kwargs,
    you need to manually update the __annotations__ dictionary. This can be done as follows:

    ```python
    class CustomProcessorKwargs(ProcessingKwargs, total=False):
        images_kwargs: CustomImagesKwargs

    CustomProcessorKwargs.__annotations__["images_kwargs"] = CustomImagesKwargs  # python 3.8 compatibility
    ```python

    common_kwargstext_kwargsimages_kwargsvideos_kwargsaudio_kwargsN)r>   r?   r@   rA   rc   rB   rg   r+   rh   rK   ri   r]   rj   r_   rk   rF   rG   rH   rf   rf     s    #J#

&
&#M< 

$
$K #

&
&#M< #

&
&#M< !

%
%!L+ rG   rf   c                   b   e Zd ZU dZddgZdgZg Zee   e	d<   dZ
dZdZg Zee   e	d<   d Zd	eeef   fd
Zd	efdZdeeej*                  f   fdZd Zd&defdZedeeej*                  f   d	eeeef   eeef   f   fd       Zedeeef   fd       Z	 d'dedee   d	eeef   fdZ e	 	 	 	 	 d(deeej*                  f   deeeej*                  f      dededeeeef      defd       Z!ed)d       Z"ed        Z#e$d         Z%e&d!        Z'd" Z(	 	 d*d#eeeeef         dee   d$ed	efd%Z)y)+ProcessorMixinza
    This is a mixin used to provide saving/loading functionality for all processor classes.
    feature_extractor	tokenizerchat_templateoptional_call_argsNvalid_kwargsc           
      L   | j                   D ]  }t        | ||j                  |d              ! |D ]  }|| j                  vst	        d| d       t        || j                        D ]  \  }}||v rt	        d| d      |||<    t        |      t        | j                        k7  rJt        dt        | j                         ddj                  | j                         dt        |       d      |j                         D ]  \  }}t        | | d	      }t        j                  ||      }t        |t              rt        d
 |D              }nt        t        |      }t        ||      s(t	        dt!        |      j"                   d| d| d      t        | ||        y )NzUnexpected keyword argument .z!Got multiple values for argument zThis processor requires z arguments: , z. Got z arguments instead._classc              3   B   K   | ]  }|t        t        |        y wNgetattrtransformers_module.0ns     rH   	<genexpr>z*ProcessorMixin.__init__.<locals>.<genexpr>~  s     $j\]\iW-@!%D$js   zReceived a z for argument z, but a z was expected.)optional_attributessetattrpop
attributes	TypeErrorziplen
ValueErrorjoinitemsrz   AUTO_TO_BASE_CLASS_MAPPINGget
isinstancetupler{   typer>   )	selfargskwargsoptional_attributekeyargattribute_name
class_nameproper_classs	            rH   __init__zProcessorMixin.__init__c  s    #'":": 	TD,fjj9KT.RS	T  	GC$//)">se1 EFF	G $'tT__#= 	-C'"CNCSST UVV),~&		- v;#doo..*3t+?*@TYYW[WfWfMgLhhnt9+02  $*<<> 	/NC .)9'@AJ377
JOJ*e,$$jj$jj&':JGc<0!$s)"4"4!5^NCSS[\f[gguv  D.#.	/rG   returnc                 j   t        j                  | j                        }t        j                  | j
                        }|j                  }|D cg c]  }|| j                  j                  vs| }}|dgz  }|j                         D ci c]  \  }}||v s|| }}}| j                  j                  |d<   d|v r|d= d|v r|d= d|v r|d= d|v r|d= |j                         D ci c]1  \  }}t        |t              s|j                  j                  dk(  s||3 }}}|S c c}w c c}}w c c}}w )z
        Serializes this instance to a Python dictionary.

        Returns:
            `Dict[str, Any]`: Dictionary of all the attributes that make up this processor instance.
        auto_mapprocessor_classro   image_processorrn   rp   BeamSearchDecoderCTC)copydeepcopy__dict__inspect	signaturer   
parameters	__class__r   r   r>   r   r   )r   outputsigattrs_to_savexkvs          rH   to_dictzProcessorMixin.to_dict  sE    t}}- .$1XqQdnn>W>W5WXX*%#)<<>H41aQ-5G!Q$HH$(NN$;$; !& {#&()&(*+f$'
 
1q.1Q[[5I5IMc5c qD
 
 1 Y I
s   D$-D$D)D))6D/c                 X    | j                         }t        j                  |dd      dz   S )z
        Serializes this instance to a JSON string.

        Returns:
            `str`: String containing all the attributes that make up this feature_extractor instance in JSON format.
           Tindent	sort_keys
)r   jsondumps)r   
dictionarys     rH   to_json_stringzProcessorMixin.to_json_string  s'     \\^
zz*Q$?$FFrG   json_file_pathc                     t        |dd      5 }|j                  | j                                ddd       y# 1 sw Y   yxY w)z
        Save this instance to a JSON file.

        Args:
            json_file_path (`str` or `os.PathLike`):
                Path to the JSON file in which this processor instance's parameters will be saved.
        wutf-8encodingN)openwriter   )r   r   writers      rH   to_json_filezProcessorMixin.to_json_file  s<     .#8 	0FLL,,./	0 	0 	0s	    8Ac                     | j                   D cg c]  }d| dt        t        | |              }}dj                  |      }| j                  j
                   d| d| j                          S c c}w )Nz- z: r   z:
z

)r   reprrz   r   r   r>   r   )r   nameattributes_reprs      rH   __repr__zProcessorMixin.__repr__  sv    PTP_P_`RvRWT4-@(A'BC``))O4..))*#o->d4CVCVCXBYZZ as   "A0push_to_hubc           	         |j                  dd      }|<t        j                  dt               |j	                  dd      t        d      ||d<   t        j                  |d       |rr|j                  dd      }|j                  d	|j                  t        j                  j                        d
         } | j                  |fi |}| j                  |      }| j                  m| j                  D cg c]  }t        | |       }	}|	D 
cg c]   }
t!        |
t"              r|
j$                  n|
" }}
|j'                  |        t)        | ||       | j                  D ]P  }t        | |      }t+        |d      r%|j-                  | j.                  j0                         |j3                  |       R | j                  ;| j                  D ],  }t        | |      }t!        |t"              s |j$                  d= . t        j                  j5                  |t6              }t        j                  j5                  |t8              }| j;                         }| j<                  gt?        j@                  d| j<                  idd      dz   }tC        |dd      5 }|jE                  |       ddd       tF        jI                  d|        tK        |jM                               dhk7  r)| jO                  |       tF        jI                  d|        |r%| jQ                  ||j	                  d             tK        |jM                               dhk(  rg S |gS c c}w c c}
w # 1 sw Y   xY w)a  
        Saves the attributes of this processor (feature extractor, tokenizer...) in the specified directory so that it
        can be reloaded using the [`~ProcessorMixin.from_pretrained`] method.

        <Tip>

        This class method is simply calling [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] and
        [`~tokenization_utils_base.PreTrainedTokenizerBase.save_pretrained`]. Please refer to the docstrings of the
        methods above for more information.

        </Tip>

        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
                be created if it does not exist).
            push_to_hub (`bool`, *optional*, defaults to `False`):
                Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
                repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
                namespace).
            kwargs (`Dict[str, Any]`, *optional*):
                Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
        use_auth_tokenNrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.tokenV`token` and `use_auth_token` are both specified. Please set only the argument `token`.T)exist_okcommit_messagerepo_id)config_set_processor_classr   rp   r   r   r   r   r   r   zchat template saved in r   zprocessor saved in )r   r   ))r   warningswarnFutureWarningr   r   osmakedirssplitpathsep_create_repo_get_files_timestamps_auto_classr   rz   r   r   init_kwargsappendr   hasattrr   r   r>   save_pretrainedr   r   r   r   rp   r   r   r   r   loggerinfosetkeysr   _upload_modified_files)r   save_directoryr   r   r   r   r   files_timestampsr   attrsaconfigs	attributeoutput_processor_fileoutput_chat_template_fileprocessor_dictchat_template_json_stringr   s                     rH   r   zProcessorMixin.save_pretrained  s-   0  $4d;%MM E zz'4(4 l  -F7O
NT2#ZZ(8$?NjjN,@,@,Mb,QRG'd'':6:G#99.I 'IMY~WT>2YEYafg\]A7N)OUVVgGgNN4 t^GD"oo 	6Nn5I y"89..t~~/F/FG%%n5	6 '"&// :#D.9	i)@A!--j9: !#^^ L$&GGLLAS$T! )

OT-?-?@VZ[^bb & /wG 86678KK12K1LMN ~""$%*;)<<34KK-.C-DEF'' -jj) (  ~""$%*;)<<I%&&k Zg>8 8s   6M%MMM(pretrained_model_name_or_pathc                    |j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  d	d      }	|j                  d
d      }
|j                  dd      }|j                  dd      }d|d}|||d<   t               r|st        j                  d       d}t	        |      }t
        j                  j                  |      }t
        j                  j                  |      rDt
        j                  j                  |t              }t
        j                  j                  |d      }t
        j                  j                  |      r|}d}d}nVt        |      r|}t        |      }d}n;t        }t        }	 t        ||||||||||	|
d      }t        ||||||||||	|
d      }d}|Dt#        |dd      5 }|j%                         }ddd       t'        j(                        d   }||d<   |i |fS 	 t#        |dd      5 }|j%                         }ddd       t'        j(                        }|rt        j                  d|        nt        j                  d d |        d|v r|d   t        j-                  d!       |s,d"|v rt/        |d"   |      |d"<   d#|v rt1        |d#   |      |d#<   ||fS # t        $ r  t         $ r t        d| d| dt         d      w xY w# 1 sw Y   xY w# 1 sw Y   xY w# t&        j*                  $ r t        d| d      w xY w)$a  
        From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used for instantiating a
        processor of type [`~processing_utils.ProcessingMixin`] using `from_args_and_dict`.

        Parameters:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                The identifier of the pre-trained checkpoint from which we want the dictionary of parameters.
            subfolder (`str`, *optional*, defaults to `""`):
                In case the relevant files are located inside a subfolder of the model repo on huggingface.co, you can
                specify the folder name here.

        Returns:
            `Tuple[Dict, Dict]`: The dictionary(ies) that will be used to instantiate the processor object.
        	cache_dirNforce_downloadFresume_downloadproxiesr   local_files_onlyrevision	subfolder _from_pipeline
_from_auto	processor)	file_typefrom_auto_classusing_pipelinez+Offline mode: forcing local_files_only=TrueTzchat_template.json)
r   r   r   r   r   r   
user_agentr   r   %_raise_exceptions_for_missing_entrieszCan't load processor for 'z'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'z2' is the correct path to a directory containing a z filerr   r   rp   z"It looks like the config file at 'z' is not a valid JSON file.zloading configuration file z from cache at zChat templates should be in a 'chat_template.json' file but found key='chat_template' in the processor's config. Make sure to move your template to its own file.r   custom_pipelines)r   r    r   r   rD   r   r   isdirr   r   isfiler!   r   r   r   EnvironmentError	Exceptionr   readr   loadsJSONDecodeErrorwarning_oncer   r   )clsr   r   r   r   r   r   r   r   r   r   from_pipeliner   r   is_localprocessor_filechat_template_fileresolved_processor_fileresolved_chat_template_filerp   readertextr   s                          rH   get_processor_dictz!ProcessorMixin.get_processor_dict2  s   $ JJ{D1	$4e< **%6=**Y-

7D)!::&8%@::j$/JJ{B/	

#3T: **\59#.?S
$+8J'(%5KKEF#(+,I(J%77==!>?77==67WW\\*GXN!#.KMa!b77>>78&C#*.'H89:N&23P&Q#*.'+N!3-*51"'#1#$3%5)%':?+'$ /:1&'#1#$3%5)%':?/+8 &213I %V{{}% JJt,_=M&3F?# #*v:		-sWE %{{}%!ZZ-N KK56M5NOPKK5n5E_UlTmnon,1P1\^
 ^+-G":.0M.z* "^35W"#568U612 v%%y $   &01N0O P99V8W X//=.>eE % %% % ## 	"45L4MMhi 	s<   ?.K> ?L-<M 
L:M >,L*-L7:M?M #M)r   c                    |j                         }|j                  dd      }|j                  dd      }d|v r|d= d|v r|d= | j                  || j                        } | |i |}|t	        |d|       t        |j                               D ]+  }t        ||      st	        |||j                  |             - |j                  |       t        j                  d|        |r||fS |S )	a  
        Instantiates a type of [`~processing_utils.ProcessingMixin`] from a Python dictionary of parameters.

        Args:
            processor_dict (`Dict[str, Any]`):
                Dictionary that will be used to instantiate the processor object. Such a dictionary can be
                retrieved from a pretrained checkpoint by leveraging the
                [`~processing_utils.ProcessingMixin.to_dict`] method.
            kwargs (`Dict[str, Any]`):
                Additional parameters from which to initialize the processor object.

        Returns:
            [`~processing_utils.ProcessingMixin`]: The processor object instantiated from those
            parameters.
        return_unused_kwargsFrp   Nr   r   )processor_configrr   z
Processor )r   r   validate_init_kwargsrr   r   r   r   r   updater   r   )	r  r   r   r   r  rp   unused_kwargsr   r   s	            rH   from_args_and_dictz!ProcessorMixin.from_args_and_dict  s   " (,,.%zz*@%H

?D9 .01'z*00._b_o_o0p00	$I> v{{}% 	9Cy#&	3

38	9 	m$j,-f$$rG   ModelProcessorKwargstokenizer_init_kwargsc           	      @   i i i i i d}i i i i i dt               }D ]  }|j                  j                  |i       j                         |<   |j                  |   j                  j                         D ]@  }||v st        | j                  |      rt        | j                  |      n||   }||   |<   B  |j                         t        |      t        |      z
  }	|D ]  }|j                  |   j                  j                         D ]n  }||v r0||   j                  |d      }
|
dk7  r/||	v r+t        d| d| d      ||v r|j                  |d      }
nd}
|
dk7  sV|
||   |<   |j                  |       p  t        fd|D              rT|j                         D ]@  \  }}|v s|j                         D ]#  \  }}||vs|||   |<   |j                  |       % B n|D ]  }||vs||   |d   |<    |D ]  }||   j                  |d           |S )a  
        Method to merge dictionaries of kwargs cleanly separated by modality within a Processor instance.
        The order of operations is as follows:
            1) kwargs passed as before have highest priority to preserve BC.
                ```python
                high_priority_kwargs = {"crop_size" = {"height": 222, "width": 222}, "padding" = "max_length"}
                processor(..., **high_priority_kwargs)
                ```
            2) kwargs passed as modality-specific kwargs have second priority. This is the recommended API.
                ```python
                processor(..., text_kwargs={"padding": "max_length"}, images_kwargs={"crop_size": {"height": 222, "width": 222}}})
                ```
            3) kwargs passed during instantiation of a modality processor have fourth priority.
                ```python
                tokenizer = tokenizer_class(..., {"padding": "max_length"})
                image_processor = image_processor_class(...)
                processor(tokenizer, image_processor) # will pass max_length unless overriden by kwargs at call
                ```
            4) defaults kwargs specified at processor level have lowest priority.
                ```python
                class MyProcessingKwargs(ProcessingKwargs, CommonKwargs, TextKwargs, ImagesKwargs, total=False):
                    _defaults = {
                        "text_kwargs": {
                            "padding": "max_length",
                            "max_length": 64,
                        },
                    }
                ```
        Args:
            ModelProcessorKwargs (`ProcessingKwargs`):
                Typed dictionary of kwargs specifically required by the model passed.
            tokenizer_init_kwargs (`Dict`, *optional*):
                Dictionary of kwargs the tokenizer was instantiated with and need to take precedence over defaults.

        Returns:
            output_kwargs (`Dict`):
                Dictionary of per-modality kwargs to be passed to each modality-specific processor.

        )rh   ri   rk   rj   rg   	__empty__zKeyword argument z+ was passed two times:
in a dictionary for z and as a **kwarg.c              3   &   K   | ]  }|v  
 y wrx   rF   )r}   r   default_kwargss     rH   r   z/ProcessorMixin._merge_kwargs.<locals>.<genexpr>b  s     7sn$7s   rg   )r   	_defaultsr   r   rB   r   r   ro   rz   r  r   r   addanyr   )r   r  r  r   output_kwargs	used_keysmodalitymodality_keyvaluenon_modality_kwargskwarg_valuesubdictsubkeysubvaluer   r"  s                  @rH   _merge_kwargszProcessorMixin._merge_kwargs  s   ^ 
 
 E	 ' 	CH';'E'E'I'I(TV'W'\'\'^N8$ 4 D DX N ^ ^ c c e C#88 #4>><@  =2<@ 
 >CN8,\:C	C 	^, "&kC,>>% 	0H 4 D DX N ^ ^ c c e 0v%"("2"6"6|["QK"k1lFY6Y(/~ >33;*<NP  "V+ #)**\;"GK"-K+-<GM(+L9MM,/%0	0, 777%+\\^ 2!'~-,3MMO 2(!2>FM(3F;%MM&122  Fi':@+M/237F
 & 	KH(#**=+IJ	KrG   r   r   r   r   r   c                 ,   ||d<   ||d<   ||d<   ||d<   |j                  dd      }|)t        j                  dt               |t	        d      |}|||d	<    | j
                  |fi |}	 | j                  |fi |\  }
} | j                  |	|
fi |S )
a[  
        Instantiate a processor associated with a pretrained model.

        <Tip>

        This class method is simply calling the feature extractor
        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`], image processor
        [`~image_processing_utils.ImageProcessingMixin`] and the tokenizer
        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`] methods. Please refer to the docstrings of the
        methods above for more information.

        </Tip>

        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~SequenceFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            **kwargs
                Additional keyword arguments passed along to both
                [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and
                [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`].
        r   r   r   r   r   Nr   r   r   )r   r   r   r   r   _get_arguments_from_pretrainedr  r  )r  r   r   r   r   r   r   r   r   r   r   s              rH   from_pretrainedzProcessorMixin.from_pretrainedu  s    N ({#1 %5!"%z$4d;%MM E   l  #E#F7O1s112OZSYZ!7!7!78U!`Y_!`%s%%dNEfEErG   c                     t        |t              s|j                  }ddlmc m} t        ||      st        | d      || _        y)a  
        Register this class with a given auto class. This should only be used for custom feature extractors as the ones
        in the library are already mapped with `AutoProcessor`.

        <Tip warning={true}>

        This API is experimental and may have some slight breaking changes in the next releases.

        </Tip>

        Args:
            auto_class (`str` or `type`, *optional*, defaults to `"AutoProcessor"`):
                The auto class to register this new feature extractor with.
        r   Nz is not a valid auto class.)	r   rD   r>   transformers.models.automodelsautor   r   r   )r  
auto_classauto_modules      rH   register_for_auto_classz&ProcessorMixin.register_for_auto_class  sC      *c*#,,J66{J/
|+FGHH$rG   c                 <   g }| j                   D ]  }t        | | d      }t        |t              r7t        d |D              }|j	                  dd      }|r|d   |d   }n|d   }nt        t
        |      }|j                   |j                  |fi |        |S )Nrv   c              3   D   K   | ]  }|t        t        |      nd   y wrx   ry   r|   s     rH   r   z@ProcessorMixin._get_arguments_from_pretrained.<locals>.<genexpr>  s#     o_`1=(;Q ?VZ Zos    use_fastTr   r   )r   rz   r   r   r   r{   r   r3  )	r  r   r   r   r   r   classesr=  attribute_classs	            rH   r2  z-ProcessorMixin._get_arguments_from_pretrained  s    !nn 	bN (8&?@J*e,odnoo!::j$7
 6&-ajO&-ajO")*=z"JKK7778U`Y_`a	b rG   c                 N    t        | | j                  d         }t        |dd       S )Nr   model_input_names)rz   r   )r   first_attributes     rH   rA  z ProcessorMixin.model_input_names  s'    !$(:;(;TBBrG   c                     | j                         }i }t        |      t        |      z
  }|r=dj                  |      }t        j	                  d| d       |D ci c]  }|| |   
 }}|S c c}w )Nru   zISome kwargs in processor config are unused and will not have any effect: z. )r   r   r   r   warning)r  rr   kwargs_from_configr  unused_keysunused_key_strr   s          rH   r  z#ProcessorMixin.validate_init_kwargs  s    -224,-L0AA!YY{3NNN[\j[kkmn >IIQ 0 33IMI Js   A+c           
      x   t        |      rt        j                  d       t        |      t        | j                        kD  rJt	        dt        | j                         ddj                  | j                         dt        |       d      t        || j                        D ci c]  \  }}||
 c}}S c c}}w )a  
        Matches optional positional arguments to their corresponding names in `optional_call_args`
        in the processor class in the order they are passed to the processor call.

        Note that this should only be used in the `__call__` method of the processors with special
        arguments. Special arguments are arguments that aren't `text`, `images`, `audio`, nor `videos`
        but also aren't passed to the tokenizer, image processor, etc. Examples of such processors are:
            - `CLIPSegProcessor`
            - `LayoutLMv2Processor`
            - `OwlViTProcessor`

        Also note that passing by position to the processor call is now deprecated and will be disallowed
        in future versions. We only have this for backward compatibility.

        Example:
            Suppose that the processor class has `optional_call_args = ["arg_name_1", "arg_name_2"]`.
            And we define the call method as:
            ```python
            def __call__(
                self,
                text: str,
                images: Optional[ImageInput] = None,
                *arg,
                audio=None,
                videos=None,
            )
            ```

            Then, if we call the processor as:
            ```python
            images = [...]
            processor("What is common in these images?", images, arg_value_1, arg_value_2)
            ```

            Then, this method will return:
            ```python
            {
                "arg_name_1": arg_value_1,
                "arg_name_2": arg_value_2,
            }
            ```
            which we could then pass as kwargs to `self._merge_kwargs`
        zPassing positional arguments to the processor call is now deprecated and will be disallowed in v4.47. Please pass all arguments as keyword arguments.zExpected *at most* zK optional positional arguments in processor callwhich will be matched with  z+ in the order they are passed.However, got z positional arguments instead.Please pass all arguments as keyword arguments instead (e.g. `processor(arg_name_1=..., arg_name_2=...))`.)r   r   r   rq   r   r   r   )r   r   	arg_valuearg_names       rH   'prepare_and_validate_optional_call_argsz6ProcessorMixin.prepare_and_validate_optional_call_args  s    X t9MMB t9s42233%c$*A*A&B%C D..1hht7N7N.O-P Q  #D	{ +}}  @C4I`I`?ab(;	8)#bbbs   %B6conversationtokenizec                     |$| j                   | j                   }nt        d       | j                  j                  |f||d|S )a  
        Similar to the `apply_chat_template` method on tokenizers, this method applies a Jinja template to input
        conversations to turn them into a single tokenizable string.

        Args:
            conversation (`List[Dict, str, str]`):
                The conversation to format.
            chat_template (`Optional[str]`, *optional*):
                The Jinja template to use for formatting the conversation. If not provided, the tokenizer's
                chat template is used.
            tokenize (`bool`, *optional*, defaults to `False`):
                Whether to tokenize the output or not.
            **kwargs:
                Additional keyword arguments
        zNo chat template is set for this processor. Please either set the `chat_template` attribute, or provide a chat template as an argument. See https://huggingface.co/docs/transformers/main/en/chat_templating for more information.)rp   rN  )rp   r   ro   apply_chat_template)r   rM  rp   rN  r   s        rH   rP  z"ProcessorMixin.apply_chat_template-  sd    .  !!- $ 2 2 m 
 2t~~11
(5
LR
 	
rG   )Frx   )NFFNmain)AutoProcessor)NF)*r>   r?   r@   rA   r   r   rq   r   rD   rB   feature_extractor_classtokenizer_classr   rr   r   r   r   r   r   r
   r   PathLiker   r   rC   r   classmethodr   r  r  rf   r   r0  r3  r:  r2  propertyrA  staticmethodr  rL  rP  rF   rG   rH   rm   rm   T  s    &{3J*+$&S	&"OK L$s) $/L&c3h &P	G 	G	05bkk1A+B 	0[
d'4 d'L V&,1#r{{2B,CV&	tCH~tCH~-	.V& V&p +d38n + +` 15z.z  (~z
 
c4izx  8<$!&,0=F',S"++-='>=F E#r{{"234=F 	=F
 =F c4i()=F =F =F~ % %2  " C C 
 
8cz (,	"
Dc3h01"
  }"
 	"
 
"
rG   rm   c                    	 dt         fd		fdfdd } ||       } |       } ||      } |      }|r|r| |fS | |s||s|r|rt        j                  d       || fS t        d      )a  
    For backward compatibility: reverse the order of `images` and `text` inputs if they are swapped.
    This method should only be called for processors where `images` and `text` have been swapped for uniformization purposes.
    Note that this method assumes that two `None` inputs are valid inputs. If this is not the case, it should be handled
    in the processor's `__call__` method before calling this method.
    r   c                 H    t        | t              xr | j                  d      S )Nhttp)r   rD   
startswith)vals    rH   is_urlz1_validate_images_text_input_order.<locals>.is_urlZ  s    #s#>v(>>rG   c                 ~    t        | t        t        f      r| D ]  } |      r y yt        |       s	 |       syy)NFT)r   listr   r   )imgsimg$_is_valid_images_input_for_processorr^  s     rH   rc  zO_validate_images_text_input_order.<locals>._is_valid_images_input_for_processor]  sF    dT5M* !;C@ !  !&&,rG   c                     t        | t              ryt        | t        t        f      rt	        |       dk(  ry| D ]  } |      c S  y)NTr   F)r   rD   r`  r   r   )tt_s"_is_valid_text_input_for_processors     rH   rg  zM_validate_images_text_input_order.<locals>._is_valid_text_input_for_processorh  sI    aD%=)1v{ ?9#>>?rG   c                      ||       xs | d u S rx   rF   )input	validators     rH   	_is_validz4_validate_images_text_input_order.<locals>._is_validu  s    05D=0rG   zYou may have used the wrong order for inputs. `images` should be passed before `text`. The `images` and `text` inputs will be swapped. This behavior will be deprecated in transformers v4.47.zGInvalid input type. Check that `images` and/or `text` are valid inputs.)rC   r   r  r   )
imagesr  rk  images_is_validimages_is_texttext_is_validtext_is_imagesrc  rg  r^  s
          @@@rH   !_validate_images_text_input_orderrq  R  s    ?t ?	1  (LMO7?Nd$FGM9$?N=t| 	>t|Tbguv	
 V|
^
__rG   r   rR  zprocessor files)objectobject_classobject_files)BrA   r   r   r   r   systypingr   pathlibr   r   r   r   r   r   r	   r
   numpynptyping_extensionsdynamic_module_utilsr   image_utilsr   r   r   r   tokenization_utils_baser   r   r   r   r   utilsr   r   r   r   r   r   r   r   r   r   r    r!   r"   
get_loggerr>   r   __file__parentr{   r   version_infoUnpackr+   rK   r]   r_   rc   rf   rm   rq  r   formatrF   rG   rH   <module>r     s      	 
    E E E   4 N N /    " 
		H	% 1h1F1FG  /40  w]]F%%F7 % 7 t4>9E 4>n->9E ->`$*)5 $*N59E 54z<{L`e 4n{
^ {
|7`t '~'A'AB %%1)7)C)C)K)K)R)RGX *S *N& 2rG   