
    Ig*                         d Z ddlZddlmZmZmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZmZmZmZmZ dd	lmZmZ d
dlmZ  ej0                  e      Z G d de      Zy)zq
Processor class for InstructBLIP. Largely copy of Blip2Processor with addition of a tokenizer for the Q-Former.
    N)ListOptionalUnion   )BatchFeature)
VideoInput)ProcessorMixin)
AddedTokenBatchEncodingPaddingStrategyPreTokenizedInput	TextInputTruncationStrategy)
TensorTypelogging   )AutoTokenizerc            $       :    e Zd ZdZg dZdgZdZdZdZd fd	Z		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dde
deeeee   ee   f   d	ed
eeeef   deeeef   dee   dedee   dee   dededededededeeeef      def"dZd Zd Zed        Z fdZe fd       Z xZS ) InstructBlipVideoProcessora  
    Constructs an InstructBLIPVideo processor which wraps a InstructBLIP image processor and a LLaMa/T5 tokenizer into a single
    processor.

    [`InstructBlipVideoProcessor`] offers all the functionalities of [`InstructBlipVideoImageProcessor`] and [`AutoTokenizer`]. See the
    docstring of [`~InstructBlipVideoProcessor.__call__`] and [`~InstructBlipVideoProcessor.decode`] for more information.

    Args:
        image_processor (`InstructBlipVideoImageProcessor`):
            An instance of [`InstructBlipVideoImageProcessor`]. The image processor is a required input.
        tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
        qformer_tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
        num_query_tokens (`int`, *optional*):
            Number of tokens used by the Qformer as queries, should be same as in model's config.
    )image_processor	tokenizerqformer_tokenizernum_query_tokensInstructBlipVideoImageProcessorr   c                     t        ddd      | _        |j                  | j                  gd       || _        t        |   |||       y )Nz<video>FT)
normalizedspecial)special_tokens)r
   video_token
add_tokensr   super__init__)selfr   r   r   r   kwargs	__class__s         /var/www/html/answerous/venv/lib/python3.12/site-packages/transformers/models/instructblipvideo/processing_instructblipvideo.pyr"   z#InstructBlipVideoProcessor.__init__A   sJ    %iE4Pd../E 0)5FG    imagestextadd_special_tokenspadding
truncation
max_lengthstridepad_to_multiple_ofreturn_attention_maskreturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsreturn_lengthverbosereturn_tensorsreturnc                 t   ||t        d      t               }|ot        |t              r|g}n.t        |t              st        |d   t              st        d       | j
                  d||||||||	|
|||||dd|}| j                  ~||i }| j                  j                  | j                  z  dz  }| j                  |gt        |      z  dd      }|D ]-  }t        ||   ||         D cg c]
  \  }}||z    c}}||<   / n|}|t        j                  d	       t        ||
      }|j                  |        | j                  d||||||||	|
||||||d|}|j!                  d      |d<   |j!                  d      |d<   |$| j#                  ||      }|j                  |       |S c c}}w )a%  
        This method uses [`InstructBlipVideoImageProcessor.__call__`] method to prepare image(s) or video(s) for the model, and
        [`BertTokenizerFast.__call__`] to prepare text for the model.

        Please refer to the docstring of the above two methods for more information.
        Nz3You have to specify at least one of images or text.r   zAInvalid input text. Please provide a string, or a list of strings)r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7      F)r*   r7   aK  Expanding inputs for video tokens in InstructBLIPVideo should be done in processing. Please follow instruction here (https://gist.github.com/zucchini-nlp/65f22892b054dc0d68228af56fbeaac2) to update your InstructBLIPVideo model. Using processors without these attributes in the config is deprecated and will throw an error in v4.47.)tensor_type	input_idsqformer_input_idsattention_maskqformer_attention_mask)r7    )
ValueErrorr   
isinstancestrlistr   r   r   contentlenziploggerwarning_oncer   updater   popr   )r#   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r$   encoding_text_encodingtext_encodingvideo_tokensvideo_token_encodingkimg_encodingtxt_encodingqformer_text_encodingimage_encodings                               r&   __call__z#InstructBlipVideoProcessor.__call__G   sE   4 >dlRSS>$$vd+JtAw4L !dee+T^^ #5%%#5&;*C+E'=&;+#  !N* $$0V5G "$$,,t/D/DDqH  (,~~!NSY.5Y] (6 ($ ( A ;>>RST>UWefgWh:i(6L, %|3(M!$ !/%''B *-^TMOOM*$:D$:$: %#5%%#5&;*C+E'=&;+-%  !%!$ -B,E,Ek,RH()1F1J1JK[1\H-.!11&1XNOON+S(s   2F4c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r   batch_decoder#   argsr$   s      r&   rX   z'InstructBlipVideoProcessor.batch_decode   s     
 +t~~**D;F;;r'   c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )r   decoderY   s      r&   r\   z!InstructBlipVideoProcessor.decode   s     
 %t~~$$d5f55r'   c                     | j                   j                  }| j                  j                  }t        t        j                  ||z               S N)r   model_input_namesr   rD   dictfromkeys)r#   tokenizer_input_namesimage_processor_input_namess      r&   r_   z,InstructBlipVideoProcessor.model_input_names   sA     !% @ @&*&:&:&L&L#DMM"7:U"UVWWr'   c                    t         j                  j                  |      rt        d| d      t        j                  |d       t         j                  j                  |d      }| j                  j                  |       d| j                  v }|r| j                  j                  d       t        |   |fi |}|r| xj                  dgz  c_        |S )NzProvided path (z#) should be a directory, not a fileT)exist_okr   )ospathisfilerA   makedirsjoinr   save_pretrained
attributesremover!   )r#   save_directoryr$   qformer_tokenizer_pathqformer_presentoutputsr%   s         r&   rk   z*InstructBlipVideoProcessor.save_pretrained   s    77>>.)~.>>abcc
NT2!#n>Q!R../EF .@OO""#67').CFCOO 344Or'   c                     t        |   |fi |}t        |t              r|d   }t	        j                  |d      }||_        |S )Nr   r   )	subfolder)r!   from_pretrainedrB   tupler   r   )clspretrained_model_name_or_pathr$   	processorr   r%   s        r&   rt   z*InstructBlipVideoProcessor.from_pretrained   sP    G+,ITVT	 i'!!I)99:Wcvw&7	#r'   r^   )NNTFNNr   NNFFFFFTN) __name__
__module____qualname____doc__rl   valid_kwargsimage_processor_classtokenizer_classqformer_tokenizer_classr"   r   r   r   r   r   boolrC   r   r   r   intr   r   rV   rX   r\   propertyr_   rk   classmethodrt   __classcell__)r%   s   @r&   r   r   (   s   $ GJ&'L=%O-H "^b#'5:;?$(,004*/+0',&+#;?#ll I0$y/4HYCZZ[l !	l
 tS/12l $%778l SMl l %SMl  (~l $(l %)l !%l  $l l  !l" !sJ!78#l& 
'l^<6 X X&  r'   r   )r|   rf   typingr   r   r   image_processing_utilsr   image_utilsr   processing_utilsr	   tokenization_utils_baser
   r   r   r   r   r   utilsr   r   autor   
get_loggerry   rH   r   r@   r'   r&   <module>r      sR    
 ( ( 2 % .  )   
		H	%A Ar'   