
    :Qg                        d dl Z d dlZd dlmZmZmZmZmZmZm	Z	m
Z
 d dlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ  ej<                  e      Z  ed      Z! ed      Z" G d d      Z#y)    N)BinaryIO	ContainerDictIteratorListOptionalTupleAny)Rect   )settings)PDFDocumentPDFTextExtractionNotAllowedPDFNoPageLabels)	PDFParser)
dict_value)PDFObjectNotFound)	int_value)
list_value)resolve1)LITPagePagesc                       e Zd ZdZdedededee   ddf
dZdefd	Z	h d
Z
ededed    fd       Ze	 	 	 	 	 ddedeee      dededededed    fd       Zy)PDFPageak  An object that holds the information about a page.

    A PDFPage object is merely a convenience class that has a set
    of keys and values, which describe the properties of a page
    and point to its contents.

    Attributes:
      doc: a PDFDocument object.
      pageid: any Python object that can uniquely identify the page.
      attrs: a dictionary of page attributes.
      contents: a list of PDFStream objects that represents the page content.
      lastmod: the last modified time of the page.
      resources: a dictionary of resources used by the page.
      mediabox: the physical size of the page.
      cropbox: the crop rectangle of the page.
      rotate: the page rotation (in degree).
      annots: the page annotations.
      beads: a chain that represents natural reading order.
      label: the page's label (typically, the logical page number).
    docpageidattrslabelreturnNc                 X   || _         || _        t        |      | _        || _        t        | j                  j                  d            | _        t        | j                  j                  dt                           | _	        | j                  d   D cg c]  }t        |       }}t        |      | _
        d| j                  v rt        | j                  d         | _        n| j                  | _        t        | j                  j                  dd            dz   dz  | _        | j                  j                  d      | _        | j                  j                  d	      | _        d
| j                  v rt        | j                  d
         }ng }t!        |t"              s|g}|| _        yc c}w )zInitialize a page object.

        doc: a PDFDocument object.
        pageid: any Python object that can uniquely identify the page.
        attrs: a dictionary of page attributes.
        label: page label string.
        LastModified	ResourcesMediaBoxCropBoxRotater   ih  AnnotsBContentsN)r   r   r   r   r   r   getlastmoddict	resourcesmediaboxcropboxr   rotateannotsbeads
isinstancelistcontents)selfr   r   r   r   mediabox_parammediabox_paramsr5   s           M/var/www/html/answerous/venv/lib/python3.12/site-packages/pdfminer/pdfpage.py__init__zPDFPage.__init__-   sQ    &



~ >?/7JJNN;/0
 <@::j;Q&
)7H^$&
 &
 '7

"!)$**Y*?!@DL==DL !!<=CsJjjnnX.ZZ^^C(
#

: 67HH(D) zH&.#&
s   F'c                 N    dj                  | j                  | j                        S )Nz(<PDFPage: Resources={!r}, MediaBox={!r}>)formatr-   r.   )r6   s    r9   __repr__zPDFPage.__repr__R   s"    9@@NNDMM
 	
    >   r&   r%   r$   r#   documentc              #      	K   dt         dt        t        t         f   dt        t        t
        t        t         t        t         t         f   f   f      f 	fd		 j                         }d}dj                  v rB 	j                  d   j                        }|D ]  \  }}  ||t        |             d} |suj                  D ]f  }|j                         D ]Q  }	 j                  |      }t        |t               r-|j#                  d      t$        u r  ||t        |             S h y # t        $ r t        j                  d       }Y w xY w# t&        $ r Y w xY ww)	Nobjparentr    c              3   z  K   t        | t              r+| }t        	j                  |            j	                         }n%| j
                  }t        |       j	                         }|j                         D ]  \  }}|j                  v s||vs|||<     |j                  d      }|!t        j                  s|j                  d      }|t        u rBd|v r>t        j                  d|d          t        |d         D ]  } 
||      E d {     y |t        u rt        j                  d|       ||f y y 7 -w)NTypetypeKidszPages: Kids=%rzPage: %r)r3   intr   getobjcopyobjiditemsINHERITABLE_ATTRSr*   r   STRICTLITERAL_PAGESlogdebugr   LITERAL_PAGE)rA   rB   rJ   treekv	tree_typecclsr?   searchs           r9   rX   z$PDFPage.create_pages.<locals>.search[   s#     #s#!(//%"89>>@ 		!#++- ,,.  A---!4-DG  (I  HHV,	M)fn		*DL9#DL1 /A%a../l*		*d+dm# + /s   BD;	D;A=D;D9.D;Fr   TrD   )objectr   strr   r	   rG   get_page_labelsr   	itertoolsrepeatcatalognextxrefs
get_objidsrH   r3   r,   r*   rQ   r   )
rW   r?   page_labelspagesobjectsrJ   rR   xrefrA   rX   s
   ``       @r9   create_pageszPDFPage.create_pagesY   sk    	$	$!%c6k!2	$eCfd66>.B&B!CCDE	$6	13;3K3K3MK h&&&X--g68H8HIG!( (E4k1BCC   !__. E&ooe4%c40SWWV_5T"%hsD<M"NN	 	'  	1#**40K	1" - sP   AE)D6 ,A6E)#AE1E)6EE)EE)	E&#E)%E&&E)fppagenosmaxpagespasswordcachingcheck_extractablec              #   "  K   t        |      }t        |||      }|j                  s,|rd|z  }	t        |	      d|z  }
t        j                  |
       t        | j                  |            D ]  \  }}|r||vr| |s||dz   k  s y  y w)N)rj   rk   z"Text extraction is not allowed: %rzThe PDF %r contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this caser   )r   r   is_extractabler   rO   warning	enumeraterf   )rW   rg   rh   ri   rj   rk   rl   parserr   	error_msgwarning_msgpagenopages                r9   	get_pageszPDFPage.get_pages   s      2&8WE !! @2E	1)<<A DF	F  K('(8(8(=> 	NVTF'1JH
2	 	s   A?BBB)Nr    TF)__name__
__module____qualname____doc__r   rY   r   rZ   r:   r=   rL   classmethodr   rf   r   r   rG   boolrv    r>   r9   r   r      s    *#/#/(.#/7=#/FNsm#/	#/J
# 

 G1K 1HY4G 1 1f  -1"'## )C.)# 	#
 # #  # 
)	# #r>   r   )$r\   loggingtypingr   r   r   r   r   r   r	   r
   pdfminer.utilsr   rw   r   pdfdocumentr   r   r   	pdfparserr   pdftypesr   pdfexceptionsr   r   r   r   psparserr   	getLoggerrx   rO   rQ   rN   r   r~   r>   r9   <module>r      sg      R R R   R R     ,     g! 6{GZ Zr>   