
    :Qg                     v    d dl Z d dlZd dlmZmZ ddlmZ ddlmZm	Z	 ddl
mZmZmZmZmZmZ  G d de      Zy)	    N)PdfFileReaderPdfFileWriter   )	TableList)StreamLattice)TemporaryDirectoryget_page_layoutget_text_objectsget_rotationis_urldownload_urlc                   2    e Zd ZdZd	dZd Zd Zddi fdZy)

PDFHandlera  Handles all operations like temp directory creation, splitting
    file into single page PDFs, parsing each PDF and then removing the
    temp directory.

    Parameters
    ----------
    filepath : str
        Filepath or URL of the PDF file.
    pages : str, optional (default: '1')
        Comma-separated page numbers.
        Example: '1,3,4' or '1,4-end' or 'all'.
    password : str, optional (default: None)
        Password for decryption.

    Nc                 b   t        |      rt        |      }|| _        |j                         j	                  d      st        d      |d| _        n=|| _        t        j                  d   dk  r | j                  j                  d      | _        | j                  | j                  |      | _        y )N.pdfzFile format not supported r      ascii)r   r   filepathlowerendswithNotImplementedErrorpasswordsysversion_infoencode
_get_pagespages)selfr   r   r   s       M/var/www/html/answerous/venv/lib/python3.12/site-packages/camelot/handlers.py__init__zPDFHandler.__init__%   s    (#H-H ~~((0%&ABBDM$DM"Q& $ 4 4W =__T]]E:
    c                    g }|dk(  r|j                  ddd       nt        |d      }t        |d      }|j                  r|j	                  | j
                         |dk(  r#|j                  d|j                         d       n|j                  d      D ]|  }d	|v rP|j                  d	      \  }}|d
k(  r|j                         }|j                  t        |      t        |      d       W|j                  t        |      t        |      d       ~ |j                          g }	|D ]&  }
|	j                  t        |
d   |
d
   dz                ( t        t        |	            S )a~  Converts pages string to list of ints.

        Parameters
        ----------
        filepath : str
            Filepath or URL of the PDF file.
        pages : str, optional (default: '1')
            Comma-separated page numbers.
            Example: '1,3,4' or '1,4-end' or 'all'.

        Returns
        -------
        P : list
            List of int page numbers.

        1r   )startendrbFstrictall,-r'   r&   )appendopenr   isEncrypteddecryptr   getNumPagessplitintcloseextendrangesortedset)r    r   r   page_numbersinstreaminfilerabPps              r!   r   zPDFHandler._get_pages4   sA   " C<!A 67Hd+H"8E:F!!t}}-~##a8J8J8L$MNS) NAax wws|1: & 2 2 4A$++c!fSV,LM$++c!fSV,LMN NN 	6AHHU1W:qx!|45	6c!f~r#   c                    t        |d      5 }t        |d      }|j                  r|j                  | j                         t
        j                  j                  |d| d      }t
        j                  j                  |      \  }}|j                  |dz
        }	t               }
|
j                  |	       t        |d      5 }|
j                  |       ddd       t        |      \  }}t        |d	
      }t        |d
      }t        |d
      }t        |||      }|dk7  r	dj                  |j!                  dd      d|g      }t        j"                  ||       t        |d      }t        |d      }|j                  r|j                  | j                         t               }
|j                  d      }	|dk(  r|	j%                  d       n|dk(  r|	j'                  d       |
j                  |	       t        |d      5 }|
j                  |       ddd       |j)                          ddd       y# 1 sw Y   dxY w# 1 sw Y   /xY w# 1 sw Y   yxY w)a  Saves specified page from PDF into a temporary directory.

        Parameters
        ----------
        filepath : str
            Filepath or URL of the PDF file.
        page : int
            Page number.
        temp : str
            Tmp directory.

        r(   Fr)   page-r   r   wbNchar)ltypehorizontal_textvertical_textr   pagerA   _rotatedr   anticlockwiseZ   	clockwise)r/   r   r0   r1   r   ospathjoinsplitextgetPager   addPagewriter
   r   r   replacerenamerotateClockwiserotateCounterClockwiser5   )r    r   rI   tempfileobjr<   fpathfrootfextrA   outfileflayoutdimcharsrG   rH   rotation	fpath_newr;   s                       r!   
_save_pagezPDFHandler._save_page^   s    (D! !	!W"759F!!t}}-GGLLtfD'9:E''**51KE4tax(A#oGOOAeT" !aa !)%0KFC$V6:E.v=NOO,V?KM#E?MJH2~GGU]]63%?T$RS			%+	40&x>%%NN4==1'/NN1%.%%b),,,R0"%& %!MM!$% C!	! !	!! !,% %?!	! !	!s=   B6IH7D/III7I	<II		IIlatticeFc           
         g }t               5 }| j                  D ]  }| j                  | j                  ||       ! | j                  D cg c]&  }t        j
                  j                  |d| d      ( }}|dk(  rt        di |n
t        di |}	|D ]'  }|	j                  |||      }
|j                  |
       ) 	 ddd       t        t        |            S c c}w # 1 sw Y   "xY w)a  Extracts tables by calling parser.get_tables on all single
        page PDFs.

        Parameters
        ----------
        flavor : str (default: 'lattice')
            The parsing method to use ('lattice' or 'stream').
            Lattice is used by default.
        suppress_stdout : str (default: False)
            Suppress logs and warnings.
        layout_kwargs : dict, optional (default: {})
            A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
        kwargs : dict
            See camelot.read_pdf kwargs.

        Returns
        -------
        tables : camelot.core.TableList
            List of tables found in PDF.

        rC   r   rf   )suppress_stdoutlayout_kwargsN )r	   r   re   r   rN   rO   rP   r   r   extract_tablesr6   r   r8   )r    flavorrh   ri   kwargstablestempdirrA   r   parserts              r!   parsezPDFHandler.parse   s    0 ! 		!WZZ ;q':;EIZZPRWW\\'U1#T?;PEP*0I*=W&v&6CSFCSF !))m *  a 	!		! (( Q		! 		!s   =C 
+C5A	C C  C))r%   N)__name__
__module____qualname____doc__r"   r   re   rr   rj   r#   r!   r   r      s&     ;(T.!b R#)r#   r   )rN   r   PyPDF2r   r   corer   parsersr   r   utilsr	   r
   r   r   r   r   objectr   rj   r#   r!   <module>r|      s.    
 
 /  $ ]) ])r#   