1# pip install tika
2from tika import parser
3
4raw = parser.from_file('yourfile.pdf')
5print(raw['content'])
6
1#!pip install tabula-py
2import tabula
3#read all table data
4df = tabula.read_pdf("sample.pdf",pages=[1,2])
5df[1]
6
7#tabula.convert_into("sample.pdf", "sample.csv", output_format="csv")
1import PyPDF2
2
3pdfFileObject = open(r"F:\pdf.pdf", 'rb')
4
5pdfReader = PyPDF2.PdfFileReader(pdfFileObject)
6
7print(" No. Of Pages :", pdfReader.numPages)
8
9pageObject = pdfReader.getPage(0)
10
11print(pageObject.extractText())
12
13pdfFileObject.close()
1pip install PyPDF2
2import PyPDF2
3pdfFileObject=open(r"F:\fileName.pdf",'rb')
4pdfReader = PyPDF2.PdfFileReader(pdfFileObject) //Creating reader obj
5print(" No. Of Pages :", pdfReader.numPages)//To know no.of pages