def readTablePdf(fileName="demoData.pdf"): try: i = 7 while not i == 0: print( "\n 1: Print DataFrame\n 2: Print JSON\n 3: Print Single Table\n 4: Print Multiple Tables " ) print( " 5: Save to JSON File\n 6: Save to CSV File\n 7: Tabula Info\n 0: EXIT" ) i = int(input("Choose one of the options from above: ")) if i == 1: df = tabula.read_pdf( fileName ) # Simply reads pdf and returns in DataFrame format print("DATA: \n", df) elif i == 2: df = tabula.read_pdf(fileName, output_format="json" ) # Reads and returns data in Json format print("JSON DATA: \n", df) elif i == 3: df = tabula.read_pdf( fileName ) # Returns first occurence of table - we can even specify page numbers to retreive table from print("SINGLE TABLE DATA: \n", df) elif i == 4: df = tabula.read_pdf(fileName, multiple_tables=True, pages="all") # Returns all tables data print("ALL TABLES DATA: \n", df) elif i == 5: ####### TO SAVE JSON INTO JSON FILE output = input("ENTER OUTPUT FILE NAME") # Reads and saves the complete file data in JSON format into an external JSON file of given name convert_into(fileName, output, output_format="json", multiple_tables=True, pages='all') elif i == 6: ####### TO SAVE JSON INTO CSV FILE output = input("ENTER OUTPUT FILE NAME") # Reads and saves the complete file data in CSV format into an external CSV file of given name convert_into(fileName, output, output_format="csv", multiple_tables=True, pages='all') elif i == 7: #Tabula Information print("TABULA INFORMATION:\n", tabula.environment_info()) else: break except Exception as ex: print("Exception in opening file: ", ex)
def test_environment_info(self): self.assertEqual(tabula.environment_info(), None)
import tabula #check your environment via tabula-py,which shows Python, Java #version, Java version, and your OS environment. tabula.environment_info() # pdf_path = "G:\Shared drives\Agrigate\Data Analytics and Data Science\Research\Industry specs and knowledge\Fresh-Food-Trade-SA-2020.pdf" # read pdf as CSV tabula.convert_into( pdf_path, "G:\Shared drives\Agrigate\Data Analytics and Data Science\Research\Industry specs and knowledge\Fresh-Food-Trade-SA-2020.csv", pages="all", output_format="csv", stream=True)
# # runtime: python37 # handlers: # - url: /.* # secure: always # script: auto from flask import Flask, render_template, request import requests from bs4 import BeautifulSoup import re import pandas as pd from pandas import DataFrame import tabula test = tabula.environment_info() print("TEST") print(test) import pygsheets client = pygsheets.authorize(service_file="credentials.json") sheet = client.open("COVID19_japan") wks = sheet[0] import pdfplumber from io import BytesIO import datetime import json