-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
130 lines (112 loc) · 4.33 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import pandas as pd
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import resolve1
from tkinter import *
from tkinter import filedialog
def create_csv(folder_path):
pdf_files = [f for f in os.listdir(folder_path) if f.endswith('.pdf')]
encoding = 'iso-8859-1'
first = True
k = 0
files_count = len(pdf_files)
for pdf_file_name in pdf_files:
columns_pdf = []
values = []
file = folder_path + '/' + pdf_file_name
with open(file, 'rb') as pdf_file:
parser = PDFParser(pdf_file)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog['AcroForm'])['Fields']
for i in fields:
field = resolve1(i)
# try because of diameter sign
try:
name = str(field.get('T'), encoding)
except:
name = str(field.get('T')[:-8], encoding)
opt = field.get('Opt')
sel = field.get('V')
# check if options are available and comparison
if opt != None:
if not isinstance(type(opt), list):
opt = resolve1(opt)
for e in opt:
# Field has no 2 array list
if name == 'Beobachter':
if e == sel:
value = e
elif e[0] == sel:
value = e[1]
else:
value = sel
# just bytes can be decoded
if isinstance(value, bytes):
try:
value = str(value, encoding)
except:
value = value
elif str(value)[0] == r"/":
value = str(value)[2:-1]
else:
value = str(value)
columns_pdf.append(name)
values.append(value)
if first:
columns_init = columns_pdf.copy()
columns_init.append('file')
df = pd.DataFrame(columns=columns_init)
first = False
df_pdf = pd.DataFrame([values], columns=columns_pdf)
filename = [pdf_file_name]
df_pdf['file'] = filename
df = df.append(df_pdf)
k += 1
text_count.set(str(k) + ' von ' + str(files_count))
root.update()
df = df.replace({'None': '-'})
df = df.fillna('-')
first_col = df.pop('file')
df.insert(0, 'file', first_col)
df.to_csv(folder_path + '.csv', index=False)
root.destroy()
def get_folder_path():
folder_selected = filedialog.askdirectory(initial=os.getcwd())
if not folder_selected == '':
new_text = 'csv file is built. \n This can take a moment.'
text.set(new_text)
root.update()
create_csv(folder_selected)
def main():
global root, text, text_count
cwd = os.getcwd()
root = Tk()
root.geometry('400x300')
root.title('PTC_Exporter')
if os.path.isfile(os.path.join(cwd, 'Logo.png')):
widget = Label(root, compound='top')
widget.logo_png = PhotoImage(file=os.path.join(cwd, 'Logo.png'))
widget['image'] = widget.logo_png
widget.pack()
btn = Button(root, text='Choose folder', command=get_folder_path).pack()
text = StringVar()
text.set('Choose folder with button')
label = Label(root, textvariable=text)
label.pack()
text_count = StringVar()
text_count.set('')
label_count = Label(root, textvariable=text_count)
label_count.pack()
info_text = StringVar()
info_text.set('Info: This tool exports data from pdf forms in a csv file. \n'
'Click on the "Choose folder" and choose a folder which contains \n'
'the pdf form files. You can find the generated csv file afterwards \n'
'in the folder where the chosen folder is named after the folder name. \n'
'The csv file can afterwards be imported in an excel file for further use.')
label_info = Label(root, textvariable=info_text)
label_info.place(relx=0.5, rely=0.8, anchor='center')
label_info.pack()
root.mainloop()
if __name__ == '__main__':
main()