-
Notifications
You must be signed in to change notification settings - Fork 1
/
minefields.py
57 lines (49 loc) · 1.75 KB
/
minefields.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from argparse import ArgumentParser
import pickle
import sys
import pprint
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import resolve1, PDFObjRef
def gettext(field):
try:
return field.get("T").decode('utf-16').replace("[0]",'')
except Exception:
return field.get("T").replace("[0]",'')
def load_form(filename):
"""Load pdf form contents into a nested list of name/value tuples"""
with open(filename, 'rb') as file:
parser = PDFParser(file)
doc = PDFDocument(parser)
import ipdb;ipdb.set_trace()
# parser.set_document(doc)
#doc.set_parser(parser)
#doc.initialize()
return [load_fields(resolve1(f)) for f in
resolve1(doc.catalog['AcroForm'])['Fields']]
def load_fields(field):
"""Recursively load form fields"""
form = field.get('Kids', None)
if form:
f = gettext(field)
#print 'FORM: #', f,'#'
if 'Page2' in f:
return
return [load_fields(resolve1(f)) for f in form]
else:
name, value = field.get('T'), field.get('V')
# if name=='OrdinaryDividendsAmt[0]':
# import ipdb;ipdb.set_trace()
arect = field.get('Rect')
print "<div style='background-color:green;position:absolute;left:%spx;top:%spx;width:%spx;height:%spx;'>%s</div>" % ( (arect[0])*2,(1200-arect[1])*2, (arect[2] - arect[0])*2-3,( arect[3] - arect[1])*2-3, gettext(field))
# Some field types, like signatures, need extra resolving
def main():
print '''<html>
<style>
div{border:solid 1px black;opacity:.5}
</style>
<body>'''
form = load_form(sys.argv[1])
print "</body></html>"
if __name__ == '__main__':
main()