-
Notifications
You must be signed in to change notification settings - Fork 0
/
exifMunch3.py
81 lines (75 loc) · 3.1 KB
/
exifMunch3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os, re
from iptcinfo import IPTCInfo
import csv
csvOut = csv.writer(open('forGregg.csv', 'w'))
csvOut.writerow(['uri', 'make & model', 'year', 'description'])
i = 0
errs = 0
cars = set()
'''
THIS ROUTINE NEEDS DOCUMENTATION
Briefly, test for IPTC metadata; if so, get caption; if so, strip whitespace & stuff and
search for car via regex, then see if it matches or not; then str.find() "Description:"
and use math to parse the whole desc thing.
Please remember that we need to add in the rest of the (headers?) (the stuff that
is like location and whatever and source and things that you haven't included;
see allthedata.txt)
'''
for f in os.listdir('.'):
try:
for pic in os.listdir(f):
if 'jpg' in str(pic).lower() or 'jpeg' in str(pic).lower() or 'gif' in str(pic).lower() or 'bmp' in str(pic).lower():
i += 1
#print f + '/' + pic
iterPhoto = open(f + '/' + pic)
try:
info = IPTCInfo(iterPhoto)
except:
print "IPTC Error (not present)"
if info:
try:
d = info.getData()['caption/abstract']
except:
print "IPTC, but no caption"
d = False
if d:
dStripped = ''.join(d.splitlines())
#print dStripped
i += 1
p = re.compile('<b>Car:</b><br>(.*?)<p>')
car = False
try:
car = p.match(dStripped).group(1)
except:
print "Could not determine model"
year = False
try:
year = int(car[0:3])
except:
year = ''
isDesc = dStripped.find("Description:")
desc = False
if isDesc:
isLoc = dStripped.find("<p><b>Location:")
if isLoc:
desc = dStripped[isDesc+20:isLoc]
else:
desc = dStripped[isDesc+20:]
if year and car:
carOut = car[5:]
yearOut = car[0:4]
csvOut.writerow([f + '/' + pic, carOut, yearOut, desc])
#print carOut, yearOut, desc
elif car:
carOut = car
csvOut.writerow([f + '/' + pic, carOut, year, desc])
else:
msg = "Unspecified error with " + pic + " (probably in the metadata)"
#csvOut.writerow([msg])
iterPhoto.close()
print i
except OSError as e:
print e
#csvOut.close()
print str(i) + " pictures processed, with " + str(errs) + " errors"
print len(cars)