-
Notifications
You must be signed in to change notification settings - Fork 0
/
obj_reference_gender.py
executable file
·152 lines (119 loc) · 4.82 KB
/
obj_reference_gender.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/python
'''
This tool is used for analysis of object references
with respect to gender of the diver and the copilot
'''
import argparse
import os
import read_write_annotation_files as rw
import annotation_schema
import sys
import metadata
read_complex = rw.read_annotation_file
read_simple = rw.read_simple_annotation_file
write_complex = rw.write_annotation_file
write_simple = rw.write_simple_annotation_file
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='''
Script to perform an analysis on the object references
vs gender of the speakers.
''')
parser.add_argument('data',
metavar='<data directory>',
help='''
directory where all the annotations
are stored
''')
parser.add_argument('list',
metavar='<run list>',
help='''
list of all the runs for which
analysis is to be performed
''')
parser.add_argument('--config',
type=str,
default='',
help='''
file where other optional configurations
are stored
''');
args = vars(parser.parse_args())
dataDir = args['data']
runListId = args['list']
configFileId = args['config']
runListP = open(runListId, 'r')
runList = []
for line in runListP:
line = line.strip()
runList.append(line)
objectReferenceComplexLabels = annotation_schema.objectReferenceComplexLabels
analysisCount = dict()
runAnalysisCount = dict()
genderList = ['female', 'male']
objSet = set()
for objLabel in objectReferenceComplexLabels:
objName = (objLabel.name).split(' (')[0]
objSet.add(objName)
objList =list(objSet)
objList.sort()
print objList
gestureList = ['(Gesture)', '(No Gesture)']
# Create the analysisCount structure
for objName in objList:
analysisCount[objName] = dict()
runAnalysisCount[objName] = dict()
for gesture in gestureList:
analysisCount[objName][gesture] = dict()
runAnalysisCount[objName][gesture] = dict()
for gen in genderList:
analysisCount[objName][gesture][gen] = 0
runAnalysisCount[objName][gesture][gen] = 0
# Update analysisCount from all the runs
totalCount = 0
errCount = 0
for runId in runList:
for objName in objList:
for gesture in gestureList:
for gen in genderList:
runAnalysisCount[objName][gesture][gen] = 0
runCount = 0
run = dataDir + '/' + runId
objectReferenceId = run + '/' + 'object-reference.xml'
metadataId = run + '/' + 'metadata.xml'
metadataObj = metadata.metadata()
metadataObj.Read(metadataId)
copilot_gender = metadataObj.get_copilot_gender()
driver_gender = metadataObj.get_driver_gender()
gender = dict()
gender['driver'] = driver_gender
gender['copilot'] = copilot_gender
print runId, 'driver:', driver_gender, 'copilot:', copilot_gender
objWords, objAnnotations, objNotes = read_complex(objectReferenceId)
for objAnnotation in objAnnotations:
objLabel = objAnnotation.label
gesture = ''.join(objLabel.partition('(')[1:])
label = objLabel.partition('(')[0].strip()
objWords = objAnnotation.words
spkSet = set()
for word in objWords:
spkSet.add(word.speaker)
spkList = sorted(list(spkSet))
genList = [gender[spk] for spk in spkList]
for gen in genList:
try:
analysisCount[label][gesture][gen] += 1
runAnalysisCount[label][gesture][gen] += 1
totalCount += 1
except:
errCount += 1
print runId,": error for:", objAnnotation.name
for gen in genderList:
for objName in objList:
for gesture in gestureList:
print '\t'.join(['',objName, gesture, gen, str(analysisCount[objName][gesture][gen])])
print 'total\t', totalCount
print 'errors\t', errCount
for gen in genderList:
for objName in objList:
for gesture in gestureList:
print '\t'.join([objName, gesture, gen, str(analysisCount[objName][gesture][gen])])