-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
219 lines (148 loc) · 6.25 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import csv
import xlsxwriter
from os import listdir, mkdir
from os.path import isfile, join, exists
from pylab import np, plt
class Cell:
def __init__(self, content):
self.content = content
def is_date(self):
return "/" in self.content and ":" in self.content
def is_trial_number(self):
return self.content[0] == 't'
def is_empty(self):
return len(self.content) == 0
def is_not_a_star(self):
return self.content != "*"
def extract_data(file_path):
with open(file_path, 'rt') as file:
reader = csv.reader(file, delimiter='\t')
data = dict() # or {}
current_key = None
for row in reader:
try:
first_cell = Cell(row[0] if len(row) >= 1 else '')
second_cell = Cell(row[1] if len(row) >= 2 else '')
if not first_cell.is_empty() and not first_cell.is_date():
if first_cell.is_trial_number():
if current_key is None:
raise Exception("I did not succeed finding the key.")
# Add the content of the second cell to the appropriate entry
value = int(second_cell.content) if second_cell.is_not_a_star() else 0
data[current_key].append(value)
print("A value has been extracted from row '{}'.".format(row))
else:
# Create a new entry in the dictionary containing data
current_key = first_cell.content
data[current_key] = []
print("A key has been extracted from row '{}'.".format(row))
else:
print("Row '{}' will be ignored (either the first cell is empty or it is a date).".format(row))
except Exception as e:
print("Row '{}' will be ignored. It raised an exception ('{}')".format(row, e))
# Add a column for trial number
n_max = max([len(v) for v in data.values()])
data["trial"] = list(range(n_max))
# Remove keys without any value
data = {key: value for key, value in data.items() if len(value)}
# Complete missing values with zeros
for key in data.keys():
n_zeros_to_add = n_max - len(data[key])
data[key] += [0, ] * n_zeros_to_add
return data
def write_a_new_file(data, file_path):
# Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook(file_path)
worksheet = workbook.add_worksheet()
# Sort column names by alphabetic order
column_names = sorted(list(data.keys()))
# Put trial as first column
if "trial" in column_names:
column_names.remove("trial")
column_names = ["trial", ] + column_names
# Start to fill beginning from the first column.
col = 0
# Write data column by column
for c_name in column_names:
row = 0
# Write the column_name
worksheet.write(row, col, c_name)
row += 1
# Fill the column with data
for v in data[c_name]:
worksheet.write(row, col, v)
row += 1
col += 1
workbook.close()
print("Xlsx file '{}' created with success.\n".format(file_path))
def short_analysis(data, analysis_file_path, fig_root_name):
# Suppose there are two idx for rt
for rt_idx in [1, 2]:
# Convert your data in array for easier manipulation
rt_column_name = "RT {}".format(rt_idx)
rt = np.asarray(data[rt_column_name])
rt_mt_column_name = "RT-MT {}".format(rt_idx)
rt_mt = np.asarray(data[rt_mt_column_name])
# Look where 'rt' and 'rt_mt' are different to zero
cond0 = rt[:] != 0
cond1 = rt_mt[:] != 0
# Combine the two conditions
idx = cond0 * cond1
# Use the booleans as index and make a cut in your data
rt = rt[idx]
rt_mt = rt_mt[idx]
# Compute 'mt'
mt = rt_mt - rt
print("Short analysis.")
print("'mt {}' is: \n".format(rt_idx), mt)
# Save this in a new 'xlsx' file
new_data = dict()
new_data["RT{}".format(rt_idx)] = rt
new_data["MT{}".format(rt_idx)] = mt
write_a_new_file(file_path=analysis_file_path, data=new_data)
# Do some plots
plt.scatter(mt, rt)
plt.xlabel("mt")
plt.ylabel("rt")
plt.savefig("{}_scatter_rt{}.pdf".format(fig_root_name, rt_idx))
plt.close()
plt.hist(mt)
plt.xlabel("mt")
plt.savefig("{}_hist_mt{}.pdf".format(fig_root_name, rt_idx))
plt.close()
plt.hist(rt)
plt.xlabel("rt")
plt.savefig("{}_hist_rt{}.pdf".format(fig_root_name, rt_idx))
plt.close()
def create_folder(folder_path):
if not exists(folder_path):
mkdir(folder_path)
def main():
# Path of the folder where your raw data are
data_folder = "data"
# Paths of the folder where the outputs of this script will go
new_data_folder = "new_data"
figure_folder = "figures"
analysis_folder = "analysis_results"
# Create the 'outputs' folders
create_folder(figure_folder)
create_folder(new_data_folder)
create_folder(analysis_folder)
# List data files
data_files = [f for f in listdir(data_folder) if isfile(join(data_folder, f))]
for file_path in data_files:
extension = file_path.split('.')[-1]
if extension in ("xls", "csv"):
print("I will convert '{}'.\n".format(file_path))
complete_file_path = data_folder + "/" + file_path
file_path_without_extension = file_path.split(".")[0]
new_file_path = "{}/new_{}.xlsx".format(new_data_folder, file_path_without_extension)
analysis_file_path = "{}/analysis_{}.xlsx".format(analysis_folder, file_path_without_extension)
fig_root_name = "{}/fig_{}".format(figure_folder, file_path_without_extension)
data = extract_data(file_path=complete_file_path)
write_a_new_file(data=data, file_path=new_file_path)
short_analysis(data=data, analysis_file_path=analysis_file_path, fig_root_name=fig_root_name)
else:
print("I will ignore '{}' for conversion.".format(file_path))
if __name__ == "__main__":
main()