-
Notifications
You must be signed in to change notification settings - Fork 0
/
start.py
297 lines (176 loc) · 8.86 KB
/
start.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 21 11:29:20 2018
This file contains the main code for setting up the data analysis. The main loop aligns the 3 excel files in order to
send the only the required data for analysis.
ver 0.01
@author: PKellicker
"""
import pandas as pd
import numpy as np
import os
import re
import argparse
from filter import find_curr_recipe
from parse_data import parse_data
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
# searches for the correct line to start so headers are correct. Used when number of lines to skip is dynamic
def skip_to(fle, line, folder, **kwargs):
os.chdir(folder)
if os.stat(fle).st_size == 0:
raise ValueError("File is empty")
with open(fle) as f:
pos = 0
cur_line = f.readline()
while not cur_line.startswith(line):
pos = f.tell()
cur_line = f.readline()
f.seek(pos)
return pd.read_csv(f, **kwargs)
# ********************************************************************* #
def run(args):
### Setup ###
# define outside cwd dir
base = os.getcwd() #+ "\\CDT Test"
csv_folder = base + "\\" + args.data #"+\\Use 1" #
#csv_folder = "C:\\Users\\pkellicker\\Desktop\\Coffee Code\\Git Code\\Excel\\Use 2" # C:\Users\pkellicker\Desktop\Coffee Code\Code
# base = "C:\\Users\\pkellicker\\Desktop\\Coffee Code\\Git Code\\Excel"
os.chdir(base)
# import static files
cesar_name = args.kpi #= "CP300 & CF90 Cup temp-volume.xlsm" #
recipe_name = args.recipe #= "CP300 EB2.xlsx" #
cesar = pd.read_excel(cesar_name)
recipe = pd.read_excel(recipe_name, skiprows = 1)
# ********************************************************************* #
# * * * work cesar to correspond w/ csv * * * #
cesar_csv = cesar.filter(items = ['CSV Filename']) # Create df of cesar with csv filename only *** does skip Tea line
cesar_csv = cesar_csv.dropna() # Clean up df
# find length based on this column - should be more accurate
cesar_length = cesar.filter(items = ["Pre Test Chamber Weight grams"])
cesar_length = cesar_length.dropna()
file_count = cesar_length.shape[0] # get number of csv files
names = cesar_csv.values # I want a string array of the names so I can feed them into the fn skip_to
# ********************************************************************* #
# * * * work recipe to correspond w/ csv * * * #
tea = 0 # flag for when we hit tea
# create array of dates -> field in which Tea brew start is noted
cesar_date = cesar.filter(items = ['Date'])
cesar_date = cesar_date.dropna()
dates = cesar_date.values
# find number of date entries to avoid index error
num_dates = cesar_date.shape[0]
# ********************************************************************* #
count = 0
array = np.zeros((file_count, 6))
end_time = args.end
# Load workbook for storing data & find correct columns' index
wb = load_workbook(base + "\\" + cesar_name, read_only = False, keep_vba = True)
ws = wb['Data']
# find column number
avg_bot_col = cesar.columns.get_loc("Avg Bot") + 1
avg_mid_col = avg_bot_col -1
avg_top_col = avg_bot_col -2
time_zero_col = cesar.columns.get_loc("Time Zero Temp") + 1
#convert column number to letter
avg_bot_col_let = get_column_letter(avg_bot_col)
avg_mid_col_let = get_column_letter(avg_mid_col)
avg_top_col_let = get_column_letter(avg_top_col)
#time_zero_col_let = get_column_letter(time_zero_col)
### Loop ###
for i in range(0,file_count):
# check for when tea starts - also for incrementing since one line in xlsx is for *** careful it needs to find tea i.e. start b4 tea line
if i < num_dates:
if tea == 0:
if dates[i] == "EB2 TEA BREWS" :
tea = 1
file_temp = names[i] + ".csv" # add the .csv extension to file name
file_temp = str(file_temp)[2:-2] # remove first 2 and last 2 chars -> ['']
if os.path.isfile(csv_folder + "\\" + file_temp) == False: # check to see if file exists
continue
data_curr = skip_to(file_temp, "Number", csv_folder) # current csv file working with
data_table = skip_to(file_temp, "CH", csv_folder)
#data_f = data_table.iloc[0:10, 0:2] # get top of csv file -> channel table
#data_table.head(8).to_excel("csv_Headers.xlsx")
# create filter for current row, create dataframe of current row
cesar_rowi = cesar['Test Number'] == i + 1 + tea
cesar_curr = cesar[cesar_rowi] # cesar_curr = row corresponding to current csv file *** need to step by 1 when -> tea
recipe_curr_1 = find_curr_recipe(cesar_curr, recipe, tea)
recipe_curr = recipe_curr_1[0]
brew_type = recipe_curr_1[1]
data = parse_data(recipe_cur = recipe_curr, data_cur = data_curr, data_f = data_table, cesar_cur = cesar_curr, end_time = end_time, top = args.top, mid = args.mid, bot = args.bot)
array[i][0] = data[0] # bot
array[i][1] = data[1] # mid
array[i][2] = data[2] # top
array[i][3] = data[3] # one
array[i][4] = data[4] # two
#store data
#os.chdir("C:\\Users\\pkellicker\\Desktop\\Coffee Code\\Git Code\\Excel")
ws.cell(row = i+2, column = avg_bot_col).value = array[i][0]
ws.cell(row = i+2, column = avg_mid_col).value = array[i][1]
ws.cell(row = i+2, column = avg_top_col).value = array[i][2]
#ws.cell(row = i+2, column = avg_top_col-2).value = array[i][4]
#ws.cell(row = i+2, column = avg_top_col-1).value = array[i][5]
# input average formula into column "Time Zero Temp"
string_1 = "=ROUND(AVERAGE(" + avg_bot_col_let + str(i+2) + "," + avg_mid_col_let + str(i+2) + "," + avg_top_col_let + str(i+2) + "),1)" # ********************changed could be bug
ws.cell(row = i+2, column = time_zero_col).value = string_1
wb.save(base + "\\" + cesar_name)
def main():
parser = argparse.ArgumentParser(description = "Analyze coffee test data. Need KPI, recipe, and csv files. Recipe and KPI files, and Data folder should be placed in same location as .exe. Be sure to use " " when specifying names")
required = parser.add_argument_group("Required Arguments")
required.add_argument("-kpi", help = "enter full name of KPI file", type = str, required = True)
required.add_argument("-recipe", help = "enter full name of recipe file", type = str, required = True)
required.add_argument("-data", help = "enter full name of folder containing the .csv files", type = str, required = True)
optional = parser.add_argument_group("Other Optional Arguments")
optional.add_argument("-v", action = 'store_false', help="Run analysis based on voltage pattern. Default is to analyze by end_time", dest = "end", default = True)
optional.add_argument("-top", help="enter name of top channel title", type = str, default =None)
optional.add_argument("-mid", help="enter name of mid channel title", type = str, default =None)
optional.add_argument("-bot", help="enter name of bot channel title", type = str, default =None)
parser.set_defaults(func=run)
args = parser.parse_args()
args.func(args)
if __name__ == "__main__":
main()
# * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * #
# tried to find Tea with regex
#cesar_date.to_excel("KPI Dates1.xlsx")
#fh = open("KPI Dates1.xlsx").read()
#match = re.search("EB2 TEA BREWS", fh)
'''
#cols = ['CH','Signal name']
#data = data_table[cols]
data_f = data_table.iloc[0:10, 0:2]
data = data_table.iloc[0:10, 1:2]
data_0 = data.values
data_1 = str(data_0)
data_2 = data_1.split(" ")
data_3 = pd.Series(data_2)
data_filter = data_3.str.contains("BOT")
data_bot = data_f[data_filter]
data_filter = data_3.str.contains("MID")
data_mid = data_f[data_filter]
data_filter = data_3.str.contains("TOP")
data_top = data_f[data_filter]
'''
'''
# find number of files in dir
file_count = count_files(csv_folder)
#count = 10
'''
'''
# count number of raw data files -> *could just count rows in KPI file*
def count_files(folder):
numfiles = len([f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f)) and f[0] != '.'])
return numfiles
'''
'''
# loop through data files
for i in range(1,file_count+1):
file_temp = ("CP300_SER" + str(i) + ".csv")
data_temp = skip_to(file_temp, "Number", csv_folder)
# create filter for current row, create dataframe of current row
cesar_rowi = cesar['Test Number'] == i
cesar_curr = cesar[cesar_rowi]
'''
# cwd after loop is 'files' not 'Code'
#print(os.getcwd())