def parseFile(file): data = csvToJson.parseCsv(file) obs = np.array(data['data']) cleanData = [] for i in range(len(obs)): row = obs[i, obsIdxs] #null in obsIdxs or in time index (seperated as time is always included by default) if "---" in row or "---" in obs[i][3]: continue else: cleanData.append([stringToSecs(obs[i][3])] + [float(elem) for elem in row]) return np.array(cleanData)
def returnIntervals(self): topN = [int(arg) for arg in sys.argv[4].split(",")] #print(topN) path = sys.argv[1] data = csvToJson.parseCsv(path) gap = sys.argv[2].split(",") intervalIdxs = {"strokes": 9, "distance": 1, "time": 3} intervalIdx = [intervalIdxs[arg] for arg in sys.argv[3].split(",")] for i in range(len(intervalIdx)): if intervalIdx[i] == 3: gap[i] = csvToJson.elapsedTimeToSec("00:" + gap[i] + ".0") else: gap[i] = int(gap[i]) threshold = 0.1 if len(sys.argv) == 6: threshold = float(sys.argv[5]) return self.combineProduceIntervals(data, topN, gap, intervalIdx, threshold)
def getVariances(files): variances = [] for file in files: to_format_data = p_i.GetIntervals() data = csvToJson.parseCsv(file) to_format_data.reformatArray(data) data["data"][13] = to_format_data.parseColumn(data["data"][13], lambda x: int(float(x)), int) data["data"][9] = to_format_data.parseColumn(data["data"][9], lambda x: int(float(x)), int) data["data"][1] = to_format_data.parseColumn(data["data"][1], lambda x: float(x), float) data["data"][3] = to_format_data.parseElapsedTime(data["data"][3]) variances.append([file, np.var(data["data"][13])]) return sorted(variances, key=lambda item: item[1], reverse=True)
def view(self, file, ind_type): format_data = p_i.GetIntervals() data = csvToJson.parseCsv(file) format_data.reformatArray(data) data["data"][13] = format_data.parseColumn(data["data"][13], lambda x: int(float(x)), int) data["data"][9] = format_data.parseColumn(data["data"][9], lambda x: int(float(x)), int) data["data"][8] = format_data.parseColumn(data["data"][8], lambda x: int(float(x)), int) data["data"][5] = format_data.parseColumn(data["data"][5], lambda x: float(x), float) data["data"][1] = format_data.parseColumn(data["data"][1], lambda x: float(x), float) data["data"][3] = format_data.parseElapsedTimePlot(data["data"][3]) frac = math.floor(len(data["data"][1]) * 0.91) if ind_type == "time": plt.plot(data["data"][3], data["data"][13], '-b') #ints.flatten().astype(int).tolist()) plt.fmt_xdata = mdates.DateFormatter("%H:%M:%S.%f") xax = plt.gca().get_xaxis() xax.set_major_formatter(mdates.DateFormatter("%M:%S")) elif ind_type == "distance": plt.plot(data["data"][1], data["data"][13], '-b') #ints.flatten().astype(int).tolist()) elif ind_type == "strokes": plt.plot(data["data"][9], data["data"][13], '-b') #ints.flatten().astype(int).tolist()) elif ind_type == "rate": plt.scatter( data["data"][8], data["data"][13]) #ints.flatten().astype(int).tolist()) elif ind_type == "speed": plt.plot(data["data"][3][:frac], (data["data"][5] / data["data"][13] )[:frac]) #ints.flatten().astype(int).tolist()) plt.show()
def saveToCsv(filenames): files = [] for filename in filenames: data = csvToJson.parseCsv(filename) data["filename"] = filename files.append(data) csvToJson.joinData(files) fileToWrite = files[0] csvData = [] with open(fileToWrite["filename"], 'rb') as csvfile: reader = csv.reader(csvfile) for row in reader: csvData.append(row) if len(row) > 0 and row[0] == "Per-Stroke Data:": break for i in range(3): csvData.append(reader.next()) with open(fileToWrite["filename"][:-4] + " new" + ".csv", 'w') as csvfile: csvwriter = csv.writer(csvfile) for row in csvData: csvwriter.writerow(row) for row in fileToWrite["data"]: csvwriter.writerow(row)
def combineProduceIntervals(self, data, topN, gap, intervalIdx, threshold="0.1"): topN = [int(arg) for arg in topN.split(",")] data = csvToJson.parseCsv(data) gap = gap.split(",") intervalIdxs = {"strokes": 9, "distance": 1, "time": 3} intervalIdx = [intervalIdxs[arg] for arg in intervalIdx.split(",")] for i in range(len(intervalIdx)): if intervalIdx[i] == 3: gap[i] = csvToJson.elapsedTimeToSec("00:" + gap[i] + ".0") else: gap[i] = int(gap[i]) threshold = float(threshold) self.reformatArray(data) data["data"][13] = self.parseColumn(data["data"][13], int) data["data"][9] = self.parseColumn(data["data"][9], int) data["data"][1] = self.parseColumn(data["data"][1], float) data["data"][3] = self.parseElapsedTime(data["data"][3]) # start_time = time.time() #note to self, I think there are cases where I am zeroing a "---" for power and it ruins the variance #also look into an alternative for variance that allows for one or two out liers without skewing the result? power = data["data"][13] # sys.stdout.write(json.dumps(power.tolist())) # sys.stdout.flush() # return; filtPower = filterData(power) rises = getStep(1, filtPower, 0.15) falls = getStep(-1, filtPower, 0.2) #- 1 steps = np.hstack((rises,falls)) steps = np.sort(steps) steps[steps >= len(power)] = len(power) - 1 # print(len(data["data"][9])) # print(len(data["data"][1])) # print(len(data["data"][3])) # print(len(data["data"][13])) # print(len(filtPower)) # print("rises",rises) # print("falls",len(falls),falls) groupings = [[]] * len(intervalIdx) sorted_orderings = [] for i in range(len(groupings)): cur_sorted_orderings, cur_groupings = self.getSortedOrderings(falls, rises, intervalIdx[i], gap[i], data, filtPower,threshold, topN[i], i) groupings[i] = cur_groupings # print("sorted_orderings", i, cur_sorted_orderings) sorted_orderings += cur_sorted_orderings groupsToUse = [] sorted_on_finish, opt = self.getBestSchedule(sorted_orderings) # print(opt) # print(sorted_on_finish[opt[1][0]]) # print(sorted_on_finish[opt[1][1]]) # print() for i in range(len(opt[1])): interval = sorted_on_finish[opt[1][i]] grouping_idx = interval[3] groupsToUse += groupings[grouping_idx][interval[2]].tolist() # print("flattened groupings",np.array(groupsToUse).flatten()) # print("--- %s seconds ---" % (time.time() - start_time)) # plt.subplot(211) # plt.plot(data["data"][13],'-bD', markevery=np.array(groupsToUse).flatten().tolist()) # plt.show() return dumps(groupsToUse)