def cv_collect(rootdir): cv_list = [] for filename, subdir in walker(rootdir, re.compile('cv(.*?)txt')): cv_list.append( txt_to_CV( Path(subdir).joinpath(filename), filename[:len(filename) - 4])) with open(rootdir.joinpath('list_cv.txt'), 'w') as f: f.truncate() for i, cv in enumerate(cv_list): f.write(str(i) + ' ' + cv.info) f.write('\n') print() print('List of CV has been saved to ' + str(rootdir) + 'cv_list.txt') with open(rootdir.joinpath('data_cv.pickle'), 'wb') as f: pickle.dump(cv_list, f, pickle.HIGHEST_PROTOCOL) print() print("CV information has been dumped to " + str(rootdir) + 'cv_data.pickle') return cv_list
def get_dataset(files_dir, x_range, num_classes, y_starts_from): """ Get data from csv file, do the augmentation, then save to npy file for further uses. y here only stands for class, so there must be consequenced class in filename. """ Y = [] X = [] coordinates = [] # import csv data for filename, subdir in walker(files_dir, re.compile('training(.*?)_\d+.csv')): # print(subdir+'/'+filename) with open(subdir + '/' + filename, 'r') as f: reader = csv.reader(f) data_list = list(reader)[1:] # Skip the first line try: x = [float(data_list[i][1]) for i in x_range] # len(x_range)*1 list here coordinate = [float(data_list[i][0]) for i in x_range] except IndexError: print("IndexError, please check " + subdir + '/' + filename) continue X.append(x) # m*n list coordinates.append(coordinate) y = re.findall('training(\d+)_.*?.csv', filename)[-1] # starts from 0 maybe # y_one_hot = np.eye(9, dtype=int)[int(y) - 1] if y_starts_from == 0: Y.append(int(y)) # where y is 0~(num_classes-1) else: Y.append(int(y) - 1) #where y is 1~num_classes Y_one_hot = to_categorical(Y, num_classes=num_classes) X_np = np.array(X) # m*n numpy array Y_np = np.array(Y_one_hot) # m*c numpy array coordinates_np = np.array(coordinates) return X_np, Y_np, coordinates_np
# import datetime import os import re from pathlib import Path import pandas as pd from dir_walker import walker aimed_path - "" saved_filename = "" # save to excel by default working_path = Path(aimed_path) for i, (filename, subdir) in enumerate(walker(working_path, pattern=re.compile('.*?.txt'))): print("reading "+filename) with open(Path(subdir).joinpath(Path(filename)), 'r') as fp: head = fp.readline() stock_name = head[7:11] print(stock_name) # Skip first two lines and the last line data_csv = pd.read_csv(Path(subdir).joinpath(Path(filename)), skiprows=2, header=None, encoding='gbk')[:-1] # extract the first and the 5th column data_csv = data_csv.loc[:, [0, 4]] # data_csv.columns = ['date', filename[:-4]] data_csv_t = data_csv.transpose() header_date = data_csv_t.iloc[0] data_csv_t = data_csv_t[1:] data_csv_t.columns = header_date data_csv_t['stock'] = filename[:-4] data_csv_t['name'] = stock_name
for line in f: time.append(float(line.split()[0])) potential.append(float(line.split()[1])) start_record = 0 for i, p in enumerate(potential): if start_record and (sum(potential[i:i + 50]) / 50 > 3e-3): time_last = time[i] - time[start_record] elif (sum(potential[i:i + 50]) / 50 > 3e-3): start_record = i elif start_record: break print('time: ' + str(time_last)) print('std: ' + str(np.std(potential[start_record + 50:i]))) return time[start_record + 50:i], potential[start_record + 50:i] def write_p(file, time, potential): with open(file, 'w') as f: # f.write('time/ms,voltage/V') # f.write('\n') for i, p in enumerate(potential): f.write(str(time[i])) f.write(',') f.write(str(p)) f.write('\n') for filename, subdir in walker(rootdir, re.compile('(.*?).txt')): print(filename) time, potential = read_p(subdir + '/' + filename) write_p(subdir + '/' + filename[:-4] + '.csv', time, potential)
# Extract required lines from given ascii files # and reclassify them by potential import os from dir_walker import walker # rootdir = 'G:/finaldesign/ENML/data/20180509' rootdir = "C:/code/ENML/data/20180523" save_dir = 'C:/code/ENML/data/20180523_extracted' if not os.path.exists(save_dir): os.makedirs(save_dir) # extract for filename, subdir in walker(rootdir): print("Opening " + filename) a = [] with open(subdir + '/' + filename, 'rb') as f: for line in f: try: a.append(line.decode()) except UnicodeDecodeError: a.append('\r\n') pass # if not a[75].split()[0] == '8000': # # judge whether the file is correct # print("File "+filename+' is not correct') # continue extracted_lines = a[76:-2] # save with open(save_dir + '/' + filename, 'w', newline='\n') as f:
# Used to compare data collected from 2 different stations. from dir_walker import walker import re import csv import matplotlib.pyplot as plt import random plt.style.use('ggplot') rootdir = 'C:/code/ENML/test/i_v' subdir1 = list(walker(rootdir, re.compile('(.*?).csv')))[0][1] label1 = subdir1[(len(rootdir) - len(subdir1)):] subdir2 = list(walker(rootdir, re.compile('(.*?).csv')))[-1][1] label2 = subdir2[(len(rootdir) - len(subdir2)):] x_range = range(7000, 7101) fig = plt.figure() i, j = (1, 1) num_file = 0 for filename, subdir in walker(subdir1, re.compile('(.*?).csv')): # if random.random() < 0.8: # continue print(subdir + '/' + filename) num_file = num_file + 1 print('#file: %d' % num_file) with open(subdir + '/' + filename, 'r') as f: reader = csv.reader(f) data_list = list(reader)[1:]