def seg_flag_cor_batch(path_in, path_out, names): filelist = base.get_files_csv(path_in) for file in filelist: print("processing file: {}".format(file)) data = pd.read_csv(path_in + file) data = seg_flag_cor(data, names) data.to_csv(path_out + file, index=False)
def interpolate_bin_mean_batch(path_in, path_out, bin_size=5): filelist = base.get_files_csv(path_in) for file in filelist: print("processing file: {}".format(file)) data = pd.read_csv(path_in + file) data = interpolate_bin_mean(data, bin_size) data.to_csv(path_out + file, index=False)
def del_dup_batch(path_in, path_out): filelist = base.get_files_csv(path_in) for file in filelist: print("processing file: {}".format(file)) data = pd.read_csv(path_in+file) data_new = del_dup(data) data_new.to_csv(path_out+file, index=False)
def seg_flag_cor_batch(path_in, path_out, names): filelist = base.get_files_csv(path_in) for file in filelist: print("processing file: {}".format(file)) data = pd.read_csv(path_in+file) data = seg_flag_cor(data, names) data.to_csv(path_out+file, index=False)
def del_dup_batch(path_in, path_out): filelist = base.get_files_csv(path_in) for file in filelist: print("processing file: {}".format(file)) data = pd.read_csv(path_in + file) data_new = del_dup(data) data_new.to_csv(path_out + file, index=False)
def sm_mean_1m_batch(path_in, path_out): filelist = base.get_files_csv(path_in) for file in filelist: print("-------- processing file: {}".format(file)) data = pd.read_csv(path_in + file) data = sm_mean_1m(data) data.to_csv(path_out + file, index=False)
def cal_rolling_path(path_in, path_out, func, win_rate): filist = base.get_files_csv(path_in) for file in filist: print("caculate for file {}".format(file)) cal_rolling_stats(file_in=path_in + file, file_out=path_out + file, func=func, win_rate=win_rate)
def cal_rolling_path(path_in, path_out, func, win_rate): filist =base.get_files_csv(path_in) for file in filist: print("caculate for file {}".format(file)) cal_rolling_stats(file_in=path_in + file, file_out=path_out + file, func=func, win_rate=win_rate)
def plot_box_ZX_HW_batch(path): filelist = base.get_files_csv(path) name = var_name.name_HW1 + var_name.name_HW2 for file in filelist: plt.figure(figsize=(13, 2)) file_path = path + file data = pd.read_csv(file_path) plt.boxplot(data[name].as_matrix()) plt.xticks(np.arange(1, 1 + len(name)), name) plt.title(file)
def plot_box_ZX_HW_batch(path): filelist = base.get_files_csv(path) name = var_name.name_HW1 + var_name.name_HW2 for file in filelist: plt.figure(figsize=(13, 2)) file_path = path + file data = pd.read_csv(file_path) plt.boxplot(data[name].as_matrix()) plt.xticks(np.arange(1, 1+len(name)), name) plt.title(file)
def parse_time_batch(path_in, path_out): filelist = base.get_files_csv(path_in) for file in filelist: print("process file: {}".format(file)) data = pd.read_csv(path_in + file) data['BTSJ_I'] = parse_time(list(data['BTSJ'])) names = list(data.columns) names.remove('BTSJ_I') names.insert(1, 'BTSJ_I') data = data[names] data.to_csv(path_out + file, index=False)
def parse_temp_batch(path_in, path_out): filelist = base.get_files_csv(path_in) for file in filelist: print("processing file {}".format(file)) data = pd.read_csv(path_in+file) if len(data) < 1000: # discard the dataset with too less records print("!!!!!! without parsing file:{} because the number of records too small!".format(file)) continue data = parse_temperature(data) data = parse_temp_HW(data) data.to_csv(path_out+file, index=False)
def parse_time_batch(path_in, path_out): filelist = base.get_files_csv(path_in) for file in filelist: print("process file: {}".format(file)) data = pd.read_csv(path_in+file) data['BTSJ_I'] = parse_time(list(data['BTSJ'])) names = list(data.columns) names.remove('BTSJ_I') names.insert(1, 'BTSJ_I') data = data[names] data.to_csv(path_out+file, index=False)
def plot_stats_ZX_HW_batch(path, func): plt.figure(figsize=(15, 5)) name = var_name.name_HW1 + var_name.name_HW2 filelist = base.get_files_csv(path) for file in filelist: file_path = path + file data = pd.read_csv(file_path) stds = data[name].apply(func) plt.plot(stds.values, 'o-', label=file) plt.xticks(np.arange(stds.count()), stds.index) plt.legend(loc='best', prop={'size': 8}) plt.title("std of ZH_HW")
def check_bug_batch(path_in): filelist = base.get_files_csv(path_in) f_log = open(path_in+"check_bug_log.txt", "w") stdout_origin = sys.stdout sys.stdout = f_log for file in filelist: print("--------------- file: {}".format(file)) check_bug(path_in+file) print("---------------\n\n") f_log.close() sys.stdout = stdout_origin
def check_seg_flag_batch(path, log_dir): filelist = base.get_files_csv(path) stdout_origin = sys.stdout f_log = open(log_dir, "w") sys.stdout = f_log for file in filelist: print("**************** file:{} ***************".format(file)) check_seg_flag(path+file) print("****************************************\t") sys.stdout = stdout_origin f_log.close()
def check_seg_flag_batch(path, log_dir): filelist = base.get_files_csv(path) stdout_origin = sys.stdout f_log = open(log_dir, "w") sys.stdout = f_log for file in filelist: print("**************** file:{} ***************".format(file)) check_seg_flag(path + file) print("****************************************\t") sys.stdout = stdout_origin f_log.close()
def parse_temp_batch(path_in, path_out): filelist = base.get_files_csv(path_in) for file in filelist: print("processing file {}".format(file)) data = pd.read_csv(path_in + file) if len(data) < 1000: # discard the dataset with too less records print( "!!!!!! without parsing file:{} because the number of records too small!" .format(file)) continue data = parse_temperature(data) data = parse_temp_HW(data) data.to_csv(path_out + file, index=False)
def check_bug_batch(path_in): filelist = base.get_files_csv(path_in) f_log = open(path_in + "check_bug_log.txt", "w") stdout_origin = sys.stdout sys.stdout = f_log for file in filelist: print("--------------- file: {}".format(file)) check_bug(path_in + file) print("---------------\n\n") f_log.close() sys.stdout = stdout_origin
def cluster_batch(data_dir): filelist = base.get_files_csv(data_dir) stdout_original = sys.stdout log_file = open(data_dir+"dtw_cluster.txt", "w") sys.stdout = log_file for file in filelist: data = pd.read_csv(data_dir + file) del data['BTSJ'] #data = (data-data.mean())/(data.std()) clust = cluster(data) print("-------------------------------------------------------") print("********process file: {}*********".format(file)) print(clust) sys.stdout = stdout_original log_file.close()
def cluster_batch(data_dir): filelist = base.get_files_csv(data_dir) stdout_original = sys.stdout log_file = open(data_dir + "dtw_cluster.txt", "w") sys.stdout = log_file for file in filelist: data = pd.read_csv(data_dir + file) del data['BTSJ'] #data = (data-data.mean())/(data.std()) clust = cluster(data) print("-------------------------------------------------------") print("********process file: {}*********".format(file)) print(clust) sys.stdout = stdout_original log_file.close()
import pandas as pd import os from preprocess import base from ffx_learn import learn_ffx from ffx_learn import call_ffx def norm_2(x): return (x - np.mean(x)) / (x.max() - x.min()) #return (x-x.mean()) / np.sqrt(x*x) root_dir = os.getcwd() data_dir = root_dir + "/data_0134/integrated_temp_sdmc_mean/" filelist = base.get_files_csv(data_dir) print(filelist) file = filelist[0] data = pd.read_csv(data_dir + file) del data['BTSJ'] print(data.columns) #data = data.apply(norm_2, axis=1) # ax = data.plot() # ax.set_title(file) # #ax.set_ylim([-1, 1]) # plt.show() # varnames = data.columns # X = data.as_matrix() # y = np.zeros(X.shape[0])
import numpy as np import pandas as pd import os from preprocess import base from ffx_learn import learn_ffx from ffx_learn import call_ffx def norm_2(x): return (x-np.mean(x))/(x.max()-x.min()) #return (x-x.mean()) / np.sqrt(x*x) root_dir = os.getcwd() data_dir = root_dir + "/data_0134/integrated_temp_sdmc_mean/" filelist = base.get_files_csv(data_dir) print(filelist) file = filelist[0] data = pd.read_csv(data_dir+file) del data['BTSJ'] print(data.columns) #data = data.apply(norm_2, axis=1) # ax = data.plot() # ax.set_title(file) # #ax.set_ylim([-1, 1]) # plt.show() # varnames = data.columns # X = data.as_matrix() # y = np.zeros(X.shape[0])