def build_all_corpora_multithread(path): from multiprocessing import Pool pool = Pool(7) folders = sorted(get_folders(path)) pool.map(build_corpus_for_folder, folders) for folder in get_folders(path): build_corpus_for_folder(folder) return
def build_all_corpora(path): for folder in get_folders(path): build_corpus_for_folder(folder) return
def saxify_all(path, alphabet=(3, 21)): for folder in get_folders(path): saxify_folder(folder, alphabet) return
data = df['datum'].values.tolist() ret = [] for i in range(nrows/l): if len(np.unique(data[i*l:(i+1)*l])) < 5: continue entry = {0: label} for j in range(l): entry[j+1] = data[i*l+j] ret.append(entry) adf = pd.DataFrame(ret) return adf from mylib import get_folders dirs = get_folders('/Users/daoyuan.li/Documents/Smart.Buildings/Dataset/DECC/popular_appliances/within_household/') for root in dirs: csvfs = ['%s.csv' % x for x in ids] train = [] test = [] for i in range(len(csvfs)): try: df = create_dataset(join(root, csvfs[i]), i) nrows = df.shape[0] train.append(df.iloc[:nrows/2, :]) test.append(df.iloc[nrows/2:, :]) except Exception as e: pass # print e
def generate_parameters(folder, alphabet_range, reset=False, pfile='params.txt'): df = pd.DataFrame.from_csv(join(folder, 'train', 'saxified_10.csv'), index_col=False) klasses = sorted(df['label'].unique()) mod = 'a' if reset: mod = 'w' with open(join(root, pfile), mod) as f: for k in klasses: for a in range(alphabet_range[0], alphabet_range[1] + 1): f.write('%s %s %s\n' % (folder[len( '/Users/daoyuan.li/Documents/Smart.Buildings/Dataset/DECC/popular_appliances/' ):], a, k)) return # generate_parameters('NewlyAddedDatasets/ElectricDevices/', (5, 10), reset=True, pfile='params.txt') # generate_parameters('NewlyAddedDatasets/LargeKitchenAppliances', (3, 20), reset=True, pfile='params1.txt') # generate_parameters('NewlyAddedDatasets/SmallKitchenAppliances', (3, 20), reset=False, pfile='params1.txt') # generate_parameters('NewlyAddedDatasets/RefrigerationDevices', (3, 20), reset=False, pfile='params1.txt') # generate_parameters('NewlyAddedDatasets/ECG5000/', (5, 10), reset=True, pfile='params.txt') # generate_parameters('Pre_Summer_2015_Datasets/OSULeaf/', (5, 10), reset=True, pfile='params_leaf') # generate_parameters('Pre_Summer_2015_Datasets/SwedishLeaf/', (5, 10), reset=True, pfile='params_leaf') for folder in get_folders(root): generate_parameters(folder, (20, 20))
traindf = pd.DataFrame.from_csv(traincsv, index_col=False) klasses = sorted(traindf['label'].unique()) df = get_dataframe(folder, resultsdir, a, klasses) df = predict(df) df.to_csv(clsssifiedf, index=False) print folder, len(klasses), a, accuracy_score(df['label'].values.tolist(), df['predicted'].values.tolist()) # print classification_report(df['label'].values.tolist(), df['predicted'].values.tolist()) except Exception as e: print e pass print return root = '/Users/daoyuan.li/Documents/Smart.Buildings/Dataset/DECC/popular_appliances/combinations/' for folder in get_folders(root): try: process_results(folder, resultsdir='final_results_wl_2_to_20') except: print folder # In[ ]: from sklearn.metrics import accuracy_score from sklearn.metrics import classification_report from os.path import join import os import pandas as pd from mylib import get_folders
def saxify_all_multithread(path): from multiprocessing import Pool pool = Pool(7) folders = sorted(get_folders(path))[:3] pool.map(saxify_folder, folders) return
def saxify_all(path, alphabet=(20, 21)): for folder in get_folders(path): saxify_folder(folder, alphabet) return
nrows, _ = df.shape cols = range(l+1) data = df['datum'].values.tolist() ret = [] for i in range(nrows/l): if len(np.unique(data[i*l:(i+1)*l])) < 3: continue entry = {0: label} for j in range(l): entry[j+1] = data[i*l+j] ret.append(entry) adf = pd.DataFrame(ret) return adf dirs = get_folders('/Users/daoyuan.li/Documents/Smart.Buildings/Dataset/DECC/popular_appliances/combinations/') for root in dirs: csvfs = ["17_18.csv", "17_160.csv", "17_51.csv", "18_160.csv", "18_51.csv", "51_160.csv", "160_161.csv", "17_161.csv", "51_161.csv", "18_183.csv", "6_17.csv", "6_18.csv", "6_51.csv", "6_160.csv", "6_161.csv"] train = [] test = [] for i in range(len(csvfs)): try: df = create_dataset(join(root, csvfs[i]), i) nrows = df.shape[0] train.append(df.iloc[:nrows/2, :]) test.append(df.iloc[nrows/2:, :]) except Exception as e: pass # print e