def build_all_corpora_multithread(path):
    from multiprocessing import Pool
    pool = Pool(7)
    folders = sorted(get_folders(path))
    pool.map(build_corpus_for_folder, folders)
    for folder in get_folders(path):
        build_corpus_for_folder(folder)
    return
def build_all_corpora(path):
    for folder in get_folders(path):
        build_corpus_for_folder(folder)
    return
def saxify_all(path, alphabet=(3, 21)):
    for folder in get_folders(path):
        saxify_folder(folder, alphabet)
    return
    data = df['datum'].values.tolist()
    ret = []
    for i in range(nrows/l):
        if len(np.unique(data[i*l:(i+1)*l])) < 5:
            continue
        entry = {0: label}
        for j in range(l):
            entry[j+1] = data[i*l+j]
        ret.append(entry)
    adf = pd.DataFrame(ret)
    return adf


from mylib import get_folders

dirs = get_folders('/Users/daoyuan.li/Documents/Smart.Buildings/Dataset/DECC/popular_appliances/within_household/')

for root in dirs:
    csvfs = ['%s.csv' % x for x in ids]
    train = []
    test = []

    for i in range(len(csvfs)):
        try:
            df = create_dataset(join(root, csvfs[i]), i)
            nrows = df.shape[0]
            train.append(df.iloc[:nrows/2, :])
            test.append(df.iloc[nrows/2:, :])
        except Exception as e:
            pass
#             print e
def generate_parameters(folder,
                        alphabet_range,
                        reset=False,
                        pfile='params.txt'):
    df = pd.DataFrame.from_csv(join(folder, 'train', 'saxified_10.csv'),
                               index_col=False)
    klasses = sorted(df['label'].unique())
    mod = 'a'
    if reset:
        mod = 'w'
    with open(join(root, pfile), mod) as f:
        for k in klasses:
            for a in range(alphabet_range[0], alphabet_range[1] + 1):
                f.write('%s %s %s\n' % (folder[len(
                    '/Users/daoyuan.li/Documents/Smart.Buildings/Dataset/DECC/popular_appliances/'
                ):], a, k))
    return


# generate_parameters('NewlyAddedDatasets/ElectricDevices/', (5, 10), reset=True, pfile='params.txt')
# generate_parameters('NewlyAddedDatasets/LargeKitchenAppliances', (3, 20), reset=True, pfile='params1.txt')
# generate_parameters('NewlyAddedDatasets/SmallKitchenAppliances', (3, 20), reset=False, pfile='params1.txt')
# generate_parameters('NewlyAddedDatasets/RefrigerationDevices', (3, 20), reset=False, pfile='params1.txt')

# generate_parameters('NewlyAddedDatasets/ECG5000/', (5, 10), reset=True, pfile='params.txt')
# generate_parameters('Pre_Summer_2015_Datasets/OSULeaf/', (5, 10), reset=True, pfile='params_leaf')
# generate_parameters('Pre_Summer_2015_Datasets/SwedishLeaf/', (5, 10), reset=True, pfile='params_leaf')

for folder in get_folders(root):
    generate_parameters(folder, (20, 20))
            traindf = pd.DataFrame.from_csv(traincsv, index_col=False)
            klasses = sorted(traindf['label'].unique())
            df = get_dataframe(folder, resultsdir, a, klasses)
            df = predict(df)
            df.to_csv(clsssifiedf, index=False)
            print folder, len(klasses), a, accuracy_score(df['label'].values.tolist(), df['predicted'].values.tolist())
#             print classification_report(df['label'].values.tolist(), df['predicted'].values.tolist())
        except Exception as e:
            print e
            pass
    print
    return


root = '/Users/daoyuan.li/Documents/Smart.Buildings/Dataset/DECC/popular_appliances/combinations/'
for folder in get_folders(root):
    try:
        process_results(folder, resultsdir='final_results_wl_2_to_20')
    except:
        print folder


# In[ ]:

from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from os.path import join
import os
import pandas as pd

from mylib import get_folders
def build_all_corpora(path):
    for folder in get_folders(path):
        build_corpus_for_folder(folder)
    return
def saxify_all_multithread(path):
    from multiprocessing import Pool
    pool = Pool(7)
    folders = sorted(get_folders(path))[:3]
    pool.map(saxify_folder, folders)
    return
def saxify_all(path, alphabet=(20, 21)):
    for folder in get_folders(path):
        saxify_folder(folder, alphabet)
    return
    nrows, _ = df.shape
    cols = range(l+1)
    data = df['datum'].values.tolist()
    ret = []
    for i in range(nrows/l):
        if len(np.unique(data[i*l:(i+1)*l])) < 3:
            continue
        entry = {0: label}
        for j in range(l):
            entry[j+1] = data[i*l+j]
        ret.append(entry)
    adf = pd.DataFrame(ret)
    return adf


dirs = get_folders('/Users/daoyuan.li/Documents/Smart.Buildings/Dataset/DECC/popular_appliances/combinations/')

for root in dirs:
    csvfs = ["17_18.csv", "17_160.csv", "17_51.csv", "18_160.csv", "18_51.csv", "51_160.csv", "160_161.csv", "17_161.csv", "51_161.csv", "18_183.csv", "6_17.csv", "6_18.csv", "6_51.csv", "6_160.csv", "6_161.csv"]
    train = []
    test = []

    for i in range(len(csvfs)):
        try:
            df = create_dataset(join(root, csvfs[i]), i)
            nrows = df.shape[0]
            train.append(df.iloc[:nrows/2, :])
            test.append(df.iloc[nrows/2:, :])
        except Exception as e:
            pass
#             print e