def main(tseries_fpath, train_fpath, test_fpath, assign_fpath, out_folder): Xtrain = ioutil.load_series(tseries_fpath, train_fpath) Xtest = ioutil.load_series(tseries_fpath, test_fpath) y_train = np.genfromtxt(assign_fpath, dtype='i') max_pts = Xtest.shape[1] for num_pts in [1, 25, 50, 75]: probs = fit(Xtrain, y_train, Xtest, num_pts) probs_fpath = os.path.join(out_folder, 'probs-%d-pts.dat' % num_pts) np.savetxt(probs_fpath, probs)
def run_fold(folder, tseries_fpath, min_pts, thetas, out_folder): try: os.makedirs(out_folder) except: pass train_fpath = os.path.join(folder, "train.dat") cents_fpath = os.path.join(folder, "ksc", "cents.dat") assign_fpath = os.path.join(folder, "ksc", "assign.dat") probs_folder = os.path.join(folder, "probs") X = ioutil.load_series(tseries_fpath, train_fpath) train_idx = np.loadtxt(train_fpath, dtype="bool") y_true = np.loadtxt(assign_fpath) num_series = X.shape[0] max_pts = X.shape[1] # Since we prune the first 100 lines of X we need to read other info peak_days = [] sum_views = [] with open(tseries_fpath) as tseries_file: for i, line in enumerate(tseries_file): if train_idx[i]: x = np.array([int(v) for v in line.split()[1:]]) peak_days.append(x.argmax()) sum_views.append(x.sum()) peak_days = np.array(peak_days) sum_views = np.array(sum_views) y_pred, best_by, confs, all_confs = pred(probs_folder, num_series, max_pts, min_pts, thetas) save_results(X, peak_days, sum_views, min_pts, thetas, best_by, all_confs, y_true, y_pred, confs, out_folder)
def run_fold(folder, tseries_fpath, min_pts, thetas, out_folder, gamma_max): try: os.makedirs(out_folder) except: pass test_fpath = os.path.join(folder, 'test.dat') cents_fpath = os.path.join(folder, 'ksc', 'cents.dat') assign_fpath = os.path.join(folder, 'ksc', 'test_assign.dat') probs_folder = os.path.join(folder, 'probs-test') X = ioutil.load_series(tseries_fpath, test_fpath) test_idx = np.loadtxt(test_fpath, dtype='bool') y_true = np.loadtxt(assign_fpath) num_series = X.shape[0] max_pts = gamma_max peak_days = [] sum_views = [] with open(tseries_fpath) as tseries_file: for i, line in enumerate(tseries_file): if test_idx[i]: x = np.array([int(v) for v in line.split()[1:]]) peak_days.append(x.argmax()) sum_views.append(x.sum()) peak_days = np.array(peak_days) sum_views = np.array(sum_views) y_pred, best_by, confs, all_confs = \ pred(probs_folder, num_series, max_pts, min_pts, thetas) save_results(X, peak_days, sum_views, min_pts, thetas, best_by, all_confs, y_true, y_pred, confs, out_folder)
def main(tseries_fpath, test_fpath, cents_fpath): X = ioutil.load_series(tseries_fpath, test_fpath) C = np.loadtxt(cents_fpath) dist_cents = dist.dist_all(C, X, rolling=True)[0] y_true = dist_cents.argmin(axis=0) for t in y_true: print t
def main(tseries_fpath, base_folder, k): k = int(k) idx_fpath = os.path.join(os.path.join(base_folder, ".."), "train.dat") X = ioutil.load_series(tseries_fpath, idx_fpath) cent, assign, shift, dists_cent = ksc.inc_ksc(X, k) np.savetxt(os.path.join(base_folder, "cents.dat"), cent, fmt="%.5f") np.savetxt(os.path.join(base_folder, "assign.dat"), assign, fmt="%d") np.savetxt(os.path.join(base_folder, "shift.dat"), shift, fmt="%d") np.savetxt(os.path.join(base_folder, "dists_cent.dat"), dists_cent, fmt="%.5f")
def main(tseries_fpath, base_folder, k): k = int(k) idx_fpath = os.path.join(os.path.join(base_folder, '..'), 'train.dat') X = ioutil.load_series(tseries_fpath, idx_fpath) cent, assign, shift, dists_cent = ksc.inc_ksc(X, k) np.savetxt(os.path.join(base_folder, 'cents.dat'), cent, fmt='%.5f') np.savetxt(os.path.join(base_folder, 'assign.dat'), assign, fmt='%d') np.savetxt(os.path.join(base_folder, 'shift.dat'), shift, fmt='%d') np.savetxt(os.path.join(base_folder, 'dists_cent.dat'), dists_cent, fmt='%.5f')
def main(tseries_fpath, centroids_fpath, test_fpath, assign_fpath, out_folder): C = np.genfromtxt(centroids_fpath) Xtest = ioutil.load_series(tseries_fpath, test_fpath) y_train = np.arange(C.shape[0]) max_pts = Xtest.shape[1] for num_pts in range(1, max_pts + 1): #for num_pts in [1, 25, 50, 75]: probs = fit(C, y_train, Xtest, num_pts) probs_fpath = os.path.join(out_folder, 'probs-%d-pts.dat' % num_pts) np.savetxt(probs_fpath, probs)
def main(tseries_fpath, train_fpath, test_fpath, ytrain_fpath, ytest_fpath, out_folder): Xtrain = ioutil.load_series(tseries_fpath, train_fpath) Xtest = ioutil.load_series(tseries_fpath, test_fpath) y_train = np.genfromtxt(ytrain_fpath) y_true = np.genfromtxt(ytest_fpath) max_pts = Xtrain.shape[1] best_by = np.zeros(Xtest.shape[0]) min_conf = np.zeros(Xtest.shape[0]) all_probs = np.zeros(shape=(Xtest.shape[0], max_pts)) lousy_conf = 1.0 / len(set(y_train)) #if confidence is equal to this, classifier did nothing for num_pts in range(1, max_pts + 1): y_pred, probs = fit(Xtrain, y_train, Xtest, num_pts) for i in range(Xtest.shape[0]): p_true = probs[i, y_true[i]] if best_by[i] == 0 and y_pred[i] == y_true[i] and p_true > lousy_conf: best_by[i] = num_pts min_conf[i] = probs[i, y_true[i]] all_probs[i, num_pts - 1] = p_true summary_fpath = os.path.join(out_folder,\ 'class_summ-%d-pts.dat' % num_pts) probs_fpath = os.path.join(out_folder, 'probs-%d-pts.dat' % num_pts) with open(summary_fpath, 'w') as summary_file: print(classification_report(y_true, y_pred), file=summary_file) np.savetxt(probs_fpath, probs) best_fpath = os.path.join(out_folder, 'best-by.dat') conf_fpath = os.path.join(out_folder, 'conf.dat') all_conf_fpath = os.path.join(out_folder, 'all-conf.dat') np.savetxt(best_fpath, best_by) np.savetxt(conf_fpath, min_conf) np.savetxt(all_conf_fpath, np.asarray(all_probs))
def main(tseries_fpath, train_fpath, test_fpath, ytrain_fpath, ytest_fpath, out_folder): Xtrain = ioutil.load_series(tseries_fpath, train_fpath) Xtest = ioutil.load_series(tseries_fpath, test_fpath) y_train = np.genfromtxt(ytrain_fpath) y_true = np.genfromtxt(ytest_fpath) max_pts = Xtrain.shape[1] best_by = np.zeros(Xtest.shape[0]) min_conf = np.zeros(Xtest.shape[0]) all_probs = np.zeros(shape=(Xtest.shape[0], max_pts)) lousy_conf = 1.0 / len(set(y_train)) # if confidence is equal to this, classifier did nothing for num_pts in range(1, max_pts + 1): y_pred, probs = fit(Xtrain, y_train, Xtest, num_pts) for i in xrange(Xtest.shape[0]): p_true = probs[i, y_true[i]] if best_by[i] == 0 and y_pred[i] == y_true[i] and p_true > lousy_conf: best_by[i] = num_pts min_conf[i] = probs[i, y_true[i]] all_probs[i, num_pts - 1] = p_true summary_fpath = os.path.join(out_folder, "class_summ-%d-pts.dat" % num_pts) probs_fpath = os.path.join(out_folder, "probs-%d-pts.dat" % num_pts) with open(summary_fpath, "w") as summary_file: print(classification_report(y_true, y_pred), file=summary_file) np.savetxt(probs_fpath, probs) best_fpath = os.path.join(out_folder, "best-by.dat") conf_fpath = os.path.join(out_folder, "conf.dat") all_conf_fpath = os.path.join(out_folder, "all-conf.dat") np.savetxt(best_fpath, best_by) np.savetxt(conf_fpath, min_conf) np.savetxt(all_conf_fpath, np.asarray(all_probs))
def main(tseries_fpath, centroids_fpath, test_fpath, assign_fpath, out_folder, gamma_max): gamma_max = int(gamma_max) C = np.genfromtxt(centroids_fpath) Xtest = ioutil.load_series(tseries_fpath, test_fpath) y_train = np.arange(C.shape[0]) max_pts = gamma_max for num_pts in range(1, max_pts + 1): #for num_pts in [1, 25, 50, 75]: probs = fit(C, y_train, Xtest, num_pts) probs_fpath = os.path.join(out_folder, 'probs-%d-pts.dat' % num_pts) np.savetxt(probs_fpath, probs)
def main(tseries_fpath, train_fpath, centroids_fpath, classes_fpath, out_folder, gamma_max): gamma_max = int(gamma_max) X = ioutil.load_series(tseries_fpath, train_fpath) C = np.genfromtxt(centroids_fpath, dtype='f') y_train = np.arange(C.shape[0]) y_true = np.genfromtxt(classes_fpath, dtype='i') max_pts = gamma_max #max_pts = X.shape[1] best_by = np.zeros(X.shape[0]) min_conf = np.zeros(X.shape[0]) all_probs = np.zeros(shape=(X.shape[0], max_pts)) lousy_conf = 1.0 / C.shape[ 0] #if confidence is equal to this, classifier did nothing for num_pts in range(1, max_pts + 1): y_pred, probs = fit(C, y_train, X, y_true, num_pts) for i in xrange(X.shape[0]): p_true = probs[i, y_true[i]] if best_by[i] == 0 and y_pred[i] == y_true[ i] and p_true > lousy_conf: best_by[i] = num_pts min_conf[i] = probs[i, y_true[i]] all_probs[i, num_pts - 1] = p_true summary_fpath = os.path.join(out_folder,\ 'class_summ-%d-pts.dat' % num_pts) probs_fpath = os.path.join(out_folder, 'probs-%d-pts.dat' % num_pts) with open(summary_fpath, 'w') as summary_file: print(classification_report(y_true, y_pred), file=summary_file) np.savetxt(probs_fpath, probs) best_fpath = os.path.join(out_folder, 'best-by.dat') conf_fpath = os.path.join(out_folder, 'conf.dat') all_conf_fpath = os.path.join(out_folder, 'all-conf.dat') np.savetxt(best_fpath, best_by) np.savetxt(conf_fpath, min_conf) np.savetxt(all_conf_fpath, np.asarray(all_probs))
def main(tseries_fpath, train_fpath, centroids_fpath, classes_fpath, out_folder, gamma_max): gamma_max = int(gamma_max) X = ioutil.load_series(tseries_fpath, train_fpath) C = np.genfromtxt(centroids_fpath, dtype='f') y_train = np.arange(C.shape[0]) y_true = np.genfromtxt(classes_fpath) max_pts = gamma_max #max_pts = X.shape[1] best_by = np.zeros(X.shape[0]) min_conf = np.zeros(X.shape[0]) all_probs = np.zeros(shape=(X.shape[0], max_pts)) lousy_conf = 1.0 / C.shape[0] #if confidence is equal to this, classifier did nothing for num_pts in range(1, max_pts + 1): y_pred, probs = fit(C, y_train, X, y_true, num_pts) for i in xrange(X.shape[0]): p_true = probs[i, y_true[i]] if best_by[i] == 0 and y_pred[i] == y_true[i] and p_true > lousy_conf: best_by[i] = num_pts min_conf[i] = probs[i, y_true[i]] all_probs[i, num_pts - 1] = p_true summary_fpath = os.path.join(out_folder,\ 'class_summ-%d-pts.dat' % num_pts) probs_fpath = os.path.join(out_folder, 'probs-%d-pts.dat' % num_pts) with open(summary_fpath, 'w') as summary_file: print(classification_report(y_true, y_pred), file=summary_file) np.savetxt(probs_fpath, probs) best_fpath = os.path.join(out_folder, 'best-by.dat') conf_fpath = os.path.join(out_folder, 'conf.dat') all_conf_fpath = os.path.join(out_folder, 'all-conf.dat') np.savetxt(best_fpath, best_by) np.savetxt(conf_fpath, min_conf) np.savetxt(all_conf_fpath, np.asarray(all_probs))