def tune_learner(train_X): train_len = len(train_X) new_train_index = np.random.choice(range(train_len), train_len * 0.7) new_tune_index = list(set(range(train_len)) - set(new_train_index)) new_train_X = train_X[new_train_index] new_train_Y = train_Y[new_train_index] new_tune_X = train_X[new_tune_index] new_tune_Y = train_Y[new_tune_index] clf = learner(new_train_X, new_train_Y, new_tune_X, new_tune_Y, goal) tuner = DE_Tune_ML(clf, clf.get_param(), target_class, goal) return tuner.Tune()
def test_unknown_data(): n = 100 l = 1 d = int(np.ceil(n**(0.15))) rho = 0.01 N = int(np.log(n)/rho**2) Y = generate_Y(n, l, d, rho, N) Gp, Hp = learner(n, l, d, rho, Y, np.zeros((l, n,n)), np.zeros((N,n))) print 'Learned the network' Yp = decoder(Gp, Hp) print 'learner: training error %.4f' % (l1_loss(Y, Yp))
def test_mnist(): train_set, valid_set, test_set = generate_mnist(fname) Y, target = train_set Yt, targett = test_set #pdb.set_trace() #Y = np.concatenate((Y, 0*Y), axis=1) #Yt = np.concatenate((Yt, 0*Yt), axis=1) N, n = Y.shape l = 4 d = int(np.ceil(n**(0.19))) rho = estimate_rho(l, d, Y) #N = int(np.log(n)/rho**2) if N >= Y.shape[0]: print 'samples N: %.d, Y: %.d' %(N, Y.shape[0]) else: N = int(np.log(n)/rho**2) Y = Y[0:N,:] target = target[0:N] Yt = Yt[0:N,:] targett = targett[0:N] print n, l, d, rho, N Gp, Hp = learner(n, l, d, rho, Y) print 'Learned the network' Yp = decoder(Gp, Hp) print 'NN training error: %.4f' % l1_loss(Y, Yp) if 1: clf = svm.LinearSVC(loss='l2', penalty='l1', dual=False) clf.fit(Hp, target) targetp = clf.predict(Hp) print 'training error: %.4f' % (zero_one_loss(target, targetp)) Htp = encoder(d, Gp, Yt) targettp = clf.predict(Htp) print 'test error: %.4f' % (zero_one_loss(targett, targettp))
def test_leaner(): training_data, test_data = create_data() G, H, Y, n, l, d, rho, N = training_data Gt, Ht, Yt, nt, lt, dt, rhot, Nt = test_data print n, l, d, rho, N l = 1 d = 2 rho = estimate_rho(l, d, Y) Gp, Hp = learner(n, l, d, rho, Y, G, H) print 'Learned the network' Yp = decoder(Gp, Hp) print 'learner: training error %.4f' % (l1_loss(Y, Yp)) Htp = encoder(d, Gp, Yt) Ytp = decoder(Gp, Htp) print 'learner: test error %.4f' % (l1_loss(Yt, Ytp)) '''
def cross_val(pd_data, learner, target_class, goal, isWhat="", fold=5, repeats=2): """ do 5-fold cross_validation """ def tune_learner(train_X): train_len = len(train_X) new_train_index = np.random.choice(range(train_len), train_len * 0.7) new_tune_index = list(set(range(train_len)) - set(new_train_index)) new_train_X = train_X[new_train_index] new_train_Y = train_Y[new_train_index] new_tune_X = train_X[new_tune_index] new_tune_Y = train_Y[new_tune_index] clf = learner(new_train_X, new_train_Y, new_tune_X, new_tune_Y, goal) tuner = DE_Tune_ML(clf, clf.get_param(), target_class, goal) return tuner.Tune() def tune_SMOTE(train_pd): train_len = len(train_pd) new_train_index = random.sample(train_pd.index, int(train_len * 0.7)) new_train = train_pd.ix[new_train_index] if "_TunedSmote" in isWhat: new_train_X = new_train.ix[:, new_train.columns[:-1]].values new_train_Y = new_train.ix[:, new_train.columns[-1]].values new_tune = train_pd.drop(new_train_index) new_tune_X = new_tune.ix[:, new_tune.columns[:-1]].values new_tune_Y = new_tune.ix[:, new_tune.columns[-1]].values # clf = learner(new_train_X, new_train_Y, new_tune_X, new_tune_Y) A_smote = smote(new_train) num_range = [[int(A_smote.get_majority_num() * 0.5), int(A_smote.get_majority_num() * 1.5)]] * ( A_smote.label_num - 1) params_to_tune = {"k": [2, 20], "up_to_num": num_range} # pdb.set_trace() tuner = DE_Tune_SMOTE(learner, smote, params_to_tune, new_train, new_tune, target_class, goal) params = tuner.Tune() return params, new_train F = {} total_evaluation = 0 for i in xrange(repeats): # repeat 5 times here kf = StratifiedKFold(pd_data.ix[:, pd_data.columns[-1]].values, fold, shuffle=True) for train_index, test_index in kf: train_pd = pd_data.ix[train_index] test_pd = pd_data.ix[test_index] if "Smote" in isWhat: k = 5 up_to_num = [] if "_TunedSmote" in isWhat: params, train_pd = tune_SMOTE(train_pd) # use new training data not original, because some are used as tuning k = params["k"] up_to_num = params["up_to_num"] train_pd = smote(train_pd, k, up_to_num).run() train_X = train_pd.ix[:, train_pd.columns[:-1]].values train_Y = train_pd.ix[:, train_pd.columns[-1]].values test_X = test_pd.ix[:, test_pd.columns[:-1]].values test_Y = test_pd.ix[:, test_pd.columns[-1]].values params, evaluation = tune_learner(train_X) if "_TunedLearner" in isWhat else ({},0) F = learner(train_X, train_Y, test_X, test_Y, goal).learn(F, **params) total_evaluation +=evaluation # pdb.set_trace() avg_evaluation = total_evaluation / (repeats * fold) return avg_evaluation, F