def __init__(self, init_dat, train_dat, multi_t=False, random=True, do_active=False, rand_task=False): self.trained_x = init_dat['feature'] self.trained_y = init_dat['label'] self.train_pool = gen_land_pool(train_dat, multi_t) self.pool_size = util.dat_size(train_dat) self.total_pool = self.pool_size self.init_size = util.dat_size(init_dat) self.multi_task = multi_t self.rand_task = rand_task # Keep track of tasks that have training instances # In matlab format where all tasks start with one self.task_online = [t for t in range(0, T)] if random: if multi_t: for task_pool in self.train_pool: np.random.shuffle(task_pool) else: np.random.shuffle(self.train_pool) self.do_active = do_active
def run_stl_landm(pool_dat, init_dat, test_dat, do_active): ###### Train Initial Model ###### init_size = dat_size(init_dat) learner = MLLearner(LogisticRegression(), init_dat) print "Start training..." print "pool", len(train_pool) total_pool_size = len(train_pool) test_acc = [] learned_size = [] prof = Professor(init_dat, pool_dat, random=True, do_active=do_active) total_pool_size = prof.get_pool_size() init_acc = util.model_roc_score(learner, test_dat) test_acc = [init_acc] learned_size = [init_size] ### Training Until No more data available OR Reach the set N_ITER ### count = N_ITER while prof.has_next(): # print "pool", len(train_pool) selected_x, selected_y, tasks = prof.next_train_set(INS_SIZE, learner=learner) # Group selected training set by tasks next_train_x = [[] for i in range(0, T)] next_train_y = [[] for i in range(0, T)] for i in range(0, selected_x.shape[0]): t = int(tasks[i]) next_train_x[t].append(selected_x[i, :]) next_train_y[t].append(selected_y[i]) for t in range(0, T): if next_train_x[t]: learner.refine_model(np.array(next_train_x[t]), np.array(next_train_y[t]), t) # acc = util.model_roc_score(learner, test_dat) acc = util.model_score(learner, test_dat) test_acc.append(acc) learned_size.append(total_pool_size - prof.get_pool_size() + init_size) if ITER_ENABLE: if count < 0: break count -= 1 return test_acc, learned_size
from data_process import gen_init import numpy as np import pickle as pk from config import T from util import dat_size import util source = 'fera_forella' ## Load From Source ## train_dat, test_dat = load_dat(source) print "init dat" init_dat, pool_dat = gen_init(train_dat, 5) print "train size", dat_size(train_dat) print "test size", dat_size(test_dat) print "pool size", dat_size(pool_dat) print "init size", dat_size(init_dat) print "----> Writing to files..." # Save to files pool_f = open("data/pool", "wb") pk.dump(pool_dat, pool_f) pool_f.close() test_f = open("data/test", "wb") pk.dump(test_dat, test_f) test_f.close()
from util import load_test from util import load_init from util import load_pool ######## Panda ###### # Comparing Multiple Active Learner vs ELLA + ATS vs ELLA + ATS + AL vs # ELLA + AL # This file runs Pure Multiple Active Learner ##################### ###### Load Data ###### pool_dat = load_pool() init_dat = load_init() test_dat = load_test() init_size = dat_size(init_dat) train_pool = np.array(gen_land_pool(pool_dat)) shuffle(train_pool) ###### Train Initial Model ###### learner = MLLearner(LogisticRegression(), init_dat) print "Start training..." print "pool", len(train_pool) total_pool_size = len(train_pool) test_acc = [] learned_size = []
eng.addpath(eng.genpath(ELLA_DIR)) # res = eng.runExperimentActiveTask() # print res ######## Panda ###### # Comparing Multiple Active Learner vs ELLA + ATS vs ELLA + ATS + AL vs # ELLA + AL # This file runs ELLA + Active Task Selection ##################### ## Load all files test_dat = util.add_bias(load_test()) pool_dat = util.add_bias(load_pool()) init_dat = util.add_bias(load_init()) init_size = util.dat_size(init_dat) ## Init ELLA Model with init set ## ella_model = ELLA(eng, init_dat) init_acc = ella_score(ella_model, test_dat) test_acc = [init_acc] learned_size = [init_size] prof = Professor(init_dat, pool_dat, multi_t=True, random=True) total_pool_size = prof.get_pool_size() print "train pool size", total_pool_size # ### Training Until No more data available OR Reach the set N_ITER ### count = N_ITER while prof.has_next():
landmine = loadmat('data/LandmineData.mat') #### Data Summary #### # counts = np.zeros(2) # for t in range(0, T): # counts += np.bincount(landmine['label'][:, t][0].reshape(landmine['label'][:, t][0].shape[0])) # print counts # exit() print "----> Loading & Splitting data..." land_train, land_test = load_landset() print "init dat" init_dat, pool_dat = gen_init(land_train, 5) print "train size", dat_size(land_train) print "test size", dat_size(land_test) print "pool size", dat_size(pool_dat) print "init size", dat_size(init_dat) print "----> Writing to files..." # Save to files pool_f = open("data/pool", "wb") pk.dump(pool_dat, pool_f) pool_f.close() test_f = open("data/test", "wb") pk.dump(land_test, test_f) test_f.close()
def run_ml_landmine(pool_dat, init_dat, test_dat, learner_class, engine=None, do_active=False, task_ctrl=TASK_NONE, evaluation=config.EVAL_ACC): if task_ctrl == TASK_NONE: active_task = False rand_task = False elif task_ctrl == TASK_RAND: active_task = False rand_task = True elif task_ctrl == TASK_ACTIVE: active_task = True rand_task = False # Copy all data test_dat_cp = copy.deepcopy(test_dat) init_dat_cp = copy.deepcopy(init_dat) pool_dat_cp = copy.deepcopy(pool_dat) ###### Train Initial Model ###### init_size = dat_size(init_dat_cp) if learner_class is ELLA: # Add bias term test_dat_cp = util.add_bias(test_dat_cp) pool_dat_cp = util.add_bias(pool_dat_cp) init_dat_cp = util.add_bias(init_dat_cp) learner = learner_class(engine, init_dat_cp) else: learner = learner_class(LogisticRegression(), init_dat_cp) print "Start training..." test_acc = [] learned_size = [] prof = Professor(init_dat_cp, pool_dat_cp, random=True, do_active=do_active, multi_t=active_task or rand_task, rand_task=rand_task) total_pool_size = prof.get_pool_size() init_acc = model_eval(learner, test_dat_cp, evaluation) test_acc = [init_acc] learned_size = [init_size] task_rec = [] ## Training Until No more data available OR Reach the set N_ITER ### count = N_ITER while prof.has_next(): # print "pool", prof.get_pool_size() selected_x, selected_y, tasks = prof.next_train_set(INS_SIZE, learner=learner) task_rec.append(tasks[0]) # Group selected training set by tasks next_train_x = [[] for i in range(0, T)] next_train_y = [[] for i in range(0, T)] for i in range(0, selected_x.shape[0]): t = int(tasks[i]) next_train_x[t].append(selected_x[i, :]) next_train_y[t].append(selected_y[i]) for t in range(0, T): if next_train_x[t]: learner.refine_model(np.array(next_train_x[t]), np.array(next_train_y[t]), t) acc = model_eval(learner, test_dat_cp, evaluation) # print acc test_acc.append(acc) # print learner.get_trained_size() learned_size.append(total_pool_size - prof.get_pool_size() + init_size) if ITER_ENABLE: if count < 0: break count -= 1 if count % 20 == 0: if engine is not None: engine.clean_mem() util.curve_to_csv("res/task_save" + str(do_active) + " .csv", task_rec, [0]) return test_acc, learned_size