Пример #1
0
    def __init__(self,
                 init_dat,
                 train_dat,
                 multi_t=False,
                 random=True,
                 do_active=False,
                 rand_task=False):
        self.trained_x = init_dat['feature']
        self.trained_y = init_dat['label']

        self.train_pool = gen_land_pool(train_dat, multi_t)
        self.pool_size = util.dat_size(train_dat)
        self.total_pool = self.pool_size
        self.init_size = util.dat_size(init_dat)

        self.multi_task = multi_t
        self.rand_task = rand_task

        # Keep track of tasks that have training instances
        # In matlab format where all tasks start with one
        self.task_online = [t for t in range(0, T)]

        if random:
            if multi_t:
                for task_pool in self.train_pool:
                    np.random.shuffle(task_pool)
            else:
                np.random.shuffle(self.train_pool)

        self.do_active = do_active
Пример #2
0
def run_stl_landm(pool_dat, init_dat, test_dat, do_active):

    ###### Train Initial Model ######
    init_size = dat_size(init_dat)
    learner = MLLearner(LogisticRegression(), init_dat)

    print "Start training..."

    print "pool", len(train_pool)
    total_pool_size = len(train_pool)
    test_acc = []
    learned_size = []

    prof = Professor(init_dat, pool_dat, random=True, do_active=do_active)
    total_pool_size = prof.get_pool_size()

    init_acc = util.model_roc_score(learner, test_dat)
    test_acc = [init_acc]
    learned_size = [init_size]

    ### Training Until No more data available OR Reach the set N_ITER ###
    count = N_ITER
    while prof.has_next():
        # print "pool", len(train_pool)
        selected_x, selected_y, tasks = prof.next_train_set(INS_SIZE,
                                                            learner=learner)

        # Group selected training set by tasks
        next_train_x = [[] for i in range(0, T)]
        next_train_y = [[] for i in range(0, T)]

        for i in range(0, selected_x.shape[0]):
            t = int(tasks[i])
            next_train_x[t].append(selected_x[i, :])
            next_train_y[t].append(selected_y[i])

        for t in range(0, T):
            if next_train_x[t]:
                learner.refine_model(np.array(next_train_x[t]),
                                     np.array(next_train_y[t]), t)

        # acc = util.model_roc_score(learner, test_dat)
        acc = util.model_score(learner, test_dat)

        test_acc.append(acc)
        learned_size.append(total_pool_size - prof.get_pool_size() + init_size)

        if ITER_ENABLE:
            if count < 0: break
            count -= 1

    return test_acc, learned_size
Пример #3
0
from data_process import gen_init
import numpy as np
import pickle as pk
from config import T
from util import dat_size
import util

source = 'fera_forella'

## Load From Source ##
train_dat, test_dat = load_dat(source)

print "init dat"
init_dat, pool_dat = gen_init(train_dat, 5)

print "train size", dat_size(train_dat)
print "test  size", dat_size(test_dat)
print "pool  size", dat_size(pool_dat)
print "init  size", dat_size(init_dat)

print "----> Writing to files..."

# Save to files
pool_f = open("data/pool", "wb")
pk.dump(pool_dat, pool_f)
pool_f.close()

test_f = open("data/test", "wb")
pk.dump(test_dat, test_f)
test_f.close()
from util import load_test
from util import load_init
from util import load_pool

######## Panda ######
# Comparing Multiple Active Learner vs ELLA + ATS vs ELLA + ATS + AL vs
# ELLA + AL
# This file runs Pure Multiple Active Learner
#####################

###### Load Data ######
pool_dat = load_pool()
init_dat = load_init()
test_dat = load_test()

init_size = dat_size(init_dat)

train_pool = np.array(gen_land_pool(pool_dat))
shuffle(train_pool)

###### Train Initial Model ######

learner = MLLearner(LogisticRegression(), init_dat)

print "Start training..."

print "pool", len(train_pool)
total_pool_size = len(train_pool)
test_acc = []
learned_size = []
Пример #5
0
eng.addpath(eng.genpath(ELLA_DIR))
# res = eng.runExperimentActiveTask()
# print res


######## Panda ######
# Comparing Multiple Active Learner vs ELLA + ATS vs ELLA + ATS + AL vs
# ELLA + AL
# This file runs ELLA + Active Task Selection
#####################

## Load all files
test_dat = util.add_bias(load_test())
pool_dat = util.add_bias(load_pool())
init_dat = util.add_bias(load_init())
init_size = util.dat_size(init_dat)

## Init ELLA Model with init set ##
ella_model = ELLA(eng, init_dat)
init_acc = ella_score(ella_model, test_dat)
test_acc = [init_acc]
learned_size = [init_size]


prof = Professor(init_dat, pool_dat, multi_t=True, random=True)
total_pool_size = prof.get_pool_size()
print "train pool size", total_pool_size

# ### Training Until No more data available OR Reach the set N_ITER ###
count = N_ITER
while prof.has_next():
Пример #6
0
landmine = loadmat('data/LandmineData.mat')

#### Data Summary ####
# counts = np.zeros(2)
# for t in range(0, T):
# counts += np.bincount(landmine['label'][:, t][0].reshape(landmine['label'][:, t][0].shape[0]))
# print counts
# exit()

print "----> Loading & Splitting data..."
land_train, land_test = load_landset()
print "init dat"
init_dat, pool_dat = gen_init(land_train, 5)

print "train size", dat_size(land_train)
print "test  size", dat_size(land_test)
print "pool  size", dat_size(pool_dat)
print "init  size", dat_size(init_dat)

print "----> Writing to files..."

# Save to files
pool_f = open("data/pool", "wb")
pk.dump(pool_dat, pool_f)
pool_f.close()

test_f = open("data/test", "wb")
pk.dump(land_test, test_f)
test_f.close()
Пример #7
0
def run_ml_landmine(pool_dat,
                    init_dat,
                    test_dat,
                    learner_class,
                    engine=None,
                    do_active=False,
                    task_ctrl=TASK_NONE,
                    evaluation=config.EVAL_ACC):

    if task_ctrl == TASK_NONE:
        active_task = False
        rand_task = False
    elif task_ctrl == TASK_RAND:
        active_task = False
        rand_task = True
    elif task_ctrl == TASK_ACTIVE:
        active_task = True
        rand_task = False

    # Copy all data
    test_dat_cp = copy.deepcopy(test_dat)
    init_dat_cp = copy.deepcopy(init_dat)
    pool_dat_cp = copy.deepcopy(pool_dat)

    ###### Train Initial Model ######
    init_size = dat_size(init_dat_cp)

    if learner_class is ELLA:
        # Add bias term
        test_dat_cp = util.add_bias(test_dat_cp)
        pool_dat_cp = util.add_bias(pool_dat_cp)
        init_dat_cp = util.add_bias(init_dat_cp)

        learner = learner_class(engine, init_dat_cp)
    else:
        learner = learner_class(LogisticRegression(), init_dat_cp)

    print "Start training..."
    test_acc = []
    learned_size = []

    prof = Professor(init_dat_cp,
                     pool_dat_cp,
                     random=True,
                     do_active=do_active,
                     multi_t=active_task or rand_task,
                     rand_task=rand_task)
    total_pool_size = prof.get_pool_size()

    init_acc = model_eval(learner, test_dat_cp, evaluation)
    test_acc = [init_acc]
    learned_size = [init_size]
    task_rec = []

    ## Training Until No more data available OR Reach the set N_ITER ###
    count = N_ITER
    while prof.has_next():
        # print "pool", prof.get_pool_size()
        selected_x, selected_y, tasks = prof.next_train_set(INS_SIZE,
                                                            learner=learner)
        task_rec.append(tasks[0])

        # Group selected training set by tasks
        next_train_x = [[] for i in range(0, T)]
        next_train_y = [[] for i in range(0, T)]

        for i in range(0, selected_x.shape[0]):
            t = int(tasks[i])
            next_train_x[t].append(selected_x[i, :])
            next_train_y[t].append(selected_y[i])

        for t in range(0, T):
            if next_train_x[t]:
                learner.refine_model(np.array(next_train_x[t]),
                                     np.array(next_train_y[t]), t)

        acc = model_eval(learner, test_dat_cp, evaluation)
        # print acc
        test_acc.append(acc)
        # print learner.get_trained_size()
        learned_size.append(total_pool_size - prof.get_pool_size() + init_size)

        if ITER_ENABLE:
            if count < 0: break
            count -= 1

        if count % 20 == 0:
            if engine is not None:
                engine.clean_mem()

    util.curve_to_csv("res/task_save" + str(do_active) + " .csv", task_rec,
                      [0])

    return test_acc, learned_size