def __init__(self, Q_0, hypotheses, examples, eta, delta, eps,
              teacher_type, exp_iter, mu, top_k):
     self.Q_0 = Q_0
     self.hypotheses = hypotheses
     self.examples = examples
     self.eta = eta
     self.delta = delta
     self.eps = eps
     self.teacher_type = teacher_type
     self.exp_iter = exp_iter
     self.accumulator = {}
     self.max_iter = 150
     self.mu = mu
     self.top_k = top_k
     self.learner = learner.learner(Q_0=Q_0, H=hypotheses, eta=eta)
     self.teacher = teacher.teacher(H=hypotheses,
                                    examples=examples,
                                    Q_0=Q_0,
                                    eta_learner=eta,
                                    delta=delta,
                                    eps=eps,
                                    teacher_type=teacher_type,
                                    max_iter=self.max_iter,
                                    mu=self.mu,
                                    top_k=self.top_k)
Exemplo n.º 2
0
 def end_to_end_teaching(self):
     M_0 = env.get_M_0()
     M_t, feasible = self.teacher.get_target_M(M_0)
     env_t = self.modify_env(self.env, M_t)
     print(env_t.reward)
     self.learner = learner.learner(env_t)
     self.learner.UCRL(alpha=self.alpha_UCRL,
                       n_rounds=self.T_UCRL,
                       pi_no_attack=self.pi_no_attack,
                       M_0=M_0,
                       M_t=M_t,
                       cost_p=self.attackers_cost_p)
Exemplo n.º 3
0
    def __init__(self,
                 H,
                 examples,
                 Q_0,
                 eta_learner,
                 delta,
                 eps,
                 teacher_type,
                 max_iter,
                 mu,
                 top_k=10):
        self.H = H
        self.examples = examples
        self.Q_0 = Q_0
        self.Q_of_h_given_S = copy.deepcopy(Q_0)
        self.eta_learner = eta_learner
        self.delta = delta
        self.teacher_type = teacher_type
        if teacher_type == "greedy":
            self.eta_teacher = eta_learner
        elif teacher_type == "plus_delta":
            self.eta_teacher = eta_learner + self.delta
        elif teacher_type == "minus_delta":
            self.eta_teacher = eta_learner - self.delta
        elif teacher_type == "random":
            self.eta_teacher = eta_learner
        elif teacher_type == "approximate_Q":
            self.eta_teacher = eta_learner
        elif teacher_type == "noise_feature" or teacher_type == "limited_ground_truth":
            self.eta_teacher = eta_learner

        else:
            print("Wrong teacher type -- ", self.teacher_type)
            print(
                "Please choose one of the following: [greedy, plus_delta, minus_delta, random]"
            )
            exit(0)
        self.learner = learner.learner(Q_0=Q_0, H=H, eta=self.eta_teacher)
        self.eps = eps
        self.max_iter = max_iter
        self.mu = mu
        self.top_k = top_k
        self.h_star_index = utils.find_h_star_index_given_examples(H, examples)
        self.h_star = self.H[self.h_star_index]
        self.examples_train = utils.remove_all_inconsistent_examples(
            self.h_star, examples)
        self.list_of_error_h = utils.error_for_every_h(H, self.examples_train)
        if teacher_type == "approximate_Q":
            self.Q_0 = self.get_perturbed_initial_distribution()
        self.C_eps = self.get_C_eps()
Exemplo n.º 4
0
    def compare_neighbor(self, y0, yn, var=NONE):
        """
            Compare fullsize of images in neighbour area
        """

        estr = learner((y0.shape[1], y0.shape[0]), [4, 4]).new_estimator()
        estr.update(y0[:, :, 0], yn[:, :, 0])
        estr.update(y0[:, :, 1], yn[:, :, 1])
        estr.update(y0[:, :, 2], yn[:, :, 2])
        n = len(estr.neighbor_similarity_flat)
        best_sim = np.array([[0]] * n)
        for k in range(n):
            best_sim[k] = max(estr.neighbor_similarity_flat[k])
        if var != NONE:
            best_sim = best_sim * var.flat
        return np.mean(best_sim)
Exemplo n.º 5
0
    def compare_neighbor(self, y0, yn, var=NONE):
        """
            Compare fullsize of images in neighbour area
        """

        estr = learner((y0.shape[1], y0.shape[0]), [4, 4]).new_estimator()
        estr.update(y0[:, :, 0], yn[:, :, 0])
        estr.update(y0[:, :, 1], yn[:, :, 1])
        estr.update(y0[:, :, 2], yn[:, :, 2])
        n = len(estr.neighbor_similarity_flat)
        best_sim = np.array([[0]] * n)
        for k in range(n):
            best_sim[k] = max(estr.neighbor_similarity_flat[k])
        if var != NONE:
            best_sim = best_sim * var.flat
        return np.mean(best_sim)
Exemplo n.º 6
0
 def dataProcess(self,
                 file,
                 data_path,
                 model,
                 feature_selector,
                 normalize=False,
                 missingValueTreatment=False,
                 featureSelect=False):
     self.excel_data = pd.read_csv(file)
     self.model = model
     self.headers = self.excel_data.columns.values.tolist()
     for __header in self.headers:
         if self.header_ignore in __header:
             self.excel_data.drop([__header], axis=1, inplace=True)
         if self.normalization_ignore in __header:
             self.df[__header] = self.excel_data[__header]
             self.excel_data.drop([__header], axis=1, inplace=True)
         if self.class_s in __header:
             self.class_label = __header
     self.processed_headers = self.excel_data.columns.values.tolist()
     if missingValueTreatment:
         self.excel_data = pd.DataFrame(data=self.imputation(
             self.excel_data),
                                        columns=self.processed_headers)
     if normalize:
         self.dependent = self.excel_data[self.class_label]
         self.excel_data.drop([self.class_label], axis=1, inplace=True)
         self.excel_data = pd.concat(
             [self.DataNormalize(self.excel_data), self.df], axis=1)
         self.excel_data[self.class_label] = self.dependent
     if featureSelect:
         model = learner.learner(2)
         self.clf = model.selectedLearner(self.model)
         self.excel_data = self.featureSelction(self.excel_data,
                                                feature_selector)
     if self.excel_data.isnull().values.any():
         print("There is blank cells, please check..")
         print(self.excel_data.isnull().sum())
     self.excel_data.to_pickle(os.path.join(data_path,
                                            "processed_data.pkl"))
     self.excel_data.to_csv(os.path.join(data_path, "processed_data.csv"))
Exemplo n.º 7
0
def main():
    domain_path = "doms and probs/satellite_domain_multi.pddl"  #"doms and probs/rover_domain (2).pddl"# #  #  #sys.argv[2]
    problem_path = "doms and probs/satellite_problem_multi.pddl"  #"doms and probs/rover_problem (2).pddl" #  # # sys.argv[3]

    # Find the problem and the world in the problem name
    file = open(problem_path, 'r')
    policy_name = file.readline().replace("(define (problem ", "")
    while policy_name == "\n":
        policy_name = file.readline().replace("(define (problem ", "")
    policy_name = policy_name[0:policy_name.find(")")] + "_Q_Table"
    file.close()

    file = open(domain_path)
    is_deterministic = file.read().find("probabilistic") == -1
    file.close()

    file = open(problem_path)
    is_transparent = file.read().find("reveal") == -1
    file.close()

    # if (is_deterministic == False):
    # load\create the Qtable
    q = q_table.q_table()
    q.build_from_file(policy_name)
    # if sys.argv[1] == "-E":
    # execute mode
    #
    # if is_deterministic == False:
    #print LocalSimulator().run(domain_path, problem_path, Q_learner_executer.executer(q))
    # else:
    #print LocalSimulator().run(domain_path, problem_path, graph_executor.executer(q,Graph.Read("roverMqsdkJedCB9zj3HW+RaxCJQcsI6i4w3XNYU8vDEdkFo=.graphml", "graphml")))

    # elif sys.argv[1] == "-L":
    # atexit.register(q.save_to_file, policy_path=policy_name)
    # learn mode
    print LocalSimulator().run(domain_path, problem_path, learner.learner(q))
Exemplo n.º 8
0
import datagate
import learner
import evaluator
import sys

NumFeat = 46
NumberOfGames = int(sys.argv[1])
pastdata = datagate.getdata(NumberOfGames)

# this part uses learner.py , which applies linear regression and get a vector of coefficients
print("Begin learning...")
coe = learner.learner(pastdata, NumFeat)
print(coe)

# this part is to test whether learner works reasonably by
# allowing manual input 
while (True):
	inp = input().split(" ")
	for i in range(0, len(inp)): inp[i] = int(inp[i])
	if (inp[0] == -1): break
	if (len(inp) != NumFeat):
		print("Not a legal vector\n")
	else:
		print(evaluator.evaluator(coe, inp, NumFeat))

Exemplo n.º 9
0
#Scale the features so each vector is of unit modulus
textData.bagofwords = textData.tfidf_df.apply(lambda x: x/np.linalg.norm(x),1)
#Include dummy variables for each class label in the dataframe
#####1 - Positive#####
#####0 - Negative#####
textData.bagofwords["classLabel"] = pd.get_dummies(all_data['sentiment'])['pos']
#Include the original text in the tfidf-dataframe
textData.bagofwords["origText"] = all_data.text

#Choose 1 from each class- positive & negative
labeled_data = textData.bagofwords.loc[[0,500]]

#Shuffle the remaining dataset
shuffle = textData.bagofwords.loc[np.random.permutation(textData.bagofwords[~textData.bagofwords.index.isin([0,500])].index)]

#Use 150 for the pool of unlabeled, and 50 for the test data
unlabeled_data = shuffle[0:150]
test_data = shuffle[150::]

data1 = machine_learning.ActiveLearningDataset(labeled_data,classLabel="classLabel",origText="origText")
data2 = machine_learning.ActiveLearningDataset(unlabeled_data,classLabel="classLabel",origText="origText")
data3 = machine_learning.ActiveLearningDataset(test_data,classLabel="classLabel",origText="origText")

active_learner = learner.learner(data1,test_datasets=data3,probability=0,NBC=True)
length = len(data1.data)
active_learner.pick_initial_training_set(length)
active_learner.rebuild_models(undersample_first=True)

active_learner.unlabeled_datasets.add_data(data2.data)

active_learner.active_learn(10, num_to_label_at_each_iteration=2)
Exemplo n.º 10
0
 def __init__(self):
     self.env = chess_env()
     self.learner = learner(self.env)
     self.learner.learn()
Exemplo n.º 11
0
    #         svm.set_data(XTrain, YTrain)
    #         svm.set_kernel('rbf')
    #         svm.make_kernel_matrix(gamma=gamma)
    #         svm.train(c)
    #         RQS = svm.test_error(XVal, YVal)
    #         if RQS<BestRSQ:
    #             BestC=c
    #             BestRSQ=RQS
    #
    # # get test error

    print "SVM -Pegasus"
    # Pegasus:
    Bestgamma = 0
    Bestmax_epochs=0
    BestRSQ = float('Inf')
    for gamma in range(1,5):
        for max_epochs in range (100,102):
            svm = learner()
            svm.set_data(XTrain, YTrain)
            svm.set_kernel('rbf')
            svm.make_kernel_matrix(gamma=gamma)
            svm.train_pegasos_kernelized(1, max_epochs)
            RQS = svm.test_error(XVal, YVal)
            if RQS<BestRSQ:
                Bestgamma=gamma
                Bestmax_epochs=max_epochs
                BestRSQ=RQS
    # get test error

Exemplo n.º 12
0
def main():
    # for the first time intialize random weights for the synapses.
    # otherwise use the output synapse from run (N) for run (N+1).

    # for first run
    hl = {1: [50, 100, 140]}
    hls = hl[1]
    input_size = 2
    num_of_iterations = 1000
    # mass = [0.145, 0.50, 0.05, 1, 1.50 , 2]
    mass = np.linspace(0.01, 2, 31)
    # mass = [0.145]
    frame = 0

    actions = actions_creator()

    act_mat = 1000 * np.ones([len(actions), len(actions)])
    R_mat = np.zeros([len(actions), len(actions)])
    q_mat = np.zeros([len(actions), len(actions)])

    errors_vec = 1000 * np.ones([len(actions)])

    # for other runs
    training_error = np.zeros([len(mass), num_of_iterations, 2])
    # synapse_0, synapse_1, synapse_2, synapse_3 = lnr.initialize_synapses(hls, input_size)
    mass_std = np.zeros(len(mass))
    for m in range(len(mass)):
        actions_count = np.zeros(len(actions))
        # print m
        synapse_0, synapse_1, synapse_2, synapse_3 = lnr.initialize_synapses(
            hls, input_size)
        for i in range(num_of_iterations):
            training_error[m, i, 0] = i
            if i == 0:
                current_action = np.random.randint(len(actions))
                prev_action = current_action

            action = actions[current_action]
            actions_count[current_action] += 1
            l4_error, training_error[
                m, i,
                1], synapse_0, synapse_1, synapse_2, synapse_3 = lnr.learner(
                    synapse_0, synapse_1, synapse_2, synapse_3, action,
                    mass[m])
            # print action, l4_error**2
            act_mat, next_action = critic_actor_v1(prev_action, current_action,
                                                   act_mat, l4_error)
            prev_action = current_action
            current_action = next_action
            # if m == 50: TODO: random learner
            #     current_action = np.random.randint(100)
            # if i % 1000 == 999 and m == 0:
            #     st = '{0}, {1}'.format(mass[m],i)
            #     plt.figure(st)
            #     ax = plt.subplot2grid((1, 1), (0, 0))
            #     ax.bar(np.linspace(0, len(actions), len(actions)), actions_count)
            # print actions_count
        mass_std[m] = np.std(actions_count)
    plt.figure('errors')
    colormap = plt.cm.gist_ncar
    plt.gca().set_color_cycle([colormap(i) for i in np.linspace(0, 0.99, 10)])

    for m in range(len(mass)):
        if m % 15 == 0:
            c = ['r', 'b', 'g', 'y', 'k', 'm']

            # plt.figure('m = '+str(mass[m])+'kg')
            pl = 'm = %.2fkg' % (mass[m])
            plt.plot(training_error[m, :, 0],
                     training_error[m, :, 1],
                     label=pl)
            # plt.plot(training_error[m, :, 0], training_error[m, :, 1], label='m = ' + str(mass[m]) + 'kg')
            plt.xlabel('Steps')
            plt.ylabel('Loss function')
            plt.ylim([0, 40])
            image_names = 'graphs'
            save = False
            if save:
                for t in range(10):
                    plt.savefig('./figs1/' + image_names +
                                string.zfill(str(frame), 5) + '.png',
                                format='png')
                    frame += 1
    plt.legend()

    plt.figure('std vs. mass')
    plt.scatter(mass, mass_std)
    plt.show()
Exemplo n.º 13
0
        curr_testData['classLabel'] = classDummies[col].loc[
            curr_testData.index]

        data1 = machine_learning.ActiveLearningDataset(curr_labeledData,
                                                       classLabel="classLabel",
                                                       origText="origText")
        data2 = machine_learning.ActiveLearningDataset(curr_unlabeledData,
                                                       classLabel="classLabel",
                                                       origText="origText")
        data3 = machine_learning.ActiveLearningDataset(curr_testData,
                                                       classLabel="classLabel",
                                                       origText="origText")

        #Create learner, with labeled dataset as initial training
        active_learner = learner.learner(data1,
                                         test_datasets=data3,
                                         NBC=False,
                                         className=classDefinitions[col])
        active_learner.load()
        classifiers[col] = active_learner
        #Confirm about to start new classifier:
        while True:
            var = raw_input(
                'Would you like to continue building a classifier for class %s? \nY or N? \nAnswer:'
                % classDefinitions[col])
            if var not in ('Y', 'N'):
                print 'Choose either Y or N'
                continue
            else:
                break
        if var == 'N':
            continue
Exemplo n.º 14
0
__author__ = 'davidvinegar'
from learner import learner
import pandas as pd
import util
import matplotlib.pyplot as plt
import datetime as dt
import KNNLearner as knn

l = learner()
dates = pd.date_range('2007-12-31', '2009-12-31')
symbol = "IBM"

momentumDF = l.getMomentum(dates, symbol)
fiveDayPriceChange = l.getWeekPercentPriceChange(dates, symbol)
volatilityDF = l.getVolatility(dates, symbol)
bollingerBandDf = l.getBollingerBandVAlue(symbol, dates, volatilityDF)

stats = l.getStats(momentumDF, volatilityDF, bollingerBandDf)

bollingerBandDf = l.normalizeDataFrame(bollingerBandDf)
momentumDF = l.normalizeDataFrame(momentumDF)
volatilityDF = l.normalizeDataFrame(volatilityDF)

unalteredPrices = util.get_data([symbol], dates, addSPY=False).dropna()
fiveDayPriceChange, trainX, trainY, unalteredPrices = l.prepareTrainXandY(
    bollingerBandDf, fiveDayPriceChange, momentumDF, unalteredPrices,
    volatilityDF, symbol)

#Uncomment the LinRegl and comment the KNN Learner to use that instead of KNN
# learner = lrl.LinRegLearner(verbose = True) # create a LinRegLearner
learner = knn.KNNLearner(2, verbose=True)  # create a knn learner
Exemplo n.º 15
0
     data_path: Folder path where the dataset exists
     algo: required only when using recursive_feature_selector as feature selector
     feature_selector = feature selector
     next 3 parameters are for whether to use Normalization, Missing value treatment and if feature selection will be done
 ++++++++++++++++++++++++++++++++++++
 """
 dProcessor.dataProcess(file_path, data_path, algo, feature_selector, True,
                        True, False)
 """
 ++++++++++++++++++++++++++++++++++++
 cross validation parameter:
     nFold = Integer value depending on number of fold you want to create
 ++++++++++++++++++++++++++++++++++++
 """
 nFold = 10
 model = learner.learner(nFold)
 """
 ++++++++++++++++++++++++++++++++++++
 Learner to Run:
     Available options are -
     1) Naive Bayes: NB
     2) Decision Tree: DT
     3) Random Forest: RF
     4) Support Vector Machine: SVM
     5) Logistic Regression: LR
     6) Neural Network: MLP
     7) Our Implementation of Neural Network: NN
     8) AdaBoost: ADA
     9) Tree Based Naive Bayes with depth 2: NBL2
     10) Tree Based Naive Bayes with depth 3: NBL3
     11) Our Implementation of Naive Bayes: NBO
Exemplo n.º 16
0
import poker,random,learner

if __name__ == "__main__":
	deck = poker.poker()
	deck.shuffle()
	player1 = 'b'
	deck.registerPlayer(player1)
	player2 = 'learnerc'
	deck.registerPlayer(player2)
	hand1 = deck.deal(player1)
	hand2 = deck.deal(player2)
	l = learner.learner(hand2)
	l.learnerid = player2
	print "{}: {}".format(player1," ".join(hand1))
	print "{}: {}".format(player2," ".join(hand2))
	select1 = random.randint(0,3)
	hand1= deck.swap(player1,random.sample(hand1, select1))
	print "{} is swapping {} random cards.".format(player1, select1)
	print "\t{}".format(" ".join(hand1))
	hand2 = deck.swap(player2, l.chooseswap())	
	print "{} is swapping {} cards.".format(player2, len(hand2))
	print "\t{}".format(" ".join(hand2))

	#print "{}: {}".format(player1," ".join(hand1))
	#print "{}: {}".format(player2," ".join(hand2))
	winner = deck.resolve()
	print "Winner: {}".format(winner)
	l.recordwin(winner)
	#print deck.deck
Exemplo n.º 17
0
 def find_neighbor_indices(self, y0):
     #pdb.set_trace()
     self.estimator = learner((y0.shape[1], y0.shape[0]),
                              [4, 4]).new_estimator()
     self.estimator.init_structures(y0[:, :, 0])
Exemplo n.º 18
0
import matplotlib.pyplot as plt

from bandit import Bandit
from learner import learner

ARMS = 10
N_BANDITS = 2000
PLAYS = 1000

# pylint: disable=C0103
bandits = [Bandit() for _ in range(N_BANDITS)]
average_oracle = [
    numpy.average([bandits[i].oracle_value for i in range(N_BANDITS)])
] * PLAYS

history_value, history_optimal = learner(bandits, PLAYS, 'greedy')
history_value_0_01, history_optimal_0_01 = learner(bandits,
                                                   PLAYS,
                                                   'greedy',
                                                   eps=0.01)
history_value_0_1, history_optimal_0_1 = learner(bandits,
                                                 PLAYS,
                                                 'greedy',
                                                 eps=0.1)

average_oracle = [
    numpy.average([bandits[i].oracle_value for i in range(N_BANDITS)])
] * PLAYS
plt.plot([i for i in range(len(history_value))],
         average_oracle,
         label="oracle")
Exemplo n.º 19
0
        with open(custom) as f:
            custom_args = yaml.load(f.read())
        args.update(custom_args)
    args['cuda'] = args['cuda'] and torch.cuda.is_available()
    log_dir = f'results/{datetime.now().strftime("%Y-%m-%d-%H:%M:%S")}-{args["env"]}-{args["prefix"]}'
    log_dir = MPI.COMM_WORLD.bcast(log_dir, root=0)
    args['log_dir'] = log_dir
    if MPI.COMM_WORLD.Get_rank() == 0:
        os.makedirs(log_dir)
        with open(log_dir + '/configuration.yaml', 'w') as f:
            f.write(yaml.dump(args))
    args['device'] = torch.device('cuda' if args['cuda'] else 'cpu')
    return args


if __name__ == '__main__':
    torch.set_num_threads(1)
    args = parse()
    set_context(args['num_units'], args['num_actors'])
    if global_dict['unit_idx'] == args['num_units']:
        evaluator(args)
    else:
        rank = global_dict['rank_local']
        if rank == global_dict['rank_learner']:
            learner(args)
        elif rank == global_dict['rank_replay']:
            replay(args)
        else:
            actor_id = rank
            actor(args, actor_id)
Exemplo n.º 20
0
# -*- coding: utf-8 -*-
"""
Created on Fri Mar  2 11:51:43 2018

@author: suvod
"""

import os
import pandas as pd
import dataProcessor
import learner

if __name__ == "__main__":
    data_loc = 'data'
    dataSet = "songDataSet.csv"
    cwd = os.getcwd()
    data_path = os.path.join(cwd, data_loc)
    file_path = os.path.join(data_path, dataSet)
    dProcessor = dataProcessor.dataProcessor()
    dProcessor.dataProcess(file_path, data_path, True, True, False)
    model = learner.learner()
    model.train('DT')
		#Make copies of the datasets
		curr_labeledData = labeledData.copy()
		curr_unlabeledData = unlabeledData.copy()
		curr_testData = testData.copy()

		#Overwrite the old classLabel with binary class labels
		curr_labeledData['classLabel'] = classDummies[col].loc[curr_labeledData.index]
		curr_unlabeledData['classLabel'] = classDummies[col].loc[curr_unlabeledData.index]
		curr_testData['classLabel'] = classDummies[col].loc[curr_testData.index]

		data1 = machine_learning.ActiveLearningDataset(curr_labeledData,classLabel="classLabel",origText="origText")
		data2 = machine_learning.ActiveLearningDataset(curr_unlabeledData,classLabel="classLabel",origText="origText")
		data3 = machine_learning.ActiveLearningDataset(curr_testData,classLabel="classLabel",origText="origText")

		#Create learner, with labeled dataset as initial training
		active_learner = learner.learner(data1,test_datasets=data3,NBC=False,className = classDefinitions[col])
		active_learner.load()
		classifiers[col] = active_learner
		#Confirm about to start new classifier:
		while True:
			var = raw_input('Would you like to continue building a classifier for class %s? \nY or N? \nAnswer:' % classDefinitions[col])
			if var not in ('Y','N'):
				print 'Choose either Y or N'
				continue
			else:
				break
		if var == 'N':
			continue
		length = len(data1.data)
		active_learner.pick_initial_training_set(length)
		active_learner.rebuild_models(undersample_first=True)
Exemplo n.º 22
0
from db import spartandb
from PyDictionary import PyDictionary
from learner import learner

read_file = open("subjects.txt", "r")
keywords = read_file.read().split(",")
keywords = list(set(keywords))
dbclient = spartandb()
dictionary = PyDictionary()
for keyword in keywords:
    dbclient.insert_subject(keyword.lower())
read_file.close()

read_file = open("objects.txt", "r")
keywords = read_file.read().split(",")
keywords = list(set(keywords))
dbclient = spartandb()
dictionary = PyDictionary()
for keyword in keywords:
    dbclient.insert_object(keyword.lower())
    for key in keyword.split(" "):
        if dictionary.synonym(key) is not None:
            for synonym in dictionary.synonym(key):
                dbclient.insert_object(synonym)

learner_mod = learner()
learner_mod.read()

#print dbclient.get_keywords()
Exemplo n.º 23
0
def main():
    filename='Eur-Lex'
    path='../'+filename+'/' + filename 
    # small checks
    # al=min_max_variance_active_learner(path)
    # al.active_select()
    # return
    """with open(path+'.seed.0','r') as fp:
        line=fp.readline()
    print len(line.strip().split(' '))
    return
    l=learner(path)
    l.read_sparse_data(path+'.train.norm',nfeatures=1837)
    
    sparse_list_data=[1,2,3,4,5,6,7,8]
    sparse_list_row=[0,1,2,3,4,5,6,7]
    sparse_list_col=[0,1,0,1,0,1,0,1]
    Q=csr_matrix((np.array(sparse_list_data),(np.array(sparse_list_row),np.array(sparse_list_col))),shape=(8,2))
    print Q.todense()
    Q=normalize(Q, axis=0, norm='l2')
    print Q.todense()
    return
    """
    # change tradeoff function calls 
    if len(sys.argv)>1:
        input_text = sys.argv[1]
        if input_text in ['tradeoff','check']:
            input_method = sys.argv[2]
        
    else:
        input_text='generate_seed'#tradeoff'#''#'random'#'random'#'check_minmax_l2distance'#'preprocess'
        input_method='random'#'multiple_seed'#'maxquery_vary_delta'
    
    #**********************
    #***Tradeoff Section***
    #**********************
    if input_text in 'tradeoff':
        seed_file_idx=['0']#[str(i) for i in range(5)]
        seed_files=[path+'.seed.'+idx for idx in seed_file_idx]
        max_iter=2
        #**********************************
        if input_method in 'max_query_vary_delta':
            set_of_delta=list(np.arange(.1,.2,.01))
            for fseed in seed_files:
                for delta in set_of_delta:
                    active_learner_general(max_query_active_learner(path,delta=delta), fseed, fsave_ext='.delta_'+str(delta), max_iter=max_iter)
            return
        #*********************************
        if input_method in 'minmax':
            al= min_max_inner_prod(path)
        if input_method in 'minsum':#
            al= min_sum_inner_prod(path)
        if input_method in 'minmin':#
            al= min_min_inner_prod(path)
        if input_method in 'maxquery':#
            delta=.1
            save_fig_interval=50
            al=max_query_active_learner(path,delta=delta, save_fig_interval=save_fig_interval)
        if input_method in 'minmaxvar':
            al=min_max_variance_active_learner(path, count=5)
        if input_method in 'random':#
            al= random_active_learner(path)
        run_tradeoff_exp(al,seed_files,max_iter=max_iter)
    #**********************************************************
    #****************CHECK*************************************
    #**********************************************************
    if input_text in 'check':
        if input_method in 'tuning':
            l=learner(path)
            l.tune()
        if input_method in 'popular_n_rare':
            l=learner(' ')
            acc = l.train_test()
            with open('acc','a') as fp : 
                fp.write(' '.join([str(acc_val) for acc_val in acc])+'\n')
            #d=data_process()
            #d.get_seed(frac=.5,ftrain='train')
            #d.get_label()
            #l.read_out_file()
            #l.decide_popular_n_rare_labels()
            #l.get_acc()
            return
            """
            X,Y,Q=generate_sparse_data(nsamples=5,nlabels=2,nfeatures=2,density=.75)
            al=min_max_inner_prod(path)
            al.Xtrain=X 
            al.Ytrain=Y 
            al.Q=Q 
            for i in range(5):
                al.active_select()
            return"""
        if input_method in 'min_max_l2distance':
            check_min_max_dist_outer(path)    
        if input_method in 'ball_tree':
            X,Y,Q = generate_sparse_data(nsamples=8,nlabels=2,nfeatures=2,density=.75)
            al=min_max_l2distance_lower(path,leaf_size=2)
            al.Xtrain=X 
            al.Ytrain=Y 
            al.Q=Q
            al.create_ball_tree()
            al.brute_force_l2_norm(X,np.array(range(X.shape[0])))
            print('************\n\ndoing active select\n\n *******************')
            al.active_select()
            #al.show_ball_tree_n_points()
        if input_method in 'inner_prod':# checking is left
            k=1
            l=learner(path)
            l.read_metadata()
            # change ftrain  to ftrain_src
            l.ftrain=l.ftrain_src
            # run train and test * create Q, create out file , decide a rank value
            l.train_test(k=k,acc_method='popular_n_rare')# define rank, default k=1
            # read test file, 
            l.Ytest,l.Xtest=l.read_sparse_data(l.ftest, l.nfeatures)
            # compute out file values
            pred_computed = l.compute_k_scores(l.Xtest, l.Q, k=k) # change the output if required later
            Ytest_comp=[]
            score_comp=[]
            for sample_score in pred_computed:
                Ytest_comp.append(sample_score[0][0])
                score_comp.append(sample_score[0][1])
            # compare and report the result
            Ytest_pred, score_pred = l.read_out_file()
            for label1,label2,score1,score2 in zip(Ytest_comp,Ytest_pred, score_comp, score_pred):
                print('label1:'+str(label1)+',label2:'+str(label2)+',score1:'+str(score1)+',score2:'+str(score2)+'\n')
    #**********************************************************
    #****************PLOT*************************************
    #**********************************************************
    if input_text in 'plot':
        if input_method in 'active_score_per_iter':
            path='../results/exp7rep/bibtex.maxquery.count.'
            nfiles=40
            plot_active_score_per_iter(path, nfiles)
        if input_method in 'samples per labels':
            l=learner(path)
            l.read_metadata()
            Ytrain=l.read_sparse_data(l.ftrain_src,l.nfeatures)[0]
            l.find_samples_per_label(l.nlabels,Ytrain)
        if input_method in 'single_seed':
            plot_single_seed()
        if input_method in 'multiple_seed':
            seed_set=[str(i) for i in range(5) ]
            #seed_set=['']
            path='../results/testing/bibtex.acc.'  
            method_list=['minmaxdev','random']
            dp = data_process()
            dp.readfiles_outer(path, method_list, seed_set, num_of_acc=2,fsave=path) # modify
        if input_method in 'ad-hoc':
            path='../results/exp5/e/bibtex.acc.'
            method_list=['maxquery','random']
            file_list = [path+m for m in method_list]
            plot_your_choice(2 , file_list, method_list)
    #**********************************************************
    #****************OTHER*************************************
    #**********************************************************
    if input_text in 'preprocess':
        l = learner(path)
        list_of_files=[path+ext for ext in ['.train', '.test', '.heldout']]
        l.read_metadata()
        l.read_normalize_write_sparse_files(list_of_files,l.nfeatures)
    if input_text in 'min_max_l2distance':
        print('minmax l2 distance')
        leaf_size=20
        bound='lower'
        minmaxl2=min_max_l2distance(path,leaf_size,bound)
        fseed=path+'.seed.0'
        active_learner_general(minmaxl2,fseed)
        #active_learner_general(random_al,frac_seed)
    if input_text in 'generate_seed':
        #print 'seed generation'
        start_idx=0
        nseed_required=1#10
        gen_seed=data_process()
        frac_seed=.1
        ftrain=path+'.train.norm'
        for i in range(nseed_required):
            fseed=path+'.seed.'+str(i+start_idx)
            #print fseed
            #list_of_labels,nlabels =gen_seed.get_label(ftrain)
            seeds=gen_seed.get_seed(frac_seed,ftrain)# how to get nlabels
            #print len(seeds)
            
            #with open(fseed,'w') as fp:
            #    fp.write(' '.join(str(s) for s in seeds))
                
        #seed_generator.get_seed(frac_seed,fseed) 
    if input_text in 'changing_seed':# incomplete
        frac_l=0
        frac_u=0
        frac_diff=0
        gen_seed=data_process()
        for frac in range(frac_l,frac_u, frac_diff):
            seeds=gen_seed.get_seed(frac_seed,ftrain)
            with open(fseed,'w') as fp:
                fp.write(' '.join(str(s) for s in seeds))
Exemplo n.º 24
0
 def find_neighbor_indices(self, y0):
     # pdb.set_trace()
     self.estimator = learner((y0.shape[1], y0.shape[0]), [4, 4]).new_estimator()
     self.estimator.init_structures(y0[:, :, 0])