def writeFilesToDisk(self): # print 10*"=", "writeFilesToDisk" save(self.files, self.file_files) save(self.folders, self.file_folders) save(self.copyFiles, self.file_copyFiles) save(self.newFiles, self.file_newFiles) save(self.newFolders, self.file_newFolders)
def headers(self): """docstring for headers""" headers = { "Authorization": "Bearer " + str(self.auth.Bearer(self.user_Id)) } save(self.auth.prefs, self.file_prefs) return headers
def save_predictor(self, file_name): """ saves predictor to file system for later use @param file_name: file name to save predictor @type file_name: str """ print "saving predictor to", file_name print self.predictor try: helper.save(file_name, self.predictor, "gzip") except Exception, detail: print "error writing predictor" print detail
def main(): (total, General, Inventory) = traverse() print(total) # General = hp.load('genre.txt') hp.save(General, 'genre.txt') hp.save(Inventory, 'inventory.txt') raw_genre_list = hp.sort_genre(General) print(raw_genre_list) # test-part # traverse() # structure('TRAAADZ128F9348C2E.h5', True) # print(extract('TRAAAAW128F429D538.h5')) print
def main(): base_dir = "/fml/ag-raetsch/home/cwidmer/Documents/phd/projects/multitask/data/translation_start/" organisms = os.listdir(base_dir) for org_name in organisms: work_dir = base_dir + org_name + "/" (neg, pos) = create_seq_data(org_name, work_dir) result = {} result["pos"] = pos result["neg"] = neg print "=======================" print "%s pos=%i, neg=%i" % (org_name, len(pos), len(neg)) save_fn = work_dir + "seqs.pickle" helper.save(save_fn, result)
def main(): base_dir = "data/splice" organisms = os.listdir(base_dir) for org_name in organisms: print "processing", org_name work_dir = base_dir + org_name + "/" (neg, pos) = create_seq_data(org_name, work_dir) result = {} result["pos"] = pos result["neg"] = neg print "=======================" print "%s pos=%i, neg=%i" % (org_name, len(pos), len(neg)) save_fn = work_dir + "seqs_acc.pickle" helper.save(save_fn, result)
labels_test, parameter_dict, init_dir, all_init_files, cross_validate='test') """ create hebbian convolution neural network """ net = hebbian_cnn.Network(**parameter_dict) """ train network """ perf_train = net.train(images_train, labels_train) """ test network """ perf_test = net.test(images_test, labels_test) """ plot weights of the network """ plots = helper.generate_plots(net) """ save network to disk """ save_name = helper.save(net, overwrite=False, plots=plots, save_path=save_path) """ collect results from multiple runs """ perf_train_all, perf_test_all = helper.mutliruns_collect( n_runs, r, perf_train, perf_test, perf_train_all, perf_test_all, save_path_multiruns) """ print run time """ run_stop = time.time() print '\nrun name:\t' + save_name print 'start time:\t' + time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(run_start)) print 'end time:\t' + time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime(run_stop)) print 'train time:\t' + str(datetime.timedelta(seconds=run_stop - run_start))
import math import helper VAR = 3 EPS = 1e-5 xs = range(-5, 6) sums = [] ns = [] for x in xs: fx = VAR * x sum = fx i = 3 n = 0 while math.fabs(fx) > EPS: fx *= -((VAR * x)**2) / (i * (i - 1)) sum += fx i += 2 n += 1 sums.append(round(sum, 4)) ns.append(n) helper.print_table(table_headers=['x', 'f(x)', 'n'], table_values=[xs, sums, ns], table_title='Taylor sum') helper.save(value_list=[xs, sums], to='values.txt')
shuffle=True) testloader = torch.utils.data.DataLoader(test_data, batch_size=64) validloader = torch.utils.data.DataLoader(validate_data, batch_size=64) if args.arch == 'vgg': input_size = 25088 model = models.vgg16(pretrained=True) elif args.arch == 'resnet': input_size = 2048 model = models.alexnet(pretrained=True) for param in model.parameters(): param.requires_grad = False model.classifier = nn.Sequential(nn.Linear(input_size, args.hidden_layers), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(args.hidden_layers, 102), nn.LogSoftmax(dim=1)) print(model) criterion = nn.NLLLoss() device = args.gpu optimizer = optim.Adam(model.classifier.parameters(), args.lr) loss, accuracy = helper.validate(model, criterion, testloader, device) print(f"loss: {loss} \n Accuracy: {accuracy}") epochs = args.epochs model = helper.train(model, optimizer, criterion, epochs, trainloader, validloader, device) helper.accuracy(model, testloader, device) helper.save(model, train_data, args.arch, input_size, args.hidden_layers, epochs, args.lr)
if Tag.has_key(ID): # consider if I2S.has_key(ID): # already done pass else: # do something count += 1 (artist, album, title) = hp.abstract_title(filename) I2S[ID] = [artist, title] pass print 'new added: ', count return I2S i2s = file('filter_ID_SONG.dic', 'r') id_song = eval(i2s.read()) i2s.close() print 'already has: ', len(id_song) my_tag = file('my_tag.dic', 'r') tag = eval(my_tag.read()) my_tag.close() print 'total has: ', len(tag) new_i2s = traverse(id_song, tag) print 'new total has: ', len(new_i2s) hp.save(new_i2s, 'new_i2s.dic')
def compare_solvers(d): """ call different solvers, compare objectives available solvers: - finite_diff_primal - cvxopt_dual_solver - finite_diff_dual - dcd - dcd_shrinking - dcd_shogun - mtk_shogun """ data_name = d["data_name"] min_interval = d["min_interval"] #solvers = ["dcd_shogun", "mtk_shogun"] solvers = ["mtk_shogun"] #solvers = ["dcd_shogun"] plot = False data, task_sim = get_data(data_name) # set up plot if plot: import pylab fig = pylab.figure() print "computing true objective" # determine true objective record_interval = 0 solver = dcd.train_mtl_svm(data, task_sim, "dcd_shogun", 1e-9, record_interval, min_interval) #solver = dcd.train_mtl_svm(data, task_sim, "mtk_shogun", 1e-9) true_obj = -solver.final_dual_obj #true_obj = solver.final_primal_obj #true_obj = -solver.dual_objectives[-1] #solver.final_dual_obj print "true objective computed:", true_obj for s_idx, solver_name in enumerate(solvers): print "processing solver", solver_name # new implementation if "dcd" in solver_name: eps = 1e-8 else: eps = 1e-8 # solver = dcd.train_mtl_svm(data, task_sim, solver_name, eps, 100, min_interval) #TODO is this working correctly???? rd = [ np.abs(np.abs(true_obj) - np.abs(obj)) for obj in solver.dual_objectives ] tt = np.array(solver.train_times, dtype=np.float64) / 1000.0 + 1.0 # save results dat = {} dat["dual_obj"] = solver.dual_objectives dat["primal_obj"] = solver.primal_objectives dat["fun_diff"] = rd dat["time"] = solver.train_times dat["true_obj"] = true_obj dat["solver_obj"] = solver dat["name"] = solver_name prefix = "/fml/ag-raetsch/home/cwidmer/svn/projects/2012/mtl_dcd/" fn = prefix + "results/result_newkids_nitro_" + data_name + "_" + solver_name + ".pickle" helper.save(fn, dat) # plot stuff #pylab.semilogy(num_xt, train_time[0], "o", label=solvers[0]) if plot: pylab.plot(tt, rd, "-o", label=solver_name.replace("_shogun", "")) pylab.yscale("log") pylab.xscale("log") pylab.xlabel("time (s)") pylab.ylabel("relative function difference") #TODO relative! pylab.grid(True) # plot training time #pylab.semilogy(num_xt, train_time[1], "o", label=solvers[1]) if plot: pylab.legend(loc="best") fig_name = "newkids_" + data_name + ".pdf" fig.savefig(fig_name)
task_names = data.get_task_names() FACTOR = 1.0 # init gamma matrix gammas = numpy.zeros((data.get_num_tasks(), data.get_num_tasks())) for t1_name in task_names: for t2_name in task_names: similarity = taxonomy.compute_node_similarity(taxonomy.get_id(t1_name), taxonomy.get_id(t2_name)) gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)] = similarity helper.save("/tmp/gammas", gammas) gammas = gammas * FACTOR cost = param.cost * numpy.sqrt(FACTOR) print gammas ########## # regular normalizer normalizer = MultitaskKernelNormalizer(data.task_vector_nums) for t1_name in task_names: for t2_name in task_names:
def test_data(): ################################################################## # select MSS ################################################################## mss = expenv.MultiSplitSet.get(379) ################################################################## # data ################################################################## # fetch data instance_set = mss.get_train_data(-1) # prepare data data = PreparedMultitaskData(instance_set, shuffle=True) # set parameters param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 4 param.cost = 1.0 param.transform = 1.0 param.id = 666 param.freeze() ################################################################## # taxonomy ################################################################## taxonomy = shogun_factory.create_taxonomy(mss.taxonomy.data) support = numpy.linspace(0, 100, 4) distances = [[0, 1, 2, 2], [1, 0, 2, 2], [2, 2, 0, 1], [2, 2, 1, 0]] # create tree normalizer tree_normalizer = MultitaskKernelPlifNormalizer(support, data.task_vector_names) task_names = data.get_task_names() FACTOR = 1.0 # init gamma matrix gammas = numpy.zeros((data.get_num_tasks(), data.get_num_tasks())) for t1_name in task_names: for t2_name in task_names: similarity = taxonomy.compute_node_similarity(taxonomy.get_id(t1_name), taxonomy.get_id(t2_name)) gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)] = similarity helper.save("/tmp/gammas", gammas) gammas = gammas * FACTOR cost = param.cost * numpy.sqrt(FACTOR) print gammas ########## # regular normalizer normalizer = MultitaskKernelNormalizer(data.task_vector_nums) for t1_name in task_names: for t2_name in task_names: similarity = gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)] normalizer.set_task_similarity(data.name_to_id(t1_name), data.name_to_id(t2_name), similarity) ################################################################## # Train SVMs ################################################################## # create shogun objects wdk_tree = shogun_factory.create_kernel(data.examples, param) lab = shogun_factory.create_labels(data.labels) wdk_tree.set_normalizer(tree_normalizer) wdk_tree.init_normalizer() print "--->",wdk_tree.get_normalizer().get_name() svm_tree = SVMLight(cost, wdk_tree, lab) svm_tree.set_linadd_enabled(False) svm_tree.set_batch_computation_enabled(False) svm_tree.train() del wdk_tree del tree_normalizer print "finished training tree-norm SVM:", svm_tree.get_objective() wdk = shogun_factory.create_kernel(data.examples, param) wdk.set_normalizer(normalizer) wdk.init_normalizer() print "--->",wdk.get_normalizer().get_name() svm = SVMLight(cost, wdk, lab) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) svm.train() print "finished training manually set SVM:", svm.get_objective() alphas_tree = svm_tree.get_alphas() alphas = svm.get_alphas() assert(len(alphas_tree)==len(alphas)) for i in xrange(len(alphas)): assert(abs(alphas_tree[i] - alphas[i]) < 0.0001) print "success: all alphas are the same"
companbot_x.cred = int(companbot_x.cred) + 1 text_area.x = 5 text_area.text = "I found treasure!" animate(idle) time.sleep(0.4) if randomEvent == 12: companbot_x.xp = int(companbot_x.xp) + random.randint(1, 2) time.sleep(0.4) if randomEvent == 22: text_area.x = 5 text_area.text = helper.chat() animate(idle) if randomEvent == 42: text_area.x = 5 text_area.text = "Dance Time!" animate(dance) if int(companbot_x.xp) > 100: print("Level Up") companbot_x = companion.companbot.levelUp(companbot_x) helper.save(companbot_x) led[0] = (helper.get_rndRGB()) else: helper.save(companbot_x) user_AFK, AFKTimer = sandman.sleep(minitft, user_AFK, AFKTimer) monoClk_last, AFKTimer, user_AFK = helper.timelasp( monoClk_last, AFKTimer, user_AFK) except Exception as e: print("Error Main: " + str(e)) led[0] = (255, 0, 0)
# create dense matrices A,B,C A = array([[1, 2, 3], [4, 0, 0], [0, 0, 0], [0, 5, 0], [0, 0, 6], [9, 9, 9]], dtype=float64) B = array([1, 1, 1, -1, -1, -1], dtype=float64) # ... of type Real, LongInt and Byte feats_train = RealFeatures(A.transpose()) kernel = GaussianKernel(feats_train, feats_train, 1.0) kernel.io.set_loglevel(MSG_DEBUG) lab = Labels(B) svm = SVMLight(1, kernel, lab) svm.train() helper.save("/tmp/awesome_svm", svm) svm = helper.load("/tmp/awesome_svm") svm.train() #sys.exit(0) run = expenv.Run.get(1010) #run = expenv.Run.get(974) dat = run.get_train_data() print dat.keys() d = dat["thaliana"] subset_size = 20 examples = [i.example for i in d[0:subset_size]]
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ root = param.taxonomy.data print ">>>" + str(param.taxonomy.data) + "<<<" print "initial root weight:", root.edge_weight print "tasks", train_data.keys() print "tax keys", root.get_data_keys() numpy.random.seed(1) # prepare data splits for inner validation # set up validation strategy # this has to be done here, because the training set CANNOT contain # any examples that will be used to evaluate further down the tree # # also by doing it this way, we have equally many examples from each # task in each split inner_train_data = {} inner_eval_data = {} for task_id in root.get_data_keys(): idx = range(len(train_data[task_id])) idx_pos = [idx for idx in range(len(train_data[task_id])) if train_data[task_id][idx].label == 1] idx_neg = [idx for idx in range(len(train_data[task_id])) if train_data[task_id][idx].label == -1] numpy.random.shuffle(idx_pos) numpy.random.shuffle(idx_neg) # distribute pos/negs evenly across splits splits_pos = helper.split_list(idx_pos, FOLD) splits_neg = helper.split_list(idx_neg, FOLD) eval_split_id = 0 train_idx_pos = list(helper.flatten([splits_pos[j] for j in xrange(FOLD) if j!=eval_split_id])) train_idx_neg = list(helper.flatten([splits_neg[j] for j in xrange(FOLD) if j!=eval_split_id])) train_idx = train_idx_pos train_idx.extend(train_idx_neg) numpy.random.shuffle(train_idx) eval_idx_pos = splits_pos[eval_split_id] eval_idx_neg = splits_neg[eval_split_id] eval_idx = eval_idx_pos eval_idx.extend(eval_idx_neg) numpy.random.shuffle(eval_idx) # numpy.random.shuffle(idx) # # splits = helper.split_list(idx, FOLD) # # eval_split_id = 0 # train_idx = list(helper.flatten([splits[j] for j in xrange(FOLD) if j!=eval_split_id])) # eval_idx = splits[eval_split_id] # make sure idx lists are disjoint assert( len(set(train_idx).intersection(set(eval_idx))) == 0 ) print "len train data", len(train_data[task_id]), task_id # select data sets inner_train_data[task_id] = [train_data[task_id][idx] for idx in train_idx] inner_eval_data[task_id] = [train_data[task_id][idx] for idx in eval_idx] ########################################################### # Learn Taxonomy Parameters ########################################################### grey_nodes = [root] #initialize inner cost inner_cost = param.cost while len(grey_nodes)>0: # fetch next node to process node = grey_nodes.pop(0) #pop first item # enqueue children if not node.is_leaf(): grey_nodes.extend(node.children) ################################### #train current node ################################### # concatenate instances from all task for nodes below instance_set_train = list(helper.flatten([inner_train_data[key] for key in node.get_data_keys()])) instance_set_eval = list(helper.flatten([inner_eval_data[key] for key in node.get_data_keys()])) # shuffle to avoid having instances from one task in consecutive order numpy.random.shuffle(instance_set_train) numpy.random.shuffle(instance_set_eval) # extract examples and labels train_examples = [inst.example for inst in instance_set_train] train_labels = [inst.label for inst in instance_set_train] eval_examples = [inst.example for inst in instance_set_eval] eval_labels = [inst.label for inst in instance_set_eval] #import copy #debug_examples = copy.copy(train_examples) #debug_examples.extend(eval_examples) #debug_labels = copy.copy(train_labels) #debug_labels.extend(eval_labels) # only local xval for leaves #if node.is_root(): # inner_param = 0.0 # predictor = self._train_inner_classifier(node, train_examples, train_labels, param, inner_param, param.cost) #else: #TODO: also perform inner validation on non-leaves if node.is_leaf():# not node.is_root(): print "performing inner xval at node", node.name # perform local model selection result_dict = self._perform_inner_xval(node, train_examples, train_labels, eval_examples, eval_labels, param) # use dict for returning args to avoid order glitches inner_edge_weight = result_dict["best_edge_weight"] inner_cost = result_dict["best_inner_cost"] predictor = result_dict["best_predictor"] else: # for non-leaves train without model selection inner_edge_weight = param.transform inner_cost = param.cost predictor = self._train_inner_classifier(node, train_examples, train_labels, param, inner_edge_weight, inner_cost) #predictor = self._train_inner_classifier(node, debug_examples, debug_labels, param, inner_edge_weight, inner_cost) node.predictor = predictor node.edge_weight = inner_edge_weight node.cost = inner_cost ########################################################### # Retrain on whole training set with optimal parameters ########################################################### grey_nodes = [root] while len(grey_nodes)>0: node = grey_nodes.pop(0) #pop first item # enqueue children if not node.is_leaf(): grey_nodes.extend(node.children) # fetch all data that belongs to leaves underneath current node instance_set_retrain = list(helper.flatten([train_data[key] for key in node.get_data_keys()])) # shuffle instances numpy.random.shuffle(instance_set_retrain) # extract examples and labels examples = [inst.example for inst in instance_set_retrain] labels = [inst.label for inst in instance_set_retrain] print "FINAL TRAIN on " + node.name + " C=" + str(node.cost) + " B=" + str(node.edge_weight) predictor = self._train_inner_classifier(node, examples, labels, param, node.edge_weight, node.cost) # attach predictor to node node.predictor = predictor ##################################################### # Wrap things up ##################################################### # wrap up predictors for later use predictors = {} for leaf in root.get_leaves(): assert(leaf.predictor!=None) predictors[leaf.name] = leaf.predictor # make sure we have the same keys (potentially in a different order) sym_diff_keys = set(train_data.keys()).symmetric_difference(set(predictors.keys())) assert len(sym_diff_keys)==0, "symmetric difference between keys non-empty: " + str(sym_diff_keys) # save graph plot mypath = "/fml/ag-raetsch/share/projects/multitask/graphs/" filename = mypath + "graph_" + str(param.id) filename_perf = mypath + "performances_" + str(param.id) helper.save(filename_perf, result_dict["performances"]) print "saving results to:", filename_perf root.plot(filename, plot_cost=True, plot_B=True) return predictors
def test_data(): ################################################################## # select MSS ################################################################## mss = expenv.MultiSplitSet.get(379) ################################################################## # data ################################################################## # fetch data instance_set = mss.get_train_data(-1) # prepare data data = PreparedMultitaskData(instance_set, shuffle=True) # set parameters param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 4 param.cost = 1.0 param.transform = 1.0 param.id = 666 param.freeze() ################################################################## # taxonomy ################################################################## taxonomy = shogun_factory.create_taxonomy(mss.taxonomy.data) support = numpy.linspace(0, 100, 4) distances = [[0, 1, 2, 2], [1, 0, 2, 2], [2, 2, 0, 1], [2, 2, 1, 0]] # create tree normalizer tree_normalizer = MultitaskKernelPlifNormalizer(support, data.task_vector_names) task_names = data.get_task_names() FACTOR = 1.0 # init gamma matrix gammas = numpy.zeros((data.get_num_tasks(), data.get_num_tasks())) for t1_name in task_names: for t2_name in task_names: similarity = taxonomy.compute_node_similarity( taxonomy.get_id(t1_name), taxonomy.get_id(t2_name)) gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)] = similarity helper.save("/tmp/gammas", gammas) gammas = gammas * FACTOR cost = param.cost * numpy.sqrt(FACTOR) print gammas ########## # regular normalizer normalizer = MultitaskKernelNormalizer(data.task_vector_nums) for t1_name in task_names: for t2_name in task_names: similarity = gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)] normalizer.set_task_similarity(data.name_to_id(t1_name), data.name_to_id(t2_name), similarity) ################################################################## # Train SVMs ################################################################## # create shogun objects wdk_tree = shogun_factory.create_kernel(data.examples, param) lab = shogun_factory.create_labels(data.labels) wdk_tree.set_normalizer(tree_normalizer) wdk_tree.init_normalizer() print "--->", wdk_tree.get_normalizer().get_name() svm_tree = SVMLight(cost, wdk_tree, lab) svm_tree.set_linadd_enabled(False) svm_tree.set_batch_computation_enabled(False) svm_tree.train() del wdk_tree del tree_normalizer print "finished training tree-norm SVM:", svm_tree.get_objective() wdk = shogun_factory.create_kernel(data.examples, param) wdk.set_normalizer(normalizer) wdk.init_normalizer() print "--->", wdk.get_normalizer().get_name() svm = SVMLight(cost, wdk, lab) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) svm.train() print "finished training manually set SVM:", svm.get_objective() alphas_tree = svm_tree.get_alphas() alphas = svm.get_alphas() assert (len(alphas_tree) == len(alphas)) for i in xrange(len(alphas)): assert (abs(alphas_tree[i] - alphas[i]) < 0.0001) print "success: all alphas are the same"
def learning_curve(data_name, solvers): """ call different solvers, compare objectives available solvers: - finite_diff_primal - cvxopt_dual_solver - finite_diff_dual - dcd - dcd_shrinking - dcd_shogun - mtk_shogun """ #solvers = ["mtk_shogun"] #solvers = ["dcd_shogun"] num_runs = 10 #fractions = np.linspace(0.1, 1.0, num_runs) fractions = [float(c) for c in np.exp(np.linspace(np.log(0.1), np.log(1.0), num_runs))] # keep track of training time num_xt = np.zeros(num_runs) train_times = np.zeros((2,num_runs)) for run_id, fraction_data in enumerate(fractions): data, task_sim = get_data(data_name) #fig = pylab.figure() data_subset = defaultdict(dict) num_xt[run_id] = 0 for task_name in data: num_total = len(data[task_name]["xt"]) num_subset = int(float(num_total) * fraction_data) xt, lt = coshuffle(data[task_name]["xt"], data[task_name]["lt"]) data_subset[task_name]["xt"] = xt[0:num_subset] data_subset[task_name]["lt"] = lt[0:num_subset] num_xt[run_id] += num_subset for s_idx, solver in enumerate(solvers): eps = 1e-3 start_time = time.time() dcd.train_mtl_svm(data_subset, task_sim, solver, eps, 0, 0) ttime = time.time() - start_time print "training time:", ttime, "seconds" train_times[s_idx,run_id] = ttime # write progress to file fn = "results/learning_curve_" + data_name + "_" + solver + ".txt" txt_file = file(fn, "a") txt_file.write("num_xt:\t%i\ttime:\t%i\n" % (num_xt[run_id], ttime)) txt_file.close() # save results fn = "results/learning_curve_" + data_name + ".pickle" helper.save(fn, {"num_xt": num_xt, "time": train_times})
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ # split for training weak_learners and boosting (train_weak, train_boosting) = split_data(train_data, 4) # merge data sets data = PreparedMultitaskData(train_weak, shuffle=True) # create shogun label lab = shogun_factory.create_labels(data.labels) ################################################## # define pockets ################################################## pockets = [0]*9 pockets[0] = [1, 5, 6, 7, 8, 31, 32, 33, 34] pockets[1] = [1, 2, 3, 4, 6, 7, 8, 9, 11, 21, 31] pockets[2] = [11, 20, 21, 22, 29, 31] pockets[3] = [8, 30, 31, 32] pockets[4] = [10, 11, 30] pockets[5] = [10, 11, 12, 13, 20, 29] pockets[6] = [10, 12, 20, 22, 26, 27, 28, 29] pockets[7] = [12, 14, 15, 26] pockets[8] = [13, 15, 16, 17, 18, 19, 20, 23, 24, 25, 26] pockets = [] for i in xrange(35): pockets.append([i]) #new_pockets = [] # merge neighboring pockets #for i in range(8): # new_pockets.append(list(set(pockets[i]).union(set(pockets[i+1])))) #pockets = new_pockets ######################################################## print "creating a kernel:" ######################################################## # init seq handler pseudoseqs = SequencesHandler() classifiers = [] for pocket in pockets: print "creating normalizer" #import pdb #pdb.set_trace() normalizer = MultitaskKernelNormalizer(data.task_vector_nums) print "processing pocket", pocket # set similarity for task_name_lhs in data.get_task_names(): for task_name_rhs in data.get_task_names(): similarity = 0.0 for pseudo_seq_pos in pocket: similarity += float(pseudoseqs.get_similarity(task_name_lhs, task_name_rhs, pseudo_seq_pos-1)) # normalize similarity = similarity / float(len(pocket)) print "pocket %s (%s, %s) = %f" % (str(pocket), task_name_lhs, task_name_rhs, similarity) normalizer.set_task_similarity(data.name_to_id(task_name_lhs), data.name_to_id(task_name_rhs), similarity) print "creating empty kernel" kernel = shogun_factory.create_kernel(data.examples, param) print "setting normalizer" kernel.set_normalizer(normalizer) print "training SVM for pocket", pocket svm = self._train_single_svm(param, kernel, lab) classifiers.append(svm) print "done obtaining weak learners" # save additional info #self.additional_information["svm_objective"] = svm.get_objective() #self.additional_information["svm num sv"] = svm.get_num_support_vectors() #self.additional_information["post_weights"] = combined_kernel.get_subkernel_weights() #print self.additional_information ################################################## # combine weak learners for each task ################################################## # set constants some = 0.9 import cvxmod # wrap up predictors svms = {} # use a reference to the same svm several times for task_name in train_boosting.keys(): instances = train_boosting[task_name] N = len(instances) F = len(pockets) examples = [inst.example for inst in instances] labels = [inst.label for inst in instances] # dim = (F x N) out = cvxmod.zeros((N,F)) for i in xrange(F): svm = classifiers[i] tmp_out = self._predict_weak(svm, examples, data.name_to_id(task_name)) out[:,i] = numpy.sign(tmp_out) #out[:,i] = tmp_out #TODO: fix helper.save("/tmp/out_sparse", (out,labels)) pdb.set_trace() weights = solve_boosting(out, labels, some, solver="mosek") svms[task_name] = (data.name_to_id(task_name), svm) return svms
task_names = data.get_task_names() FACTOR = 1.0 # init gamma matrix gammas = numpy.zeros((data.get_num_tasks(), data.get_num_tasks())) for t1_name in task_names: for t2_name in task_names: similarity = taxonomy.compute_node_similarity(taxonomy.get_id(t1_name), taxonomy.get_id(t2_name)) gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)] = similarity helper.save("/tmp/gammas", gammas) gammas = gammas * FACTOR cost = param.cost * numpy.sqrt(FACTOR) print gammas ########## # regular normalizer normalizer = MultitaskKernelNormalizer(data.task_vector_nums) for t1_name in task_names:
def get_presvm(B=2.0): examples_presvm = [numpy.array([ 2.1788894 , 3.89163458, 5.55086917, 6.4022742 , 3.14964751, -0.4622959 , 5.38538904, 5.9962938 , 6.29690849]), numpy.array([ 2.1788894 , 3.89163458, 5.55086917, 6.4022742 , 3.14964751, -0.4622959 , 5.38538904, 5.9962938 , 6.29690849]), numpy.array([ 0.93099452, 0.38871617, 1.57968949, 1.25672527, -0.8123137 , 0.20786586, 1.378121 , 1.15598866, 0.80265343]), numpy.array([ 0.68705535, 0.15144113, -0.81306157, -0.7664577 , 1.16452945, -0.2712956 , 0.483094 , -0.16302007, -0.39094812]), numpy.array([-0.71374437, -0.16851719, 1.43826895, 0.95961166, -0.2360497 , -0.30425755, 1.63157052, 1.15990427, 0.63801465]), numpy.array([ 0.68705535, 0.15144113, -0.81306157, -0.7664577 , 1.16452945, -0.2712956 , 0.483094 , -0.16302007, -0.39094812]), numpy.array([-0.71374437, -0.16851719, 1.43826895, 0.95961166, -0.2360497 , -0.30425755, 1.63157052, 1.15990427, 0.63801465]), numpy.array([-0.98028302, -0.23974489, 2.1687206 , 1.99338824, -0.67070205, -0.33167281, 1.3500379 , 1.34915685, 1.13747975]), numpy.array([ 0.67109612, 0.12662017, -0.48254886, -0.49091898, 1.31522237, -0.34108933, 0.57832179, -0.01992828, -0.26581628]), numpy.array([ 0.3193611 , 0.44903416, 3.62187778, 4.1490827 , 1.58832961, 1.95583397, 1.36836023, 1.92521945, 2.41114998])] labels_presvm = [-1.0, -1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0] examples = [numpy.array([-0.49144487, -0.19932263, -0.00408188, -0.21262012, 0.14621013, -0.50415481, 0.32317317, -0.00317602, -0.21422637]), numpy.array([ 0.0511817 , -0.04226666, -0.30454651, -0.38759116, 0.31639514, 0.32558471, 0.49364473, 0.04515591, -0.06963456]), numpy.array([-0.30324369, -0.11909251, -0.03210278, -0.2779561 , 1.31488853, -0.33165365, 0.60176018, -0.00384946, -0.15603975]), numpy.array([ 0.59282756, -0.0039991 , -0.26028983, -0.26722552, 1.63314995, -0.51199338, 0.33340685, -0.0170519 , -0.19211039]), numpy.array([-0.18338766, -0.07783465, 0.42019824, 0.201753 , 2.01160098, 0.33326111, 0.75591909, 0.36631525, 0.1761829 ]), numpy.array([ 0.10273793, -0.02189574, 0.91092358, 0.74827973, 0.51882902, -0.1286531 , 0.64463658, 0.67468349, 0.55587266]), numpy.array([-0.09727099, -0.13413522, 0.18771062, 0.19411594, 1.48547364, -0.43169608, 0.55064534, 0.24331473, 0.10878847]), numpy.array([-0.22494375, -0.15492964, 0.28017737, 0.29794467, 0.96403895, 0.43880289, 0.08053425, 0.07456818, 0.12102371]), numpy.array([-0.18161417, -0.17692039, 0.19554942, -0.00785625, 1.38315115, -0.05923183, -0.05723568, -0.15463646, -0.24249483]), numpy.array([-0.36538359, -0.20040061, -0.38384388, -0.40206556, -0.25040256, 0.94205875, 0.40162798, 0.00327328, -0.24107393])] labels = [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, -1.0] examples_test = [numpy.array([-0.45159799, -0.11401394, 1.28574573, 1.09144306, 0.92253119, -0.47230164, 0.77032486, 0.83047366, 0.74768906]), numpy.array([ 0.42613105, 0.0092778 , -0.78640296, -0.71632445, 0.41154244, 0.88380309, 0.19475759, -0.14195876, -0.30479425]), numpy.array([-0.09727099, -0.13413522, 0.18771062, 0.19411594, 1.48547364, -0.43169608, 0.55064534, 0.24331473, 0.10878847]), numpy.array([ 0.11558796, -0.08867647, -0.26432074, -0.30924546, -1.08243017, -0.1339607 , -0.1956124 , -0.2428358 , -0.25761213]), numpy.array([ 1.23679696, 0.18753081, -0.25593329, -0.12051991, 0.64976989, -0.17184101, 0.14951337, 0.01988587, -0.0356698 ]), numpy.array([ 1.03355002, 0.05316195, -0.97905368, -0.75482121, 0.28673776, 2.27142733, 0.02654739, -0.31109851, -0.44555277]), numpy.array([-0.53662325, -0.21434756, -0.12105795, -0.27531257, 0.66947047, 0.05474302, -0.00717455, -0.17700575, -0.22253444]), numpy.array([ 0.11272632, -0.12674826, -0.49736457, -0.51445609, 0.88518932, -0.51558669, -0.12000557, -0.32973613, -0.38488736]), numpy.array([ 0.8372111 , 0.06972199, -1.00454229, -0.79869642, 1.19376333, -0.40160273, -0.25122157, -0.46417918, -0.50234858]), numpy.array([-0.36325018, -0.12206184, 0.10525247, -0.15663416, 1.03616948, -0.51699463, 0.59566286, 0.35363369, 0.10545559])] ############################################# # compute pre-svm ############################################# # create real-valued features as first step examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64) examples_presvm = numpy.transpose(examples_presvm) feat_presvm = RealFeatures(examples_presvm) lab_presvm = Labels(numpy.array(labels_presvm)) wdk_presvm = LinearKernel(feat_presvm, feat_presvm) presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_max_iterations(10000) presvm_liblinear.set_bias_enabled(False) presvm_liblinear.train() #return presvm_liblinear #def get_da_svm(presvm_liblinear): ############################################# # compute linear term manually ############################################# examples = numpy.array(examples, dtype=numpy.float64) examples = numpy.transpose(examples) feat = RealFeatures(examples) lab = Labels(numpy.array(labels)) dasvm_liblinear = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_liblinear, B) dasvm_liblinear.set_bias_enabled(False) dasvm_liblinear.train() helper.save("/tmp/svm", presvm_liblinear) presvm_pickle = helper.load("/tmp/svm") dasvm_pickle = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_pickle, B) dasvm_pickle.set_bias_enabled(False) dasvm_pickle.train() helper.save("/tmp/dasvm", dasvm_liblinear) dasvm_pickle2 = helper.load("/tmp/dasvm") ############################################# # load test data ############################################# examples_test = numpy.array(examples_test, dtype=numpy.float64) examples_test = numpy.transpose(examples_test) feat_test = RealFeatures(examples_test) # check if pickled and unpickled classifiers behave the same out1 = dasvm_liblinear.classify(feat_test).get_labels() out2 = dasvm_pickle.classify(feat_test).get_labels() # compare outputs for i in xrange(len(out1)): try: assert(abs(out1[i]-out2[i])<= 0.001) except: print "(%.5f, %.5f)" % (out1[i], out2[i]) print "classification agrees."
import math import helper VAR = 3 EPS = 1e-5 xs = range(-5, 6) sums = [] ns = [] for x in xs: fx = VAR * x sum = fx i = 3 n = 0 while math.fabs(fx) > EPS: fx *= -((VAR * x) ** 2) / (i * (i - 1)) sum += fx i += 2 n += 1 sums.append(round(sum, 4)) ns.append(n) helper.print_table(table_headers=['x', 'f(x)', 'n'], table_values=[xs, sums, ns], table_title='Taylor sum') helper.save(value_list=[xs, sums], to='values.txt')
A=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=float64) B=array([1,1,1,-1,-1,-1], dtype=float64) # ... of type Real, LongInt and Byte feats_train = RealFeatures(A.transpose()) kernel = GaussianKernel(feats_train, feats_train, 1.0) kernel.io.set_loglevel(MSG_DEBUG) lab = Labels(B) svm = SVMLight(1, kernel, lab) svm.train() helper.save("/tmp/awesome_svm", svm) svm = helper.load("/tmp/awesome_svm") svm.train() #sys.exit(0) run = expenv.Run.get(1010) #run = expenv.Run.get(974) dat = run.get_train_data() print dat.keys() d = dat["thaliana"] subset_size = 20
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ # split for training weak_learners and boosting (train_weak, train_boosting) = split_data(train_data, 4) # merge data sets data = PreparedMultitaskData(train_weak, shuffle=True) # create shogun label lab = shogun_factory.create_labels(data.labels) ######################################################## print "creating a kernel:" ######################################################## # init seq handler pseudoseqs = SequencesHandler() classifiers = [] for pocket in pockets: print "creating normalizer" #import pdb #pdb.set_trace() normalizer = MultitaskKernelNormalizer(data.task_vector_nums) print "processing pocket", pocket # set similarity for task_name_lhs in data.get_task_names(): for task_name_rhs in data.get_task_names(): similarity = 0.0 for pseudo_seq_pos in pocket: similarity += float(pseudoseqs.get_similarity(task_name_lhs, task_name_rhs, pseudo_seq_pos-1)) # normalize similarity = similarity / float(len(pocket)) print "pocket %s (%s, %s) = %f" % (str(pocket), task_name_lhs, task_name_rhs, similarity) normalizer.set_task_similarity(data.name_to_id(task_name_lhs), data.name_to_id(task_name_rhs), similarity) print "creating empty kernel" kernel = shogun_factory.create_kernel(data.examples, param) print "setting normalizer" kernel.set_normalizer(normalizer) print "training SVM for pocket", pocket svm = self._train_single_svm(param, kernel, lab) classifiers.append(svm) print "done obtaining weak learners" # save additional info #self.additional_information["svm_objective"] = svm.get_objective() #self.additional_information["svm num sv"] = svm.get_num_support_vectors() #self.additional_information["post_weights"] = combined_kernel.get_subkernel_weights() #print self.additional_information ################################################## # combine weak learners for each task ################################################## # set constants some = 0.9 import cvxmod # wrap up predictors svms = {} # use a reference to the same svm several times for task_name in train_boosting.keys(): instances = train_boosting[task_name] N = len(instances) F = len(pockets) examples = [inst.example for inst in instances] labels = [inst.label for inst in instances] # dim = (F x N) out = cvxmod.zeros((N,F)) for i in xrange(F): svm = classifiers[i] tmp_out = self._predict_weak(svm, examples, data.name_to_id(task_name)) out[:,i] = numpy.sign(tmp_out) #out[:,i] = tmp_out #TODO: fix helper.save("/tmp/out_sparse", (out,labels)) pdb.set_trace() weights = solve_boosting(out, labels, some, solver="mosek") svms[task_name] = (data.name_to_id(task_name), svm) return svms