def main(args=sys.argv[1:]): args = parse_args(args) logging.basicConfig(level=args.logging) device = torch.device( "cpu" if args.no_cuda or not torch.cuda.is_available() else "cuda") vocab = Vocab() # Load data now to know the whole vocabulary when training model. train_data = data_loader.load(data_loader.path("train"), vocab) valid_data = data_loader.load(data_loader.path("valid"), vocab) test_data = data_loader.load(data_loader.path("test"), vocab) model = RnnLm(len(vocab), args.embedding_dim, args.gru_hidden, args.gru_layers, not args.untied, args.gru_dropout).to(device) optimizer = optim.RMSprop(model.parameters(), lr=args.lr) for epoch_ind in range(args.epochs): logging.info("Training epoch %d", epoch_ind) train_epoch(train_data, model, optimizer, args, device) logging.info("Validation perplexity: %.1f", evaluate(valid_data, model, args.batch_size, device)) logging.info("Test perplexity: %.1f", evaluate(test_data, model, args.batch_size, device))
def run(training_data, test_data, problog_file): queries = load(training_data) test_queries = load(test_data) network = MultiLabelNet() with open(problog_file, 'r') as f: problog_string = f.read() net = Network(network, 'multilabel_net', neural_predicate) net.optimizer = torch.optim.Adam(network.parameters(), lr=0.001) model = Model(problog_string, [net], caching=False) optimizer = Optimizer(model, 2) train_model( model, queries, nr_epochs=50, optimizer=optimizer, test_iter=len(queries) * 10, # test=multilabel_test, log_iter=500, snapshot_iter=len(queries)) for query in test_queries: print(query) for k, v in model.solve(query).items(): print('\t{}: {:.4f}\t{}'.format( k.args[1], v[0], CLASSES_BY_LABEL[int(query.args[0])][str(k.args[1])]))
def main(config): prepare_dirs_and_logger(config) rng = np.random.RandomState(config.random_seed) tf.set_random_seed(config.random_seed) load() train_data_loader, train_label_loader, train_loc_loader, train_mask_loader = get_loader( config.data_path, config.batch_size, 0, 'train', True) test_data_loader, test_label_loader, test_loc_loader, test_mask_loader = get_loader( config.data_path, config.batch_size_test, 5, 'train', True) trainer = Trainer(config, train_data_loader, train_label_loader, train_loc_loader, train_mask_loader, test_data_loader, test_label_loader, test_loc_loader, test_mask_loader) print("loaded trainer") if config.is_train: save_config(config) trainer.train() print("finished train") else: if not config.load_path: raise Exception( "[!] You should specify `load_path` to load a pretrained model" ) trainer.test()
def run_linear(training_data, test_data, problog_files, problog_train_files=(), problog_test_files=()): queries = load(training_data) test_queries = load(test_data) # network = SoundLinearNet() # network = SoundCNNet() network = SoundVGGish() problog_string = add_files_to(problog_files, '') problog_train_string = add_files_to(problog_train_files, problog_string) problog_test_string = add_files_to(problog_test_files, problog_string) net = Network(network, 'sound_net', neural_predicate_vggish) net.optimizer = torch.optim.Adam(network.parameters(), lr=0.001) model_to_train = Model(problog_train_string, [net], caching=False) optimizer = Optimizer(model_to_train, 2) model_to_test = Model(problog_test_string, [net], caching=False) train_model(model_to_train, queries, nr_epochs=10, optimizer=optimizer, test_iter=len(queries), test=lambda _: my_test(model_to_test, test_queries), log_iter=500, snapshot_iter=len(queries), snapshot_name=' SequenceDetectionSnapshots/model')
def predict(): """ An example of how to load a trained model and use it to predict labels. """ # load the saved model classifier = cPickle.load(open('best_model.pkl')) # compile a predictor function predict_model = theano.function( inputs=[classifier.input], outputs=classifier.y_pred) # We can test it on some examples from test test dataset='data/mnist.pkl.gz' training_set, validation_set, testing_set, = data_loader.load(dataset) testing_set_x , testing_set_y = testing_set testing_set_x = testing_set_x.get_value() testing_set_y = testing_set_y.eval()[:30] predicted_values = predict_model(testing_set_x[:30]) print ("Predicted values for the first 10 examples in test set:") print predicted_values print ("answers:") print
def main(): tf.set_random_seed(2018) dtrain, dtest, z, y_std = data_loader.load('airplane.csv', n_clusters=config.clusters, n_induce=config.num_inducing, sgp=config.sgp) N, _ = dtrain.shape model = VBSGPR(N, config.log_beta, config.log_sf2, config.log_theta, z, whiten=True) clusters = [i for i in range(config.clusters)] lb = model.lower_bound() fmu, fcov = model.predict_f() gp_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'vbsgpr') gp_opt = tf.train.AdamOptimizer(0.01, beta1=0.9, name='gp_opt') # Best: 40.459 # gp_opt = tf.train.MomentumOptimizer(0.01, momentum=0.9, use_nesterov=False) gp_train_op = gp_opt.minimize(-lb, var_list=gp_vars) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(config.epochs): random.shuffle(clusters) for i, cluster in enumerate(clusters): data_batch = dtrain[np.where(dtrain[:, -1] == cluster)] X, y = data_batch[:, :-2], data_batch[:, -2:-1] _, lb_ = sess.run([gp_train_op, lb], {model.x: X, model.y: y, model.batch: y.shape[0]}) if i % 100 == 0: print ('Epoch: [{}], The {}-th Cluster: [{}], Lower Bound: [{}]'.format( epoch, i, cluster, lb_)) X_test, y_test = dtest[:, :-2], dtest[:, -2:-1] f_test, _ = sess.run([fmu, fcov], {model.x: X_test}) rmse = np.sqrt(np.mean(y_std**2 * ((y_test - f_test))**2)) print ('Epoch {} test RMSE: {}'.format(epoch, rmse))
def main(): # Select chain and info file with a GUI. # datafile = open_file_gui(add_pattern="*.txt") # infofile = open_file_gui(add_pattern="*.txt") parser = arg_parser(description='Superplot summary tool', conflict_handler='resolve') parser.add_argument('--data_file', '-d', help='Chain file to summarise', type=str, required=True) parser.add_argument('--info_file', '-i', help='Info file to summarise', type=str, default=None, required=False) args = vars(parser.parse_args()) datafile = os.path.abspath(args['data_file']) infofile = args['info_file'] if infofile: infofile = os.path.abspath(infofile) # Load and label data labels, data = data_loader.load(infofile, datafile) summary_table = _summary_table(labels, data, datafile=datafile, infofile=infofile) return summary_table
def train(train_data, ep, bz): x, y = data_loader.load(train_data, dim, t) print('Training model ...') model = create_model() model.fit(x, y, epochs=ep, batch_size=bz, verbose=2) # save(directory + 'model/', m_name, model) evaluate(create_model, seed, x, y, t, ep, bz)
def run_coauthor(fold_i): def neural_predicate(network, i, dataset='train'): i = int(i) dataset = str(dataset) if dataset == 'train': d, l = mnist_train_data[i] elif dataset == 'test': d, l = mnist_test_data[i] d = Variable(d.unsqueeze(0)) output = network.net(d) return output.squeeze(0) queries = load('train_data.txt') with open('coauthor_rules.pl') as f: problog_string = f.read() network = coauthor_net() net = Network(network, f'coauthor {i+1}', neural_predicate) net.optimizer = torch.optim.Adam(network.parameters(), lr=0.001) model = Model(problog_string, [net], caching=False) optimizer = Optimizer(model, 2) train_model(model, queries, 1, optimizer, test_iter=1000, test=test_coauthor, snapshot_iter=10000)
def run(data_dir: str, save_estimator=False, segment_size=query_name_learner.SEGMENT_SIZE, overlap: float = query_name_learner.OVERLAP_FRACTION, is_segment_size_in_seconds: bool = False, ipython_when_done=False): data_loader.load(data_dir, ['frame_time_relative', 'dns_qry_name', 'dns_qry_type']) query_name_learner.SEGMENT_SIZE = segment_size query_name_learner.OVERLAP_FRACTION = overlap query_name_learner.IS_SEGMENT_SIZE_IN_SECONDS = is_segment_size_in_seconds per_user_states = query_name_learner.run_one_v_all(save_estimator) if ipython_when_done: from IPython import embed embed()
def main(): df = data_loader.make_df("../input_data/data_h27v07") df["mean_lai"] = df.path.map( lambda x: np.mean(data_loader.load(x)["Lai_500m"])) df.sort_values("date", ascending=True).plot(x="date", y="mean_lai") plt.show()
def run(training_data, test_data, problog_files, problog_train_files=(), problog_test_files=(), config_file=None, net_mode='init', cfg=None): config = json.load(open(config_file)) config['net_mode'] = net_mode config['cfg'] = cfg queries = load(training_data) test_queries = load(test_data) sounds = SoundsUtils(config) problog_string = add_files_to(problog_files, '') problog_train_string = add_files_to(problog_train_files, problog_string) problog_test_string = add_files_to(problog_test_files, problog_string) network = sounds.network net = Network(network, 'sound_net', sounds.neural_predicate) net.optimizer = sounds.optimizer model_to_train = Model(problog_train_string, [net], caching=False) optimizer = Optimizer(model_to_train, 2) model_to_test = Model(problog_test_string, [net], caching=False) train_model(model_to_train, queries, 5, optimizer, test_iter=len(queries), test=lambda _: my_test( model_to_test, test_queries, test_functions={ 'sound_net': lambda *args, **kwargs: sounds.neural_predicate( *args, **kwargs, in_training=False) }, ), snapshot_iter=len(queries))
def main(): # Select chain and info file with a GUI. datafile = open_file_gui() infofile = open_file_gui() # Load and label data labels, data = data_loader.load(infofile, datafile) summary_table = _summary_table(labels, data, datafile=datafile, infofile=infofile) return summary_table
def main(): # Select chain and info file with a GUI. datafile = open_file_gui(add_pattern="*.txt") infofile = open_file_gui(add_pattern="*.txt") # Load and label data labels, data = data_loader.load(infofile, datafile) summary_table = _summary_table(labels, data, datafile=datafile, infofile=infofile) return summary_table
def load_data(args): data = data_loader.load(args.dataset, n_train=args.n_train, n_test=args.n_test, train_noise=args.train_noise, test_noise=args.test_noise) stratify = args.dataset not in ["abalone", "segment"] if args.dataset not in [ 'arcene', 'moon', 'toy_Story', 'toy_Story_ood', 'segment' ]: print(args.dataset) x = data_loader.prepare_inputs(data['features']) y = data['labels'] x_train, x_test, y_train, y_test = train_test_split( x, y, train_size=args.train_test_ratio, stratify=y if stratify else None) else: if args.dataset == 'moon' or args.dataset=='toy_Story' or \ args.dataset=='toy_Story_ood': x_train, x_test = data['x_train'], data['x_val'] else: x_train, x_test = data_loader.prepare_inputs( data['x_train'], data['x_val']) y_train, y_test = data['y_train'], data['y_val'] # Generate validation split x_train, x_val, y_train, y_val = train_test_split( x_train, y_train, train_size=args.train_test_ratio, stratify=y_train if stratify else None) x_train = x_train.astype(np.float32) x_val = x_val.astype(np.float32) x_test = x_test.astype(np.float32) n_mean = np.mean(x_train, axis=0) n_std = np.var(x_train, axis=0)**.5 x_train = (x_train - n_mean) / n_std x_val = (x_val - n_mean) / n_std x_test = (x_test - n_mean) / n_std try: if args.n_ood > 0 and y_val.shape[1] > args.n_ood: n_ood = y_val.shape[1] - args.n_ood - 1 return utils.prepare_ood(x_train, x_val, x_test, y_train, y_val, y_test, n_ood, args.norm) except AttributeError: #print(x_train, x_val, x_test, y_train, y_val, y_test) return x_train, x_val, x_test, y_train, y_val, y_test, 0, 0 return x_train, x_val, x_test, y_train, y_val, y_test, 0, 0
def run(training_data, test_data, problog_files): queries = load(training_data) test_queries = load(test_data) problog_string = '' for problog_file in problog_files: with open(problog_file) as f: problog_string += f.read() problog_string += '\n\n' network = MNIST_Net() net = Network(network, 'mnist_net', neural_predicate) net.optimizer = torch.optim.Adam(network.parameters(), lr=0.001) model = Model(problog_string, [net], caching=False) optimizer = Optimizer(model, 2) train_model(model, queries, 1, optimizer, test_iter=1000, test=test_MNIST, snapshot_iter=10000)
def run(training_data, test_data, problog_files, problog_train_files=(), problog_test_files=()): queries = load(training_data) test_queries = load(test_data) problog_string = add_files_to(problog_files, '') problog_train_string = add_files_to(problog_train_files, problog_string) problog_test_string = add_files_to(problog_test_files, problog_string) network = MNIST_Net() net = Network(network, 'mnist_net', neural_predicate) net.optimizer = torch.optim.Adam(network.parameters(), lr=0.001) model_to_train = Model(problog_train_string, [net], caching=False) optimizer = Optimizer(model_to_train, 2) model_to_test = Model(problog_test_string, [net], caching=False) train_model(model_to_train, queries, 1, optimizer, test_iter=len(queries), test=lambda _: my_test( model_to_test, test_queries, test_functions={ 'mnist_net': lambda *args, **kwargs: neural_predicate( *args, **kwargs, dataset='test') }, ), log_iter=1000, snapshot_iter=len(queries))
def indexATIS(): train_set, valid_set, dicts = load( 'atis.pkl') # load() from data_loader.py w2idx, la2idx = dicts['words2idx'], dicts['labels2idx'] idx2w = {w2idx[k]: k for k in w2idx} idx2la = {la2idx[k]: k for k in la2idx} indexes = { "idx2w": idx2w, "idx2la": idx2la, "w2idx": w2idx, "la2idx": la2idx } with open('embeddings/word_indexes.json', 'w') as f: json.dump(indexes, f) log("Word Indexes saved at (embeddings/word_indexes.json)...") train_x, _, train_label = train_set valid_x, _, valid_label = valid_set MAX_LEN = max(max([len(s) for s in train_x]), max([len(s) for s in valid_x])) # Add padding train_x = pad_sequences(train_x, maxlen=MAX_LEN, padding='post', value=w2idx["<UNK>"]) train_label = pad_sequences(train_label, maxlen=MAX_LEN, padding='post', value=la2idx["O"]) valid_x = pad_sequences(valid_x, maxlen=MAX_LEN, padding='post', value=w2idx["<UNK>"]) valid_label = pad_sequences(valid_label, maxlen=MAX_LEN, padding='post', value=la2idx["O"]) train_set = (train_x, train_label) # packing only train_x and train_label valid_set = (valid_x, valid_label) return (train_set, valid_set, indexes)
def main(): input_path = os.path.abspath(os.path.join('./data', args.dataset)) dataset = os.path.splitext(args.dataset)[0] logger.info('Load {}'.format(input_path)) params = {'test_size': 0.2, 'random_state': 1, 'cluster': 'kmeans'} X_train, X_test, y_train, y_test = data_loader.load(input_path, **params) logger.info('Split into train and test subsets: {}'.format(params)) params_path = os.path.abspath(os.path.join('./params', args.params)) with open(params_path) as file_: params = yaml.load(file_, Loader=yaml.SafeLoader) logger.info('Load {}'.format(params_path)) logger.info('Hyperparameters: {}'.format(params)) models = { 'MLP': nn.MLPClassifier, 'CNN': nn.CNNClassifier, 'RNN': nn.RNNClassifier } clf = models[args.model](**params) estimator = clf.__class__.__name__ logger.info('Train {} on {}'.format(estimator, dataset)) clf.fit(X_train, y_train) output_dir = os.path.abspath(args.output) os.makedirs(output_dir, exist_ok=True) csv_log = pd.DataFrame({ 'loss': clf.loss_curve_, 'train_score': clf.training_scores_, 'val_score': clf.validation_scores_ }) csv_log_path = os.path.join(output_dir, time.strftime('%Y%m%d-%H%M%S.csv')) csv_log.to_csv(csv_log_path) logger.info('Save learning log to {}'.format(csv_log_path)) if args.plot: plot_path = os.path.join(output_dir, time.strftime('%Y%m%d-%H%M%S.png')) plotting.plot_learning_curve(csv_log_path, '{} on {}'.format(estimator, dataset), plot_path) logger.info('Save learning curves to {}'.format(plot_path)) logger.info('Training score: {}'.format(clf.score(X_train, y_train))) logger.info('Testing score: {}'.format(clf.score(X_test, y_test))) logger.info('Done')
def predict(path, output): print('Prediction ...') x, y = data_loader.load(path, dim, t) pipe = load_model(directory + 'model/', m_name) pred = pipe.predict_proba(x)[:, 0] new_p = pred.round(2).astype(int) count = 0 for i in range(len(new_p)): if new_p[i] == y[i]: count = count + 1 print('Test result: ', count, '/', len(y), ' (', round(count / len(y) * 100, 2), '%)', sep='') df = pd.DataFrame({'Expected': y, 'Predicted': new_p}) df.to_csv(output)
def train(model_name, category_type, dump=False): clf = tfidf_pipeline.make(model_name) categories = names.categories[category_type] print 'Loading data...' data = data_loader.load('full', categories) train_X, train_y, test_X, test_y = data_loader.split(data, 0.1) print 'Done.' print 'Training...' clf.fit(train_X, train_y) print 'Done.' print 'Testing...' predicted = clf.predict(test_X) if model_name in ['svr', 'linreg']: predicted = np.clip(np.round(predicted), 0, 7) accuracy = scorers.err1(test_y, predicted) print 'Off-by-one accuracy: ' + str(accuracy) else: accuracy = scorers.err0(test_y, predicted) print 'Exact accuracy: ' + str(accuracy) print classification_report(test_y, predicted, target_names=categories) cm = confusion_matrix(test_y, predicted) print cm plot.plot_confusion_matrix(cm, category_type) if dump: print 'Saving classifier...' if not exists('dumps'): makedirs('dumps') joblib.dump(clf, join('dumps', category_type + '_' + model_name + '_classifier.pkl')) print 'Done.' return clf
def main(): df = data_loader.make_df("../input_data/data_h27v07") bin = 10 rang = np.arange(0, int(1000 / bin) * int(700 / bin)) print(rang) col_areas = [] for i in rang: df[f"mean_lai_{i}"] = 0 col_areas.append(f"mean_lai_{i}") #print(df) for idx in df.index: val = data_loader.load(df.loc[idx, "path"])["Lai_500m"] for i in rang: j = i // int(1000 / bin) k = i % int(700 / bin) df.loc[idx, f"mean_lai_{i}"] = np.mean( val[j * bin:(j + 1) * bin, 1000 + k * bin:1000 + (k + 1) * bin]) #print(df.loc[idx, :]) print(df.shape) # df.sort_values("date", ascending=True).plot(x="date", y="mean_lai") # plt.show() df.to_csv("../input_data/area_lai_5km_mean.csv")
import matplotlib.pyplot as plt import numpy as np import data_loader from lda import lda_experiment from logistic import logistic_regression_experiment from linear import linear_regression_experiment from qda import qda_experiment if __name__ == "__main__": # Train datasets X_trainA, Y_trainA = data_loader.load("data/trainA") X_trainB, Y_trainB = data_loader.load("data/trainB") X_trainC, Y_trainC = data_loader.load("data/trainC") # Test datasets X_testA, Y_testA = data_loader.load("data/testA") X_testB, Y_testB = data_loader.load("data/testB") X_testC, Y_testC = data_loader.load("data/testC") # 2.1 : LDA print("{:=^30}".format("LDA")) lda_experiment(X_trainA, Y_trainA, X_testA, Y_testA, "A") lda_experiment(X_trainB, Y_trainB, X_testB, Y_testB, "B") lda_experiment(X_trainC, Y_trainC, X_testC, Y_testC, "C") # 2.2 : logistic regression print("{:=^30}".format("Logistic Regression")) logistic_regression_experiment(X_trainA, Y_trainA, X_testA, Y_testA, "A") logistic_regression_experiment(X_trainB, Y_trainB, X_testB, Y_testB, "B") logistic_regression_experiment(X_trainC, Y_trainC, X_testC, Y_testC, "C")
def dump_results(results, n_clusters): out_file = f"./output/algs_{n_clusters}clusters.json" with open(out_file, 'w') as f: json.dump(results, f, indent=1) if __name__ == "__main__": args = parse_arguments() nr_clusters = args.n_clusters REPEATS = args.repeats cluster_metrics = args.metrics path = os.getcwd() os.chdir('..') X = data_loader.load( "0_data_generators/data_{}_shuffled.csv".format(nr_clusters)) X = np.array(X) print("Done loading, shape:", X.shape) os.chdir(path) raster = Raster(precision=4, threshold=5, min_size=5) clustering_algorithms = [] if args.no_raster else [('RASTER', raster)] # 20 for 10 clusters, 300-500 for 100 clusters. # Don't even try 1000 clusters (a run takes days). tau = 5 / (X.size) # Clique equivalent of RASTER's threshold for xsi in args.xsi: clique = clique_fit.Clique(xsi=xsi, tau=tau) name = "CLIQUE_xsi" + str(xsi) clustering_algorithms.append((name, clique))
import json from train import train_model from data_loader import load from examples.NIPS.MNIST.mnist import test_MNIST, MNIST_Net, MNIST_Net2, neural_predicate from model import Model from optimizer import Optimizer from network import Network import torch queries = load('train_data.txt') with open('abs.pl') as f: problog_string = f.read() network = MNIST_Net() network.load_state_dict(torch.load('sd_rec_cnn.pt')) network.eval() net = Network(network, 'mnist_net', neural_predicate) net.optimizer = torch.optim.Adam(network.parameters(), lr=0.001) model = Model(problog_string, [net], caching=False) optimizer = Optimizer(model, 2) log = {} logs = test_MNIST(model) for e in logs: log[e[0]] = e[1] with open('notl_rec2abs_dpl.json', 'w') as outfile: json.dump(log, outfile)
def __init__(self, data_file, info_file, xindex=2, yindex=3, zindex=4, default_plot_type=0 ): self.data_file = data_file self.info_file = info_file self.xindex = xindex self.yindex = yindex self.zindex = zindex self.plot_limits = default("plot_limits") self.bin_limits = default("bin_limits") self.fig = None self.plot = None self.options = None # Load data from files self.labels, self.data = data_loader.load(info_file, data_file) # Enumerate available plot types and keep an ordered # dict mapping descriptions to classes. # Using an ordered dict means the order in which classes # are listed in plot_types will be preserved in the GUI. self.plots = OrderedDict() for plot_class in plots.plot_types: self.plots[plot_class.description] = plot_class ####################################################################### # Combo-box for various plot types typetitle = gtk.Button("Plot type:") self.typebox = gtk.combo_box_new_text() for description in self.plots.keys(): self.typebox.append_text(description) self.typebox.set_active(default_plot_type) # Set to default plot type ####################################################################### # Combo box for selecting x-axis variable xtitle = gtk.Button("x-axis variable:") self.xbox = gtk.combo_box_new_text() for label in self.labels.itervalues(): self.xbox.append_text(label) self.xbox.set_wrap_width(5) self.xbox.connect('changed', self._cx) self.xtext = gtk.Entry() self.xtext.set_text(self.labels[self.xindex]) self.xtext.connect("changed", self._cxtext) self.xbox.set_active(self.xindex) ####################################################################### # Combo box for selecting y-axis variable ytitle = gtk.Button("y-axis variable:") self.ybox = gtk.combo_box_new_text() for label in self.labels.itervalues(): self.ybox.append_text(label) self.ybox.set_wrap_width(5) self.ybox.connect('changed', self._cy) self.ytext = gtk.Entry() self.ytext.set_text(self.labels[self.yindex]) self.ytext.connect("changed", self._cytext) self.ybox.set_active(self.yindex) ####################################################################### # Combo box for selecting z-axis variable ztitle = gtk.Button("z-axis variable:") self.zbox = gtk.combo_box_new_text() for label in self.labels.itervalues(): self.zbox.append_text(label) self.zbox.set_wrap_width(5) self.zbox.connect('changed', self._cz) self.ztext = gtk.Entry() self.ztext.set_text(self.labels[self.zindex]) self.ztext.connect("changed", self._cztext) self.zbox.set_active(self.zindex) ####################################################################### # Check buttons for log Scaling self.logx = gtk.CheckButton('Log x-data.') self.logy = gtk.CheckButton('Log y-data.') self.logz = gtk.CheckButton('Log z-data.') ####################################################################### # Text boxt for plot title tplottitle = gtk.Button("Plot title:") self.plottitle = gtk.Entry() self.plottitle.set_text(default("plot_title")) ####################################################################### # Legend properties # Text box for legend title tlegtitle = gtk.Button("Legend title:") self.legtitle = gtk.Entry() self.legtitle.set_text("") # Combo box for legend position tlegpos = gtk.Button("Legend position:") self.legpos = gtk.combo_box_new_text() for loc in ["best", "upper right", "lower left", "lower right", "right", "center left", "center right", "lower center", "upper center", "center", "no legend"]: self.legpos.append_text(loc) self.legpos.set_active(0) # Default is first in above list - "best" ####################################################################### # Spin button for number of bins per dimension tbins = gtk.Button("Bins per dimension:") self.bins = gtk.SpinButton() self.bins.set_increments(10, 10) self.bins.set_range(5, 10000) self.bins.set_value(default("nbins")) ####################################################################### # Axes limits alimits = gtk.Button("Comma separated plot limits\n" "x_min, x_max, y_min, y_max:") self.alimits = gtk.Entry() self.alimits.connect("changed", self._calimits) self.alimits.append_text("") ####################################################################### # Bin limits blimits = gtk.Button("Comma separated bin limits\n" "x_min, x_max, y_min, y_max:") self.blimits = gtk.Entry() self.blimits.connect("changed", self._cblimits) self.blimits.append_text("") ####################################################################### # Check buttons for optional plot elements self.show_best_fit = gtk.CheckButton("Best-fit") self.show_posterior_mean = gtk.CheckButton("Posterior mean") self.show_credible_regions = gtk.CheckButton("Credible regions") self.show_conf_intervals = gtk.CheckButton("Confidence intervals") self.show_posterior_pdf = gtk.CheckButton("Posterior PDF") self.show_prof_like = gtk.CheckButton("Profile Likelihood") self.show_best_fit.set_active(True) self.show_posterior_mean.set_active(True) self.show_credible_regions.set_active(True) self.show_conf_intervals.set_active(True) self.show_posterior_pdf.set_active(True) self.show_prof_like.set_active(True) ####################################################################### # Make plot button makeplot = gtk.Button('Make plot.') makeplot.connect("clicked", self._pmakeplot) ####################################################################### # Check boxes to control what is saved (note we only attach them to the # window after showing a plot) self.save_image = gtk.CheckButton('Save image') self.save_image.set_active(True) self.save_summary = gtk.CheckButton('Save statistics in plot') self.save_summary.set_active(True) self.save_pickle = gtk.CheckButton('Save pickle of plot') self.save_pickle.set_active(True) ####################################################################### # Layout - GTK Table self.gridbox = gtk.Table(17, 5, False) self.gridbox.attach(typetitle, 0, 1, 0, 1, xoptions=gtk.FILL) self.gridbox.attach(self.typebox, 1, 2, 0, 1, xoptions=gtk.FILL) self.gridbox.attach(xtitle, 0, 1, 1, 2, xoptions=gtk.FILL) self.gridbox.attach(self.xbox, 1, 2, 1, 2, xoptions=gtk.FILL) self.gridbox.attach(self.xtext, 1, 2, 2, 3, xoptions=gtk.FILL) self.gridbox.attach(ytitle, 0, 1, 3, 4, xoptions=gtk.FILL) self.gridbox.attach(self.ybox, 1, 2, 3, 4, xoptions=gtk.FILL) self.gridbox.attach(self.ytext, 1, 2, 4, 5, xoptions=gtk.FILL) self.gridbox.attach(ztitle, 0, 1, 5, 6, xoptions=gtk.FILL) self.gridbox.attach(self.zbox, 1, 2, 5, 6, xoptions=gtk.FILL) self.gridbox.attach(self.ztext, 1, 2, 6, 7, xoptions=gtk.FILL) self.gridbox.attach(self.logx, 0, 1, 2, 3, xoptions=gtk.FILL) self.gridbox.attach(self.logy, 0, 1, 4, 5, xoptions=gtk.FILL) self.gridbox.attach(self.logz, 0, 1, 6, 7, xoptions=gtk.FILL) self.gridbox.attach(tplottitle, 0, 1, 9, 10, xoptions=gtk.FILL) self.gridbox.attach(self.plottitle, 1, 2, 9, 10, xoptions=gtk.FILL) self.gridbox.attach(tlegtitle, 0, 1, 10, 11, xoptions=gtk.FILL) self.gridbox.attach(self.legtitle, 1, 2, 10, 11, xoptions=gtk.FILL) self.gridbox.attach(tlegpos, 0, 1, 11, 12, xoptions=gtk.FILL) self.gridbox.attach(self.legpos, 1, 2, 11, 12, xoptions=gtk.FILL) self.gridbox.attach(tbins, 0, 1, 12, 13, xoptions=gtk.FILL) self.gridbox.attach(self.bins, 1, 2, 12, 13, xoptions=gtk.FILL) self.gridbox.attach(alimits, 0, 1, 13, 14, xoptions=gtk.FILL) self.gridbox.attach(self.alimits, 1, 2, 13, 14, xoptions=gtk.FILL) self.gridbox.attach(blimits, 0, 1, 14, 15, xoptions=gtk.FILL) self.gridbox.attach(self.blimits, 1, 2, 14, 15, xoptions=gtk.FILL) point_plot_container = gtk.VBox() point_plot_box_upper = gtk.HBox(homogeneous=True) point_plot_box_lower = gtk.HBox(homogeneous=True) for check_box in [self.show_conf_intervals, self.show_credible_regions, self.show_best_fit]: point_plot_box_upper.pack_start_defaults(check_box) for check_box in [self.show_posterior_mean, self.show_posterior_pdf, self.show_prof_like]: point_plot_box_lower.pack_start_defaults(check_box) point_plot_container.pack_start_defaults(point_plot_box_upper) point_plot_container.pack_start_defaults(point_plot_box_lower) self.gridbox.attach(point_plot_container, 0, 2, 15, 16, xoptions=gtk.FILL) self.gridbox.attach(makeplot, 0, 2, 16, 17, xoptions=gtk.FILL) ####################################################################### # Make main GUI window self.window = gtk.Window() self.window.maximize() self.window.set_title("SuperPlot") # Quit if cross is pressed self.window.connect('destroy', lambda w: gtk.main_quit()) # Add the table to the window and show self.window.add(self.gridbox) self.gridbox.show() self.window.show_all() return
import sys from sklearn.externals.joblib import dump import data_loader import names import tfidf_pipeline import model_presets if __name__ == '__main__': for (category_name, model_name) in [('stars', 'linreg'), ('binary', 'svc')]: print 'Loading ' + category_name + ' data' train,_ = data_loader.load('split', names.categories[category_name]) print 'Training ' + model_name clf = tfidf_pipeline.make(model_name) clf.fit(train.data, train.target) print 'Dumping ' + model_name dump(clf, 'web_clf_' + category_name + '.pkl')
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='data/mnist.pkl.gz', batch_size=600): training_set, validation_set, testing_set, = data_loader.load(dataset) training_set_x , training_set_y = training_set validation_set_x, validation_set_y = validation_set testing_set_x , testing_set_y = testing_set # compute number of minibatches for training, validation and testing n_train_batches = training_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = validation_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = testing_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = tensor.lscalar() # generate symbolic variables for input (x and y represent a # minibatch) x = tensor.matrix('x') y = tensor.ivector('y') classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: testing_set_x[index * batch_size: (index + 1) * batch_size], y: testing_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: validation_set_x[index * batch_size: (index + 1) * batch_size], y: validation_set_y[index * batch_size: (index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta = (W,b) g_W = tensor.grad(cost=cost, wrt=classifier.W) g_b = tensor.grad(cost=cost, wrt=classifier.b) # update the parameters of the model updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: training_set_x[index * batch_size: (index + 1) * batch_size], y: training_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is considered significant validation_frequency = 5 * n_train_batches # requency of training best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iter: number of minibatches used) iter = epoch * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) # update best_validation_loss best_validation_loss = this_validation_loss # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print( ( ' epoch %i, minibatch %i/%i, test error of' ' best model %f %%' ) % ( epoch, minibatch_index + 1, n_train_batches, test_score * 100. ) ) # save the best model with open('best_model.pkl', 'w') as f: cPickle.dump(classifier, f) if patience <= iter: done_looping = True break epoch = epoch + 1 end_time = timeit.default_timer() print( ( 'Optimization complete with best validation score of %f %%,' 'with test performance %f %%' ) % (best_validation_loss * 100., test_score * 100.) ) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time))
from train import train_model from model import Model from network import Network from standard_networks import FC from data_loader import load from optimizer import Optimizer import torch train = 2 test = 8 train_queries = load('data/train{}_test{}_train.txt'.format(train, test)) test_queries = load('data/train{}_test{}_test.txt'.format(train, test)) def neural_pred(network, i1, i2): d = torch.zeros(20) d[int(i1)] = 1.0 d[int(i2) + 10] = 1.0 d = torch.autograd.Variable(d.unsqueeze(0)) output = network.net(d) return output.squeeze(0) fc1 = FC(20, 2) adam = torch.optim.Adam(fc1.parameters(), lr=1.0) swap_net = Network(fc1, 'swap_net', neural_pred, optimizer=adam) #with open('compare.pl') as f: with open('quicksort.pl') as f: problog_string = f.read()
import numpy as np import sklearn.tree import sklearn.cross_validation import data_loader from sklearn.metrics import make_scorer # Load train data (X_train, Y_train) = data_loader.load("Dataset/churn.data.txt", standardize=False) (X_test, Y_test) = data_loader.load("Dataset/churn.test.txt", standardize=False) def custom_scorer(ground_truth, predictions): ground_truth = ground_truth predictions = predictions prec = sklearn.metrics.precision_score(ground_truth, predictions) rec = sklearn.metrics.recall_score(ground_truth, predictions) f1 = sklearn.metrics.f1_score(ground_truth, predictions) print "prec: " + str(prec) print "rec: " + str(rec) print "f1: " + str(f1) return f1 model = sklearn.tree.DecisionTreeClassifier() model.fit(X_train, Y_train) Y_pred = model.predict(X_test) score = custom_scorer(Y_test, Y_pred)
import numpy as np from sklearn.svm import * import sklearn.cross_validation import data_loader from sklearn.metrics import make_scorer from sklearn.feature_selection import SelectFromModel # Load train data (X_train, Y_train) = data_loader.load("Dataset/churn.data.txt") (X_test, Y_test) = data_loader.load("Dataset/churn.test.txt") def custom_scorer(ground_truth, predictions): ground_truth = ground_truth predictions = predictions prec = sklearn.metrics.precision_score(ground_truth, predictions) rec = sklearn.metrics.recall_score(ground_truth, predictions) f1 = sklearn.metrics.f1_score(ground_truth, predictions) print "prec: " + str(prec) print "rec: " + str(rec) print "f1: " + str(f1) return f1 # Build linear SVM classifier, l1 regularization to perform implicit feature selection lsvc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(X_train, Y_train) model = SelectFromModel(lsvc, prefit=True) features_selected = [elem for selected, elem in zip(model.get_support(), data_loader.get_feature_names()) if selected]
def main(): print("data loading...") loading = load(data_path) train_size, test_size, val_size = loading.return_len() x_train_torch = torch.empty(train_size, 127, 200) x_val_torch = torch.empty(test_size, 127, 200) x_test_torch = torch.empty(val_size, 127, 200) y_train_torch = torch.empty(train_size) y_val_torch = torch.empty(test_size) y_test_torch = torch.empty(val_size) print(x_train_torch.shape) print(x_test_torch.shape) print(x_val_torch.shape) x_train_torch, x_val_torch, x_test_torch, y_train_torch, y_val_torch, y_test_torch = loading.main_processing() print(x_train_torch.shape, x_val_torch.shape, x_test_torch.shape, y_train_torch.shape, y_val_torch.shape, y_test_torch.shape) print("data loading success") x_train_loader = torch.utils.data.DataLoader(x_train_torch, batch_size=batch_size , shuffle=False, num_workers=0, drop_last=True) y_train_loader = torch.utils.data.DataLoader(y_train_torch, batch_size=batch_size , shuffle=False, num_workers=0, drop_last=True) x_val_loader = torch.utils.data.DataLoader(x_val_torch, batch_size=batch_size , shuffle=False, num_workers=0, drop_last=True) y_val_loader = torch.utils.data.DataLoader(y_val_torch, batch_size=batch_size , shuffle=False, num_workers=0, drop_last=True) x_test_loader = torch.utils.data.DataLoader(x_test_torch, batch_size=batch_size , shuffle=False, num_workers=0, drop_last=True) y_test_loader = torch.utils.data.DataLoader(y_test_torch, batch_size=batch_size , shuffle=False, num_workers=0, drop_last=True) model = Text_CNN().to(device) # 모델을 gpu에 올림 loss_func = nn.BCELoss() # lossfunction 정의 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # optimizer 정의 loss_arr = [[0 for i in range(len(x_train_loader))] for j in range(num_epoch)] train_acc = [] val_acc = [] for i in range(num_epoch): start = time.time() # 시간 측정 for j, (data, label) in enumerate(zip(x_train_loader, y_train_loader)): x = data.to(device) # data를 gpu에 올림 y_ = label.to(device) # label을 gpu에 올림 optimizer.zero_grad() # optimizer 초기화 output = model.forward(x) # 모델 foward 진행 loss = loss_func(output, y_) # loss function을 사용해서 loss 측정. loss.backward() # 가중치에 대한 Loss의 변화량을 측정함. optimizer.step() # loss가 감소하는 방향으로 가중치를 업데이트 # loss_arr[i].append(loss.cpu().detach().numpy()) loss_arr[i][j] = loss.item() if j == len(x_train_loader) - 1: # 하나의 epoch를 보면 아래를 실행. print("Epoch :", i + 1, " Loss :", sum(loss_arr[i], 0.0) / len(loss_arr[i])) # 평균 loss 출력 train_acc.append(eval(x_train_loader, y_train_loader, model)) print("Accuracy of Train Data : {}".format(train_acc[i])) val_acc.append(eval(x_val_loader, y_val_loader, model)) print("Accuracy of Validation Data : {}".format(val_acc[i])) loss_arr.append(loss.cpu().detach().numpy()) print("running time :", time.time() - start) print('---------------------------------------------------------') # learning rate decay lr = learning_rate * (0.1 ** (i // learing_rate_decay)) for param_group in optimizer.param_groups: param_group['lr'] = lr #print(param_group['lr']) f1_score(x_val_loader, y_val_loader, model, device) # Test Data에 대한 Accuracy print("Accuracy of Test Data : {}".format(eval(x_test_loader, y_test_loader, model))) # 모델 저장 torch.save(model.state_dict(), './test1.pth')
def run(): data = data_loader.load(DATASET, n_train=N_TRAIN, n_test=N_TEST, train_noise=TRAIN_NOISE, test_noise=TEST_NOISE, ood=OOD) stratify = DATASET not in ["abalone", "segment"] if DATASET not in [ 'arcene', 'moon', 'toy_Story', 'toy_Story_ood', 'segment' ]: print(DATASET) x = data_loader.prepare_inputs(data['features']) y = data['labels'] ''' # check whether the choice of N_OOD is reasonable classes = np.argmax(y, axis=1) number_of_each_class = [(classes == ic).sum() for ic in range(int(classes.max()))] number_of_each_class.reverse() percentage_of_each_class = np.cumsum(np.array(number_of_each_class)) / np.array(number_of_each_class).sum() n_ood = np.where(percentage_of_each_class>=0.1)[0][0] + 1 #n_in = y.shape[1] - n_ood #stratify = classes < n_in ''' x_train, x_test, y_train, y_test = train_test_split( x, y, train_size=TRAIN_TEST_RATIO, stratify=y if stratify else None) else: #n_ood = int(N_OOD) if DATASET == 'moon' or DATASET == 'toy_Story' or DATASET == 'toy_Story_ood': x_train, x_test = data['x_train'], data['x_val'] else: x_train, x_test = data_loader.prepare_inputs( data['x_train'], data['x_val']) y_train, y_test = data['y_train'], data['y_val'] if 'N_OOD' in globals() and N_OOD >= 1: n_ood = prepare_ood_from_args(data, DATASET, N_OOD) n_in = y_train.shape[1] - n_ood # training train_in_idxs = np.argmax(y_train, axis=1) < n_in train_ood_idxs = np.argmax(y_train, axis=1) >= n_in #val_in_idxs = np.argmax(y_val, axis=1) < n_in #val_ood_idxs = np.argmax(y_val, axis=1) >= n_in x_train_in = x_train[train_in_idxs] y_train_in = y_train[train_in_idxs][:, 0:n_in] x_train_out = x_train[train_ood_idxs] y_train_out = y_train[train_ood_idxs][:, 0:n_in] # Generate validation split x_train_in, x_val_in, y_train_in, y_val_in = train_test_split( x_train_in, y_train_in, train_size=TRAIN_TEST_RATIO, stratify=y_train_in if stratify else None) x_val = np.concatenate((x_train_out, x_val_in), axis=0) y_val = np.concatenate((y_train_out, y_val_in), axis=0) y_test = y_test[:, 0:n_in] y_val = y_val[:, 0:n_in] x_train = x_train_in.astype(np.float32) x_val = x_val.astype(np.float32) else: x_train, x_val, y_train, y_val = train_test_split( x_train, y_train, train_size=TRAIN_TEST_RATIO, stratify=y_train if stratify else None) ##################### print('Finish loading data') gdrive_rpath = './experiments_ood' t = int(time.time()) log_dir = os.path.join(gdrive_rpath, MODEL_NAME, '{}/logs'.format(t)) if not os.path.exists(log_dir): os.makedirs(log_dir) tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir) file_writer_cm = tf.summary.create_file_writer(log_dir + '/cm') checkpoint_filepath = os.path.join(gdrive_rpath, MODEL_NAME, '{}/ckpt/'.format(t)) if not os.path.exists(checkpoint_filepath): os.makedirs(checkpoint_filepath) model_path = os.path.join(gdrive_rpath, MODEL_NAME, '{}/model'.format(format(t))) if not os.path.exists(model_path): os.makedirs(model_path) model_cp_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_filepath, save_weights_only=True, monitor='val_auc_of_ood', mode='max', save_best_only=True) model = build_model(x_train.shape[1], y_train.shape[1], MODEL, args) def plot_boundary(epoch, logs): # Use the model to predict the values from the validation dataset. xy = np.mgrid[-10:10:0.1, -10:10:0.1].reshape(2, -1).T hat_z = tf.nn.softmax(model(xy, training=False), axis=1) # scipy.special.softmax(hat_z, axis=1) c = np.sum(np.arange(hat_z.shape[1] + 1)[1:] * hat_z, axis=1) # c = np.argmax(np.arange(6)[1:]*scipy.special.softmax(hat_z, axis=1), axis=1 # xy = np.mgrid[-1:1.1:0.01, -2:2.1:0.01].reshape(2,-1).T figure = plt.figure(figsize=(8, 8)) plt.scatter(xy[:, 0], xy[:, 1], c=c, cmap="brg") image = plot_to_image(figure) # Log the confusion matrix as an image summary. with file_writer_cm.as_default(): tf.summary.image("Boundaries", image, step=epoch) def plot_boundary_pretrain(epoch, logs): # Use the model to predict the values from the validation dataset. xy = np.mgrid[-1:1.1:0.01, -2:2.1:0.01].reshape(2, -1).T hat_z = tf.nn.softmax(model(xy, training=False), axis=1) # scipy.special.softmax(hat_z, axis=1) c = np.sum(np.arange(6)[1:] * hat_z, axis=1) # c = np.argmax(np.arange(6)[1:]*scipy.special.softmax(hat_z, axis=1), axis=1 # xy = np.mgrid[-1:1.1:0.01, -2:2.1:0.01].reshape(2,-1).T figure = plt.figure(figsize=(8, 8)) plt.scatter(xy[:, 0], xy[:, 1], c=c, cmap="brg") image = plot_to_image(figure) # Log the confusion matrix as an image summary. with file_writer_cm.as_default(): tf.summary.image("Boundaries_pretrain", image, step=epoch) border_callback_pretrain = tf.keras.callbacks.LambdaCallback( on_epoch_end=plot_boundary_pretrain) border_callback = tf.keras.callbacks.LambdaCallback( on_epoch_end=plot_boundary) training_generator = mixup.data_generator(x_train_in, y_train_in, batch_size=BATCH_SIZE, n_channels=N_CHANNELS, shuffle=SHUFFLE, mixup_scheme=MIXUP_SCHEME, k=N_NEIGHBORS, alpha=ALPHA, local=LOCAL_RANDOM, out_of_class=OUT_OF_CLASS, manifold_mixup=MANIFOLD_MIXUP) validation_generator = mixup.data_generator(x_val, y_val, batch_size=x_val.shape[0], n_channels=N_CHANNELS, shuffle=False, mixup_scheme='none', alpha=0, manifold_mixup=MANIFOLD_MIXUP) test_generator = mixup.data_generator(x_test, y_test, batch_size=x_test.shape[0], n_channels=N_CHANNELS, shuffle=False, mixup_scheme='none', alpha=0, manifold_mixup=MANIFOLD_MIXUP) # Pretraining # if DATASET=='toy_Story': # pre_x = np.mgrid[-1:1.1:0.01, -2:2.1:0.01].reshape(2,-1).T # pre_y = .2*np.ones(shape=[pre_x.shape[0], 5]) # model.fit(x=pre_x, y=pre_y, epochs=1, callbacks=[border_callback_pretrain]) training_history = model.fit( x=training_generator, validation_data=validation_generator, epochs=EPOCHS, callbacks=[ tensorboard_callback, model_cp_callback, # border_callback ], ) print(model.summary()) model.load_weights(checkpoint_filepath) model.save(model_path) print('Tensorboard callback directory: {}'.format(log_dir)) metric_file = os.path.join(gdrive_rpath, MODEL_NAME, '{}/results.txt'.format(t)) loss = model.evaluate(test_generator, return_dict=True) test_outputs = model.predict(test_generator) with open(metric_file, "w") as f: f.write(str(loss))
def run_multiclass(data_dir: str): data_loader.load(data_dir, ['frame_time_relative', 'dns_qry_name', 'dns_qry_type']) query_name_learner.run_multiclass()
visualize = 0 torch.manual_seed(2) ############################################################################# ############################################################################# ############################################################################# ############################## Load Data #################################### ############################################################################# if load_train_data: from data_loader import load DATA = load(horizon=21, num_nodes=125, num_layers=3, num_rsc=7) X_train, Y_train = DATA.read_train(num_sample=10000) X_train = 1 - X_train # "X" and "Y" are Nxn matrices where "N" is the number of # scenarios and "n" is the number of nodes. Each row of # "X" is a binary vector which has a "0" when the node is # damaged and "1" when the node is repaired. Each element # of "Y" gives the time-step at which the node is repair- # ed and "0" if the node is not damaged. print("\nTraining data was successfully loaded!\n") if load_test_data:
num_processes): ## Step 1: Projection (Parallel) (tiles, scalar) = c_prime.par_map_to_tiles(all_points, precision, threshold, num_processes) ## Step 2: Agglomeration (Sequential) clusters = c_prime.raster_clustering_tiles(tiles, min_size) return (clusters, scalar) if __name__ == "__main__": # load input data data_path = "../0_data_generators/output/data_1000_shuffled.csv" all_points = dl.load(data_path) """ 1) RASTER clusters RASTER projects points to tiles and disregards the former after the projection has been performed. Thus, it requires merely constant space, assuming bounded integers or a bounded coordinate system like the GPS coordinate system for our planet. Input is projected to points that represent tiles. """ precision = 3 threshold = 5 min_size = 4
import data_loader _, test_data = data_loader.load() import mnist_loader training_data, validation_data, _ = mnist_loader.load_data_wrapper() # import network # net = network.Network([784, 30, 10]) # net.learn(training_data, 30, 10, 3.0, test_data) import network2 net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost) net.large_weight_initializer() net.SGD(training_data, 30, 10, 0.5, evaluation_data=test_data, monitor_evaluation_accuracy=True)
def run(): data = data_loader.load(DATASET, n_train=N_TRAIN, n_test=N_TEST, train_noise=TRAIN_NOISE, test_noise=TEST_NOISE) stratify = DATASET not in ["abalone", "segment"] if DATASET not in [ 'arcene', 'moon', 'toy_Story', 'toy_Story_ood', 'segment' ]: print(DATASET) x = data_loader.prepare_inputs(data['features']) y = data['labels'] x_train, x_test, y_train, y_test = train_test_split( x, y, train_size=TRAIN_TEST_RATIO, stratify=y if stratify else None, random_state=0) else: if DATASET == 'moon' or DATASET == 'toy_Story' or DATASET == 'toy_Story_ood': x_train, x_test = data['x_train'], data['x_val'] else: x_train, x_test = data_loader.prepare_inputs( data['x_train'], data['x_val']) y_train, y_test = data['y_train'], data['y_val'] # Generate validation split x_train, x_val, y_train, y_val = train_test_split( x_train, y_train, train_size=TRAIN_TEST_RATIO, stratify=y_train if stratify else None, random_state=0) x_train = x_train.astype(np.float32) x_val = x_val.astype(np.float32) x_test = x_test.astype(np.float32) if 'N_OOD' in globals() and N_OOD >= 1: n_ood = update_n_ood(data, DATASET, N_OOD) n_ood = y_val.shape[1] - n_ood - 1 print("Number of ood classes: {n_ood}") x_train, x_val, x_test, y_train, y_val, y_test, x_ood, y_ood = prepare_ood( x_train, x_val, x_test, y_train, y_val, y_test, n_ood, NORM) # x_test_with_ood = np.concatenate([x_test, x_ood], axis=0) # y_test_with_ood = np.concatenate([y_test, y_ood], axis=0) x_ood_val, x_ood_test, y_ood_val, y_ood_test = train_test_split( x_ood, y_ood, test_size=0.5, random_state=0) x_test_with_ood = np.concatenate([x_test, x_ood_test], axis=0) y_test_with_ood = np.concatenate([y_test, y_ood_test], axis=0) x_val_with_ood = np.concatenate([x_val, x_ood_val], axis=0) y_val_with_ood = np.concatenate([y_val, y_ood_val], axis=0) else: n_ood = 0 print('Finish loading data') gdrive_rpath = './experiments_all' t = int(time.time()) log_dir = os.path.join(gdrive_rpath, MODEL_NAME, '{}'.format(t)) if not os.path.exists(log_dir): os.makedirs(log_dir) tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir) file_writer_cm = tf.summary.create_file_writer(log_dir + '/cm') checkpoint_filepath = os.path.join(log_dir, 'ckpt') if not os.path.exists(checkpoint_filepath): os.makedirs(checkpoint_filepath) model_path = os.path.join(log_dir, 'model') if not os.path.exists(model_path): os.makedirs(model_path) model_cp_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_filepath, save_weights_only=True, monitor=MONITOR, mode='max', save_best_only=True, verbose=1) model = build_model(x_train.shape[1], y_train.shape[1], MODEL, args) def plot_boundary(epoch, logs): # Use the model to predict the values from the validation dataset. xy = np.mgrid[-10:10:0.1, -10:10:0.1].reshape(2, -1).T hat_z = tf.nn.softmax(model(xy, training=False), axis=1) # scipy.special.softmax(hat_z, axis=1) c = np.sum(np.arange(hat_z.shape[1] + 1)[1:] * hat_z, axis=1) # c = np.argmax(np.arange(6)[1:]*scipy.special.softmax(hat_z, axis=1), axis=1 # xy = np.mgrid[-1:1.1:0.01, -2:2.1:0.01].reshape(2,-1).T figure = plt.figure(figsize=(8, 8)) plt.scatter(xy[:, 0], xy[:, 1], c=c, cmap="brg") image = plot_to_image(figure) # Log the confusion matrix as an image summary. with file_writer_cm.as_default(): tf.summary.image("Boundaries", image, step=epoch) border_callback = tf.keras.callbacks.LambdaCallback( on_epoch_end=plot_boundary) training_generator = mixup.data_generator(x_train, y_train, batch_size=BATCH_SIZE, n_channels=N_CHANNELS, shuffle=SHUFFLE, mixup_scheme=MIXUP_SCHEME, k=N_NEIGHBORS, alpha=ALPHA, local=LOCAL_RANDOM, out_of_class=OUT_OF_CLASS, manifold_mixup=MANIFOLD_MIXUP) validation_generator = mixup.data_generator(x_val, y_val, batch_size=x_val.shape[0], n_channels=N_CHANNELS, shuffle=False, mixup_scheme='none', alpha=0, manifold_mixup=MANIFOLD_MIXUP) test_generator = mixup.data_generator(x_test, y_test, batch_size=x_test.shape[0], n_channels=N_CHANNELS, shuffle=True, mixup_scheme='none', alpha=0, manifold_mixup=MANIFOLD_MIXUP) if N_OOD > 0: in_out_test_generator = mixup.data_generator( x_test_with_ood, y_test_with_ood, batch_size=x_test_with_ood.shape[0], n_channels=N_CHANNELS, shuffle=True, mixup_scheme='none', alpha=0, manifold_mixup=MANIFOLD_MIXUP) callbacks = [tensorboard_callback, model_cp_callback] if DATASET == 'Toy_story' or DATASET == 'Toy_story_ood': border_callback = tf.keras.callbacks.LambdaCallback( on_epoch_end=cb.plot_boundary) callbacks += [border_callback] if MODEL in ['jem', 'jemo', 'jehm', 'jehmo', 'jehmo_mix']: callbacks += [cb.jem_n_epochs()] ## buffer ## ''' if MODEL in ['jehmo', 'jehmo_mix']: if model.with_buffer_out: model.replay_buffer_out = get_buffer(model.buffer_size, training_generator.x.shape[1], x=training_generator.x) ''' ## training ## t_train_start = int(time.time()) training_history = model.fit(x=training_generator, validation_data=validation_generator, epochs=EPOCHS, callbacks=callbacks) t_train_end = int(time.time()) used_time = t_train_end - t_train_start model.load_weights(checkpoint_filepath) # model.save(model_path) print('Tensorboard callback directory: {}'.format(log_dir)) ood_loss = 0 metric_file = os.path.join(gdrive_rpath, 'results.txt') loss = model.evaluate(test_generator, return_dict=True) # if N_OOD>0: # ood_loss = model.evaluate(in_out_test_generator, return_dict=True) # with open(metric_file, "a+") as f: # f.write(f"{MODEL}, {DATASET}, {t}, {loss['acc_with_ood']:.3f}," \ # f"{loss['ece_metrics']:.3f}, {loss['oe_metrics']:.3f}," \ # f"{loss['loss']:.3f}, {n_ood}, {ood_loss['auc_of_ood']}\n") if N_OOD > 0: ood_loss = model.evaluate(in_out_test_generator, return_dict=True) with open(metric_file, "a+") as f: f.write(f"{MODEL}, {MIXUP_SCHEME}, {DATASET}, {t}, {loss['accuracy']:.3f}," \ f"{loss['ece_metrics']:.3f}, {loss['oe_metrics']:.3f}," \ f"{ood_loss['accuracy']:.3f}," \ f"{ood_loss['ece_metrics']:.3f}, {ood_loss['oe_metrics']:.3f}," f"{n_ood}, {ood_loss['auc_of_ood']}, {used_time}\n") else: with open(metric_file, "a+") as f: f.write(f"{MODEL}, {MIXUP_SCHEME}, {DATASET}, {t}, {loss['accuracy']:.3f}," \ f"{loss['ece_metrics']:.3f}, {loss['oe_metrics']:.3f}," \ f"None, " \ f"None, None," f"{n_ood}, None, {used_time}\n") arg_file = os.path.join(log_dir, 'args.txt') with open(arg_file, "w+") as f: f.write(str(args))
def __init__(self, grams_file): self.data = data_loader.load(grams_file) tk = self.data.keys()[1] if self.data.keys()[0] == '*' else self.data.keys()[0] self.gram_size = len(tk)