def ComputePrecisionK(modelfile, testfile, K_list): CURRENT_DIR = os.path.dirname(os.path.abspath("./WikiCategoryLabelling/")) sys.path.append(os.path.dirname(CURRENT_DIR + "/WikiCategoryLabelling/")) maxParagraphLength = 250 maxParagraphs = 10 labels = 1001 vocabularySize = 76390 model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize) testing = DataParser(maxParagraphLength, maxParagraphs, labels, vocabularySize) testing.getDataFromfile(testfile) print("data loading done") print("no of test examples: " + str(testing.totalPages)) model.load(modelfile) print("model loading done") batchSize = 10 testing.restore() truePre = [] pred = [] for i in range(math.ceil(testing.totalPages / batchSize)): if i < testing.totalPages / batchSize: data = testing.nextBatch(batchSize) else: data = testing.nextBatch(testing.totalPages % batchSize) truePre.extend(data[0]) pre = model.predict(data) pred.extend(pre[0].tolist()) avgPrecK = [0] * len(K_list) for i, p in enumerate(pred): sortedL = sorted(range(len(p)), key=p.__getitem__, reverse=True) for k, K in enumerate(K_list): labelK = sortedL[:K] precK = 0 for l in labelK: if truePre[i][l] == 1: precK += 1 avgPrecK[k] += precK / float(K) avgPrecK = [float(a) / len(pred) for a in avgPrecK] for p in avgPrecK: print(str(p))
def main(): FROM = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0).timestamp() TO = datetime.utcnow().timestamp() PAIR = 'BTCEUR' db = psycopg2.connect('dbname=johnny5 user=johnny5') cur = db.cursor() opts, args = getopt(sys.argv[1:], 'f:lp:t:') for o, a in opts: if o == '-f': FROM = date2ts(a) elif o == '-p': PAIR = a elif o == '-t': TO = date2ts(a) elif o == '-l': cur.execute('SELECT MIN(ts) FROM historical') FROM = cur.fetchone()[0] else: raise Exception("Unknown option: %s" % o) cur.execute('SELECT id, kname FROM pairs WHERE name=%s', (PAIR, )) pair_id, KPAIR = cur.fetchone() df = open('plot.data', 'wt') for pct_high in range(1, 50): for pct_low in range(1, 50): print() print("pct_low=%d, pct_high=%d" % (pct_low, pct_high)) f = HistoricalFeed(db, pair_id, FROM, 600 * 2) m = Model2(db, pair_id, 6 * 3600, 600 * 2, pct_low=pct_low, pct_high=pct_high) while True: try: ts, value = f.next() except Feed.NoMore: break if ts >= TO: break action = m.newpoint(ts, value) df.write('%d %d %0.2f\n' % (pct_high, pct_low, m.total_balance(value))) sys.stdout.flush() df.write('\n') f.close()
def _initialize(self, interactions): self._num_items = interactions.num_items self._num_users = interactions.num_users self.test_sequence = interactions.test_sequences self._net = Model2(self._num_users, self._num_items, self.model_args).to(self._device) self._optimizer = optim.Adam(self._net.parameters(), weight_decay=self._l2, lr=self._learning_rate)
def main(args): start = time.time() # seed number generator for experiment reproducability np.random.seed(args.seed) X_train, y_train = load_SPECT_data(args.train_path) X_test, y_test = load_SPECT_data(args.test_path) # create balanced validation set from test set X_val = X_test[-30:,:] y_val = y_test[-30:] # instanciate model if args.num_layers == 1: dimensions = [X_train.shape[1], args.num_neurons1, 2] model = Model(dimensions, args.lr, activation=args.activation) elif args.num_layers == 2: dimensions = [X_train.shape[1], args.num_neurons1, args.num_neurons2, 2] model = Model2(dimensions, args.lr, activation=args.activation) # train model history = model.fit(X_train, y_train, X_val, y_val, args.num_epochs, args.batch_size, args.patience) # determine accuracies for data sets train_acc, train_metrics = model.evaluate(X_train, y_train) val_acc, val_metrics = model.evaluate(X_val, y_val) test_acc, test_metrics = model.evaluate(X_test, y_test) print(f'Training accuracy: {train_acc:.4f}, precision: {train_metrics["precision"]:.4f},', \ f'recall: {train_metrics["recall"]:.4f}, F1: {train_metrics["f1"]:.4f}') print(f'Validation accuracy: {val_acc:.4f}, precision: {val_metrics["precision"]:.4f},', \ f'recall: {val_metrics["recall"]:.4f}, f1: {val_metrics["f1"]:.4f}') print(f'Test accuracy: {test_acc:.4f}, precision: {test_metrics["precision"]:.4f},', \ f'recall: {test_metrics["recall"]:.4f}, f1: {test_metrics["f1"]:.4f}') plot_history(history) print(f'Script completed in {time.time()-start:.2f} secs') return 0
def __init__(self, pop_size=64, n_process=None, n_eval=1, p_keep=0.5, n_candidate_eval=8, n_candidates=3, no_rew_early_stop=20, rnn_size=256, controller_size=128, output_size=3): self.pop_size = pop_size self.n_process = n_process self.n_candidate_eval = n_candidate_eval self.n_candidates = n_candidates self.no_rew_early_stop = no_rew_early_stop if n_process is None: self.n_process = os.cpu_count() self.n_eval = n_eval self.p_keep = p_keep self.rnn_size = rnn_size self.controller_size = controller_size self.output_size = output_size self.elite = Model2(no_rew_early_stop, rnn_size=rnn_size, controller_size=controller_size) self.gen = 0 self.results_queue = Queue() self.training_queue = Queue() self.pool = Pool(self.n_process, distribute, (self.training_queue, self.results_queue, True))
def main(): FROM = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0).timestamp() TO = datetime.utcnow().timestamp() PAIR = 'BTCEUR' db = psycopg2.connect('dbname=johnny5 user=johnny5') cur = db.cursor() opts, args = getopt(sys.argv[1:], 'f:lp:t:') for o, a in opts: if o == '-f': FROM = date2ts(a) elif o == '-p': PAIR = a elif o == '-t': TO = date2ts(a) elif o == '-l': cur.execute('SELECT MIN(ts) FROM historical') FROM = cur.fetchone()[0] else: raise Exception("Unknown option: %s" % o) cur.execute('SELECT id, kname FROM pairs WHERE name=%s', (PAIR, )) pair_id, KPAIR = cur.fetchone() f = HistoricalFeed(db, pair_id, FROM, 600) m = Model2(db, pair_id, 6 * 3600, 600 * 2) while True: try: ts, value = f.next() except Feed.NoMore: break if ts >= TO: break action = m.newpoint(ts, value)
from DataParser import DataParser from model2 import Model2 as Model # In[ ]: maxParagraphLength = 250 maxParagraphs = 10 labels = 1000 vocabularySize = 15000 model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize) training = DataParser(maxParagraphLength, maxParagraphs, labels, vocabularySize) training.getDataFromfile("data/vocab_3L_l1000_sampled_10000_red_train.txt") batchSize = 50 epoch = 0 epochEnd = 10 for e in range(epoch, epochEnd): print 'Epoch: ' + str(e) cost = 0 for itr in range(int(training.totalPages / batchSize)): cost += model.train(training.nextBatch(batchSize)) #break print(str(cost)) if e % 10 == 0: model.save("model2_l1000_" + str(e))
logitse1 = tf.placeholder(tf.float32, shape=[None, 10]) logitse2 = tf.placeholder(tf.float32, shape=[None, 10]) # Make model 1 model1 = Model1(X, Y, keep_prob) logits1, predictions1 = model1.build() loss_op1 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=model1.Y)) train_op1 = tf.train.AdamOptimizer( learning_rate=model1.learning_rate).minimize(loss_op1) accuracy1 = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(predictions1, 1), tf.argmax(model1.Y, 1)), tf.float32)) # Make model 2 model2 = Model2(X2, Y2, keep_prob2) logits2, predictions2 = model2.build() loss_op2 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=model2.Y2)) train_op2 = tf.train.AdamOptimizer( learning_rate=model2.learning_rate).minimize(loss_op2) accuracy2 = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(predictions2, 1), tf.argmax(model2.Y2, 1)), tf.float32)) # Make model 3 model3 = Model3(X3, Y3, keep_prob3) logits3, predictions3 = model3.build() loss_op3 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits3, labels=model3.Y3)) train_op3 = tf.train.AdamOptimizer(
from model2 import Model2 factor = 0.2 params = Model2.params outputs = {} for key in params.keys(): original = params[key] outputs[key] = {} # Increase by factor params[key] = original * (1 + factor) model = Model2(params) outputs[key]['increased'] = model.solve() # Decrease by factor params[key] = original * (1 - factor) model = Model2(params) outputs[key]['decreased'] = model.solve() # Original value params[key] = original model = Model2(params) outputs[key]['original'] = model.solve() # range outputs[key][ 'range'] = outputs[key]['increased'] - outputs[key]['decreased']
model1.fit(*train1) model1.save_model(model1_path) train1_tags = list(chain(*train1[1])) print(f'Model 1 Train Accuracy: {accuracy(train1_tags, train1_tags)}') finally: test1 = preprocess(Path('data/test1.wtag')) test1_true_tags = list(chain(*test1[1])) test1_pred_tags = list(chain(*model1.predict(test1[0]))) print(f'Model 1 Test Accuracy: {accuracy(test1_true_tags, test1_pred_tags)}') confusion = confusion_matrix(test1_true_tags, test1_pred_tags, model1.tag_vocabulary, n=10) tags = [tag for tag in model1.tag_vocabulary if (tag, tag) in confusion] tags.sort(key=lambda tag: confusion[tag, tag]) print(f'Model 1 Test Confusion Matrix:') print(''.ljust(5) + ''.join(t.ljust(5) for t in tags)) rows = [r.ljust(5) + ''.join([f'{confusion[r, c]:.2f}'.ljust(5) for c in tags]) for r in tags] print('\n'.join(rows)) # Model 2 model2_path = Path('model/model2') try: model2 = load_model(model2_path) except FileNotFoundError: train2 = preprocess(Path('data/train2.wtag')) model2 = Model2(lambda_=0.1, beam=3) model2.fit(*train2) model2.save_model(model2_path) train2_tags = list(chain(*train2[1])) print(f'Model 2 Train Accuracy: {accuracy(train2_tags, train2_tags)}') print(f'Total runtime: {time() - total:.2f}s')
def ComputeFscore(modelfile, testfile, outputfile): CURRENT_DIR = os.path.dirname(os.path.abspath("./WikiCategoryLabelling/")) sys.path.append(os.path.dirname(CURRENT_DIR + "/WikiCategoryLabelling/")) maxParagraphLength = 250 maxParagraphs = 10 labels = 1000 vocabularySize = 150000 model = Model(maxParagraphLength, maxParagraphs, labels, vocabularySize) testing = DataParser(maxParagraphLength, maxParagraphs, labels, vocabularySize) testing.getDataFromfile(testfile) model.load(modelfile) print("loading done") testing.restore() truePre = [] pred = [] for itr in range(testing.totalPages): data = testing.nextBatch() truePre.append(data[0]) pre = model.predict(data) pred.append(pre[0]) labelsCount = {} ConfusionMa = {} fScr = {} thres = 0.5 valid = int(len(truePre) * 0.35) labelsCount = {} ConfusionMa = {} fScr = {} thresLab = {} for la in range(1000): if la % 25 == 0: print("Currnet label", la) t = [] p = [] for i in range(valid): t.append(truePre[i][la]) p.append(pred[i][la]) bestF, bestThre = thresholdTuning(t, p) t = [] p = [] for i in range(valid, len(truePre)): t.append(truePre[i][la]) p.append(pred[i][la]) p = np.array(p) fScr[la] = f1_score(t, p >= bestThre) ConfusionMa[la] = confusion_matrix(t, p > bestThre) thresLab[la] = bestThre f = open(outputfile, "w") for i in range(1000): inp = str(i) + "," + str(thresLab[i]) + "," + str(fScr[i]) + "\n" f.write(inp) f.close()