def loadDat(rootDir='data'): #Training set X = np.load(rootDir + '/src.npy') Y = np.load(rootDir + '/dst.npy') #Randomly shuffle data inds = np.arange(X.shape[0]) permInds = np.random.permutation(inds) X = X[permInds] Y = Y[permInds] #XTest = np.load(rootDir + '/srcTest.npy')[:m] #YTest = np.load(rootDir + '/dstTest.npy')[:m] M = X.shape[0] XTrain, YTrain = X[:(.8 * M)][:m], Y[:(.8 * M)][:m] XTest, YTest = X[(.8 * M):(.9 * M)][:m], Y[(.8 * M):(.9 * M)][:m] XVal, YVal = X[(.9 * M):][:m], Y[(.9 * M):][:m] batchTrain = batcher.Batcher(XTrain, YTrain) batchVal = batcher.Batcher(XVal, YVal) batchTest = batcher.Batcher(XTest, YTest) #Vocabulary vocabSrc = np.load(rootDir + '/vocabSrc.npy', encoding='latin1').tolist() vocabDst = np.load(rootDir + '/vocabDst.npy', encoding='latin1').tolist() vocabSrcSz = len(vocabSrc.keys()) vocabDstSz = len(vocabDst.keys()) return batchTrain, batchVal, batchTest, vocabSrc, vocabDst, vocabSrcSz, vocabDstSz
def main(): parser = argparse.ArgumentParser(description='IFT6135') parser.add_argument('--batch-size', type=int, default=32) parser.add_argument('--use-cuda', type=bool, default=False) parser.add_argument('--model', default=None) args = parser.parse_args() params = parameters.params test_batch_loader = batcher.Batcher("./Data/test/", shuffle=False, test=True) model = mdl.ConvNet(params) model.load_state_dict(t.load(args.model)) if args.use_cuda: model = model.cuda() test_step = model.tester() img, pred = test_batch_loader.test(test_step, use_cuda=args.use_cuda) with open('./Results/current_result.csv', mode='w') as csv_file: csv_file.write("id,label\n") for i in range(len(img)): csv_file.write(img[i].split('/')[-1][:-4] + ',' + test_batch_loader.classes[pred[i]] + '\n') csv_file.close() return
def main(): print("main") bs = 8 model = ClassificationCNN(shapein=(bs, 450, 600, 3), convpoolSizes=[(20, 11, 11), (20, 11, 11), (20, 5, 5)], denseSizes=[300, 100], k=7) batcher = bt.Batcher() model.fit(batcher.generate(bs), batcher.testGenerate(bs))
def get_images(tfrecord_paths, label_name='wealthpooled', return_meta=False): ''' Args - tfrecord_paths: list of str, length N <= 32, paths of TFRecord files Returns: np.array, shape [N, 224, 224, 8], type float32 ''' init_iter, batch_op = batcher.Batcher(tfrecord_files=tfrecord_paths, dataset=DATASET, batch_size=32, ls_bands='ms', nl_band='merge', label_name=label_name, shuffle=False, augment=False, negatives='zero', normalize=True).get_batch() with tf.Session() as sess: sess.run(init_iter) if return_meta: ret = sess.run(batch_op) else: ret = sess.run(batch_op['images']) return ret
def main(): # Parameters learning_rate = 0.005 batch_size = 32 test_batch_size = 128 training_epochs = 10 # Use logits as a proxy to test whether we've already defined everything try: defined_test = logits except NameError: print('logits did not exist, creating') # tf Graph input x = tf.placeholder("float", [None, 32, 32, 1], name='X_placeholder') y = tf.placeholder("float", [None, n_classes], name='Y_placeholder') # Define model logits = model.SignModel(x, n_classes) # Define loss and optimizer cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, y)) optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate).minimize(cost) else: print('Model, variables, placeholders, and operations already defined') last_cp = tf.train.latest_checkpoint('.', latest_filename=None) if (last_cp is None): print('Checkpoint not found') # Initializing the variables init = tf.global_variables_initializer() else: print('Checkpoint found: {}'.format(last_cp)) # Add ops to save and restore all the variables. saver = tf.train.Saver() # Launch the graph with tf.Session() as sess: if (last_cp is not None): # Restore variables from disk. print('Restoring: {}'.format(last_cp)) saver.restore(sess, last_cp) else: print('No checkpoint to restore, initializing') sess.run(init) # Training cycle data = batcher.Batcher(GCN_train, y_train) for epoch in range(training_epochs): total_batch = int(n_train / batch_size) # Loop over all batches for i in range(total_batch): batch_x, batch_y = data.next_batch(batch_size) # Run optimization op (backprop) and cost op (to get loss value) sess.run(optimizer, feed_dict={x: batch_x, y: batch_y}) # Display logs per epoch step c = sess.run(cost, feed_dict={x: batch_x, y: batch_y}) print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(c)) print("Optimization Finished!") # Save the variables to disk. save_path = saver.save(sess, "./SignsModel.ckpt") print("Model saved in file: %s" % save_path) # Test model correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1)) # Calculate accuracy in batches to avoid running out of memory accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) testdata = batcher.Batcher(GCN_test, y_test) test_batches = int(n_test / test_batch_size) # Loop over all batches total_acc = 0 for i in range(test_batches): test_batch_x, test_batch_y = data.next_batch(test_batch_size) total_acc += accuracy.eval({x: test_batch_x, y: test_batch_y}) print("Accuracy:", total_acc / test_batches)
'--results-logs-path', default='./results/', metavar='RP', help='folder where to store the results (default: ./results/)') parser.add_argument('--input-file', metavar='IF', help='input file') parser.add_argument('--output-file', metavar='OF', help='output file') parser.add_argument('--model', metavar="MODEL", help='model file') parser.add_argument('--decoder', metavar="DEC", default=0, type=int, help='decoder to use for translation') args = parser.parse_args() batch_loader = batcher.Batcher("./data/") params = parameters.params params["vocab_size"] = batch_loader.vocab_size params["dropout"] = args.dropout model = model.RNNAutoEncoder(params) model.load_state_dict(t.load(args.model)) if args.use_cuda: model.cuda() model.train() translate = model.translator(batch_loader) input_file = open(args.input_file, 'r') output_file = open(args.output_file, 'w') translate(input_file,
def main(args, params, valid_acc_thresh=0): train_batch_loader = batcher.Batcher("./Data/train/") valid_batch_loader = batcher.Batcher("./Data/valid/") model = mdl.ConvNet(params) if args.use_cuda: model = model.cuda() learning_rate = params["learning_rate"] optimizer = SGD(model.parameters(), learning_rate) train_step = model.trainer(optimizer) valid_step = model.validator() tracking_valid_loss = [] tracking_valid_acc = [] tracking_train_loss = [] tracking_train_loss_epoch = [] tracking_train_acc = [] tracking_train_acc_epoch = [] current_epoch = 0 while current_epoch < args.epochs: iteration = 0 while current_epoch == train_batch_loader.epoch: batch = train_batch_loader.next_batch(batch_size=args.batch_size) #we print the result each 50 exemple if iteration % 50 == 0: loss, acc = train_step(batch, use_cuda=args.use_cuda) tracking_train_loss.append(loss) tracking_train_acc.append(acc) print("Epoch: " + str(current_epoch + 1) + ", It: " + str(iteration + 1) + ", Loss: " + str(loss)) else: loss, acc = train_step(batch, use_cuda=args.use_cuda) tracking_train_loss.append(loss) tracking_train_acc.append(acc) iteration += 1 current_epoch += 1 loss_valid, acc_valid = valid_batch_loader.eval(valid_step, use_cuda=args.use_cuda) tracking_valid_loss.append(loss_valid) tracking_valid_acc.append(acc_valid) tracking_train_loss_epoch.append( sum(tracking_train_loss) / float(len(tracking_train_loss))) tracking_train_loss = [] tracking_train_acc_epoch.append( sum(tracking_train_acc) / float(len(tracking_train_acc))) tracking_train_acc = [] print('\n') print("***VALIDATION***") print("Epoch: " + str(current_epoch) + ", Loss: " + str(loss_valid) + ", Acc: " + str(acc_valid)) print("****************") print('\n') if tracking_valid_acc[-1] < valid_acc_thresh: break if current_epoch >= 3: """if current_epoch >= 8: learning_rate = learning_rate/2 optimizer = SGD(model.parameters(), learning_rate) train_step = model.trainer(optimizer) else:""" if tracking_valid_loss[-2] <= tracking_valid_loss[-1]: learning_rate = learning_rate / 2 optimizer = SGD(model.parameters(), learning_rate) train_step = model.trainer(optimizer) print("learning rate adapted to " + str(learning_rate)) t.save( model.state_dict(), "./models/" + args.model_name + "_acc" + str(tracking_valid_acc[-1]) + "_e" + str(current_epoch) + ".model") plt.plot(range(len(tracking_train_loss_epoch)), tracking_train_loss_epoch, label="train") plt.plot(range(len(tracking_train_loss_epoch)), tracking_valid_loss, label="valid") plt.xlabel("epoch") plt.ylabel("loss") plt.show() plt.plot(range(len(tracking_train_loss_epoch)), tracking_train_acc_epoch, label="train") plt.plot(range(len(tracking_train_loss_epoch)), tracking_valid_acc, label="valid") plt.xlabel("epoch") plt.ylabel("accuracy") plt.show() with open("./rescc" + ("_m" if args.modified_loss else "") + ".txt", 'w') as f: f.write(str(tracking_train_loss_epoch)) f.write('\n') f.write(str(tracking_train_acc_epoch)) f.write('\n') f.write(str(tracking_valid_loss)) f.write('\n') f.write(str(tracking_valid_acc)) f.close() return tracking_valid_loss[-1], tracking_valid_acc[-1]
def getData(m, T): xTrain, yTrain = genDat(m, T) batchGen = batcher.Batcher(xTrain, yTrain) return batchGen
outRef = open('translatedRef.txt', 'w') m = 130000 T = 30 #Max vector length, padded H = 256 batchSize = 16 maxIters = 100010 numLayers = 4 embedDim = 128 vocabIn = 47861 vocabOut = 22731 savename = 'NMTeng2viet' #Load in data X = np.load('datFixed/testSrc.npy')[:m] Y = np.load('datFixed/testDst.npy')[:m] batchTrain = batcher.Batcher(X, Y) #Validation set X = np.load('datFixed/valSrc.npy')[:m] Y = np.load('datFixed/valDst.npy')[:m] batchVal = batcher.Batcher(X, Y) #Test vars vocabDict = np.load('lookupDict.npy')[0] vocabDictSrc = np.load('lookupDictSrc.npy')[0] print 'Initializing Variables' #Initialize variables UNK = 0 START = 1 STOP = 2
test, word2id=word2id, split='test', save_path='data/test/test.hdf5', resize=resize) dev_numeric = data_ops.load_or_make_numeric_examples( dev, word2id=word2id, split='dev', save_path='data/dev/dev.hdf5', resize=resize) # Batcher batcher_train = batcher.Batcher(data=train_numeric, size=20, shuffle=True, pad=True, repeat=True) batcher_test = batcher.Batcher(data=test_numeric, size=20) # size=len(test_numeric['image'])) batcher_dev = batcher.Batcher(data=dev_numeric, size=len( dev_numeric['image'])) # size=len(dev_numeric['image'])) # Graph inputs images_t = tf.placeholder( tf.float32, [None, *resize, 3], name='images_t') # The 3 there is the number of RGB channels questions_t = tf.placeholder(tf.int32, [None, None], name='questions_t') labels_t = tf.placeholder(tf.int32, [ None, ], name='labels_t')