imageWidth = trainSet.imgSize[1] nChannels = trainSet.imgs.shape[3] Xtrain = trainSet.imgs Xvalid = validationSet.imgs # import pdb; pdb.set_trace() Ytrain = getLabelsForDataset(trainSet) Yvalid = getLabelsForDataset(validationSet) testIdxsTrainSet = range(len(Xvalid)) testIdxsValidSet = range(len(Xvalid)) meanImg = trainSet.meanImg stdDevImg = trainSet.stdDevImg initLandmarks = trainSet.initLandmarks[0].reshape((1, 136)) dan = DAN(initLandmarks) STAGE = 2 with tf.Session() as sess: Saver = tf.train.Saver() Writer = tf.summary.FileWriter("logs/", sess.graph) if STAGE < 2: sess.run(tf.global_variables_initializer()) else: Saver.restore(sess,'./Model/Model') print('Pre-trained model has been loaded!') # Landmark68Test(MeanShape,ImageMean,ImageStd,sess) print("Starting training......") for epoch in range(2):
if not os.path.exists(log_path): os.mkdir(log_path) model_path = 'model' if not os.path.exists(model_path): os.mkdir(model_path) train_number, valid_number = samples_counter() nb_epocs = 20 batch_size = 128 stage = 2 meanshape = init_meanshape() model = DAN(meanshape) train_image, train_landmark = read_and_decode('data/train_dataset.tfrecords', batch_size) valid_image, valid_landmark = read_and_decode('data/valid_dataset.tfrecords', batch_size) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess = sess, coord = coord) if stage >= 2: saver.restore(sess,'model/model_step2_02.ckpt')
def main(args): """Experiment logic""" # Get file separator and construct paths sep = "\t" if args.file_type == "tsv" else "," train_path = os.path.join(args.data_dir, "train.{}".format(args.file_type)) test_path = os.path.join(args.data_dir, "test.{}".format(args.file_type)) # Read column headings headings = pd.read_csv(train_path, sep=sep, nrows=1).columns text, label = "text", "gold_label_{}".format(args.task_type) if args.elmo: from elmo import TabularReader, ElmoLoader # Pretrained urls options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" # Read dataset reader = TabularReader(text, label, sep) loader = ElmoLoader(reader, train_path, test_path, args.batch_dims) # Build model label_map = loader.label_map embedding_size = 1024 model = DAN(to_int(args.layers), len(label_map), embedding_size=embedding_size, elmo_config=(options_file, weight_file)) else: # Build data loader loader = DataLoader( args.data_dir, args.file_type, headings, text, label, to_int(args.batch_dims), (args.glove_type, args.glove_dim), args.temp_dir, ) # Build model vocab, label_map = loader.vocab, loader.label_map model = DAN(to_int(args.layers), len(label_map), vocab_size=len(vocab), embedding_size=args.glove_dim, pretrained_vecs=vocab.vectors) # Define training functions optimiser = optim.SGD(model.parameters(), lr=args.lr) loss_fn = nn.CrossEntropyLoss() # Train logging.info("\n\nStarting training...\n\n") if args.num_processes > 1: model.share_memory() processes = [] for pid in range(args.num_processes): p = mp.Process(target=run.training_process, args=(pid, loader, model, optimiser, loss_fn, (args.num_steps // args.num_processes))) p.start() processes.append(p) for p in processes: p.join() else: report_every = int(args.num_steps * 0.01) losses = run.train(loader, model, optimiser, loss_fn, label_map, args.num_steps, report_every) if args.plot: logging.info("\n\nPlotting training schedule...\n\n") plot_loss(losses, report_every, args.temp_dir) # Save the trained model logging.info("\n\nNow saving...\n\n") torch.save(model, os.path.join(args.temp_dir, "saved_model.pt")) # Test model_acc = run.test(loader, label_map, args.temp_dir) if args.baseline: logging.info( "\n\nComparing with multinomial naive bayes baseline...\n\n") from bayes import multi_nb train, test = pd.read_csv(train_path, sep=sep), pd.read_csv(test_path, sep=sep) train_txt, test_txt = (train[text], train[label]), (test[text], test[label]) base_acc = multi_nb(train_txt, test_txt) logging.info("Model accuracy: {:.6g}".format(model_acc)) logging.info("Baseline accuracy: {:.6g}".format(base_acc)) logging.info("{}".format( "Model wins!" if model_acc > base_acc else "Baseline wins!"))