def main(args): place = set_device(args.device) fluid.enable_dygraph(place) if args.dynamic else None inputs = [ Input([None, args.max_seq_len], 'int64', name='words'), Input([None, args.max_seq_len], 'int64', name='target'), Input([None], 'int64', name='length') ] labels = [Input([None, args.max_seq_len], 'int64', name='labels')] feed_list = None if args.dynamic else [ x.forward() for x in inputs + labels ] dataset = LacDataset(args) train_path = os.path.join(args.data, "train.tsv") test_path = os.path.join(args.data, "test.tsv") train_generator = create_lexnet_data_generator(args, reader=dataset, file_name=train_path, place=place, mode="train") test_generator = create_lexnet_data_generator(args, reader=dataset, file_name=test_path, place=place, mode="test") train_dataset = create_dataloader(train_generator, place, feed_list=feed_list) test_dataset = create_dataloader(test_generator, place, feed_list=feed_list) vocab_size = dataset.vocab_size num_labels = dataset.num_labels model = SeqTagging(args, vocab_size, num_labels) optim = AdamOptimizer(learning_rate=args.base_learning_rate, parameter_list=model.parameters()) model.prepare(optim, LacLoss(), ChunkEval(num_labels), inputs=inputs, labels=labels, device=args.device) if args.resume is not None: model.load(args.resume) model.fit(train_dataset, test_dataset, epochs=args.epoch, batch_size=args.batch_size, eval_freq=args.eval_freq, save_freq=args.save_freq, save_dir=args.save_dir)
def main(): device = set_device(FLAGS.device) fluid.enable_dygraph(device) if FLAGS.dynamic else None train_dataset = MnistDataset(mode='train') val_dataset = MnistDataset(mode='test') inputs = [Input([None, 784], 'float32', name='image')] labels = [Input([None, 1], 'int64', name='label')] model = MNIST() optim = Momentum(learning_rate=FLAGS.lr, momentum=.9, parameter_list=model.parameters()) model.prepare(optim, CrossEntropy(), Accuracy(topk=(1, 2)), inputs, labels, device=FLAGS.device) if FLAGS.resume is not None: model.load(FLAGS.resume) model.fit(train_dataset, val_dataset, epochs=FLAGS.epoch, batch_size=FLAGS.batch_size, save_dir='mnist_checkpoint')
def main(): device = set_device(FLAGS.device) fluid.enable_dygraph(device) if FLAGS.dynamic else None model = models.__dict__[FLAGS.arch](pretrained=FLAGS.eval_only and not FLAGS.resume) if FLAGS.resume is not None: model.load(FLAGS.resume) inputs = [Input([None, 3, 224, 224], 'float32', name='image')] labels = [Input([None, 1], 'int64', name='label')] train_dataset = ImageNetDataset( os.path.join(FLAGS.data, 'train'), mode='train') val_dataset = ImageNetDataset(os.path.join(FLAGS.data, 'val'), mode='val') optim = make_optimizer( np.ceil( len(train_dataset) * 1. / FLAGS.batch_size / ParallelEnv().nranks), parameter_list=model.parameters()) model.prepare(optim, CrossEntropy(), Accuracy(topk=(1, 5)), inputs, labels) if FLAGS.eval_only: model.evaluate( val_dataset, batch_size=FLAGS.batch_size, num_workers=FLAGS.num_workers) return output_dir = os.path.join(FLAGS.output_dir, FLAGS.arch, time.strftime('%Y-%m-%d-%H-%M', time.localtime())) if ParallelEnv().local_rank == 0 and not os.path.exists(output_dir): os.makedirs(output_dir) model.fit(train_dataset, val_dataset, batch_size=FLAGS.batch_size, epochs=FLAGS.epoch, save_dir=output_dir, num_workers=FLAGS.num_workers)
def main(): place = set_device(FLAGS.device) fluid.enable_dygraph(place) if FLAGS.dynamic else None # Generators g_AB = Generator() g_BA = Generator() g = GeneratorCombine(g_AB, g_BA, is_train=False) im_shape = [-1, 3, 256, 256] input_A = Input(im_shape, 'float32', 'input_A') input_B = Input(im_shape, 'float32', 'input_B') g.prepare(inputs=[input_A, input_B]) g.load(FLAGS.init_model, skip_mismatch=True, reset_optimizer=True) out_path = FLAGS.output + "/single" if not os.path.exists(out_path): os.makedirs(out_path) for f in glob.glob(FLAGS.input): image_name = os.path.basename(f) image = Image.open(f).convert('RGB') image = image.resize((256, 256), Image.BICUBIC) image = np.array(image) / 127.5 - 1 image = image[:, :, 0:3].astype("float32") data = image.transpose([2, 0, 1])[np.newaxis, :] if FLAGS.input_style == "A": _, fake, _, _ = g.test([data, data]) if FLAGS.input_style == "B": fake, _, _, _ = g.test([data, data]) fake = np.squeeze(fake[0]).transpose([1, 2, 0]) opath = "{}/fake{}{}".format(out_path, FLAGS.input_style, image_name) imsave(opath, ((fake + 1) * 127.5).astype(np.uint8)) print("transfer {} to {}".format(f, opath))
def fit(self, dynamic, is_mlp=False): device = set_device('gpu') fluid.enable_dygraph(device) if dynamic else None im_shape = (-1, 784) batch_size = 128 inputs = [Input(im_shape, 'float32', name='image')] labels = [Input([None, 1], 'int64', name='label')] train_dataset = MnistDataset(mode='train') val_dataset = MnistDataset(mode='test') test_dataset = TestMnistDataset() model = MNIST() if not is_mlp else MLP() optim = fluid.optimizer.Momentum( learning_rate=0.01, momentum=.9, parameter_list=model.parameters()) loss = CrossEntropy() if not is_mlp else MyCrossEntropy() model.prepare(optim, loss, Accuracy(), inputs, labels, device=device) cbk = ProgBarLogger(50) model.fit(train_dataset, val_dataset, epochs=2, batch_size=batch_size, callbacks=cbk) eval_result = model.evaluate(val_dataset, batch_size=batch_size) output = model.predict(test_dataset, batch_size=batch_size) np.testing.assert_equal(output[0].shape[0], len(test_dataset)) acc = get_predict_accuracy(output[0], val_dataset.labels) np.testing.assert_allclose(acc, eval_result['acc'])
# save results args = parser.parse_args() # convert argument parser input to a variable used in a function device_selection = args.device_selection data_dir = args.flowers_data_directory pretrained_model_selection = args.pretrained_model_selection checkpoint_filename = args.checkpoint_filename learning_rate = args.learning_rate cat_to_name_filename = args.cat_to_name_filename image_filepath = args.image_filepath topk = args.topk #select device device = set_device(device_selection) # load model loaded_model, criterion, optimizer, checkpoint = load_checkpoint( checkpoint_filename, pretrained_model_selection, learning_rate, device) # Extract and Transform data train_data, valid_data, test_data, trainloader, testloader, validloader = load_and_transform_data( data_dir) # check device print("Is our device GPU?") print(device == torch.device("cuda")) # test the model but only if it's GPU, on CPU it'll run forever. Purpose: to see if the model is fine after saving a checkpoint and loading it if device == torch.device("cuda"):
def main(): place = set_device(FLAGS.device) fluid.enable_dygraph(place) if FLAGS.dynamic else None # Generators g_AB = Generator() g_BA = Generator() # Discriminators d_A = Discriminator() d_B = Discriminator() g = GeneratorCombine(g_AB, g_BA, d_A, d_B) da_params = d_A.parameters() db_params = d_B.parameters() g_params = g_AB.parameters() + g_BA.parameters() da_optimizer = opt(da_params) db_optimizer = opt(db_params) g_optimizer = opt(g_params) im_shape = [None, 3, 256, 256] input_A = Input(im_shape, 'float32', 'input_A') input_B = Input(im_shape, 'float32', 'input_B') fake_A = Input(im_shape, 'float32', 'fake_A') fake_B = Input(im_shape, 'float32', 'fake_B') g_AB.prepare(inputs=[input_A]) g_BA.prepare(inputs=[input_B]) g.prepare(g_optimizer, GLoss(), inputs=[input_A, input_B]) d_A.prepare(da_optimizer, DLoss(), inputs=[input_B, fake_B]) d_B.prepare(db_optimizer, DLoss(), inputs=[input_A, fake_A]) if FLAGS.resume: g.load(FLAGS.resume) loader_A = paddle.io.DataLoader(data.DataA(), places=place, shuffle=True, return_list=True, batch_size=FLAGS.batch_size) loader_B = paddle.io.DataLoader(data.DataB(), places=place, shuffle=True, return_list=True, batch_size=FLAGS.batch_size) A_pool = data.ImagePool() B_pool = data.ImagePool() for epoch in range(FLAGS.epoch): for i, (data_A, data_B) in enumerate(zip(loader_A, loader_B)): data_A = data_A[0][0] if not FLAGS.dynamic else data_A[0] data_B = data_B[0][0] if not FLAGS.dynamic else data_B[0] start = time.time() fake_B = g_AB.test(data_A)[0] fake_A = g_BA.test(data_B)[0] g_loss = g.train([data_A, data_B])[0] fake_pb = B_pool.get(fake_B) da_loss = d_A.train([data_B, fake_pb])[0] fake_pa = A_pool.get(fake_A) db_loss = d_B.train([data_A, fake_pa])[0] t = time.time() - start if i % 20 == 0: print("epoch: {} | step: {:3d} | g_loss: {:.4f} | " \ "da_loss: {:.4f} | db_loss: {:.4f} | s/step {:.4f}". format(epoch, i, g_loss[0], da_loss[0], db_loss[0], t)) g.save('{}/{}'.format(FLAGS.checkpoint_path, epoch))
def do_predict(args): device = set_device("gpu" if args.use_cuda else "cpu") fluid.enable_dygraph(device) if args.eager_run else None inputs = [ Input( [None, None], "int64", name="src_word"), Input( [None, None], "int64", name="src_pos"), Input( [None, args.n_head, None, None], "float32", name="src_slf_attn_bias"), Input( [None, args.n_head, None, None], "float32", name="trg_src_attn_bias"), ] # define data dataset = Seq2SeqDataset( fpattern=args.predict_file, src_vocab_fpath=args.src_vocab_fpath, trg_vocab_fpath=args.trg_vocab_fpath, token_delimiter=args.token_delimiter, start_mark=args.special_token[0], end_mark=args.special_token[1], unk_mark=args.special_token[2], byte_data=True) args.src_vocab_size, args.trg_vocab_size, args.bos_idx, args.eos_idx, \ args.unk_idx = dataset.get_vocab_summary() trg_idx2word = Seq2SeqDataset.load_dict( dict_path=args.trg_vocab_fpath, reverse=True, byte_data=True) batch_sampler = Seq2SeqBatchSampler( dataset=dataset, use_token_batch=False, batch_size=args.batch_size, max_length=args.max_length) data_loader = DataLoader( dataset=dataset, batch_sampler=batch_sampler, places=device, collate_fn=partial( prepare_infer_input, bos_idx=args.bos_idx, eos_idx=args.eos_idx, src_pad_idx=args.eos_idx, n_head=args.n_head), num_workers=0, return_list=True) # define model transformer = InferTransformer( args.src_vocab_size, args.trg_vocab_size, args.max_length + 1, args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model, args.d_inner_hid, args.prepostprocess_dropout, args.attention_dropout, args.relu_dropout, args.preprocess_cmd, args.postprocess_cmd, args.weight_sharing, args.bos_idx, args.eos_idx, beam_size=args.beam_size, max_out_len=args.max_out_len) transformer.prepare(inputs=inputs) # load the trained model assert args.init_from_params, ( "Please set init_from_params to load the infer model.") transformer.load(args.init_from_params) # TODO: use model.predict when support variant length f = open(args.output_file, "wb") for data in data_loader(): finished_seq = transformer.test(inputs=flatten(data))[0] finished_seq = np.transpose(finished_seq, [0, 2, 1]) for ins in finished_seq: for beam_idx, beam in enumerate(ins): if beam_idx >= args.n_best: break id_list = post_process_seq(beam, args.bos_idx, args.eos_idx) word_list = [trg_idx2word[id] for id in id_list] sequence = b" ".join(word_list) + b"\n" f.write(sequence)
def do_train(args): device = set_device("gpu" if args.use_cuda else "cpu") fluid.enable_dygraph(device) if args.eager_run else None # set seed for CE random_seed = eval(str(args.random_seed)) if random_seed is not None: fluid.default_main_program().random_seed = random_seed fluid.default_startup_program().random_seed = random_seed # define inputs inputs = [ Input([None, None], "int64", name="src_word"), Input([None, None], "int64", name="src_pos"), Input([None, args.n_head, None, None], "float32", name="src_slf_attn_bias"), Input([None, None], "int64", name="trg_word"), Input([None, None], "int64", name="trg_pos"), Input([None, args.n_head, None, None], "float32", name="trg_slf_attn_bias"), Input([None, args.n_head, None, None], "float32", name="trg_src_attn_bias"), ] labels = [ Input([None, 1], "int64", name="label"), Input([None, 1], "float32", name="weight"), ] # def dataloader train_loader, eval_loader = create_data_loader(args, device) # define model transformer = Transformer(args.src_vocab_size, args.trg_vocab_size, args.max_length + 1, args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model, args.d_inner_hid, args.prepostprocess_dropout, args.attention_dropout, args.relu_dropout, args.preprocess_cmd, args.postprocess_cmd, args.weight_sharing, args.bos_idx, args.eos_idx) transformer.prepare(fluid.optimizer.Adam( learning_rate=fluid.layers.noam_decay( args.d_model, args.warmup_steps, learning_rate=args.learning_rate), beta1=args.beta1, beta2=args.beta2, epsilon=float(args.eps), parameter_list=transformer.parameters()), CrossEntropyCriterion(args.label_smooth_eps), inputs=inputs, labels=labels) ## init from some checkpoint, to resume the previous training if args.init_from_checkpoint: transformer.load(args.init_from_checkpoint) ## init from some pretrain models, to better solve the current task if args.init_from_pretrain_model: transformer.load(args.init_from_pretrain_model, reset_optimizer=True) # model train transformer.fit(train_data=train_loader, eval_data=eval_loader, epochs=args.epoch, eval_freq=1, save_freq=1, save_dir=args.save_model, callbacks=[TrainCallback(args)])