def beam_search(FLAGS): paddle.enable_static() if FLAGS.static else None device = paddle.set_device("gpu" if FLAGS.use_gpu else "cpu") # yapf: disable inputs = [ Input([None, 1, 48, 384], "float32", name="pixel"), Input([None, None], "int64", name="label_in") ] labels = [ Input([None, None], "int64", name="label_out"), Input([None, None], "float32", name="mask") ] # yapf: enable model = paddle.Model(Seq2SeqAttInferModel(encoder_size=FLAGS.encoder_size, decoder_size=FLAGS.decoder_size, emb_dim=FLAGS.embedding_dim, num_classes=FLAGS.num_classes, beam_size=FLAGS.beam_size), inputs=inputs, labels=labels) model.prepare(metrics=SeqBeamAccuracy()) model.load(FLAGS.init_model) test_dataset = data.test() test_collate_fn = BatchCompose( [data.Resize(), data.Normalize(), data.PadTarget()]) test_sampler = data.BatchSampler(test_dataset, batch_size=FLAGS.batch_size, drop_last=False, shuffle=False) test_loader = paddle.io.DataLoader(test_dataset, batch_sampler=test_sampler, places=device, num_workers=0, return_list=True, collate_fn=test_collate_fn) model.evaluate(eval_data=test_loader, callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])
def main(FLAGS): device = set_device("gpu" if FLAGS.use_gpu else "cpu") fluid.enable_dygraph(device) if FLAGS.dynamic else None model = Seq2SeqAttModel(encoder_size=FLAGS.encoder_size, decoder_size=FLAGS.decoder_size, emb_dim=FLAGS.embedding_dim, num_classes=FLAGS.num_classes) # yapf: disable inputs = [ Input([None, 1, 48, 384], "float32", name="pixel"), Input([None, None], "int64", name="label_in") ] labels = [ Input([None, None], "int64", name="label_out"), Input([None, None], "float32", name="mask") ] # yapf: enable model.prepare(loss_function=WeightCrossEntropy(), metrics=SeqAccuracy(), inputs=inputs, labels=labels, device=device) model.load(FLAGS.init_model) test_dataset = data.test() test_collate_fn = BatchCompose( [data.Resize(), data.Normalize(), data.PadTarget()]) test_sampler = data.BatchSampler(test_dataset, batch_size=FLAGS.batch_size, drop_last=False, shuffle=False) test_loader = fluid.io.DataLoader(test_dataset, batch_sampler=test_sampler, places=device, num_workers=0, return_list=True, collate_fn=test_collate_fn) model.evaluate(eval_data=test_loader, callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])
def main(FLAGS): device = paddle.set_device("gpu" if FLAGS.use_gpu else "cpu") fluid.enable_dygraph(device) if FLAGS.dynamic else None inputs = [ Input([None, 1, 48, 384], "float32", name="pixel"), ] model = paddle.Model( Seq2SeqAttInferModel(encoder_size=FLAGS.encoder_size, decoder_size=FLAGS.decoder_size, emb_dim=FLAGS.embedding_dim, num_classes=FLAGS.num_classes, beam_size=FLAGS.beam_size), inputs) model.prepare() model.load(FLAGS.init_model) fn = lambda p: Image.open(p).convert('L') test_dataset = ImageFolder(FLAGS.image_path, loader=fn) test_collate_fn = BatchCompose([data.Resize(), data.Normalize()]) test_loader = fluid.io.DataLoader(test_dataset, places=device, num_workers=0, return_list=True, collate_fn=test_collate_fn) samples = test_dataset.samples #outputs = model.predict(test_loader) ins_id = 0 for image, in test_loader: image = image if FLAGS.dynamic else image[0] pred = model.test_batch([image])[0] pred = pred[:, :, np.newaxis] if len(pred.shape) == 2 else pred pred = np.transpose(pred, [0, 2, 1]) for ins in pred: impath = samples[ins_id] ins_id += 1 print('Image {}: {}'.format(ins_id, impath)) for beam_idx, beam in enumerate(ins): id_list = postprocess(beam) word_list = index2word(id_list) sequence = "".join(word_list) print('{}: {}'.format(beam_idx, sequence))
batch_sz = 32 n_of_epochs = 2000 in_file = '../dataset/input_data.txt' out_file = '../dataset/output_data.txt' test_in_file = '../dataset/input_test_data.txt' test_out_file = '../dataset/output_test_data.txt' net_dump_folder = 'nets/' # Training set loading trans_in = transforms.Compose([ hd.Interval(in_trim_sx, in_trim_dx), hd.TrimToLength(trim_length), hd.ReplaceInf(inf_replacement_n), hd.ReplaceNan(nan_replacement_n), hd.Normalize(0, 30) ]) trans_out = transforms.Compose([ hd.Interval(out_trim_sx, out_trim_dx), hd.ScaleToLength(in_trim_dx - in_trim_sx), hd.TrimToLength(trim_length), hd.ReplaceInf(inf_replacement_n), hd.ReplaceNan(nan_replacement_n), hd.Normalize(0, 30) ]) train_dataset = hd.HallucinatingDataset(csv_in_file=in_file, csv_out_file=out_file, transform_in=trans_in, transform_out=trans_out)
def main(FLAGS): paddle.enable_static() if FLAGS.static else None device = paddle.set_device("gpu" if FLAGS.use_gpu else "cpu") # yapf: disable inputs = [ Input([None,1,48,384], "float32", name="pixel"), Input([None, None], "int64", name="label_in"), ] labels = [ Input([None, None], "int64", name="label_out"), Input([None, None], "float32", name="mask"), ] # yapf: enable model = paddle.Model( Seq2SeqAttModel( encoder_size=FLAGS.encoder_size, decoder_size=FLAGS.decoder_size, emb_dim=FLAGS.embedding_dim, num_classes=FLAGS.num_classes), inputs, labels) lr = FLAGS.lr if FLAGS.lr_decay_strategy == "piecewise_decay": learning_rate = fluid.layers.piecewise_decay( [200000, 250000], [lr, lr * 0.1, lr * 0.01]) else: learning_rate = lr grad_clip = fluid.clip.GradientClipByGlobalNorm(FLAGS.gradient_clip) optimizer = fluid.optimizer.Adam( learning_rate=learning_rate, parameter_list=model.parameters(), grad_clip=grad_clip) model.prepare(optimizer, WeightCrossEntropy(), SeqAccuracy()) train_dataset = data.train() train_collate_fn = BatchCompose( [data.Resize(), data.Normalize(), data.PadTarget()]) train_sampler = data.BatchSampler( train_dataset, batch_size=FLAGS.batch_size, shuffle=True) train_loader = paddle.io.DataLoader( train_dataset, batch_sampler=train_sampler, places=device, num_workers=FLAGS.num_workers, return_list=True, collate_fn=train_collate_fn) test_dataset = data.test() test_collate_fn = BatchCompose( [data.Resize(), data.Normalize(), data.PadTarget()]) test_sampler = data.BatchSampler( test_dataset, batch_size=FLAGS.batch_size, drop_last=False, shuffle=False) test_loader = paddle.io.DataLoader( test_dataset, batch_sampler=test_sampler, places=device, num_workers=0, return_list=True, collate_fn=test_collate_fn) model.fit(train_data=train_loader, eval_data=test_loader, epochs=FLAGS.epoch, save_dir=FLAGS.checkpoint_path, callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])
def main(): parser = argparse.ArgumentParser(description="Attn Encoder") parser.add_argument("--img", type=str, help="image dir") parser.add_argument("--prior", type=str, help="prior dir") parser.add_argument("--csv", type=str, help="csv dir") parser.add_argument("--conf", type=str, help="config file") parser.add_argument("--output", type=str, help="output dir") parser.add_argument("--pretrain", type=str, default=None, help="pretrain path") parser.add_argument("--cont", action="store_true", help="continue training") parser.add_argument("--epoch", type=int, default=1, help="epoch") parser.add_argument("--optim_step_size", type=int, default=30, help="lr decay step size") parser.add_argument("--optim_gamma", type=float, default=0.1, help="lr decay rate") parser.add_argument("--scaling", action="store_true", help="data augmentation (scaling)") parser.add_argument("--img_scale", type=float, default=1., nargs="+", help="image scales") parser.add_argument("--map_scale", type=int, default=13, nargs="+", help="map scales") args = parser.parse_args() if not os.path.isdir(args.output): os.makedirs(args.output) best_path = os.path.join(args.output, "best.pth") latest_path = os.path.join(args.output, "latest.pth") log = os.path.join(args.output, "log") hyper_path = os.path.join(args.output, "hyper.pth") config = configparser.ConfigParser() config.read(args.conf) model_cfg, lang_cfg, img_cfg = config['MODEL'], config['LANG'], config['IMAGE'] hidden_size, attn_size, n_layers = model_cfg.getint('hidden_size'), model_cfg.getint('attn_size'), model_cfg.getint('n_layers') prior_gamma = model_cfg.getfloat('prior_gamma') learning_rate = model_cfg.getfloat('learning_rate') batch_size = model_cfg.getint('batch_size') char_list = lang_cfg['chars'] # " '&.@acbedgfihkjmlonqpsrutwvyxz" immean, imstd = [float(x) for x in config['IMAGE']['immean'].split(',')], [float(x) for x in config['IMAGE']['imstd'].split(',')] # [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] upper_len = model_cfg.getint('upper_length') clip = model_cfg.getfloat('clip') save_interval = model_cfg.getint('interval') epochs = args.epoch optim_step_size, optim_gamma = args.optim_step_size, args.optim_gamma train_csv, dev_csv = os.path.join(args.csv, 'train.csv'), os.path.join(args.csv, 'dev.csv') device, cpu = torch.device('cuda'), torch.device('cpu') vocab_map, inv_vocab_map, char_list = utils.get_ctc_vocab(char_list) if type(args.img_scale) == list and type(args.map_scale) == list: scale_range, hw_range = args.img_scale, [(x, x) for x in args.map_scale] elif type(args.img_scale) == float and type(args.map_scale) == int: scale_range, hw_range = [args.img_scale], [(args.map_scale, args.map_scale)] else: raise AttributeError('scale: list or float/int') if not args.scaling: tsfm_train = transforms.Compose([dataset.ToTensor(device), dataset.Rescale(scale_range, hw_range, origin_scale=True), dataset.Normalize(immean, imstd, device)]) tsfm_test = transforms.Compose([dataset.ToTensor(device), dataset.Rescale(scale_range, hw_range, origin_scale=True), dataset.Normalize(immean, imstd, device)]) else: # scale_range = [1] # [1, 0.8, 1.2] # [1, 0.8] # hw_range = [(13, 13)] # [(13, 13), (10, 10), (15, 15)] # [(13, 13), (10, 10)] tsfm_train = transforms.Compose([dataset.ToTensor(device), dataset.Rescale(scale_range, hw_range), dataset.Normalize(immean, imstd, device)]) tsfm_test = transforms.Compose([dataset.ToTensor(device), dataset.Rescale(scale_range, hw_range, origin_scale=True), dataset.Normalize(immean, imstd, device)]) sld_train_data = dataset.SLData(args.img, args.prior, train_csv, vocab_map, transform=tsfm_train, upper_len=upper_len) sld_dev_data = dataset.SLData(args.img, args.prior, dev_csv, vocab_map, transform=tsfm_test, upper_len=float('inf')) # dataset.Rescale([1], [(13, 13)]) encoder = AttnEncoder(hidden_size=hidden_size, attn_size=attn_size, output_size=len(char_list), n_layers=n_layers, prior_gamma=prior_gamma, pretrain=args.pretrain) encoder.to(device) if torch.cuda.device_count() > 1: print('Using %d GPUs' % (torch.cuda.device_count())) encoder = nn.DataParallel(encoder) hypers = {'step': 0, 'epoch': 0, 'best_dev_acc': -1, 'perm': np.random.permutation(len(sld_train_data)).tolist()} if args.cont: print("Load %s, %s" % (latest_path, hyper_path)) encoder.load_state_dict(torch.load(latest_path)) try: with open(hyper_path, 'rb') as fo: hypers = pickle.load(fo) except Exception as err: print("Error loading %s: %s" % (hyper_path, err)) hypers = {'step': 0, 'epoch': 0, 'best_dev_acc': -1, 'perm': np.random.permutation(len(sld_train_data)).tolist()} train_loader = tud.DataLoader(sld_train_data, batch_size=batch_size, shuffle=True, collate_fn=dataset.collate_fn_ctc) dev_loader = tud.DataLoader(sld_dev_data, batch_size=batch_size, shuffle=False, collate_fn=dataset.collate_fn_ctc) print('Optimizer, decay %.5f after %d epochs' % (optim_gamma, optim_step_size)) cnn_optimizer = optim.SGD(encoder.conv.parameters(), lr=learning_rate) lstm_optimizer = optim.SGD(list(encoder.encoder_cell.parameters())+list(encoder.lt.parameters()), lr=learning_rate) cnn_scheduler = optim.lr_scheduler.StepLR(cnn_optimizer, step_size=optim_step_size, gamma=optim_gamma) lstm_scheduler = optim.lr_scheduler.StepLR(lstm_optimizer, step_size=optim_step_size, gamma=optim_gamma) decoder = Decoder(char_list) ctc_loss = CTCLoss() # normalize over batch print('%d training epochs' % (epochs)) for ep in range(epochs): cnn_scheduler.step() lstm_scheduler.step() if ep < hypers['epoch']: continue for p in cnn_optimizer.param_groups: print('CNN', p['lr']) for p in lstm_optimizer.param_groups: print('LSTM', p['lr']) train(encoder, train_loader, clip, hypers, cnn_optimizer, lstm_optimizer, ctc_loss, decoder, log, latest_path, hyper_path, device, save_interval) dl, dacc = evaluate(encoder, dev_loader, ctc_loss, decoder, device) pcont = 'Epoch %d, dev loss: %.3f, dev acc (LEV): %.3f' % (ep, dl, dacc) print(pcont) with open(log, 'a+') as fo: fo.write(pcont+"\n") # save model and hyperparameter setting hypers['epoch'] = ep if hypers['best_dev_acc'] < dacc: hypers['best_dev_acc'] = dacc with open(best_path, 'wb') as fo: torch.save(encoder.state_dict(), fo) with open(hyper_path, 'wb') as fo: pickle.dump(hypers, fo) return
def main(): parser = argparse.ArgumentParser(description="Attn Encoder") parser.add_argument("--img", type=str, help="image dir") parser.add_argument("--prior", type=str, help="prior dir") parser.add_argument("--csv", type=str, help="csv dir") parser.add_argument("--conf", type=str, help="config file") parser.add_argument("--output", type=str, help="output dir") parser.add_argument("--model", type=str, help="model path") parser.add_argument("--partition", type=str, help="train|dev|test") parser.add_argument("--task", type=str, help="beta|prob") args = parser.parse_args() if not os.path.isdir(args.output): os.makedirs(args.output) config = configparser.ConfigParser() config.read(args.conf) model_cfg, lang_cfg, img_cfg = config['MODEL'], config['LANG'], config[ 'IMAGE'] hidden_size, attn_size, n_layers = model_cfg.getint( 'hidden_size'), model_cfg.getint('attn_size'), model_cfg.getint( 'n_layers') prior_gamma = model_cfg.getfloat('prior_gamma') batch_size = 1 char_list = lang_cfg['chars'] immean, imstd = [float(x) for x in config['IMAGE']['immean'].split(',')], [ float(x) for x in config['IMAGE']['imstd'].split(',') ] train_csv, dev_csv, test_csv = os.path.join(args.csv, 'train.csv'), os.path.join( args.csv, 'dev.csv'), os.path.join( args.csv, 'test.csv') device, cpu = torch.device('cuda'), torch.device('cpu') vocab_map, inv_vocab_map, char_list = utils.get_ctc_vocab(char_list) encoder = AttnEncoder(hidden_size=hidden_size, attn_size=attn_size, output_size=len(char_list), n_layers=n_layers, prior_gamma=prior_gamma, pretrain=None) encoder.to(device) if torch.cuda.device_count() > 1: print('Using %d GPUs' % (torch.cuda.device_count())) encoder = nn.DataParallel(encoder) print('Load model: %s' % (args.model)) encoder.load_state_dict(torch.load(args.model)) scale_range = [0] hw_range = [(0, 0)] tsfm = transforms.Compose([ dataset.ToTensor(device), dataset.Rescale(scale_range, hw_range, origin_scale=True), dataset.Normalize(immean, imstd, device) ]) train_data = dataset.SLData(args.img, args.prior, train_csv, vocab_map, transform=tsfm, upper_len=float('inf')) dev_data = dataset.SLData(args.img, args.prior, dev_csv, vocab_map, transform=tsfm, upper_len=float('inf')) test_data = dataset.SLData(args.img, args.prior, test_csv, vocab_map, transform=tsfm, upper_len=float('inf')) train_loader = tud.DataLoader(train_data, batch_size=batch_size, shuffle=False, collate_fn=dataset.collate_fn_ctc) dev_loader = tud.DataLoader(dev_data, batch_size=batch_size, shuffle=False, collate_fn=dataset.collate_fn_ctc) test_loader = tud.DataLoader(test_data, batch_size=batch_size, shuffle=False, collate_fn=dataset.collate_fn_ctc) if args.task == 'beta': get_beta(encoder, [train_loader, dev_loader, test_loader], args.output, device) elif args.task == 'prob': if args.partition == 'train': loader = train_loader elif args.partition == 'dev': loader = dev_loader elif args.partition == 'test': loader = test_loader else: raise ValueError('partition: train|dev|test') get_prob(encoder, loader, args.output, device) return