def load(cls, model_path, use_cuda=False, new_config=None) -> 'RenamingModel': # device = torch.device("cuda:0" if use_cuda else "cpu") params = torch.load(model_path, map_location=lambda storage, loc: storage) config = params['config'] config['decoder']['vocab_file'] = 'data/vocab.bpe10000/vocab' config['data']['dev_file'] = 'data/preprocessed_data/dev.tar' config['data'][ 'train_file'] = 'data/preprocessed_data/train-shard-*.tar' config['data']['vocab_file'] = 'data/vocab.bpe10000/vocab' config['encoder']['graph_encoder'][ 'vocab_file'] = 'data/vocab.bpe10000/vocab' config['encoder']['seq_encoder'][ 'vocab_file'] = 'data/vocab.bpe10000/vocab' if new_config: config = util.update(config, new_config) kwargs = params['kwargs'] if params['kwargs'] is not None else dict() model = cls.build(config, **kwargs) model.load_state_dict(params['state_dict'], strict=False) model = model.cuda() model = torch.nn.DataParallel(model) model.eval() return model
def build(cls, config): params = util.update(cls.default_params(), config) vocab = torch.load(params['vocab_file']) model = cls(params['ast_node_encoding_size'], vocab) model.config = params return model
def build(cls, config): params = util.update(cls.default_params(), config) vocab = Vocab.load(params['vocab_file']) model = cls(params['variable_encoding_size'], params['hidden_size'], params['dropout'], params['tie_embedding'], params['input_feed'], vocab) model.config = params return model
def build(cls, config): params = util.update(cls.default_params(), config) encoder = globals()[config['encoder']['type']].build(config['encoder']) decoder = globals()[config['decoder']['type']].build(config['decoder']) model = cls(encoder, decoder) params = util.update(params, { 'encoder': encoder.config, 'decoder': decoder.config }) model.config = params model.decoder.encoder = encoder # give the decoder a reference to the encoder # assign batcher to sub-modules encoder.batcher = model.batcher decoder.batcher = model.batcher print('Current Configuration:', file=sys.stderr) pp = pprint.PrettyPrinter(indent=2, stream=sys.stderr) pp.pprint(model.config) return model
def load(cls, model_path, use_cuda=False, new_config=None): # type: (...) -> 'RenamingModel' device = torch.device("cuda:0" if use_cuda else "cpu") params = torch.load(model_path, map_location=lambda store, _: store) if new_config is not None: config = util.update(params['config'], new_config) kwargs = dict() if params['kwargs'] is not None: kwargs = params['kwargs'] model = cls.build(config, **kwargs) model.load_state_dict(params['state_dict'], strict=False) model = model.to(device) model.eval() return model
def build(cls, config): params = util.update(GraphASTEncoder.default_params(), config) print(params) connections = params['connections'] connection2edge_type = { 'top_down': 1, 'bottom_up': 1, 'variable_master_nodes': 2, 'terminals': 2, 'master_node': 2, 'var_usage': 2, 'func_root_to_arg': 1 } num_edge_types = sum(connection2edge_type[key] for key in connections) gnn = GatedGraphNeuralNetwork( hidden_size=params['gnn']['hidden_size'], layer_timesteps=params['gnn']['layer_timesteps'], residual_connections=params['gnn']['residual_connections'], num_edge_types=num_edge_types ) vocab = Vocab.load(params['vocab_file']) node_type_embedder = NodeTypeEmbedder( len(vocab.grammar.variable_types), params['node_type_embedding_size'] ) node_content_embedder = SubTokenEmbedder( vocab.obj_name.subtoken_model_path, params['node_content_embedding_size'] ) model = cls(gnn, params['connections'], params['node_syntax_type_embedding_size'], params['decoder_hidden_size'], node_type_embedder, node_content_embedder, vocab, config=params) return model
def build(cls, config): params = util.update(cls.default_params(), config) return cls(params)
def build(cls, config): params = util.update(XfmrSequentialEncoder.default_params(), config) return cls(params)
def train(args): work_dir = args['--work-dir'] config = json.loads(_jsonnet.evaluate_file(args['CONFIG_FILE'])) config['work_dir'] = work_dir if not os.path.exists(work_dir): print(f'creating work dir [{work_dir}]', file=sys.stderr) os.makedirs(work_dir) if args['--extra-config']: extra_config = args['--extra-config'] extra_config = json.loads(extra_config) config = util.update(config, extra_config) json.dump(config, open(os.path.join(work_dir, 'config.json'), 'w'), indent=2) model = RenamingModel.build(config) config = model.config model.train() if args['--cuda']: model = model.cuda() params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.Adam(params, lr=0.001) nn_util.glorot_init(params) # set the padding index for embedding layers to zeros # model.encoder.var_node_name_embedding.weight[0].fill_(0.) train_set = Dataset(config['data']['train_file']) dev_set = Dataset(config['data']['dev_file']) batch_size = config['train']['batch_size'] print(f'Training set size {len(train_set)}, dev set size {len(dev_set)}', file=sys.stderr) # training loop train_iter = epoch = cum_examples = 0 log_every = config['train']['log_every'] evaluate_every_nepoch = config['train']['evaluate_every_nepoch'] max_epoch = config['train']['max_epoch'] max_patience = config['train']['patience'] cum_loss = 0. patience = 0. t_log = time.time() history_accs = [] while True: # load training dataset, which is a collection of ASTs and maps of gold-standard renamings train_set_iter = train_set.batch_iterator( batch_size=batch_size, return_examples=False, config=config, progress=True, train=True, num_readers=config['train']['num_readers'], num_batchers=config['train']['num_batchers']) epoch += 1 for batch in train_set_iter: train_iter += 1 optimizer.zero_grad() # t1 = time.time() nn_util.to(batch.tensor_dict, model.device) # print(f'[Learner] {time.time() - t1}s took for moving tensors to device', file=sys.stderr) # t1 = time.time() result = model(batch.tensor_dict, batch.tensor_dict['prediction_target']) # print(f'[Learner] batch {train_iter}, {batch.size} examples took {time.time() - t1:4f}s', file=sys.stderr) loss = -result['batch_log_prob'].mean() cum_loss += loss.item() * batch.size cum_examples += batch.size loss.backward() # clip gradient grad_norm = torch.nn.utils.clip_grad_norm_(params, 5.) optimizer.step() del loss if train_iter % log_every == 0: print( f'[Learner] train_iter={train_iter} avg. loss={cum_loss / cum_examples}, ' f'{cum_examples} examples ({cum_examples / (time.time() - t_log)} examples/s)', file=sys.stderr) cum_loss = cum_examples = 0. t_log = time.time() print(f'[Learner] Epoch {epoch} finished', file=sys.stderr) if epoch % evaluate_every_nepoch == 0: print(f'[Learner] Perform evaluation', file=sys.stderr) t1 = time.time() # ppl = Evaluator.evaluate_ppl(model, dev_set, config, predicate=lambda e: not e['function_body_in_train']) eval_results = Evaluator.decode_and_evaluate( model, dev_set, config) # print(f'[Learner] Evaluation result ppl={ppl} (took {time.time() - t1}s)', file=sys.stderr) print( f'[Learner] Evaluation result {eval_results} (took {time.time() - t1}s)', file=sys.stderr) dev_metric = eval_results['func_body_not_in_train_acc']['accuracy'] # dev_metric = -ppl if len(history_accs) == 0 or dev_metric > max(history_accs): patience = 0 model_save_path = os.path.join(work_dir, f'model.bin') model.save(model_save_path) print( f'[Learner] Saved currently the best model to {model_save_path}', file=sys.stderr) else: patience += 1 if patience == max_patience: print( f'[Learner] Reached max patience {max_patience}, exiting...', file=sys.stderr) patience = 0 exit() history_accs.append(dev_metric) if epoch == max_epoch: print(f'[Learner] Reached max epoch', file=sys.stderr) exit() t1 = time.time()
def train(args): config = json.loads(_jsonnet.evaluate_file(args["CONFIG_FILE"])) if args["--extra-config"]: extra_config = args["--extra-config"] extra_config = json.loads(extra_config) config = util.update(config, extra_config) # dataloaders batch_size = config["train"]["batch_size"] train_set = Dataset(config["data"]["train_file"], config["data"], percent=float(args["--percent"])) dev_set = Dataset(config["data"]["dev_file"], config["data"]) train_loader = DataLoader( train_set, batch_size=batch_size, collate_fn=Dataset.collate_fn, num_workers=16, pin_memory=True, ) val_loader = DataLoader( dev_set, batch_size=batch_size, collate_fn=Dataset.collate_fn, num_workers=8, pin_memory=True, ) # model model = TypeReconstructionModel(config) wandb_logger = WandbLogger(name=args["--expname"], project="dire", log_model=True) wandb_logger.log_hyperparams(config) resume_from_checkpoint = (args["--eval-ckpt"] if args["--eval-ckpt"] else args["--resume"]) if resume_from_checkpoint == "": resume_from_checkpoint = None trainer = pl.Trainer( max_epochs=config["train"]["max_epoch"], logger=wandb_logger, gpus=1 if args["--cuda"] else None, auto_select_gpus=True, gradient_clip_val=1, callbacks=[ EarlyStopping( monitor="val_retype_acc" if config["data"]["retype"] else "val_rename_acc", mode="max", patience=config["train"]["patience"], ) ], check_val_every_n_epoch=config["train"]["check_val_every_n_epoch"], progress_bar_refresh_rate=10, accumulate_grad_batches=config["train"]["grad_accum_step"], resume_from_checkpoint=resume_from_checkpoint, ) if args["--eval-ckpt"]: # HACK: necessary to make pl test work for IterableDataset Dataset.__len__ = lambda self: 1000000 test_set = Dataset(config["data"]["test_file"], config["data"]) test_loader = DataLoader( test_set, batch_size=config["test"]["batch_size"], collate_fn=Dataset.collate_fn, num_workers=8, pin_memory=True, ) trainer.test(model, test_dataloaders=test_loader, ckpt_path=args["--eval-ckpt"]) else: trainer.fit(model, train_loader, val_loader)