def __init__(self, data_config, model_config="emb:32|conv:48|l1:16|do:0.1"): super(Model, self).__init__() no_chars = data_config['num_tokens'] config = utils.parse_model_params(model_config) emb_dim = config['emb'] conv_filters = config['conv'] dropout_rate = config.get("do", 0) self.embeddings = nn.Embedding( no_chars, emb_dim, padding_idx=0 ) self.dropout= torch.nn.Dropout(p=dropout_rate) self.conv1 = ConvolutionLayer(emb_dim, conv_filters, 3) self.conv2 = ConvolutionLayer(emb_dim, conv_filters, 5, dilation=3) self.conv3 = ConvolutionLayer(emb_dim, conv_filters, 9, dilation=2) self.linear1 = nn.Linear(conv_filters, config['l1']) self.linear2 = nn.Linear(config['l1'], 1) self.model_params = model_config
def __init__(self, data_config, model_config="embs:8|conv:16|l1:16|do:0.0|oc:BI"): super(Model, self).__init__() no_syllables = data_config['num_tokens'] log.info("no. syllables: %d" % no_syllables) config = utils.parse_model_params(model_config) conv_filters = config['conv'] dropout_rate = config.get("do", 0) self.output_scheme = output_tags.get_scheme(config["oc"]) self.sy_embeddings = prepare_embedding(data_config, config) if "crf" in config: self.crf = CRF(self.output_scheme.num_tags, batch_first=True) emb_dim = self.sy_embeddings.weight.shape[1] self.id_conv = IteratedDilatedConvolutions(emb_dim, conv_filters, dropout_rate) self.linear1 = nn.Linear(conv_filters, config['l1']) self.linear2 = nn.Linear(config['l1'], self.output_scheme.num_tags) self.model_params = model_config
def __init__(self, data_config, model_config="embc:16|embt:16|conv:48|l1:16|do:0.1|oc:BI"): super(Model, self).__init__() no_chars = data_config["num_tokens"] config = utils.parse_model_params(model_config) conv_filters = config["conv"] dropout_rate = config.get("do", 0) self.output_scheme = output_tags.get_scheme(config["oc"]) self.ch_embeddings = nn.Embedding(no_chars, config["embc"], padding_idx=0) self.ch_type_embeddings = nn.Embedding( char_type.get_total_char_types(), config["embt"], ) emb_dim = config["embc"] + config["embt"] self.id_conv = IteratedDilatedConvolutions(emb_dim, conv_filters, dropout_rate) self.linear1 = nn.Linear(conv_filters, config['l1']) self.linear2 = nn.Linear(config['l1'], self.output_scheme.num_tags) self.model_params = model_config
def __init__(self, data_config, model_config="embc:16|embs:8|conv:16|l1:16|do:0.0"): super(Model, self).__init__() no_chars = data_config['num_char_tokens'] log.info("no. characters: %d" % no_chars) no_syllables = data_config['num_tokens'] log.info("no. syllables: %d" % no_syllables) config = utils.parse_model_params(model_config) conv_filters = config['conv'] dropout_rate = config.get("do", 0) self.ch_embeddings = nn.Embedding(no_chars, config['embc'], padding_idx=0) self.sy_embeddings = nn.Embedding(no_syllables, config['embs'], padding_idx=0) emb_dim = config['embc'] + config['embs'] self.dropout = torch.nn.Dropout(p=dropout_rate) self.conv1 = ConvolutionBatchNorm(emb_dim, conv_filters, 3) self.conv2 = ConvolutionBatchNorm(emb_dim, conv_filters, 5, dilation=3) self.conv3 = ConvolutionBatchNorm(emb_dim, conv_filters, 9, dilation=2) self.linear1 = nn.Linear(conv_filters, config['l1']) self.linear2 = nn.Linear(config['l1'], 1) self.model_params = model_config
def __init__(self, data_config, model_config="embs:16|conv:48|l1:16|do:0.1|oc:BI"): super(Model, self).__init__() no_syllables = data_config["num_tokens"] config = utils.parse_model_params(model_config) conv_filters = config["conv"] dropout_rate = config.get("do", 0) self.output_scheme = output_tags.get_scheme(config["oc"]) self.sy_embeddings = nn.Embedding(no_syllables, config["embs"], padding_idx=0) self.dropout = torch.nn.Dropout(p=dropout_rate) emb_dim = config["embs"] self.conv1 = ConvolutionLayer(emb_dim, conv_filters, 3) self.linear1 = nn.Linear(conv_filters, config['l1']) self.linear2 = nn.Linear(config['l1'], self.output_scheme.num_tags) self.model_params = model_config
def __init__(self, data_config, model_config="embs:8|cells:32|l1:16|oc:BI|crf:1"): super(Model, self).__init__() no_syllables = data_config['num_tokens'] log.info("no. syllables: %d" % no_syllables) config = utils.parse_model_params(model_config) self.output_scheme = output_tags.get_scheme(config["oc"]) self.sy_embeddings = nn.Embedding(no_syllables, config["embs"], padding_idx=0) if config.get("crf", 0): self.crf = CRF(self.output_scheme.num_tags, batch_first=True) emb_dim = config["embs"] num_cells, num_lstm_output, bi_direction = utils.compute_lstm_output_dim( config["cells"], config["bi"]) self.dropout = nn.Dropout(config["do"]) self.lstm = nn.LSTM(emb_dim, num_cells, bidirectional=bi_direction, batch_first=True) self.linear1 = nn.Linear(num_lstm_output, config["l1"]) self.linear2 = nn.Linear(config["l1"], self.output_scheme.num_tags) self.model_params = model_config
def __init__(self, data_config, model_config="embc:16|embt:8|embs:8|conv:16|l1:16|do:0.0|oc:BI"): super(Model, self).__init__() no_chars = data_config['num_char_tokens'] log.info("no. characters: %d" % no_chars) no_syllables = data_config['num_tokens'] log.info("no. syllables: %d" % no_syllables) config = utils.parse_model_params(model_config) conv_filters = config['conv'] dropout_rate = config.get("do", 0) self.output_scheme = output_tags.get_scheme(config["oc"]) self.ch_type_embeddings = nn.Embedding( char_type.get_total_char_types(), config["embt"], ) self.ch_embeddings = nn.Embedding( no_chars, config["embc"], padding_idx=0 ) self.sy_embeddings = nn.Embedding( no_syllables, config["embs"], padding_idx=0 ) emb_dim = config["embc"] + config["embs"] + config["embt"] self.dropout= torch.nn.Dropout(p=dropout_rate) self.conv1 = ConvolutionLayer(emb_dim, conv_filters, 3) self.conv2 = ConvolutionLayer(conv_filters, conv_filters, 3, dilation=1) self.linear1 = nn.Linear(conv_filters, config['l1']) self.linear2 = nn.Linear(config['l1'], self.output_scheme.num_tags) self.model_params = model_config
def __init__(self, data_config, model_config="embc:16|embt:8|cells:32|l1:16|bi:1|oc:BI"): super(Model, self).__init__() no_chars = data_config["num_tokens"] log.info("no. characters: %d" % no_chars) config = utils.parse_model_params(model_config) self.output_scheme = output_tags.get_scheme(config["oc"]) self.ch_type_embeddings = nn.Embedding( char_type.get_total_char_types(), config["embt"], ) self.ch_embeddings = nn.Embedding(no_chars, config["embc"], padding_idx=0) emb_dim = config["embc"] + config["embt"] num_cells, num_lstm_output, bi_direction = utils.compute_lstm_output_dim( config["cells"], config["bi"]) self.dropout = nn.Dropout(config["do"]) self.lstm = nn.LSTM(emb_dim, num_cells, bidirectional=bi_direction, batch_first=True) self.linear1 = nn.Linear(num_lstm_output, config["l1"]) self.linear2 = nn.Linear(config["l1"], self.output_scheme.num_tags) self.model_params = model_config
def main(model_name, data_dir, epoch=10, lr=0.001, batch_size=64, weight_decay=0.0, checkpoint=5, model_params="", output_dir="", no_workers=4, lr_schedule="", prev_model=""): model_cls = models.get_model(model_name) dataset_cls = model_cls.dataset() training_set: dl.SequenceDataset = dataset_cls.load_preprocessed_file_with_suffix( data_dir, "training.txt") validation_set: dl.SequenceDataset = dataset_cls.load_preprocessed_file_with_suffix( data_dir, "val.txt") # only required data_config = training_set.setup_featurizer("%s/dictionary" % data_dir) device = get_device() print("Using device: %s" % device) params = {} if model_params: params['model_config'] = model_params print(">> model configuration: %s" % model_params) if prev_model: print("Initiate model from %s" % prev_model) model = models.get_model(model_name).load(prev_model, data_config, **params) else: model = models.get_model(model_name)(data_config, **params) model = model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) if prev_model: print("Loading prev optmizer's state") optimizer.load_state_dict(torch.load("%s/optimizer.pth" % prev_model)) print("Previous learning rate", get_lr(optimizer)) # force torch to use the given lr, not previous one for param_group in optimizer.param_groups: param_group['lr'] = lr param_group['initial_lr'] = lr print("Current learning rate", get_lr(optimizer)) if lr_schedule: schedule_params = utils.parse_model_params(lr_schedule) scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=schedule_params['step'], gamma=schedule_params['gamma'], ) dataloader_params = dict(batch_size=batch_size, num_workers=no_workers, collate_fn=dataset_cls.collate_fn) print("Using dataset: %s" % type(dataset_cls).__name__) training_generator = data.DataLoader(training_set, shuffle=True, **dataloader_params) validation_generator = data.DataLoader(validation_set, shuffle=False, **dataloader_params) total_train_size = len(training_set) total_test_size = len(validation_set) print("We have %d train samples and %d test samples" % (total_train_size, total_test_size)) # for FloydHub print('{"metric": "%s:%s", "value": %s}' % ("model", model_name, model.total_trainable_params())) utils.maybe_create_dir(output_dir) copy_files("%s/dictionary/*.json" % data_dir, output_dir) utils.save_training_params( output_dir, utils.ModelParams(name=model_name, params=model.model_params)) for e in range(1, epoch + 1): print("===EPOCH %d ===" % (e)) if lr_schedule: curr_lr = get_lr(optimizer) print_floydhub_metrics(dict(lr=curr_lr), step=e, prefix="global") print("lr: ", curr_lr) with utils.Timer("epoch-training") as timer: do_iterate( model, training_generator, prefix="training", step=e, device=device, optimizer=optimizer, criterion=criterion, ) with utils.Timer("epoch-validation") as timer, \ torch.no_grad(): do_iterate( model, validation_generator, prefix="validation", step=e, device=device, criterion=criterion, ) if lr_schedule: scheduler.step() if checkpoint and e % checkpoint == 0: model_path = "%s/model-e-%d.pth" % (output_dir, e) print("Saving model to %s" % model_path) torch.save(model.state_dict(), model_path) model_path = "%s/model.pth" % output_dir opt_path = "%s/optimizer.pth" % output_dir print("Saving model to %s" % model_path) torch.save(model.state_dict(), model_path) torch.save(optimizer.state_dict(), opt_path)
""", } if __name__ == "__main__": with open("./hyperopt-results.yml", "r") as fh, open(OUTPUT, "w") as fw: data = yaml.safe_load(fh) for i, row in enumerate(data): path = row["path"] df = pd.read_csv(path) print(f"loading {path}") max_val_f1 = df["best-val:word_level:f1"].max() best_model = df[df["best-val:word_level:f1"] == max_val_f1].to_dict("row")[0] arch_config = utils.parse_model_params(best_model["params"]) if "ID-CNN-XL" in row["name"]: fam_param_tmp = family_specific_param["ID-CNN-XL"] elif "ID-CNN" in row["name"]: fam_param_tmp = family_specific_param["ID-CNN"] elif "BiLSTM-XL" in row["name"]: fam_param_tmp = family_specific_param["BiLSTM-XL"] elif "BiLSTM" in row["name"]: fam_param_tmp = family_specific_param["BiLSTM"] else: raise ValueError(row["name"], "doesn't exist!") fam_param = fam_param_tmp % arch_config tt = table % dict(
def test_parse_model_params(): act = utils.parse_model_params("emb:32|l1:48|do:0.5") exp = dict(emb=32, l1=48, do=0.5) assert act == exp
def main( model_name, data_dir, epoch=10, lr=0.001, batch_size=64, weight_decay=0.0, checkpoint=0, model_params="", output_dir="", no_workers=4, prev_model="", ): model_cls = models.get_model(model_name) output_scheme = output_tags.get_scheme( utils.parse_model_params(model_params)["oc"]) dataset_cls = model_cls.dataset training_set: dl.SequenceDataset = dataset_cls.load_preprocessed_file_with_suffix( data_dir, "training.txt", output_scheme) validation_set: dl.SequenceDataset = dataset_cls.load_preprocessed_file_with_suffix( data_dir, "val.txt", output_scheme) # only required data_config = training_set.setup_featurizer() device = models.get_device() print("Using device: %s" % device) params = {} if model_params: params['model_config'] = model_params print(">> model configuration: %s" % model_params) if prev_model: print("Initiate model from %s" % prev_model) model = models.get_model(model_name).load(prev_model, data_config, **params) else: model = models.get_model(model_name)(data_config, **params) model = model.to(device) if hasattr(model, "crf"): criterion = loss.crf else: criterion = loss.cross_ent optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) if prev_model: print("Loading prev optmizer's state") optimizer.load_state_dict(torch.load("%s/optimizer.pth" % prev_model)) print("Previous learning rate", get_lr(optimizer)) # force torch to use the given lr, not previous one for param_group in optimizer.param_groups: param_group['lr'] = lr param_group['initial_lr'] = lr print("Current learning rate", get_lr(optimizer)) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", patience=0, verbose=True) dataloader_params = dict(batch_size=batch_size, num_workers=no_workers, collate_fn=dataset_cls.collate_fn) print("Using dataset: %s" % type(dataset_cls).__name__) training_generator = data.DataLoader(training_set, shuffle=True, **dataloader_params) validation_generator = data.DataLoader(validation_set, shuffle=False, **dataloader_params) total_train_size = len(training_set) total_test_size = len(validation_set) print("We have %d train samples and %d test samples" % (total_train_size, total_test_size)) # for FloydHub print('{"metric": "%s:%s", "value": %s}' % ("model", model_name, model.total_trainable_params())) os.makedirs(output_dir, exist_ok=True) copy_files("%s/dictionary/*.json" % data_dir, output_dir) start_training_time = time.time() best_val_loss = np.inf for e in range(1, epoch + 1): print("===EPOCH %d ===" % (e)) st_time = time.time() curr_lr = get_lr(optimizer) print(f"lr={curr_lr}") with utils.Timer("epoch-training") as timer: model.train() _ = do_iterate( model, training_generator, prefix="training", step=e, device=device, optimizer=optimizer, criterion=criterion, ) with utils.Timer("epoch-validation") as timer, \ torch.no_grad(): model.eval() val_loss = do_iterate( model, validation_generator, prefix="validation", step=e, device=device, criterion=criterion, ) elapsed_time = (time.time() - st_time) / 60. print(f"Time took: {elapsed_time:.4f} mins") scheduler.step(val_loss) if val_loss < best_val_loss: model_path = "%s/model.pth" % output_dir opt_path = "%s/optimizer.pth" % output_dir print("Saving model to %s" % model_path) torch.save(model.state_dict(), model_path) torch.save(optimizer.state_dict(), opt_path) best_val_loss = val_loss training_took = time.time() - start_training_time print(f"[training] total time: {training_took}") config = utils.parse_model_params(model_params) if "embs" in config and type(config["embs"]) == str: emb = config["embs"] copy_files(f"{data_dir}/dictionary/sy-emb-{emb}.npy", output_dir) utils.save_training_params( output_dir, utils.ModelParams(name=model_name, params=model.model_params, training_took=training_took, num_trainable_params=model.total_trainable_params(), lr=lr, weight_decay=weight_decay, epoch=epoch))