Exemplo n.º 1
0
idx2char = {}
with open(conf['meta_variable']['data_path']+'/idx2chap.csv','r') as f:
    for line in f:
        if 'idx' in line:continue
        idx2char[int(line.split(',')[0])] = line[:-1].split(',')[1]

# Load pre-trained model if needed
if conf['training_parameter']['use_pretrained']:
    global_step = conf['training_parameter']['pretrained_step']
    listener = torch.load(conf['training_parameter']['pretrained_listener_path'])
    speller = torch.load(conf['training_parameter']['pretrained_speller_path'])
else:
    global_step = 0
    listener = Listener(**conf['model_parameter'])
    speller = Speller(**conf['model_parameter'])
optimizer = torch.optim.Adam([{'params':listener.parameters()}, {'params':speller.parameters()}],
                              lr=conf['training_parameter']['learning_rate'])

best_ler = 1.0
record_gt_text = False
log_writer = SummaryWriter(conf['meta_variable']['training_log_dir']+conf['meta_variable']['experiment_name'])

# Training
print('Training starts...',flush=True)
while global_step<total_steps:

    # Teacher forcing rate linearly decay
    tf_rate = tf_rate_upperbound - (tf_rate_upperbound-tf_rate_lowerbound)*min((float(global_step)/tf_decay_step),1)

Exemplo n.º 2
0
idx2char = {}
with open(conf['meta_variable']['data_path'] + '/idx2chap.csv', 'r') as f:
    for line in f:
        if 'idx' in line: continue
        idx2char[int(line.split(',')[0])] = line[:-1].split(',')[1]

# Load pre-trained model if needed
if conf['training_parameter']['use_pretrained']:
    global_step = conf['training_parameter']['pretrained_step']
    listener = torch.load(
        conf['training_parameter']['pretrained_listener_path'])
    speller = torch.load(conf['training_parameter']['pretrained_speller_path'])
else:
    global_step = 0
    listener = Listener(**conf['model_parameter'])
    speller = Speller(**conf['model_parameter'])

#las_module = LAS()
#las_module.set_submodules(listener, speller)

# make module run parallel
#las_module = torch.nn.DataParallel(las_module, device_ids=[0, 1, 2 ,3])

optimizer = torch.optim.Adam([{
    'params': listener.parameters()
}, {
    'params': speller.parameters()
}],
                             lr=conf['training_parameter']['learning_rate'])

#optimizer = torch.optim.SGD([{'params':listener.parameters()}, {'params':speller.parameters()}],
Exemplo n.º 3
0
def main(args):
    # Tensorboard logging
    # Writer will output to ./runs/ directory by default
    writer = SummaryWriter(comment=args.experiment_name)
    # Fix seed
    seed = 17
    np.random.seed(seed)
    torch.manual_seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    print("---------------------------------------")
    print("Loading Config...", flush=True)
    # Load config file for experiment
    config_path = args.config_path
    print("Loading configure file at", config_path)
    with open(config_path, "r") as f:
        params = yaml.load(f, Loader=yaml.FullLoader)
    data_name = params["data"]["name"]

    tf_rate_upperbound = params["training"]["tf_rate_upperbound"]
    tf_rate_lowerbound = params["training"]["tf_rate_lowerbound"]
    tf_decay_step = params["training"]["tf_decay_step"]
    epochs = params["training"]["epochs"]

    # Load datasets
    print("---------------------------------------")
    print("Processing datasets...", flush=True)
    train_dataset = AudioDataset(params, "train")
    train_loader = AudioDataLoader(
        train_dataset, shuffle=True,
        num_workers=params["data"]["num_works"]).loader
    dev_dataset = AudioDataset(params, "test")
    dev_loader = AudioDataLoader(
        dev_dataset, num_workers=params["data"]["num_works"]).loader

    print("---------------------------------------")
    print("Creating model architecture...", flush=True)
    # Create listener and speller
    listener = Listener(**params["model"]["listener"])
    speller = Speller(**params["model"]["speller"])
    las = LAS(listener, speller)
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        las = nn.DataParallel(las)
    print(las)
    las.cuda()
    # Create optimizer
    optimizer = torch.optim.Adam(
        params=las.parameters(),
        lr=params["training"]["lr"],
    )
    if params["training"]["continue_from"]:
        print("Loading checkpoint model %s" %
              params["training"]["continue_from"])
        package = torch.load(params["training"]["continue_from"])
        las.load_state_dict(package["state_dict"])
        optimizer.load_state_dict(package["optim_dict"])
        start_epoch = int(package.get("epoch", 1))
    else:
        start_epoch = 0

    print("---------------------------------------")
    print("Training...", flush=True)

    # import pdb

    # pdb.set_trace()
    global_step = 0 + (len(train_loader) * start_epoch)
    best_cv_loss = 10e5
    my_fields = {"loss": 0}
    for epoch in tqdm(range(start_epoch, epochs), desc="Epoch training"):
        epoch_step = 0
        train_loss = []
        train_ler = []
        batch_loss = 0
        for i, (data) in tqdm(enumerate(train_loader),
                              total=len(train_loader),
                              leave=False,
                              desc=f"Epoch number {epoch}"):
            # print(
            #     f"Current Epoch: {epoch} Loss {np.round(batch_loss, 3)} | Epoch step: {epoch_step}/{len(train_loader)}",
            #     end="\r",
            #     flush=True,
            # )
            my_fields["loss"] = batch_loss
            # Adjust LR
            tf_rate = tf_rate_upperbound - (
                tf_rate_upperbound - tf_rate_lowerbound) * min(
                    (float(global_step) / tf_decay_step), 1)
            inputs = data[1]["inputs"].cuda()
            labels = data[2]["targets"].cuda()
            print(f"INPUT SHAPE {inputs.shape} LABELS SHAPE: {labels.shape}")
            batch_loss, batch_ler = batch_iterator(
                batch_data=inputs,
                batch_label=labels,
                las_model=las,
                optimizer=optimizer,
                tf_rate=tf_rate,
                is_training=True,
                max_label_len=params["model"]["speller"]["max_label_len"],
                label_smoothing=params["training"]["label_smoothing"],
                vocab_dict=train_dataset.char2idx,
            )
            if i % 100 == 0:
                torch.cuda.empty_cache()
            train_loss.append(batch_loss)
            train_ler.extend(batch_ler)
            global_step += 1
            epoch_step += 1
            # print(batch_ler)
            writer.add_scalar("loss/train-step", batch_loss, global_step)
            writer.add_scalar("ler/train-step",
                              np.array([sum(train_ler) / len(train_ler)]),
                              global_step)

        train_loss = np.array([sum(train_loss) / len(train_loss)])
        train_ler = np.array([sum(train_ler) / len(train_ler)])
        writer.add_scalar("loss/train-epoch", train_loss, epoch)
        writer.add_scalar("ler/train-epoch", train_ler, epoch)
        # Validation
        val_loss = []
        val_ler = []
        val_step = 0
        for i, (data) in tqdm(enumerate(dev_loader),
                              total=len(dev_loader),
                              leave=False,
                              desc="Validation"):
            # print(
            #     f"Current Epoch: {epoch} | Epoch step: {epoch_step}/{len(train_loader)} Validating step: {val_step}/{len(dev_loader)}",
            #     end="\r",
            #     flush=True,
            # )

            inputs = data[1]["inputs"].cuda()
            labels = data[2]["targets"].cuda()

            batch_loss, batch_ler = batch_iterator(
                batch_data=inputs,
                batch_label=labels,
                las_model=las,
                optimizer=optimizer,
                tf_rate=tf_rate,
                is_training=False,
                max_label_len=params["model"]["speller"]["vocab_size"],
                label_smoothing=params["training"]["label_smoothing"],
                vocab_dict=dev_dataset.char2idx,
            )
            if i % 100 == 0:
                torch.cuda.empty_cache()
            val_loss.append(batch_loss)
            val_ler.extend(batch_ler)
            val_step += 1

        val_loss = np.array([sum(val_loss) / len(val_loss)])
        val_ler = np.array([sum(val_ler) / len(val_ler)])
        writer.add_scalar("loss/dev", val_loss, epoch)
        writer.add_scalar("ler/dev", val_ler, epoch)
        # Checkpoint saving model each epoch and keeping only last 10 epochs
        if params["training"]["checkpoint"]:
            # Check if epoch-10 file exits, if so we delete it
            file_path_old = os.path.join(
                params["training"]["save_folder"],
                f"{data_name}-epoch{epoch - 10}.pth.tar")
            if os.path.exists(file_path_old):
                os.remove(file_path_old)

            file_path = os.path.join(params["training"]["save_folder"],
                                     f"{data_name}-epoch{epoch}.pth.tar")
            torch.save(
                las.serialize(optimizer=optimizer,
                              epoch=epoch,
                              tr_loss=val_loss,
                              val_loss=val_loss),
                file_path,
            )
            print()
            print("Saving checkpoint model to %s" % file_path)

        if val_loss < best_cv_loss:  # We found a best model, lets save it too
            file_path = os.path.join(
                params["training"]["save_folder"],
                f"{data_name}-BEST_LOSS-epoch{epoch}.pth.tar")
            # purge(params["training"]["save_folder"], "*BEST_LOSS*")  # Remove
            # previous best models
            torch.save(
                las.serialize(optimizer=optimizer,
                              epoch=epoch,
                              tr_loss=val_loss,
                              val_loss=val_loss),
                file_path,
            )
            print("Saving BEST model to %s" % file_path)

        # writer.add_scalars("cer", {"train": np.array([np.array(batch_ler).mean()])}, global_step)
        # pdb.set_trace()
        # print(inputs.size())
        print()