示例#1
0
def update(config):
    # Load model

    nnet = torch.load(config.model, map_location=lambda storage, loc: storage)
    model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'],
                            nnet['num_layers'], nnet['hidden_dim'],
                            nnet['num_classes'])
    model.load_state_dict(nnet['model_state_dict'])

    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()

    model_dir = os.path.join(config.store_path,
                             config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        filename=os.path.join(model_dir,
                                              config.experiment_name),
                        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    logging.info('Model Parameters: ')
    logging.info('Number of Layers: %d' % (nnet['num_layers']))
    logging.info('Hidden Dimension: %d' % (nnet['feature_dim']))
    logging.info('Number of Classes: %d' % (nnet['num_classes']))
    logging.info('Data dimension: %d' % (nnet['feature_dim']))
    logging.info('Number of Frames: %d' % (nnet['num_frames']))
    logging.info('Time shifts: %s' % (config.time_shifts))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))

    criterion = nn.MSELoss()
    dev_criterion = nn.CrossEntropyLoss()

    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters())
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)
    else:
        raise NotImplementedError("Learning method not supported for the task")
    lr = config.learning_rate

    ts_list = [int(t) for t in config.time_shifts.split(',')]
    max_ts = max(ts_list)

    # Figure out all feature stuff

    shell_cmd = "cat {:s} | shuf > temp".format(config.scp)
    r = subprocess.run(shell_cmd, shell=True, stdout=subprocess.PIPE)

    feats_config = pickle.load(open(config.egs_config, 'rb'))

    if feats_config['feat_type']:
        feat_type = feats_config['feat_type'].split(',')[0]
        trans_path = feats_config['feat_type'].split(',')[1]

    if feat_type == "pca":
        cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, 'temp')
    elif feat_type == "cmvn":
        cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, 'temp')
    else:
        cmd = 'temp'

    if feats_config['concat_feats']:
        context = feats_config['concat_feats'].split(',')
        cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(
            context[0], context[1])

    # Load performance monitoring model

    pm_model = torch.load(config.pm, map_location=lambda storage, loc: storage)
    ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'],
                              pm_model['bn_dim'],
                              pm_model['encoder_num_layers'],
                              pm_model['decoder_num_layers'],
                              pm_model['hidden_dim'])
    ae_model.load_state_dict(pm_model['model_state_dict'])

    if config.use_gpu:
        ae_model.cuda()

    for p in ae_model.parameters(
    ):  # Do not update performance monitoring block
        p.requires_grad = False

    mean, _ = get_cmvn(config.cmvn)

    ep_loss_dev = []
    ep_fer_dev = []

    load_chunk = torch.load(config.dev_egs)
    dev_data = load_chunk[:, 0:-1]
    dev_labels = load_chunk[:, -1].long()
    dataset = nnetDataset(dev_data, dev_labels)
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=config.batch_size,
                                              shuffle=True)

    init_fer = True
    if init_fer:
        # Compute initial performance on dev set
        val_losses = []
        val_fer = []

        for batch_x, batch_l in data_loader:
            if config.use_gpu:
                batch_x = Variable(batch_x).cuda()
                batch_l = Variable(batch_l).cuda()
            else:
                batch_x = Variable(batch_x)
                batch_l = Variable(batch_l)

            _, batch_x = model(batch_x)
            val_loss = dev_criterion(batch_x, batch_l)
            val_losses.append(val_loss.item())

            if config.use_gpu:
                val_fer.append(
                    compute_fer(batch_x.cpu().data.numpy(),
                                batch_l.cpu().data.numpy()))
            else:
                val_fer.append(
                    compute_fer(batch_x.data.numpy(), batch_l.data.numpy()))

        ep_loss_dev.append(np.mean(val_losses))
        ep_fer_dev.append(np.mean(val_fer))

        print_log = "Epoch: -1 update Dev loss: {:.3f} :: Dev FER: {:.2f}".format(
            np.mean(val_losses), np.mean(val_fer))

        logging.info(print_log)

    for epoch in range(config.epochs):

        if config.use_gpu:
            batch = torch.empty(0, config.max_seq_len,
                                pm_model['feature_dim']).cuda()
        else:
            batch = torch.empty(0, config.max_seq_len, pm_model['feature_dim'])

        lens = []
        utt_count = 0
        update_num = 0

        val_losses = []
        val_fer = []

        train_losses_pos = []
        train_losses_neg = []

        for utt_id, mat in kaldi_io.read_mat_ark(cmd):
            model.eval()
            if config.use_gpu:
                out = model(Variable(torch.FloatTensor(mat)).cuda())
            else:
                out = model(Variable(torch.FloatTensor(mat)))

            if config.use_gpu:
                post = out[1] - torch.FloatTensor(mean).cuda()
            else:
                post = out[1] - torch.FloatTensor(mean)

            lens.append(min(post.shape[0], config.max_seq_len))
            post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0)))
            batch = torch.cat([batch, post[None, :, :]], 0)
            utt_count += 1
            sys.stdout.flush()

            if utt_count == config.batch_size:
                update_num += 1
                #### DO THE ADAPTATION

                lens = torch.IntTensor(lens)
                _, indices = torch.sort(lens, descending=True)
                batch_x = batch[indices]
                batch_l = lens[indices]

                outputs = ae_model(batch_x, batch_l)

                # First positive loss
                mse_pos = samplewise_mse(outputs[:, max_ts:-max_ts - 1, :],
                                         batch_x[:, max_ts:-max_ts - 1, :])

                # Now find negative loss
                s = outputs[:, max_ts:-max_ts - 1, :].size()

                if config.use_gpu:
                    mse_neg = torch.zeros(s[0], s[1]).cuda()
                else:
                    mse_neg = torch.zeros(s[0], s[1])

                count = 0
                for t in ts_list:
                    count += 1

                    mse_neg += samplewise_mse(
                        outputs[:, max_ts:-max_ts - 1, :],
                        batch_x[:, max_ts + t:-max_ts - 1 + t, :])
                    mse_neg += samplewise_mse(
                        outputs[:, max_ts:-max_ts - 1, :],
                        batch_x[:, max_ts - t:-max_ts - 1 - t, :])

                mse_neg = mse_neg / (2 * count)

                loss = mse_pos.mean()  # (mse_pos / mse_neg).mean()
                train_losses_pos.append(mse_pos.mean().item())
                train_losses_neg.append(mse_neg.mean().item())

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                if config.use_gpu:
                    batch = torch.empty(0, config.max_seq_len,
                                        pm_model['feature_dim']).cuda()
                else:
                    batch = torch.empty(0, config.max_seq_len,
                                        pm_model['feature_dim'])
                lens = []
                utt_count = 0

                # CHECK IF ADAPTATION IS WORKING AT ALL

                for batch_x, batch_l in data_loader:
                    if config.use_gpu:
                        batch_x = Variable(batch_x).cuda()
                        batch_l = Variable(batch_l).cuda()
                    else:
                        batch_x = Variable(batch_x)
                        batch_l = Variable(batch_l)

                    _, batch_x = model(batch_x)
                    val_loss = dev_criterion(batch_x, batch_l)
                    val_losses.append(val_loss.item())

                    if config.use_gpu:
                        val_fer.append(
                            compute_fer(batch_x.cpu().data.numpy(),
                                        batch_l.cpu().data.numpy()))
                    else:
                        val_fer.append(
                            compute_fer(batch_x.data.numpy(),
                                        batch_l.data.numpy()))

        ep_loss_dev.append(np.mean(val_losses))
        ep_fer_dev.append(np.mean(val_fer))

        print_log = "Epoch: {:d} update, Tr +ve MSE Loss: {:.3f} :: Tr -ve MSE Loss: {:.3f} :: Dev loss: {:.3f} :: Dev FER: {:.2f}".format(
            epoch, np.mean(train_losses_pos), np.mean(train_losses_neg),
            np.mean(val_losses), np.mean(val_fer))

        logging.info(print_log)

        torch.save(
            ep_loss_dev,
            open(
                os.path.join(model_dir,
                             "dev_epoch{:d}.loss".format(epoch + 1)), 'wb'))
        torch.save(
            ep_fer_dev,
            open(
                os.path.join(model_dir, "dev_epoch{:d}.fer".format(epoch + 1)),
                'wb'))

        # Change learning rate to half
        optimizer, lr = adjust_learning_rate(optimizer, lr, config.lr_factor)
        logging.info('Learning rate changed to {:f}'.format(lr))
示例#2
0
def update(config):
    # Load model

    nnet = torch.load(config.model, map_location=lambda storage, loc: storage)
    model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'],
                            nnet['num_layers'], nnet['hidden_dim'],
                            nnet['num_classes'])
    model.load_state_dict(nnet['model_state_dict'])

    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()

    model_dir = os.path.join(config.store_path,
                             config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        filename=os.path.join(model_dir,
                                              config.experiment_name),
                        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    logging.info('Model Parameters: ')
    logging.info('Number of Layers: %d' % (nnet['num_layers']))
    logging.info('Hidden Dimension: %d' % (nnet['feature_dim']))
    logging.info('Number of Classes: %d' % (nnet['num_classes']))
    logging.info('Data dimension: %d' % (nnet['feature_dim']))
    logging.info('Number of Frames: %d' % (nnet['num_frames']))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))

    criterion = nn.MSELoss()
    dev_criterion = nn.CrossEntropyLoss()

    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters())
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)
    else:
        raise NotImplementedError("Learning method not supported for the task")
    lr = config.learning_rate
    # Figure out all feature stuff

    shell_cmd = "cat {:s} | shuf > temp".format(config.scp)
    r = subprocess.run(shell_cmd, shell=True, stdout=subprocess.PIPE)

    feats_config = pickle.load(open(config.egs_config, 'rb'))

    if feats_config['feat_type']:
        feat_type = feats_config['feat_type'].split(',')[0]
        trans_path = feats_config['feat_type'].split(',')[1]

    if feat_type == "pca":
        cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, 'temp')
    elif feat_type == "cmvn":
        cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, 'temp')
    else:
        cmd = 'temp'

    if feats_config['concat_feats']:
        context = feats_config['concat_feats'].split(',')
        cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(
            context[0], context[1])

    # Load performance monitoring model

    pm_model = torch.load(config.pm, map_location=lambda storage, loc: storage)
    ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'],
                              pm_model['bn_dim'],
                              pm_model['encoder_num_layers'],
                              pm_model['decoder_num_layers'],
                              pm_model['hidden_dim'])
    ae_model.load_state_dict(pm_model['model_state_dict'])

    if config.use_gpu:
        ae_model.cuda()

    for p in ae_model.parameters(
    ):  # Do not update performance monitoring block
        p.requires_grad = False

    mean, _ = get_cmvn(config.cmvn)

    ep_loss_dev = []
    ep_fer_dev = []

    load_chunk = torch.load(config.dev_egs)
    dev_data = load_chunk[:, 0:-1]
    dev_labels = load_chunk[:, -1].long()
    dataset = nnetDataset(dev_data, dev_labels)
    data_loader_check = torch.utils.data.DataLoader(dataset,
                                                    batch_size=5000,
                                                    shuffle=True)

    # Compute initial performance on dev set
    val_losses = []
    val_fer = []

    for batch_x, batch_l in data_loader_check:
        if config.use_gpu:
            batch_x = Variable(batch_x).cuda()
            batch_l = Variable(batch_l).cuda()
        else:
            batch_x = Variable(batch_x)
            batch_l = Variable(batch_l)

        batch_x = model(batch_x)
        val_loss = dev_criterion(batch_x, batch_l)
        val_losses.append(val_loss.item())

        if config.use_gpu:
            val_fer.append(
                compute_fer(batch_x.cpu().data.numpy(),
                            batch_l.cpu().data.numpy()))
        else:
            val_fer.append(
                compute_fer(batch_x.data.numpy(), batch_l.data.numpy()))

    ep_loss_dev.append(np.mean(val_losses))
    ep_fer_dev.append(np.mean(val_fer))

    print_log = "Epoch: -1 update Dev loss: {:.3f} :: Dev FER: {:.2f}".format(
        np.mean(val_losses), np.mean(val_fer))

    logging.info(print_log)
    unsup_up = True
    cc = 0

    for epoch in range(config.epochs):

        if unsup_up:
            # First lets do an unsupervised update with RNN-AE
            if config.use_gpu:
                batch = torch.empty(0, config.max_seq_len,
                                    pm_model['feature_dim']).cuda()
            else:
                batch = torch.empty(0, config.max_seq_len,
                                    pm_model['feature_dim'])
            utt_count = 0
            update_num = 0
            ae_loss = []
            lens = []
            model.train()
            for utt_id, mat in kaldi_io.read_mat_ark(cmd):

                if config.use_gpu:

                    post = model(Variable(
                        torch.FloatTensor(mat)).cuda()) - Variable(
                            torch.FloatTensor(mean)).cuda()
                else:
                    post = model(Variable(torch.FloatTensor(mat))) - Variable(
                        torch.FloatTensor(mean))

                lens.append(min(post.shape[0], config.max_seq_len))
                post = F.pad(post,
                             (0, 0, 0, config.max_seq_len - post.size(0)))
                batch = torch.cat([batch, post[None, :, :]], 0)
                utt_count += 1
                sys.stdout.flush()

                if utt_count == config.batch_size:
                    update_num += 1

                    #### DO THE ADAPTATION

                    lens = torch.IntTensor(lens)
                    _, indices = torch.sort(lens, descending=True)
                    batch_x = batch[indices]
                    batch_l = lens[indices]
                    if config.time_shift == 0:
                        outputs = ae_model(batch_x, batch_l)
                    else:
                        outputs = ae_model(batch_x[:, :-config.time_shift, :],
                                           batch_l - config.time_shift)

                    optimizer.zero_grad()

                    if config.time_shift == 0:
                        loss = criterion(outputs, batch_x)
                    else:
                        loss = criterion(outputs,
                                         batch_x[:, config.time_shift:, :])
                    ae_loss.append(loss.item() /
                                   (config.max_seq_len * config.batch_size))

                    loss.backward()
                    optimizer.step()

                    if config.use_gpu:
                        batch = torch.empty(0, config.max_seq_len,
                                            pm_model['feature_dim']).cuda()
                    else:
                        batch = torch.empty(0, config.max_seq_len,
                                            pm_model['feature_dim'])
                    lens = []
                    utt_count = 0

            logging.info('Finished unsupervised update of nnet')
        else:
            logging.info('Skipping unsupervised update of nnet')

        # Check if any utterance has a good RNN-AE score

        new_egs = torch.empty(0, nnet['feature_dim'] * nnet['num_frames'] + 1)
        new_utt_count = 0
        for utt_id, mat in kaldi_io.read_mat_ark(cmd):

            if config.use_gpu:

                post = model(Variable(
                    torch.FloatTensor(mat)).cuda()) - Variable(
                        torch.FloatTensor(mean)).cuda()
            else:
                post = model(Variable(torch.FloatTensor(mat))) - Variable(
                    torch.FloatTensor(mean))

            lens = []
            lens.append(post.shape[0])
            post = post[None, :, :]
            if config.time_shift == 0:
                outputs = ae_model(post, lens)
            else:
                outputs = ae_model(post[:, :-config.time_shift, :],
                                   lens - config.time_shift)

            if config.time_shift == 0:
                loss = criterion(outputs, post).item() / config.max_seq_len
            else:
                loss = criterion(
                    outputs,
                    post[:, config.time_shift:, :]).item() / config.max_seq_len

            # Add the utterance for supervised update
            if loss < config.score_threshold:
                new_utt_count += 1
                if config.use_gpu:
                    labs = np.argmax(
                        (model(Variable(torch.FloatTensor(mat)).cuda())
                         ).cpu().data.numpy(),
                        axis=1)
                else:
                    labs = np.argmax(
                        (model(Variable(torch.FloatTensor(mat)))).data.numpy(),
                        axis=1)

                add_egs = np.hstack((mat, labs[:, np.newaxis]))
                new_egs = torch.cat([new_egs, torch.FloatTensor(add_egs)])

        logging.info(
            'Added {:d} utterances from new domain to training set'.format(
                new_utt_count))

        ##  Update with these new utterances

        if new_utt_count == 0:
            logging.info(
                'No supervised updates with zero utterances, skipping to next epoch... '
            )
        else:
            cc += 1
            if cc == 20:
                config.score_threshold = config.score_threshold * 1.1
                cc = 0

            unsup_up = False
            train_data = new_egs[:, 0:-1]
            train_labels = new_egs[:, -1].long()
            dataset = nnetDataset(train_data, train_labels)

            data_loader = torch.utils.data.DataLoader(dataset,
                                                      batch_size=5000,
                                                      shuffle=True)
            model.train()
            train_losses = []
            tr_fer = []

            for batch_x, batch_l in data_loader:
                if config.use_gpu:
                    batch_x = Variable(batch_x).cuda()
                    batch_l = Variable(batch_l).cuda()
                else:
                    batch_x = Variable(batch_x)
                    batch_l = Variable(batch_l)

                batch_x = model(batch_x)
                optimizer.zero_grad()
                loss = dev_criterion(batch_x, batch_l)
                train_losses.append(loss.item())
                if config.use_gpu:
                    tr_fer.append(
                        compute_fer(batch_x.cpu().data.numpy(),
                                    batch_l.cpu().data.numpy()))
                else:
                    tr_fer.append(
                        compute_fer(batch_x.data.numpy(),
                                    batch_l.data.numpy()))

                loss.backward()
                optimizer.step()

        ## CHECK IF ADAPTATION IS WORKING AT ALL

        model.eval()
        val_losses = []
        val_fer = []

        for batch_x, batch_l in data_loader_check:
            if config.use_gpu:
                batch_x = Variable(batch_x).cuda()
                batch_l = Variable(batch_l).cuda()
            else:
                batch_x = Variable(batch_x)
                batch_l = Variable(batch_l)

            batch_x = model(batch_x)
            val_loss = dev_criterion(batch_x, batch_l)
            val_losses.append(val_loss.item())

            if config.use_gpu:
                val_fer.append(
                    compute_fer(batch_x.cpu().data.numpy(),
                                batch_l.cpu().data.numpy()))
            else:
                val_fer.append(
                    compute_fer(batch_x.data.numpy(), batch_l.data.numpy()))

        ep_loss_dev.append(np.mean(val_losses))
        ep_fer_dev.append(np.mean(val_fer))

        print_log = "Epoch: {:d} AE Loss: {:.3f} update, Dev loss: {:.3f} :: Dev FER: {:.2f}".format(
            epoch, np.mean(ae_loss), np.mean(val_losses), np.mean(val_fer))

        logging.info(print_log)

        torch.save(
            ep_loss_dev,
            open(
                os.path.join(model_dir,
                             "dev_epoch{:d}.loss".format(epoch + 1)), 'wb'))
        torch.save(
            ep_fer_dev,
            open(
                os.path.join(model_dir, "dev_epoch{:d}.fer".format(epoch + 1)),
                'wb'))

        # Change learning rate to half
        optimizer, lr = adjust_learning_rate(optimizer, lr, config.lr_factor)
        logging.info('Learning rate changed to {:f}'.format(lr))
示例#3
0
def update(config):
    # Load model

    nnet = torch.load(config.model, map_location=lambda storage, loc: storage)
    model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'],
                            nnet['num_classes'])
    model.load_state_dict(nnet['model_state_dict'])

    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()

    model_dir = os.path.join(config.store_path, config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(
        level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s',
        filename=os.path.join(model_dir, config.experiment_name),
        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    logging.info('Model Parameters: ')
    logging.info('Number of Layers: %d' % (nnet['num_layers']))
    logging.info('Hidden Dimension: %d' % (nnet['feature_dim']))
    logging.info('Number of Classes: %d' % (nnet['num_classes']))
    logging.info('Data dimension: %d' % (nnet['feature_dim']))
    logging.info('Number of Frames: %d' % (nnet['num_frames']))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))

    criterion = nn.MSELoss()
    dev_criterion = nn.CrossEntropyLoss()

    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters())
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)
    else:
        raise NotImplementedError("Learning method not supported for the task")
    lr = config.learning_rate
    # Figure out all feature stuff

    shell_cmd = "cat {:s} | shuf > temp".format(config.scp)
    r = subprocess.run(shell_cmd, shell=True, stdout=subprocess.PIPE)

    feats_config = pickle.load(open(config.egs_config, 'rb'))

    if feats_config['feat_type']:
        feat_type = feats_config['feat_type'].split(',')[0]
        trans_path = feats_config['feat_type'].split(',')[1]

    if feat_type == "pca":
        cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, 'temp')
    elif feat_type == "cmvn":
        cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, 'temp')
    else:
        cmd = 'temp'

    if feats_config['concat_feats']:
        context = feats_config['concat_feats'].split(',')
        cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1])

    # Load performance monitoring models
    pm_paths = config.pms.split(',')

    pm_models = []
    feat_dims = []
    for path in pm_paths:

        pm_model = torch.load(path, map_location=lambda storage, loc: storage)
        ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['bn_dim'],
                                  pm_model['encoder_num_layers'], pm_model['decoder_num_layers'],
                                  pm_model['hidden_dim'])
        ae_model.load_state_dict(pm_model['model_state_dict'])
        feat_dims.append(pm_model['feature_dim'])

        if config.use_gpu:
            ae_model.cuda()

        for p in ae_model.parameters():  # Do not update performance monitoring block
            p.requires_grad = False

        pm_models.append(ae_model)

    cmvn_paths = config.cmvns.split(',')
    means = []
    for path in cmvn_paths:
        mean, _ = get_cmvn(path)
        means.append(mean)

    if len(cmvn_paths) != len(pm_paths):
        logging.error("Number of cmvn paths not equal to number of model paths, exiting training!")
        sys.exit(1)
    else:
        num_pm_models = len(pm_paths)

    ep_loss_dev = []
    ep_fer_dev = []

    load_chunk = torch.load(config.dev_egs)
    dev_data = load_chunk[:, 0:-1]
    dev_labels = load_chunk[:, -1].long()
    dataset = nnetDataset(dev_data, dev_labels)
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=50000, shuffle=True)

    # Compute initial performance on dev set
    val_losses = []
    val_fer = []

    for batch_x, batch_l in data_loader:
        if config.use_gpu:
            batch_x = Variable(batch_x).cuda()
            batch_l = Variable(batch_l).cuda()
        else:
            batch_x = Variable(batch_x)
            batch_l = Variable(batch_l)

        _, batch_x = model(batch_x)
        val_loss = dev_criterion(batch_x, batch_l)
        val_losses.append(val_loss.item())

        if config.use_gpu:
            val_fer.append(compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy()))
        else:
            val_fer.append(compute_fer(batch_x.data.numpy(), batch_l.data.numpy()))

    ep_loss_dev.append(np.mean(val_losses))
    ep_fer_dev.append(np.mean(val_fer))

    print_log = "Epoch: -1 update Dev loss: {:.3f} :: Dev FER: {:.2f}".format(
        np.mean(val_losses),
        np.mean(val_fer))

    logging.info(print_log)

    for epoch in range(config.epochs):

        batches = []
        for idx in range(num_pm_models):
            if config.use_gpu:
                batch = torch.empty(0, config.max_seq_len, feat_dims[idx]).cuda()
            else:
                batch = torch.empty(0, config.max_seq_len, feat_dims[idx])
            batches.append(batch)

        lens = []
        utt_count = 0
        update_num = 0

        val_losses = []
        val_fer = []
        tr_losses = []
        for idx in range(num_pm_models):
            tr_losses.append([])

        # I want to dump all the posteriors first

        for utt_id, mat in kaldi_io.read_mat_ark(cmd):

            if config.use_gpu:
                out = model(Variable(torch.FloatTensor(mat)).cuda())
            else:
                out = model(Variable(torch.FloatTensor(mat)))

            if config.use_gpu:
                post = out[1] - Variable(torch.FloatTensor(means[0])).cuda()
            else:
                post = out[1] - Variable(torch.FloatTensor(means[0]))

            post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0)))
            batch = batches[0]
            batch = torch.cat([batch, post[None, :, :]], 0)
            batches[0] = batch

            for idx in range(1, num_pm_models):
                if config.use_gpu:
                    post = out[0][idx] - Variable(torch.FloatTensor(means[idx])).cuda()
                else:
                    post = out[0][idx] - Variable(torch.FloatTensor(means[idx]))
                post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0)))
                batch = batches[idx]
                batch = torch.cat([batch, post[None, :, :]], 0)
                batches[idx] = batch

            lens.append(min(post.size(0), config.max_seq_len))
            utt_count += 1
            sys.stdout.flush()

            if utt_count == config.batch_size:
                update_num += 1

                ## DO THE ADAPTATION

                lens = torch.IntTensor(lens)
                _, indices = torch.sort(lens, descending=True)

                for idx in range(num_pm_models):
                    batch_x = batches[idx][indices]
                    ae_model = pm_models[idx]
                    batch_l = lens[indices]
                    print(batch_x.size())
                    print(batch_l.size())
                    sys.stdout.flush()

                    if config.time_shift == 0:
                        outputs = ae_model(batch_x, batch_l)
                    else:
                        outputs = ae_model(batch_x[:, :-config.time_shift, :], batch_l - config.time_shift)

                    optimizer.zero_grad()

                    if config.time_shift == 0:
                        loss = criterion(outputs, batch_x)
                    else:
                        loss = criterion(outputs, batch_x[:, config.time_shift:, :])
                    tl = tr_losses[idx]
                    tl.append(loss.item() / (config.max_seq_len * config.batch_size))
                    tr_losses[idx] = tl
                    if idx < num_pm_models - 1:
                        loss.backward(retain_graph=True)
                    else:
                        loss.backward()
                    optimizer.step()

                batches = []
                for idx in range(num_pm_models):
                    if config.use_gpu:
                        batch = torch.empty(0, config.max_seq_len, feat_dims[idx]).cuda()
                    else:
                        batch = torch.empty(0, config.max_seq_len, feat_dims[idx])
                    batches.append(batch)
                lens = []
                utt_count = 0

        logging.info("Finished unsupervised adaptation for epoch {:d} with multi-layer RNN-AE Loss".format(epoch))

        # CHECK IF ADAPTATION IS WORKING AT ALL

        for batch_x, batch_l in data_loader:
            if config.use_gpu:
                batch_x = Variable(batch_x).cuda()
                batch_l = Variable(batch_l).cuda()
            else:
                batch_x = Variable(batch_x)
                batch_l = Variable(batch_l)

            _, batch_x = model(batch_x)
            val_loss = dev_criterion(batch_x, batch_l)
            val_losses.append(val_loss.item())

            if config.use_gpu:
                val_fer.append(compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy()))
            else:
                val_fer.append(compute_fer(batch_x.data.numpy(), batch_l.data.numpy()))

        ep_loss_dev.append(np.mean(val_losses))
        ep_fer_dev.append(np.mean(val_fer))

        print_log = "Epoch: {:d} update ".format(epoch)
        for idx in range(num_pm_models):
            print_log = print_log + "Tr loss layer {:d} = {:.3f} | ".format(idx, np.mean(tr_losses[idx]))

        print_log = print_log + "Dev loss: {:.3f} | Dev FER: {:.2f}".format(np.mean(val_losses), np.mean(val_fer))

        logging.info(print_log)

        torch.save(ep_loss_dev, open(os.path.join(model_dir, "dev_epoch{:d}.loss".format(epoch + 1)), 'wb'))
        torch.save(ep_fer_dev, open(os.path.join(model_dir, "dev_epoch{:d}.fer".format(epoch + 1)), 'wb'))

        # Change learning rate to half
        optimizer, lr = adjust_learning_rate(optimizer, lr, config.lr_factor)
        logging.info('Learning rate changed to {:f}'.format(lr))
示例#4
0
def update(config):
    # Load model

    nnet = torch.load(config.model, map_location=lambda storage, loc: storage)
    model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'],
                            nnet['num_layers'], nnet['hidden_dim'],
                            nnet['num_classes'])
    model.load_state_dict(nnet['model_state_dict'])

    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        filename=config.log_file,
                        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    logging.info('Model Parameters: ')
    logging.info('Number of Layers: %d' % (nnet['num_layers']))
    logging.info('Hidden Dimension: %d' % (nnet['feature_dim']))
    logging.info('Number of Classes: %d' % (nnet['num_classes']))
    logging.info('Data dimension: %d' % (nnet['feature_dim']))
    logging.info('Number of Frames: %d' % (nnet['num_frames']))
    logging.info('Negative loss weight: %f' % (config.neg_weight))
    logging.info('Contrastive time shifts: %s' % (config.time_shifts))

    if config.loss == "MSE":
        criterion = samplewise_mse
    elif config.loss == "L1":
        criterion = samplewise_abs
    else:
        logging.info('Loss function {:s} is not supported'.format(config.loss))
        sys.exit(1)

    if config.time_shifts:
        ts_list = [int(t) for t in config.time_shifts.split(',')]
        max_ts = max(ts_list)
    else:
        ts_list = None
        max_ts = None

    # Figure out all feature stuff
    shuff_file = config.scp
    feats_config = pickle.load(open(config.egs_config, 'rb'))

    if feats_config['feat_type']:
        feat_type = feats_config['feat_type'].split(',')[0]
        trans_path = feats_config['feat_type'].split(',')[1]

    if config.override_trans_path is not None:
        trans_path = config.override_trans_path

    if feat_type == "pca":
        cmd = "transform-feats {} scp:{} ark:- |".format(
            trans_path, shuff_file)
    elif feat_type == "cmvn":
        cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, shuff_file)
    else:
        cmd = shuff_file

    if feats_config['concat_feats']:
        context = feats_config['concat_feats'].split(',')
        cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(
            context[0], context[1])

    # Load performance monitoring models
    pm_paths = config.pms.split(',')

    pm_models = []
    feat_dims = []
    for path in pm_paths:

        pm_model = torch.load(path, map_location=lambda storage, loc: storage)
        ae_model = autoencoderRNN(pm_model['feature_dim'],
                                  pm_model['feature_dim'], pm_model['bn_dim'],
                                  pm_model['encoder_num_layers'],
                                  pm_model['decoder_num_layers'],
                                  pm_model['hidden_dim'])
        ae_model.load_state_dict(pm_model['model_state_dict'])
        feat_dims.append(pm_model['feature_dim'])

        if config.use_gpu:
            ae_model.cuda()

        for p in ae_model.parameters(
        ):  # Do not update performance monitoring block
            p.requires_grad = False

        pm_models.append(ae_model)

    cmvn_paths = config.cmvns.split(',')
    means = []
    for path in cmvn_paths:
        mean, _ = get_cmvn(path)
        means.append(mean)

    if len(cmvn_paths) != len(pm_paths):
        logging.error(
            "Number of cmvn paths not equal to number of model paths, exiting training!"
        )
        sys.exit(1)
    else:
        num_pm_models = len(pm_paths)

    pm_scores = {}
    for idx in range(num_pm_models):
        pm_scores[idx] = {}

    for utt_id, mat in kaldi_io.read_mat_ark(cmd):
        batches = []
        lens = mat.shape[0]

        if config.use_gpu:
            out = model(Variable(torch.FloatTensor(mat)).cuda())
        else:
            out = model(Variable(torch.FloatTensor(mat)))

        if config.use_gpu:
            post = out[1] - Variable(torch.FloatTensor(means[0])).cuda()
        else:
            post = out[1] - Variable(torch.FloatTensor(means[0]))

        batches.append(post)

        for idx in range(1, num_pm_models):
            if config.use_gpu:
                post = out[0][-idx] - Variable(torch.FloatTensor(
                    means[idx])).cuda()
            else:
                post = out[0][-idx] - Variable(torch.FloatTensor(means[idx]))
            batches.append(post)

        ## Get the PM scores
        lens = torch.IntTensor([lens])

        for idx in range(num_pm_models):
            batch_x = batches[idx]
            batch_x = batch_x[None, :, :]
            ae_model = pm_models[idx]
            batch_l = lens

            outputs = ae_model(batch_x, batch_l)
            # First positive loss
            if max_ts:
                mse_pos = criterion(outputs[:, max_ts:-max_ts - 1, :],
                                    batch_x[:, max_ts:-max_ts - 1, :])
            else:
                mse_pos = criterion(outputs, batch_x)

            # Now find negative loss

            if config.use_gpu:
                loss_all = torch.FloatTensor([1]).cuda()
            else:
                loss_all = torch.FloatTensor([1])

            s = outputs[:, max_ts:-max_ts - 1, :].size()

            if config.use_gpu:
                mse_neg = torch.zeros(s[0], s[1]).cuda()
            else:
                mse_neg = torch.zeros(s[0], s[1])

            count = 0
            for t in ts_list:
                count += 1

                mse_neg += criterion(outputs[:, max_ts:-max_ts - 1, :],
                                     batch_x[:, max_ts + t:-max_ts - 1 + t, :])
                mse_neg += criterion(outputs[:, max_ts:-max_ts - 1, :],
                                     batch_x[:, max_ts - t:-max_ts - 1 - t, :])

            mse_neg = (mse_neg * config.neg_weight) / (2 * count)
            loss = (mse_pos / mse_neg).mean()
            pk = pm_scores[idx]
            pk[utt_id] = loss.item()
            pm_scores[idx] = pk

    pickle.dump(pm_scores, open(os.path.join(config.out_file), "wb"))