Пример #1
0
def run(config):
    model_dir = os.path.join(config.store_path,
                             config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        filename=os.path.join(model_dir,
                                              config.experiment_name),
                        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    # Load VAE model and define classifier
    curl = torch.load(config.curl_model,
                      map_location=lambda storage, loc: storage)
    curl_model = nnetCurlSupervised(curl['feature_dim'] * curl['num_frames'],
                                    curl['encoder_num_layers'],
                                    curl['decoder_num_layers'],
                                    curl['hidden_dim'], curl['bn_dim'],
                                    curl['comp_num'], config.use_gpu)
    curl_model.load_state_dict(curl["model_state_dict"])
    #curl_sampler = curlLatentSampler(config.use_gpu)

    model = nnetRNN(curl['bn_dim'], config.num_layers, config.hidden_dim,
                    config.num_classes, 0)

    logging.info('Model Parameters: ')
    logging.info('Number of Layers: %d' % (config.num_layers))
    logging.info('Hidden Dimension: %d' % (config.hidden_dim))
    logging.info('Number of Classes: %d' % (config.num_classes))
    logging.info('Data dimension: %d' % (curl['feature_dim']))
    logging.info('Number of Frames: %d' % (curl['num_frames']))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))
    logging.info('Dropout: %f ' % (config.dropout))
    logging.info('Learning rate reduction rate: %f ' % (config.lrr))
    logging.info('Weight decay: %f ' % (config.weight_decay))
    sys.stdout.flush()

    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()
        curl_model = curl_model.cuda()

    criterion = nn.CrossEntropyLoss()

    lr = config.learning_rate
    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=config.learning_rate,
                               weight_decay=config.weight_decay)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(),
                                   weight_decay=config.weight_decay)
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=config.learning_rate,
                              weight_decay=config.weight_decay)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=config.learning_rate,
                                  weight_decay=config.weight_decay)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=config.learning_rate,
                                  weight_decay=config.weight_decay)
    else:
        raise NotImplementedError("Learning method not supported for the task")

    # Load datasets
    dataset_train = nnetDatasetSeq(
        os.path.join(config.egs_dir, config.train_set))
    data_loader_train = torch.utils.data.DataLoader(
        dataset_train, batch_size=config.batch_size, shuffle=True)

    dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set))
    data_loader_dev = torch.utils.data.DataLoader(dataset_dev,
                                                  batch_size=config.batch_size,
                                                  shuffle=True)

    model_path = os.path.join(model_dir,
                              config.experiment_name + '__epoch_0.model')
    torch.save(
        {
            'epoch': 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
        }, (open(model_path, 'wb')))

    ep_loss_tr = []
    ep_fer_tr = []
    ep_loss_dev = []
    ep_fer_dev = []
    err_p = 0
    best_model_state = None
    for epoch_i in range(config.epochs):

        ####################
        ##### Training #####
        ####################

        model.train()
        train_losses = []
        tr_fer = []
        # Main training loop
        for batch_x, batch_l, lab in data_loader_train:
            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
                lab = Variable(lab[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])
                lab = Variable(lab[indices])

            # First get CURL embeddings
            _, latent = curl_model(batch_x, batch_l)
            batch_x = compute_latent_features(latent)
            optimizer.zero_grad()

            # Main forward pass
            class_out = model(batch_x, batch_l)
            class_out = pad2list(class_out, batch_l)
            lab = pad2list(lab, batch_l)

            loss = criterion(class_out, lab)

            train_losses.append(loss.item())
            if config.use_gpu:
                tr_fer.append(
                    compute_fer(class_out.cpu().data.numpy(),
                                lab.cpu().data.numpy()))
            else:
                tr_fer.append(
                    compute_fer(class_out.data.numpy(), lab.data.numpy()))

            loss.backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       config.clip_thresh)
            optimizer.step()

        ep_loss_tr.append(np.mean(train_losses))
        ep_fer_tr.append(np.mean(tr_fer))

        ######################
        ##### Validation #####
        ######################

        model.eval()
        val_losses = []
        val_fer = []
        # Main training loop
        for batch_x, batch_l, lab in data_loader_dev:
            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
                lab = Variable(lab[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])
                lab = Variable(lab[indices])

            # First get CURL embeddings
            _, latent = curl_model(batch_x, batch_l)
            batch_x = compute_latent_features(latent)

            optimizer.zero_grad()
            # Main forward pass
            class_out = model(batch_x, batch_l)
            class_out = pad2list(class_out, batch_l)
            lab = pad2list(lab, batch_l)

            loss = criterion(class_out, lab)

            val_losses.append(loss.item())
            if config.use_gpu:
                val_fer.append(
                    compute_fer(class_out.cpu().data.numpy(),
                                lab.cpu().data.numpy()))
            else:
                val_fer.append(
                    compute_fer(class_out.data.numpy(), lab.data.numpy()))
        # Manage learning rate and revert model
        if epoch_i == 0:
            err_p = np.mean(val_losses)
            best_model_state = model.state_dict()
        else:
            if np.mean(val_losses) > (100 - config.lr_tol) * err_p / 100:
                logging.info(
                    "Val loss went up, Changing learning rate from {:.6f} to {:.6f}"
                    .format(lr, config.lrr * lr))
                lr = config.lrr * lr
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                model.load_state_dict(best_model_state)
            else:
                err_p = np.mean(val_losses)
                best_model_state = model.state_dict()

        ep_loss_dev.append(np.mean(val_losses))
        ep_fer_dev.append(np.mean(val_fer))

        print_log = "Epoch: {:d} ((lr={:.6f})) Tr loss: {:.3f} :: Tr FER: {:.2f}".format(
            epoch_i + 1, lr, ep_loss_tr[-1], ep_fer_tr[-1])
        print_log += " || Val: {:.3f} :: Val FER: {:.2f}".format(
            ep_loss_dev[-1], ep_fer_dev[-1])
        logging.info(print_log)

        if (epoch_i + 1) % config.model_save_interval == 0:
            model_path = os.path.join(
                model_dir, config.experiment_name + '__epoch_%d' %
                (epoch_i + 1) + '.model')
            torch.save(
                {
                    'epoch': epoch_i + 1,
                    'vaeenc': config.curl_model,
                    'feature_dim': curl['feature_dim'],
                    'num_frames': curl['num_frames'],
                    'num_classes': config.num_classes,
                    'num_layers': config.num_layers,
                    'hidden_dim': config.hidden_dim,
                    'ep_loss_tr': ep_loss_tr,
                    'ep_loss_dev': ep_loss_dev,
                    'dropout': config.dropout,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict()
                }, (open(model_path, 'wb')))
Пример #2
0
def get_output(config):
    # Load model
    nnet = torch.load(config.model, map_location=lambda storage, loc: storage)
    if config.ae_type == "normal":
        model = nnetAEClassifierMultitask(
            nnet['feature_dim'] * nnet['num_frames'], nnet['num_classes'],
            nnet['encoder_num_layers'], nnet['classifier_num_layers'],
            nnet['ae_num_layers'], nnet['hidden_dim'], nnet['bn_dim'],
            nnet['enc_dropout'])
    elif config.ae_type == "vae":
        model = nnetVAEClassifier(nnet['feature_dim'] * nnet['num_frames'],
                                  nnet['num_classes'],
                                  nnet['encoder_num_layers'],
                                  nnet['classifier_num_layers'],
                                  nnet['ae_num_layers'],
                                  nnet['hidden_dim'],
                                  nnet['bn_dim'],
                                  nnet['enc_dropout'],
                                  use_gpu=False)
    elif config.ae_type == "noae":
        model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'],
                        nnet['num_layers'], nnet['hidden_dim'],
                        nnet['num_classes'], nnet['dropout'])
    elif config.ae_type == "vaeenc":
        nnet[
            'vaeenc'] = "exp_hybrid/hybrid_lll/nnet_vae_enc2l_dec2l_300nodes/exp_1.dir/exp_1__epoch_160.model"
        vae = torch.load(nnet['vaeenc'],
                         map_location=lambda storage, loc: storage)
        vae_model = nnetVAE(vae['feature_dim'] * vae['num_frames'],
                            vae['encoder_num_layers'],
                            vae['decoder_num_layers'], vae['hidden_dim'],
                            vae['bn_dim'], 0, False)
        model = VAEEncodedClassifier(vae_model, vae['bn_dim'],
                                     nnet['num_layers'], nnet['hidden_dim'],
                                     nnet['num_classes'])
    else:
        print("Model type {} not supported!".format(config.ae_type))
        sys.exit(1)

    model.load_state_dict(nnet['model_state_dict'])
    feats_config = pickle.load(open(config.egs_config, 'rb'))

    lsm = torch.nn.LogSoftmax(1)
    sm = torch.nn.Softmax(1)

    if config.override_trans:
        feat_type = config.override_trans.split(',')[0]
        trans_path = config.override_trans.split(',')[1]
    else:
        feat_type = feats_config['feat_type'].split(',')[0]
        trans_path = feats_config['feat_type'].split(',')[1]

    if feat_type == "pca":
        cmd = "transform-feats {} scp:{} ark:- |".format(
            trans_path, config.scp)
    elif feat_type == "cmvn":
        cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format(
            trans_path, config.scp)
    elif feat_type == "cmvn_utt":
        cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format(
            trans_path, config.scp)
    else:
        cmd = "copy-feats scp:{} ark:- |".format(config.scp)

    if feats_config['concat_feats']:
        context = feats_config['concat_feats'].split(',')
        cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(
            context[0], context[1])
    if config.prior:
        prior = pickle.load(open(config.prior, 'rb'))

    post_dict = {}
    model.eval()
    for utt_id, mat in kaldi_io.read_mat_ark(cmd):
        mat = Variable(torch.FloatTensor(mat))[None, :, :]
        batch_l = Variable(torch.IntTensor([mat.size(1)]))
        if config.ae_type == "normal":
            out, _ = model(mat, batch_l)
        elif config.ae_type == "vae":
            out, _, _ = model(mat, batch_l)
        elif config.ae_type == "noae":
            out = model(mat, batch_l)
        elif config.ae_type == "vaeenc":
            out = model(mat, batch_l)

        if config.prior:
            post_dict[utt_id] = lsm(
                out[0, :, :]).data.numpy() - config.prior_weight * prior
        else:
            if config.add_softmax:
                post_dict[utt_id] = sm(out[0, :, :]).data.numpy()
            else:
                post_dict[utt_id] = out[0, :, :].data.numpy()

    return post_dict
Пример #3
0
def get_output(config):
    # Load all P(x) and P(c|x) models
    model_pcx = config.models_pcx.split(',')
    model_px = config.models_px.split(',')

    if len(model_pcx) != len(model_px):
        print("Number of p(x) models and p(c|x) models are not the same!")
    num_domains = len(model_px)

    all_pcx_models = []
    all_px_models = []

    for idx, m in enumerate(model_pcx):
        nnet = torch.load(model_pcx[idx], map_location=lambda storage, loc: storage)
        vae = torch.load(model_px[idx], map_location=lambda storage, loc: storage)
        model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'],
                        nnet['num_layers'],
                        nnet['hidden_dim'],
                        nnet['num_classes'], nnet['dropout'])
        model.load_state_dict(nnet['model_state_dict'])
        all_pcx_models.append(model)
        model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'],
                        vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, False)
        model.load_state_dict(vae['model_state_dict'])
        all_px_models.append(model)
        num_classes = nnet['num_classes']

    feats_config = pickle.load(open(config.egs_config, 'rb'))
    sm = torch.nn.Softmax(1)

    if config.override_trans:
        feat_type = config.override_trans.split(',')[0]
        trans_path = config.override_trans.split(',')[1]
    else:
        feat_type = feats_config['feat_type'].split(',')[0]
        trans_path = feats_config['feat_type'].split(',')[1]

    if feat_type == "pca":
        cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, config.scp)
    elif feat_type == "cmvn":
        cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format(trans_path, config.scp)
    elif feat_type == "cmvn_utt":
        cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format(trans_path, config.scp)
    else:
        cmd = "copy-feats scp:{} ark:- |".format(config.scp)

    if feats_config['concat_feats']:
        context = feats_config['concat_feats'].split(',')
        cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1])

    # Load prior
    priors = config.priors.split(',')
    priors = [pickle.load(open(f, 'rb')) for f in priors]

    if config.task_prior == "mm":
        print("using mm-measure based task priors")
    elif config.task_prior == "dp":
        print("using data based task priors")
    elif config.task_prior == "lowent":
        print("Using low-entropy prior")
    else:
        task_prior = config.task_prior.split(',')
        task_prior = [float(tp) for tp in task_prior]

    post_dict = {}
    for utt_id, mat in kaldi_io.read_mat_ark(cmd):
        post = np.zeros((mat.shape[0], num_classes))
        prior_acc = np.zeros(num_classes)
        mat = Variable(torch.FloatTensor(mat))[None, :, :]
        batch_l = Variable(torch.IntTensor([mat.size(1)]))
        px_save = []
        all_pcx = []
        all_px = []
        all_tp = []
        all_tp_2 = []
        for idx, model in enumerate(all_pcx_models):
            model.eval()
            out = model(mat, batch_l)
            ae_out, latent_out = all_px_models[idx](mat, batch_l)
            latent_out = (latent_out[0][0, :, :], latent_out[1][0, :, :])
            px = (vae_loss(mat[0, :, :], ae_out[0, :, :], latent_out).data.numpy())
            px_save.append(np.mean(px))
            pcx = sm(out[0, :, :])
            px = np.tile(px, (pcx.shape[1], 1)).T
            all_pcx.append(pcx.data.numpy())
            all_px.append(np.ones(px.shape))

            if config.task_prior == "mm":
                mm = mmeasure_loss(pcx).item()
                all_tp.append(mm)
                print("task {:d} , mm={:.2f}".format(idx, mm))
            elif config.task_prior == "dp":
                all_tp.append(px_save[idx])
            elif config.task_prior == "lowent":
                mm = mmeasure_loss(pcx).item()
                all_tp.append(mm)
                all_tp_2.append(px_save[idx])
            else:
                all_tp.append(task_prior[idx])

        if config.task_prior == "mm":
            all_tp = np.asarray(all_tp, dtype=np.float64)
            if config.stream_selection:
                temp = np.zeros(all_tp.shape)
                temp[np.argmax(all_tp)] = 1
                all_tp = temp
            else:
                all_tp = np.exp(all_tp) / np.sum(np.exp(all_tp))
            if np.isnan(all_tp[0]):
                print("Switching to uniform priors")
                all_tp = np.ones(num_domains) / num_domains
        elif config.task_prior == "dp":
            all_tp = np.asarray(all_tp, dtype=np.float64)
            if config.stream_selection:
                temp = np.zeros(all_tp.shape)
                temp[np.argmax(all_tp)] = 1
                all_tp = temp
            else:
                all_tp = np.exp(300 * all_tp) / np.sum(np.exp(300 * all_tp))
        elif config.task_prior == "lowent":
            all_tp = np.asarray(all_tp)
            all_tp = np.exp(all_tp) / np.sum(np.exp(all_tp))
            if np.isnan(all_tp[0]):
                print("Switching to uniform priors")
                all_tp = np.ones(num_domains) / num_domains
            all_tp_2 = np.asarray(all_tp_2)
            all_tp_2 = np.exp(300 * all_tp_2) / np.sum(np.exp(300 * all_tp_2))
            print('Entropy dp:{:.2f} and Entropy mm:{:.2f}'.format(entropy(all_tp_2), entropy(all_tp)))
            if entropy(all_tp_2) < entropy(all_tp):
                all_tp = all_tp_2
        for idx, pcx in enumerate(all_pcx):
            post += pcx * all_px[idx] * all_tp[idx]
            prior_acc += np.exp(priors[idx]) * all_tp[idx]

        print_log = ""
        for ii, x in enumerate(px_save):
            print_log += "p(x) for Task {:d} ={:.6f} with prior ={:.6f} ".format(ii, x, all_tp[ii])
        print(print_log)
        sys.stdout.flush()
        post_dict[utt_id] = np.log(post) - config.prior_weight * np.log(prior_acc)

    return post_dict
def get_output(config):
    # Load all P(x) and P(c|x) models
    model_pcx = config.models_pcx.split(',')
    model_px = config.models_px.split(',')

    if len(model_pcx) != len(model_px):
        print("Number of p(x) models and p(c|x) models are not the same!")
    num_domains = len(model_px)
    streams = powerset(list(np.arange(num_domains)))

    all_pcx_models = []
    all_px_models = []

    for idx, m in enumerate(model_pcx):
        nnet = torch.load(model_pcx[idx], map_location=lambda storage, loc: storage)
        vae = torch.load(model_px[idx], map_location=lambda storage, loc: storage)
        model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'],
                        nnet['num_layers'],
                        nnet['hidden_dim'],
                        nnet['num_classes'], nnet['dropout'])
        model.load_state_dict(nnet['model_state_dict'])
        all_pcx_models.append(model)
        model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'],
                        vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, False)
        model.load_state_dict(vae['model_state_dict'])
        all_px_models.append(model)
        num_classes = nnet['num_classes']

    feats_config = pickle.load(open(config.egs_config, 'rb'))

    if config.override_trans:
        feat_type = config.override_trans.split(',')[0]
        trans_path = config.override_trans.split(',')[1]
    else:
        feat_type = feats_config['feat_type'].split(',')[0]
        trans_path = feats_config['feat_type'].split(',')[1]

    if feat_type == "pca":
        cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, config.scp)
    elif feat_type == "cmvn":
        cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format(trans_path, config.scp)
    elif feat_type == "cmvn_utt":
        cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format(trans_path, config.scp)
    else:
        cmd = "copy-feats scp:{} ark:- |".format(config.scp)

    if feats_config['concat_feats']:
        context = feats_config['concat_feats'].split(',')
        cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1])

    # Load prior
    priors = config.priors.split(',')
    priors = [pickle.load(open(f, 'rb')) for f in priors]

    if config.use_gpu:
        priors = [torch.from_numpy(f).cuda().double() for f in priors]
    else:
        priors = [torch.from_numpy(f).double() for f in priors]

    all_pcx_models = nn.ModuleList(all_pcx_models)
    all_px_models = nn.ModuleList(all_px_models)

    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id
        all_pcx_models.cuda()
        all_px_models.cuda()

    if config.task_prior == "dp":
        print("using data based task priors")
        task_prior = "dp"
    else:
        task_prior = config.task_prior.split(',')
        task_prior = [float(tp) for tp in task_prior]

    post_dict = {}
    for utt_id, batch_x in kaldi_io.read_mat_ark(cmd):
        print("COMPUTING LOG-LIKELIHOOD FOR UTTERANCE {:s}".format(utt_id))
        sys.stdout.flush()

        T = torch.DoubleTensor([300])  # Initial temperature
        T.requires_grad = True
        num_frames = batch_x.shape[0]

        batch_x = Variable(torch.FloatTensor(batch_x))[None, :, :]
        batch_l = Variable(torch.IntTensor([batch_x.size(1)]))

        # Do forward passes through different models

        sm = torch.nn.Softmax(1)
        px_save = []
        all_pcx = []
        all_tp = torch.zeros(len(all_pcx_models), dtype=torch.double)
        for idx, model in enumerate(all_pcx_models):
            model.eval()
            out = model(batch_x, batch_l)
            ae_out, latent_out = all_px_models[idx](batch_x, batch_l)
            latent_out = (latent_out[0][0, :, :], latent_out[1][0, :, :])
            px = torch.exp(vae_loss(batch_x[0, :, :], ae_out[0, :, :], latent_out)).double()
            px_save.append(torch.mean(px))
            pcx = sm(out[0, :, :])
            all_pcx.append(pcx.double())

            if task_prior == "dp":
                all_tp[idx] = px_save[idx]
            else:
                all_tp[idx] = task_prior[idx]

        for it_num in range(config.num_iter):
            llh = compute_lhood(num_frames, num_classes, all_pcx, all_tp, priors, task_prior, streams, T)

            loss = -torch.mean(llh)
            print_log = "p(x|c) ={:.6f} with softmax temperature ={:.6f} ".format(loss.item(), T.item())
            print(print_log)
            sys.stdout.flush()
            #loss.backward(retain_graph=True)
            print(T.grad)
            # with torch.no_grad():
            # T = T + config.lr_rate * T.grad/torch.norm(T.grad,2)
            #T.requires_grad = True
            T = T + 100
        if config.use_gpu:
            post_dict[utt_id] = llh.cpu().data.numpy()
        else:
            post_dict[utt_id] = llh.data.numpy()

    return post_dict
def get_output(config):
    # Load model
    nnet = torch.load(config.model, map_location=lambda storage, loc: storage)
    vae = torch.load(nnet['vaeenc'], map_location=lambda storage, loc: storage)

    if nnet['vae_type'] == "modulation":
        if config.vae_arch == "cnn":
            in_channels = [int(x) for x in vae['in_channels'].split(',')]
            out_channels = [int(x) for x in vae['out_channels'].split(',')]
            kernel = tuple([int(x) for x in vae['kernel'].split(',')])
            vae_model = nnetVAECNNNopool(vae['feature_dim'], vae['num_frames'],
                                         in_channels, out_channels, kernel,
                                         vae['nfilters'] * vae['nrepeats'],
                                         False)
        else:
            vae_model = nnetVAE(vae['feature_dim'] * vae['num_frames'],
                                vae['encoder_num_layers'],
                                vae['decoder_num_layers'], vae['hidden_dim'],
                                vae['nfilters'] * vae['nrepeats'], 0, False)
        model = nnetRNN(vae['nfilters'] * vae['nrepeats'], nnet['num_layers'],
                        nnet['hidden_dim'], nnet['num_classes'], 0)
        vae_model.load_state_dict(vae["model_state_dict"])
        model.load_state_dict(nnet["model_state_dict"])

    elif nnet['vae_type'] == "arvae":
        ar_steps = vae['ar_steps'].split(',')
        ar_steps = [int(x) for x in ar_steps]
        ar_steps.append(0)
        vae_model = nnetARVAE(vae['feature_dim'] * vae['num_frames'],
                              vae['encoder_num_layers'],
                              vae['decoder_num_layers'], vae['hidden_dim'],
                              vae['bn_dim'], 0, len(ar_steps), False)
        model = nnetRNN(vae['bn_dim'], nnet['num_layers'], nnet['hidden_dim'],
                        nnet['num_classes'], 0)
        vae_model.load_state_dict(vae["model_state_dict"])
        model.load_state_dict(nnet['model_state_dict'])
    else:
        if config.vae_arch == "cnn":
            in_channels = [int(x) for x in vae['in_channels'].split(',')]
            out_channels = [int(x) for x in vae['out_channels'].split(',')]
            kernel = tuple([int(x) for x in vae['kernel'].split(',')])
            vae_model = nnetVAECNNNopool(vae['feature_dim'], vae['num_frames'],
                                         in_channels, out_channels, kernel,
                                         vae['bn_dim'], False)
        else:
            vae_model = nnetVAE(vae['feature_dim'] * vae['num_frames'],
                                vae['encoder_num_layers'],
                                vae['decoder_num_layers'], vae['hidden_dim'],
                                vae['bn_dim'], 0, False)

        model = nnetRNN(vae['bn_dim'], nnet['num_layers'], nnet['hidden_dim'],
                        nnet['num_classes'], 0)
        vae_model.load_state_dict(vae["model_state_dict"])
        model.load_state_dict(nnet['model_state_dict'])

    feats_config = pickle.load(open(config.egs_config, 'rb'))

    lsm = torch.nn.LogSoftmax(1)
    sm = torch.nn.Softmax(1)

    if config.override_trans:
        feat_type = config.override_trans.split(',')[0]
        trans_path = config.override_trans.split(',')[1]
    else:
        feat_type = feats_config['feat_type'].split(',')[0]
        trans_path = feats_config['feat_type'].split(',')[1]

    if feat_type == "pca":
        cmd = "transform-feats {} scp:{} ark:- |".format(
            trans_path, config.scp)
    elif feat_type == "cmvn":
        cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format(
            trans_path, config.scp)
    elif feat_type == "cmvn_utt":
        cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format(
            trans_path, config.scp)
    else:
        cmd = "copy-feats scp:{} ark:- |".format(config.scp)

    if feats_config['concat_feats']:
        context = feats_config['concat_feats'].split(',')
        cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(
            context[0], context[1])
    if config.prior:
        prior = pickle.load(open(config.prior, 'rb'))

    post_dict = {}
    model.eval()
    for utt_id, batch_x in kaldi_io.read_mat_ark(cmd):
        #print(batch_x.shape)
        if config.vae_arch == "cnn":
            batch_l = Variable(torch.IntTensor([batch_x.shape[0]]))
            batch_x = Variable(torch.FloatTensor(batch_x))
            batch_x = batch_x[None, None, :, :]
            batch_x = torch.transpose(batch_x, 2, 3)
            _, batch_x = vae_model(batch_x)
            batch_x = torch.transpose(batch_x[0], 1, 2)
        else:
            batch_x = Variable(torch.FloatTensor(batch_x))[None, :, :]
            batch_l = Variable(torch.IntTensor([batch_x.shape[1]]))
            _, batch_x = vae_model(batch_x, batch_l)
            batch_x = batch_x[0]
            print(batch_x.shape)

        batch_x = batch_x - torch.cat(
            batch_x.shape[1] * [torch.mean(batch_x, dim=1)[:, None, :]], dim=1)
        batch_x = batch_x / torch.sqrt(
            torch.cat(
                batch_x.shape[1] * [torch.var(batch_x, dim=1)[:, None, :]],
                dim=1))

        batch_x = model(batch_x, batch_l)

        if config.prior:
            print(batch_x[0].shape)
            sys.stdout.flush()
            post_dict[utt_id] = lsm(
                batch_x[0, :, :]).data.numpy() - config.prior_weight * prior
        else:
            if config.add_softmax:
                post_dict[utt_id] = sm(batch_x[0, :, :]).data.numpy()
            else:
                post_dict[utt_id] = batch_x[0, :, :].data.numpy()

    return post_dict
def get_output(config):
    # Load all P(x) and P(c|x) models
    model_pcx = config.models_pcx.split(',')
    model_px = config.models_px.split(',')

    if len(model_pcx) != len(model_px):
        print("Number of p(x) models and p(c|x) models are not the same!")
    num_domains = len(model_px)
    streams = powerset(list(np.arange(num_domains)))

    all_pcx_models = []
    all_px_models = []

    for idx, m in enumerate(model_pcx):
        nnet = torch.load(model_pcx[idx],
                          map_location=lambda storage, loc: storage)
        vae = torch.load(model_px[idx],
                         map_location=lambda storage, loc: storage)
        model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'],
                        nnet['num_layers'], nnet['hidden_dim'],
                        nnet['num_classes'], nnet['dropout'])
        model.load_state_dict(nnet['model_state_dict'])
        all_pcx_models.append(model)
        model = nnetVAE(vae['feature_dim'] * vae['num_frames'],
                        vae['encoder_num_layers'], vae['decoder_num_layers'],
                        vae['hidden_dim'], vae['bn_dim'], 0, False)
        model.load_state_dict(vae['model_state_dict'])
        all_px_models.append(model)
        num_classes = nnet['num_classes']

    feats_config = pickle.load(open(config.egs_config, 'rb'))
    sm = torch.nn.Softmax(1)

    if config.override_trans:
        feat_type = config.override_trans.split(',')[0]
        trans_path = config.override_trans.split(',')[1]
    else:
        feat_type = feats_config['feat_type'].split(',')[0]
        trans_path = feats_config['feat_type'].split(',')[1]

    if feat_type == "pca":
        cmd = "transform-feats {} scp:{} ark:- |".format(
            trans_path, config.scp)
    elif feat_type == "cmvn":
        cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format(
            trans_path, config.scp)
    elif feat_type == "cmvn_utt":
        cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format(
            trans_path, config.scp)
    else:
        cmd = "copy-feats scp:{} ark:- |".format(config.scp)

    if feats_config['concat_feats']:
        context = feats_config['concat_feats'].split(',')
        cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(
            context[0], context[1])

    # Load prior
    priors = config.priors.split(',')
    priors = [pickle.load(open(f, 'rb')) for f in priors]

    if config.task_prior == "dp":
        print("using data based task priors")
    else:
        task_prior = config.task_prior.split(',')
        task_prior = [float(tp) for tp in task_prior]

    post_dict = {}
    for utt_id, mat in kaldi_io.read_mat_ark(cmd):
        num = np.zeros((mat.shape[0], num_classes))
        denom = np.zeros(num_classes)
        mat = Variable(torch.FloatTensor(mat))[None, :, :]
        batch_l = Variable(torch.IntTensor([mat.size(1)]))
        px_save = []
        all_pcx = []
        all_px = []
        all_tp = []
        for idx, model in enumerate(all_pcx_models):
            model.eval()
            out = model(mat, batch_l)
            ae_out, latent_out = all_px_models[idx](mat, batch_l)
            latent_out = (latent_out[0][0, :, :], latent_out[1][0, :, :])
            px = np.exp(
                vae_loss(mat[0, :, :], ae_out[0, :, :],
                         latent_out).data.numpy())
            px_save.append(np.mean(px))
            pcx = sm(out[0, :, :])
            px = np.tile(px, (pcx.shape[1], 1)).T
            all_pcx.append(pcx.data.numpy())
            all_px.append(np.ones(px.shape))

            if config.task_prior == "dp":
                all_tp.append(px_save[idx])
            else:
                all_tp.append(task_prior[idx])

        if config.task_prior == "dp":
            all_tp = np.asarray(all_tp, dtype=np.float64)
            all_tp = np.exp(300 * all_tp) / np.sum(np.exp(300 * all_tp))

        for idx, st in enumerate(streams):
            num_prod = np.ones((num.shape[0], num_classes))
            denom_prod = np.ones(num_classes)
            perf_mon = 1

            for b in st:
                num_prod *= all_pcx[b]  # np.power(all_pcx[b], all_tp[b])

                perf_mon *= all_tp[b]
                denom_prod *= np.exp(priors[b])
            denom_prod /= np.sum(denom_prod)
            num_prod = num_prod / np.tile(
                np.sum(num_prod, axis=1)[:, None], (1, num_prod.shape[1]))
            num += num_prod * perf_mon
            denom += denom_prod

        print_log = ""
        for ii, x in enumerate(px_save):
            print_log += "p(x) for Task {:d} ={:.6f} with prior ={:.6f} ".format(
                ii, x, all_tp[ii])
        print(print_log)
        sys.stdout.flush()
        post_dict[utt_id] = np.log(num) - config.prior_weight * np.log(denom)

    return post_dict
Пример #7
0
def get_output(config):
    # Load all P(x) and P(c|x) models
    model_pcx = config.models_pcx.split(',')
    model_px = config.models_px.split(',')

    if len(model_pcx) != len(model_px):
        print("Number of p(x) models and p(c|x) models are not the same!")
    num_domains = len(model_px)

    all_pcx_models = []
    all_px_models = []

    for idx, m in enumerate(model_pcx):
        nnet = torch.load(model_pcx[idx], map_location=lambda storage, loc: storage)
        vae = torch.load(model_px[idx], map_location=lambda storage, loc: storage)
        model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'],
                        nnet['num_layers'],
                        nnet['hidden_dim'],
                        nnet['num_classes'], nnet['dropout'])
        model.load_state_dict(nnet['model_state_dict'])
        all_pcx_models.append(model)
        model = nnetVAE(vae['feature_dim'] * vae['num_frames'], vae['encoder_num_layers'],
                        vae['decoder_num_layers'], vae['hidden_dim'], vae['bn_dim'], 0, False)
        model.load_state_dict(vae['model_state_dict'])
        all_px_models.append(model)
        num_classes = nnet['num_classes']

    feats_config = pickle.load(open(config.egs_config, 'rb'))
    sm = torch.nn.Softmax(1)

    if config.override_trans:
        feat_type = config.override_trans.split(',')[0]
        trans_path = config.override_trans.split(',')[1]
    else:
        feat_type = feats_config['feat_type'].split(',')[0]
        trans_path = feats_config['feat_type'].split(',')[1]

    if feat_type == "pca":
        cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, config.scp)
    elif feat_type == "cmvn":
        cmd = "apply-cmvn --norm-vars=true {} scp:{} ark:- |".format(trans_path, config.scp)
    elif feat_type == "cmvn_utt":
        cmd = "apply-cmvn --norm-vars=true scp:{} scp:{} ark:- |".format(trans_path, config.scp)
    else:
        cmd = "copy-feats scp:{} ark:- |".format(config.scp)

    if feats_config['concat_feats']:
        context = feats_config['concat_feats'].split(',')
        cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1])

    # Load prior
    priors = config.priors.split(',')
    priors = [pickle.load(open(f, 'rb')) for f in priors]

    post_dict = {}
    for utt_id, mat in kaldi_io.read_mat_ark(cmd):
        post = np.zeros((mat.shape[0], num_classes))
        pxx = np.zeros((num_domains, mat.shape[0], num_classes))
        prior_acc = np.zeros((mat.shape[0], num_classes))
        mat = Variable(torch.FloatTensor(mat))[None, :, :]
        batch_l = Variable(torch.IntTensor([mat.size(1)]))
        all_pcx = []
        all_px = []

        for idx, model in enumerate(all_pcx_models):
            model.eval()
            out = model(mat, batch_l)
            ae_out, latent_out = all_px_models[idx](mat, batch_l)
            latent_out = (latent_out[0][0, :, :], latent_out[1][0, :, :])
            px = np.exp(vae_loss(mat[0, :, :], ae_out[0, :, :], latent_out).data.numpy())
            pcx = sm(out[0, :, :])
            px = np.tile(px, (pcx.shape[1], 1)).T
            all_pcx.append(pcx.data.numpy())
            all_px.append(px)
            pxx[idx] = px

        pxx = softmax(pxx)
        for idx, pcx in enumerate(all_pcx):
            post += pcx * all_px[idx] * pxx[idx]
            prior_acc += np.exp(np.tile(priors[idx], (pcx.shape[0], 1))) * pxx[idx]

        post_dict[utt_id] = np.log(post) - config.prior_weight * np.log(prior_acc)

    return post_dict
Пример #8
0
def run(config):
    model_dir = os.path.join(config.store_path,
                             config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        filename=os.path.join(model_dir,
                                              config.experiment_name),
                        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    # Load feature configuration
    egs_config = pkl.load(
        open(os.path.join(config.egs_dir, config.train_set, 'egs.config'),
             'rb'))
    context = egs_config['concat_feats'].split(',')
    num_frames = int(context[0]) + int(context[1]) + 1

    logging.info('Model Parameters: ')
    logging.info('Encoder Number of Layers: %d' % (config.encoder_num_layers))
    logging.info('Decoder Number of Layers: %d' % (config.decoder_num_layers))
    logging.info('Hidden Dimension: %d' % (config.hidden_dim))
    logging.info('Data dimension: %d' % (config.feature_dim))
    logging.info('Bottleneck dimension: %d' % (config.bn_dim))
    logging.info('Number of Frames: %d' % (num_frames))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))
    logging.info('Learning rate reduction rate: %f ' % (config.lrr))
    logging.info('Weight decay: %f ' % (config.weight_decay))
    logging.info('Relative Entropy of bottleneck (bits): %f' %
                 (config.bn_bits))

    sys.stdout.flush()

    nnet = torch.load(config.nnet_model,
                      map_location=lambda storage, loc: storage)
    nnet_model = nnetRNN(nnet['feature_dim'] * nnet['num_frames'],
                         nnet['num_layers'], nnet['hidden_dim'],
                         nnet['num_classes'], nnet['dropout'])
    nnet_model.load_state_dict(nnet['model_state_dict'])

    model = nnetVAE(nnet['num_classes'], config.encoder_num_layers,
                    config.decoder_num_layers, config.hidden_dim,
                    config.bn_dim, 0, config.use_gpu)

    bn_bits = torch.FloatTensor(
        [config.bn_bits / (np.log2(np.e) * (config.bn_dim))])

    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()
        bn_bits = bn_bits.cuda()
        nnet_model.cuda()

    lr = config.learning_rate

    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=config.learning_rate,
                               weight_decay=config.weight_decay)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(),
                                   weight_decay=config.weight_decay)
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=config.learning_rate,
                              weight_decay=config.weight_decay)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=config.learning_rate,
                                  weight_decay=config.weight_decay)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=config.learning_rate,
                                  weight_decay=config.weight_decay)
    else:
        raise NotImplementedError("Learning method not supported for the task")

    model_path = os.path.join(model_dir,
                              config.experiment_name + '__epoch_0.model')
    torch.save(
        {
            'epoch': 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
        }, (open(model_path, 'wb')))

    ep_vae_rec_tr = []
    ep_vae_kl_tr = []
    ep_vae_rec_dev = []
    ep_vae_kl_dev = []

    # Load Datasets

    dataset_train = nnetDatasetSeq(
        os.path.join(config.egs_dir, config.train_set))
    data_loader_train = torch.utils.data.DataLoader(
        dataset_train, batch_size=config.batch_size, shuffle=True)

    dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set))
    data_loader_dev = torch.utils.data.DataLoader(dataset_dev,
                                                  batch_size=config.batch_size,
                                                  shuffle=True)

    err_p = 0
    best_model_state = None

    for epoch_i in range(config.epochs):

        ####################
        ##### Training #####
        ####################

        model.train()
        train_vae_rec_losses = []
        train_vae_kl_losses = []

        # Main training loop

        for batch_x, batch_l, lab in data_loader_train:
            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])

            optimizer.zero_grad()

            batch_x = nnet_model(batch_x, batch_l)
            # Main forward pass
            ae_out, latent_out = model(batch_x, batch_l)

            # Convert all the weird tensors to frame-wise form
            batch_x = pad2list(batch_x, batch_l)

            ae_out = pad2list(ae_out, batch_l)
            latent_out = (pad2list(latent_out[0],
                                   batch_l), pad2list(latent_out[1], batch_l))
            loss = vae_loss(batch_x, ae_out, latent_out)

            train_vae_rec_losses.append(loss[0].item())
            train_vae_kl_losses.append(loss[1].item())

            (-loss[0] + torch.max(bn_bits, -loss[1])).backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                       config.clip_thresh)
            optimizer.step()

        ep_vae_rec_tr.append(np.mean(train_vae_rec_losses))
        ep_vae_kl_tr.append(np.mean(train_vae_kl_losses))

        ######################
        ##### Validation #####
        ######################

        model.eval()

        with torch.set_grad_enabled(False):

            val_vae_rec_losses = []
            val_vae_kl_losses = []

            for batch_x, batch_l, lab in data_loader_dev:
                _, indices = torch.sort(batch_l, descending=True)
                if config.use_gpu:
                    batch_x = Variable(batch_x[indices]).cuda()
                    batch_l = Variable(batch_l[indices]).cuda()
                else:
                    batch_x = Variable(batch_x[indices])
                    batch_l = Variable(batch_l[indices])

                batch_x = nnet_model(batch_x, batch_l)
                # Main forward pass
                ae_out, latent_out = model(batch_x, batch_l)

                # Convert all the weird tensors to frame-wise form
                batch_x = pad2list(batch_x, batch_l)

                ae_out = pad2list(ae_out, batch_l)
                latent_out = (pad2list(latent_out[0], batch_l),
                              pad2list(latent_out[1], batch_l))
                loss = vae_loss(batch_x, ae_out, latent_out)

                val_vae_rec_losses.append(loss[0].item())
                val_vae_kl_losses.append(loss[1].item())

            ep_vae_rec_dev.append(np.mean(val_vae_rec_losses))
            ep_vae_kl_dev.append(np.mean(val_vae_kl_losses))

        # Manage learning rate
        if epoch_i == 0:
            err_p = -np.mean(val_vae_rec_losses) + max(
                bn_bits.item(), -np.mean(val_vae_kl_losses))
            best_model_state = model.state_dict()
        else:
            if -np.mean(val_vae_rec_losses) + max(
                    bn_bits.item(), -np.mean(val_vae_kl_losses)) > (
                        100 + config.lr_tol) * err_p / 100:
                logging.info(
                    "Val loss went up, Changing learning rate from {:.6f} to {:.6f}"
                    .format(lr, config.lrr * lr))
                lr = config.lrr * lr
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                model.load_state_dict(best_model_state)
            else:
                err_p = -np.mean(val_vae_rec_losses) + max(
                    bn_bits.item(), -np.mean(val_vae_kl_losses))
                best_model_state = model.state_dict()

        print_log = "Epoch: {:d} ((lr={:.6f})) Tr VAE Log-likelihood: {:.3f} :: Val VAE Log-likelihood: {:.3f}".format(
            epoch_i + 1, lr, ep_vae_kl_tr[-1] + ep_vae_rec_tr[-1],
            ep_vae_kl_dev[-1] + ep_vae_rec_dev[-1])

        logging.info(print_log)

        if (epoch_i + 1) % config.model_save_interval == 0:
            model_path = os.path.join(
                model_dir, config.experiment_name + '__epoch_%d' %
                (epoch_i + 1) + '.model')
        torch.save(
            {
                'epoch': epoch_i + 1,
                'feature_dim': config.feature_dim,
                'num_frames': num_frames,
                'encoder_num_layers': config.encoder_num_layers,
                'decoder_num_layers': config.decoder_num_layers,
                'hidden_dim': config.hidden_dim,
                'bn_dim': config.bn_dim,
                'ep_vae_kl_tr': ep_vae_kl_tr,
                'ep_vae_rec_tr': ep_vae_rec_tr,
                'ep_vae_kl_dev': ep_vae_kl_dev,
                'ep_vae_rec_dev': ep_vae_rec_dev,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
            }, (open(model_path, 'wb')))