Exemplo n.º 1
0
def predict(args):
    encoder, classifiers, Us, Ps, Ns = torch.load(args.load_model)
    map(lambda m: m.eval(), [encoder] + classifiers)

    # args = argparser.parse_args()
    # say(args)
    if args.cuda:
        map(lambda m: m.cuda(), [encoder] + classifiers)
        Us = [ U.cuda() for U in Us ]
        Ps = [ P.cuda() for P in Ps ]
        Ns = [ N.cuda() for N in Ns ]

    say("\nTransferring from %s to %s\n" % (args.train, args.test))
    source_train_sets = args.train.split(',')
    train_loaders = []
    for source in source_train_sets:
        filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (source))
        train_dataset = AmazonDataset(filepath)
        train_loader = data.DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=0
        )
        train_loaders.append(train_loader)

    test_filepath = os.path.join(DATA_DIR, "%s_test.svmlight" % (args.test))
    test_dataset = AmazonDataset(test_filepath)
    test_loader = data.DataLoader(
        test_dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=0
    )
    say("Corpus loaded.\n")

    mats = [Us, Ps, Ns]
    (acc, oracle_acc), confusion_mat = evaluate(
            encoder, classifiers,
            mats,
            [train_loaders, test_loader],
            args
        )
    say(colored("Test accuracy/oracle {:.4f}/{:.4f}\n".format(acc, oracle_acc), 'red'))
Exemplo n.º 2
0
def predict(args):
    encoder, classifier = torch.load(args.load_model)
    map(lambda m: m.eval(), [encoder, classifier])

    if args.cuda:
        map(lambda m: m.cuda(), [encoder, classifier])

    test_filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (args.test))
    assert (os.path.exists(test_filepath))
    test_dataset = AmazonDataset(test_filepath)
    test_loader = data.DataLoader(test_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=0)

    acc, confusion_mat, _ = evaluate(encoder, classifier, test_loader, args)
    say(colored("Test accuracy {:.4f}\n".format(acc), 'red'))
    print confusion_mat
Exemplo n.º 3
0
def train(args):
    ''' Training Strategy

    Input: source = {S1, S2, ..., Sk}, target = {T}

    Train:
        Approach 1: fix metric and learn encoder only
        Approach 2: learn metric and encoder alternatively
    '''

    # test_mahalanobis_metric() and return

    encoder_class = get_model_class("mlp")
    encoder_class.add_config(argparser)
    critic_class = get_critic_class(args.critic)
    critic_class.add_config(argparser)

    args = argparser.parse_args()
    say(args)

    # encoder is shared across domains
    encoder = encoder_class(args)

    say("Transferring from %s to %s\n" % (args.train, args.test))
    source_train_sets = args.train.split(',')
    train_loaders = []
    Us = []
    Ps = []
    Ns = []
    Ws = []
    Vs = []
    # Ms = []
    for source in source_train_sets:
        filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (source))
        assert (os.path.exists(filepath))
        train_dataset = AmazonDataset(filepath)
        train_loader = data.DataLoader(train_dataset,
                                       batch_size=args.batch_size,
                                       shuffle=True,
                                       num_workers=0)
        train_loaders.append(train_loader)

        if args.metric == "biaffine":
            U = torch.FloatTensor(encoder.n_d, encoder.n_d)
            W = torch.FloatTensor(encoder.n_d, 1)
            nn.init.xavier_uniform_(W)
            Ws.append(W)
            V = torch.FloatTensor(encoder.n_d, 1)
            nn.init.xavier_uniform_(V)
            Vs.append(V)
        else:
            U = torch.FloatTensor(encoder.n_d, args.m_rank)

        nn.init.xavier_uniform_(U)
        Us.append(U)
        P = torch.FloatTensor(encoder.n_d, args.m_rank)
        nn.init.xavier_uniform_(P)
        Ps.append(P)
        N = torch.FloatTensor(encoder.n_d, args.m_rank)
        nn.init.xavier_uniform_(N)
        Ns.append(N)
        # Ms.append(U.mm(U.t()))

    unl_filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (args.test))
    assert (os.path.exists(unl_filepath))
    unl_dataset = AmazonDomainDataset(unl_filepath)
    unl_loader = data.DataLoader(unl_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 num_workers=0)

    valid_filepath = os.path.join(DATA_DIR, "%s_dev.svmlight" % (args.test))
    if os.path.exists(valid_filepath):
        valid_dataset = AmazonDataset(valid_filepath)
        valid_loader = data.DataLoader(valid_dataset,
                                       batch_size=args.batch_size,
                                       shuffle=False,
                                       num_workers=0)
    else:
        valid_loader = None

    test_filepath = os.path.join(DATA_DIR, "%s_test.svmlight" % (args.test))
    assert (os.path.exists(test_filepath))
    test_dataset = AmazonDataset(test_filepath)
    test_loader = data.DataLoader(test_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=0)
    say("Corpus loaded.\n")

    classifiers = []
    for source in source_train_sets:
        classifier = nn.Linear(encoder.n_out, 2)  # binary classification
        nn.init.xavier_normal_(classifier.weight)
        nn.init.constant_(classifier.bias, 0.1)
        classifiers.append(classifier)

    critic = critic_class(encoder, args)

    # if args.save_model:
    #     say(colored("Save model to {}\n".format(args.save_model + ".init"), 'red'))
    #     torch.save([encoder, classifiers, Us, Ps, Ns], args.save_model + ".init")

    if args.cuda:
        # map(lambda m: m.cuda(), [encoder, critic] + classifiers)
        encoder = encoder.cuda()
        critic = critic.cuda()
        classifiers = [x.cuda() for x in classifiers]
        Us = [Variable(U.cuda(), requires_grad=True) for U in Us]
        Ps = [Variable(P.cuda(), requires_grad=True) for P in Ps]
        Ns = [Variable(N.cuda(), requires_grad=True) for N in Ns]
        if args.metric == "biaffine":
            Ws = [Variable(W.cuda(), requires_grad=True) for W in Ws]
            Vs = [Variable(V.cuda(), requires_grad=True) for V in Vs]

    # Ms = [ U.mm(U.t()) for U in Us ]

    say("\nEncoder: {}\n".format(encoder))
    for i, classifier in enumerate(classifiers):
        say("Classifier-{}: {}\n".format(i, classifier))
    say("Critic: {}\n".format(critic))

    requires_grad = lambda x: x.requires_grad
    task_params = list(encoder.parameters())
    for classifier in classifiers:
        task_params += list(classifier.parameters())
    task_params += list(critic.parameters())
    task_params += Us
    task_params += Ps
    task_params += Ns
    if args.metric == "biaffine":
        task_params += Ws
        task_params += Vs

    optim_model = optim.Adam(filter(requires_grad, task_params),
                             lr=args.lr,
                             weight_decay=1e-4)

    say("Training will begin from scratch\n")

    best_dev = 0
    best_test = 0
    iter_cnt = 0

    for epoch in range(args.max_epoch):
        if args.metric == "biaffine":
            mats = [Us, Ws, Vs]
        else:
            mats = [Us, Ps, Ns]

        iter_cnt = train_epoch(iter_cnt, encoder, classifiers, critic, mats,
                               [train_loaders, unl_loader, valid_loader], args,
                               optim_model)

        if valid_loader:
            (curr_dev, oracle_curr_dev), confusion_mat = evaluate(
                encoder, classifiers, mats, [train_loaders, valid_loader],
                args)
            say("Dev accuracy/oracle: {:.4f}/{:.4f}\n".format(
                curr_dev, oracle_curr_dev))
        (curr_test, oracle_curr_test), confusion_mat = evaluate(
            encoder, classifiers, mats, [train_loaders, test_loader], args)
        say("Test accuracy/oracle: {:.4f}/{:.4f}\n".format(
            curr_test, oracle_curr_test))

        if valid_loader and curr_dev >= best_dev:
            best_dev = curr_dev
            best_test = curr_test
            print(confusion_mat)
            if args.save_model:
                say(
                    colored(
                        "Save model to {}\n".format(args.save_model + ".best"),
                        'red'))
                torch.save([encoder, classifiers, Us, Ps, Ns],
                           args.save_model + ".best")
            say("\n")

    if valid_loader:
        say(colored("Best test accuracy {:.4f}\n".format(best_test), 'red'))
    say(
        colored("Test accuracy after training {:.4f}\n".format(curr_test),
                'red'))
Exemplo n.º 4
0
def visualize(args):
    if args.mop == 3:
        encoder, classifiers, source_classifier = torch.load(args.load_model)
    elif args.mop == 2:
        encoder, classifiers, Us, Ps, Ns = torch.load(args.load_model)
    else:
        say("\nUndefined --mop\n")
        return

    map(lambda m: m.eval(), [encoder] + classifiers)
    if args.cuda:
        map(lambda m: m.cuda(), [encoder] + classifiers)

    source_train_sets = args.train.split(',')
    train_loaders = []
    for source in source_train_sets:
        filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (source))
        train_dataset = AmazonDataset(filepath)
        train_loader = data.DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=0
        )
        train_loaders.append(train_loader)

    test_filepath = os.path.join(DATA_DIR, "%s_train.svmlight" % (args.test))
    test_dataset = AmazonDataset(test_filepath)
    test_loader = data.DataLoader(
        test_dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=0
    )
    say("Corpus loaded.\n")

    source_hs = []
    source_ys = []
    source_num = []
    for loader in train_loaders:
        encoding_vecs = torch.FloatTensor()
        labels = torch.LongTensor()
        if args.cuda:
            encoding_vecs = encoding_vecs.cuda()
            labels = labels.cuda()

        for batch, label in loader:
            if args.cuda:
                batch = batch.cuda()
                label = label.cuda()

            batch = Variable(batch)
            hidden = encoder(batch)
            encoding_vecs = torch.cat([encoding_vecs, hidden.data])
            labels = torch.cat([labels, label.view(-1, 1)])

        source_hs.append(encoding_vecs)
        source_ys.append(labels)
        source_num.append(labels.shape[0])

    ht = torch.FloatTensor()
    yt = torch.LongTensor()
    if args.cuda:
        ht = ht.cuda()
        yt = yt.cuda()

    for batch, label in test_loader:
        if args.cuda:
            batch = batch.cuda()
            label = label.cuda()

        batch = Variable(batch)
        hidden = encoder(batch)
        ht = torch.cat([ht, hidden.data])
        yt = torch.cat([yt, label.view(-1, 1)])

    h_both = torch.cat(source_hs + [ht]).cpu().numpy()
    y_both = torch.cat(source_ys + [yt]).cpu().numpy()

    say("Dimension reduction...\n")
    tsne = TSNE(perplexity=30, n_components=2, n_iter=3300)
    vdata = tsne.fit_transform(h_both)
    print vdata.shape, source_num
    torch.save([vdata, y_both, source_num], 'vis/%s-%s-mop%d.vdata' % (args.train, args.test, args.mop))
    ms_plot_embedding_sep(vdata, y_both, source_num, args.save_image)