예제 #1
0
def run():

    # load source dataset
    src_data_loader = get_data_loader(params.src_dataset)
    src_data_loader_eval = get_data_loader(params.src_dataset, train=False)

    # load models
    src_encoder = init_model(net=LeNetEncoder(),
                             restore=params.src_encoder_restore)
    src_classifier = init_model(net=LeNetClassifier(),
                                restore=params.src_classifier_restore)

    # pre-train source model
    print("=== Training classifier for source domain ===")
    print(">>> Source Encoder <<<")
    im, _ = next(iter(src_data_loader))
    summary(src_encoder, input_size=im[0].size())
    print(">>> Source Classifier <<<")
    print(src_classifier)

    if not (src_encoder.restored and src_classifier.restored and
            params.src_model_trained):
        src_encoder, src_classifier = train_src(
            src_encoder, src_classifier, src_data_loader)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    eval_src(src_encoder, src_classifier, src_data_loader_eval)
예제 #2
0
def run():

    # load dataset
    src_data_loader = get_data_loader(params.src_dataset)
    tgt_data_loader = get_data_loader(params.tgt_dataset)

    # load models
    src_encoder = init_model(net=LeNetEncoder(),
                             restore=params.src_encoder_restore)
    tgt_encoder = init_model(net=LeNetEncoder(),
                             restore=params.tgt_encoder_restore)
    critic = init_model(Discriminator(input_dims=params.d_input_dims,
                                      hidden_dims=params.d_hidden_dims,
                                      output_dims=params.d_output_dims),
                        restore=params.d_model_restore)

    # Adapt target encoder by GAN
    print("=== Training encoder for target domain ===")
    print(">>> Target Encoder <<<")
    im, _ = next(iter(tgt_data_loader))
    summary(tgt_encoder, input_size=im[0].size())
    print(">>> Critic <<<")
    print(critic)

    # init weights of target encoder with those of source encoder
    if not tgt_encoder.restored:
        tgt_encoder.load_state_dict(src_encoder.state_dict())

    # Train target
    if not (tgt_encoder.restored and critic.restored
            and params.tgt_model_trained):
        tgt_encoder = train_tgt(src_encoder, tgt_encoder, critic,
                                src_data_loader, tgt_data_loader)
def main():
    args = parse_arguments()
    n_vocab = params.n_vocab
    n_layer = params.n_layer
    n_hidden = params.n_hidden
    n_embed = params.n_embed
    n_batch = args.n_batch
    temperature = params.temperature
    train_path = params.train_path
    assert torch.cuda.is_available()

    print("loading_data...")
    # 训练时加载处理好的词典(如果有的话)
    if os.path.exists("vocab.json"):
        vocab = Vocabulary()
        with open('vocab.json', 'r') as fp:
            vocab.stoi = json.load(fp)

        for key, value in vocab.stoi.items():
            vocab.itos.append(key)
    else:
        vocab = build_vocab(train_path, n_vocab)
        # save vocab
        with open('vocab.json', 'w') as fp:
            json.dump(vocab.stoi, fp)

    train_X, train_y, train_K = load_data(train_path, vocab)
    train_loader = get_data_loader(train_X, train_y, train_K, n_batch)
    print("successfully loaded")

    encoder = Encoder(n_vocab, n_embed, n_hidden, n_layer, vocab).cuda()
    Kencoder = KnowledgeEncoder(n_vocab, n_embed, n_hidden, n_layer,
                                vocab).cuda()
    manager = Manager(n_hidden, n_vocab, temperature).cuda()
    decoder = Decoder(n_vocab, n_embed, n_hidden, n_layer, vocab).cuda()

    if args.restore:
        encoder = init_model(encoder, restore=params.encoder_restore)
        Kencoder = init_model(Kencoder, restore=params.Kencoder_restore)
        manager = init_model(manager, restore=params.manager_restore)
        decoder = init_model(decoder, restore=params.decoder_restore)

    # ToDo:目前的代码所有的embedding都是独立的,可以参考transformer源码使用直接赋值的方法共享参数:
    #if emb_src_trg_weight_sharing:
    #   self.encoder.src_word_emb.weight = self.decoder.trg_word_emb.weight

    model = [encoder, Kencoder, manager, decoder]
    parameters = list(encoder.parameters()) + list(Kencoder.parameters()) + \
                 list(manager.parameters()) + list(decoder.parameters())
    optimizer = optim.Adam(parameters, lr=args.lr)

    # pre_train knowledge manager
    print("start pre-training")
    pre_train(model, optimizer, train_loader, args)
    print("start training")
    train(model, optimizer, train_loader, args)

    # save final model
    save_models(model, params.all_restore)
예제 #4
0
def train(args):
    """Train."""
    start_time = time.time()
    if args.one_per_line:
        corpus: Corpus = ClassificationCorpus(
            args.data_dir,
            train_file=args.train_file,
            dev_file=args.dev_file,
        )
    else:
        assert args.label_symbol is not None
        corpus: Corpus = FlyClassificationCorpus(
            args.data_dir,
            train_file=args.train_file,
            dev_file=args.dev_file,
            comment_symbol=args.comment_symbol,
            label_symbol=args.label_symbol,
        )

    label_dict = corpus.make_label_dictionary()
    vocab = corpus.make_vocab_dictionary().get_items()
    embeddings = utils.init_embeddings(vocab, args)

    document_embeddings = DocumentRNNEmbeddings(
        [embeddings],
        hidden_size=args.hidden_size,
        use_attn=args.use_attn,
        num_heads=args.num_heads,
        scaling=args.scaling,
        pooling_operation=args.pooling_operation,
        use_sent_query=args.use_sent_query,
    )

    model = TextClassifier(document_embeddings, label_dictionary=label_dict)

    utils.init_model(model, args)

    trainer: ModelTrainer = ModelTrainer(model, corpus,
                                         utils.optim_method(args.optim))

    trainer.train(
        args.model_dir,
        mini_batch_size=args.mini_batch_size,
        max_epochs=args.max_epochs,
        anneal_factor=args.anneal_factor,
        learning_rate=args.learning_rate,
        patience=args.patience,
        min_learning_rate=args.min_learning_rate,
        embeddings_storage_mode=args.embeddings_storage_mode,
    )

    logger.info("End of training: time %.1f min",
                (time.time() - start_time) / 60)
예제 #5
0
파일: train_tagger.py 프로젝트: yyht/daga
def train(args):
    """Train."""
    start_time = time.time()
    column_format = {i: col for i, col in enumerate(args.data_columns)}
    corpus: Corpus = ColumnCorpus(
        args.data_dir,
        column_format,
        train_file=args.train_file,
        dev_file=args.dev_file,
        comment_symbol=args.comment_symbol,
    )

    tag_type = args.data_columns[-1]
    tag_dict = corpus.make_tag_dictionary(tag_type=tag_type)
    vocab = corpus.make_vocab_dictionary().get_items()
    embeddings = utils.init_embeddings(vocab, args)

    model: SequenceTagger = SequenceTagger(
        hidden_size=args.hidden_size,
        embeddings=embeddings,
        tag_dictionary=tag_dict,
        tag_type=tag_type,
        column_format=column_format,
        use_crf=True,
        use_attn=args.use_attn,
        attn_type=args.attn_type,
        num_heads=args.num_heads,
        scaling=args.scaling,
        pooling_operation=args.pooling_operation,
        use_sent_query=args.use_sent_query,
    )

    utils.init_model(model, args)

    trainer: ModelTrainer = ModelTrainer(model, corpus,
                                         utils.optim_method(args.optim))

    trainer.train(
        args.model_dir,
        mini_batch_size=args.mini_batch_size,
        max_epochs=args.max_epochs,
        anneal_factor=args.anneal_factor,
        learning_rate=args.learning_rate,
        patience=args.patience,
        min_learning_rate=args.min_learning_rate,
        embeddings_storage_mode=args.embeddings_storage_mode,
    )

    logger.info("End of training: time %.1f min",
                (time.time() - start_time) / 60)
예제 #6
0
    def __init__(
        self,
        net_G=None,
        net_D=None,
        opt_G=None,
        opt_D=None,
        scaler_G=None,
        scaler_D=None,
        device=None,
        lambda_L1=100.0,
    ):
        super().__init__()

        self.device = device
        if net_G:
            self.net_G = net_G.to(self.device)
        else:
            self.net_G = init_model(Generator_Res_Unet().get_model(), self.device)
        if net_D:
            self.net_D = net_D.to(self.device)
        else:
            self.net_D = init_model(Discriminator(input_channels=3), self.device)

        if scaler_G:
            self.scaler_G = scaler_G
        else:
            self.scaler_G = amp.GradScaler()

        if scaler_D:
            self.scaler_D = scaler_D
        else:
            self.scaler_D = amp.GradScaler()

        if opt_G:
            self.opt_G = opt_G
        else:
            self.opt_G = optim.Adam(
                self.net_G.parameters(), lr=lr_G, betas=(beta1, beta2)
            )
        if opt_D:
            self.opt_D = opt_D
        else:
            self.opt_D = optim.Adam(
                self.net_D.parameters(), lr=lr_D, betas=(beta1, beta2)
            )

        self.GANcriterion = GANLoss(gan_mode="vanilla").to(self.device)
        self.L1criterion = nn.L1Loss().to(self.device)
        self.lambda_L1 = lambda_L1
예제 #7
0
def main():
    args = arguments()

    # init random seed
    init_random_seed(manual_seed)

    src_train_loader, src_test_loader, tgt_train_loader, tgt_test_loader = get_dataset(
        args)

    print("=== Datasets successfully loaded ===")
    src_encoder_restore = "snapshots/src-encoder-{}.pt".format(args.src)
    src_classifier_restore = "snapshots/src-classifier-{}.pt".format(args.src)
    # load models
    src_encoder = init_model(BERTEncoder(), restore=src_encoder_restore)
    src_classifier = init_model(BERTClassifier(),
                                restore=src_classifier_restore)

    # if torch.cuda.device_count() > 1:
    #     print('Let\'s use {} GPUs!'.format(torch.cuda.device_count()))
    #     src_encoder = nn.DataParallel(src_encoder)
    #     src_classifier = nn.DataParallel(src_classifier)

    # argument setting
    print("=== Argument Setting ===")
    print("src: " + args.src)
    print("tgt: " + args.tgt)
    print("seqlen: " + str(args.seqlen))
    print("num_epochs: " + str(args.num_epochs))
    print("batch_size: " + str(args.batch_size))
    print("learning_rate: " + str(args.lr))

    if args.enc_train:
        for param in src_encoder.parameters():
            param.requires_grad = True

        # train source model
        print("=== Training classifier for source domain ===")
        src_encoder, src_classifier = train_no_da(args, src_encoder,
                                                  src_classifier,
                                                  src_train_loader,
                                                  src_test_loader)

    # eval source model
    print("Evaluate classifier for source domain: {}".format(args.src))
    eval_src(src_encoder, src_classifier, src_test_loader)

    # eval target encoder on test set of target dataset
    print("Evaluate classifier for encoded target domain: {}".format(args.tgt))
    eval_tgt(src_encoder, src_classifier, tgt_test_loader)
예제 #8
0
파일: main.py 프로젝트: namletien/mcda
def office():
    init_random_seed(params.manual_seed)


    # load dataset
    src_data_loader = get_data_loader(params.src_dataset)
    src_data_loader_eval = get_data_loader(params.src_dataset, train=False)
    tgt_data_loader = get_data_loader(params.tgt_dataset)
    tgt_data_loader_eval = get_data_loader(params.tgt_dataset, train=False)

    # load models
    src_encoder = init_model(net=LeNetEncoder(),
                             restore=params.src_encoder_restore)
    src_classifier = init_model(net=LeNetClassifier(),
                                restore=params.src_classifier_restore)
    tgt_encoder = init_model(net=LeNetEncoder(),
                             restore=params.tgt_encoder_restore)
    critic = init_model(Discriminator(input_dims=params.d_input_dims,
                                      hidden_dims=params.d_hidden_dims,
                                      output_dims=params.d_output_dims),
                        restore=params.d_model_restore)


    if not (src_encoder.restored and src_classifier.restored and
            params.src_model_trained):
        src_encoder, src_classifier = train_src(
            src_encoder, src_classifier, src_data_loader)

    # eval source model
    # print("=== Evaluating classifier for source domain ===")
    # eval_src(src_encoder, src_classifier, src_data_loader_eval)

    # train target encoder by GAN

    # init weights of target encoder with those of source encoder
    if not tgt_encoder.restored:
        tgt_encoder.load_state_dict(src_encoder.state_dict())

    if not (tgt_encoder.restored and critic.restored and
            params.tgt_model_trained):
        tgt_encoder = train_tgt(src_encoder, tgt_encoder, critic,
                                src_data_loader, tgt_data_loader)

    # eval target encoder on test set of target dataset
    print(">>> domain adaption <<<")
    acc = eval_tgt(tgt_encoder, src_classifier, tgt_data_loader_eval)
    return acc
예제 #9
0
def main():
    args = parse_arguments()
    n_vocab = params.n_vocab
    n_layer = params.n_layer
    n_hidden = params.n_hidden
    n_embed = params.n_embed
    n_batch = args.n_batch
    temperature = params.temperature
    train_path = params.train_path
    assert torch.cuda.is_available()

    print("loading_data...")
    vocab = build_vocab(train_path, n_vocab)

    # save vocab
    with open('vocab.json', 'w') as fp:
        json.dump(vocab.stoi, fp)

    train_X, train_y, train_K = load_data(train_path, vocab)
    train_loader = get_data_loader(train_X, train_y, train_K, n_batch)
    print("successfully loaded")

    encoder = Encoder(n_vocab, n_embed, n_hidden, n_layer, vocab).cuda()
    Kencoder = KnowledgeEncoder(n_vocab, n_embed, n_hidden, n_layer,
                                vocab).cuda()
    manager = Manager(n_hidden, n_vocab, temperature).cuda()
    decoder = Decoder(n_vocab, n_embed, n_hidden, n_layer, vocab).cuda()

    if args.restore:
        encoder = init_model(encoder, restore=params.encoder_restore)
        Kencoder = init_model(Kencoder, restore=params.Kencoder_restore)
        manager = init_model(manager, restore=params.manager_restore)
        decoder = init_model(decoder, restore=params.decoder_restore)

    model = [encoder, Kencoder, manager, decoder]
    parameters = list(encoder.parameters()) + list(Kencoder.parameters()) + \
                 list(manager.parameters()) + list(decoder.parameters())
    optimizer = optim.Adam(parameters, lr=args.lr)

    # pre_train knowledge manager
    print("start pre-training")
    pre_train(model, optimizer, train_loader, args)
    print("start training")
    train(model, optimizer, train_loader, args)

    # save final model
    save_models(model, params.all_restore)
예제 #10
0
def trio(tgt_classifier_net, tgt_encoder_net, src_dataset, tgt_dataset, conv):
    print('loading pretrained trio after conv ' + str(conv) + '...')

    tgt_classifier = init_model(net=tgt_classifier_net,
                                restore= str(conv) + "_snapshots/" + \
                                src_dataset + "-ADDA-target-classifier-final.pt")
    tgt_encoder = init_model(net=tgt_encoder_net,
                             restore= str(conv) + "_snapshots/" + \
                            tgt_dataset + "-ADDA-target-classifier-final.pt")

    critic = init_model(Discriminator(input_dims=params.d_input_dims,
                                      hidden_dims=params.d_hidden_dims,
                                      output_dims=params.d_output_dims),
                        restore= str(conv) + "_snapshots/" + \
                        tgt_dataset + "-ADDA-target-classifier-final.pt")

    return tgt_classifier, tgt_encoder, critic
def main():
    args = parse_arguments()
    n_vocab = params.n_vocab
    n_layer = params.n_layer
    n_hidden = params.n_hidden
    n_embed = params.n_embed
    n_batch = args.n_batch
    temperature = params.temperature

    test_path = params.test_path
    vocab_path =  params.vocab_path
    assert torch.cuda.is_available()


    print("loading the vocab...")
    vocab = Vocabulary()
    with open(vocab_path, 'r',encoding='utf-8') as fp:
        vocab.stoi = json.load(fp)
    for key, value in vocab.stoi.items():
        vocab.itos.append(key)

    # load data and change to id
    print("loading_data...")
    test_X, test_y, test_K = load_data(test_path, vocab)

    test_loader = get_data_loader(test_X, test_y, test_K, n_batch,False)
    print("successfully loaded test data")

    encoder = Encoder(n_vocab, n_embed, n_hidden, n_layer).cuda()
    Kencoder = KnowledgeEncoder(n_vocab, n_embed, n_hidden, n_layer).cuda()
    manager = Manager(n_hidden, n_vocab, temperature).cuda()
    decoder = Decoder(n_vocab, n_embed, n_hidden, n_layer).cuda()


    encoder = init_model(encoder, restore=params.encoder_restore)
    Kencoder = init_model(Kencoder, restore=params.Kencoder_restore)
    manager = init_model(manager, restore=params.manager_restore)
    decoder = init_model(decoder, restore=params.decoder_restore)
    print("models successfully loaded!\n")

    model = [encoder, Kencoder, manager, decoder]

    #evaluate_loss(model, 0, test_loader)
    evaluate_sample(model, vocab,test_X, test_y, test_K, test_loader)
예제 #12
0
def run():

    # load dataset
    tgt_data_loader_eval = get_data_loader(params.tgt_dataset, train=False)

    # Load models
    src_encoder = init_model(net=LeNetEncoder(),
                             restore=params.src_encoder_restore)
    src_classifier = init_model(net=LeNetClassifier(),
                                restore=params.src_classifier_restore)
    tgt_encoder = init_model(net=LeNetEncoder(),
                             restore=params.tgt_encoder_restore)

    # Evalute target encoder on test set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> source only <<<")
    eval_tgt(src_encoder, src_classifier, tgt_data_loader_eval)
    print(">>> domain adaption <<<")
    eval_tgt(tgt_encoder, src_classifier, tgt_data_loader_eval)
예제 #13
0
def main():
    logger.info('=> PyTorch Version: {}'.format(torch.__version__))

    # Environment initialization
    device, pin_memory = init_device(args.seed, args.cpu, args.gpu,
                                     args.cpu_affinity)

    # Create the data loader
    train_loader, val_loader, test_loader = Cost2100DataLoader(
        root=args.data_dir,
        batch_size=args.batch_size,
        num_workers=args.workers,
        pin_memory=pin_memory,
        scenario=args.scenario)()

    # Define model
    model = init_model(args)
    model.to(device)

    # Define loss function
    criterion = nn.MSELoss().to(device)

    # Inference mode
    if args.evaluate:
        Tester(model, device, criterion)(test_loader)
        return

    # Define optimizer and scheduler
    lr_init = 1e-3 if args.scheduler == 'const' else 2e-3
    optimizer = torch.optim.Adam(model.parameters(), lr_init)
    if args.scheduler == 'const':
        scheduler = FakeLR(optimizer=optimizer)
    else:
        scheduler = WarmUpCosineAnnealingLR(optimizer=optimizer,
                                            T_max=args.epochs *
                                            len(train_loader),
                                            T_warmup=30 * len(train_loader),
                                            eta_min=5e-5)

    # Define the training pipeline
    trainer = Trainer(model=model,
                      device=device,
                      optimizer=optimizer,
                      criterion=criterion,
                      scheduler=scheduler,
                      resume=args.resume)

    # Start training
    trainer.loop(args.epochs, train_loader, val_loader, test_loader)

    # Final testing
    loss, rho, nmse = Tester(model, device, criterion)(test_loader)
    print(f"\n=! Final test loss: {loss:.3e}"
          f"\n         test rho: {rho:.3e}"
          f"\n         test NMSE: {nmse:.3e}\n")
예제 #14
0
def main():
    args = parse_arguments()
    n_vocab = params.n_vocab
    n_layer = params.n_layer
    n_hidden = params.n_hidden
    n_embed = params.n_embed
    n_batch = args.n_batch
    temperature = params.temperature
    test_path = params.test_path
    assert torch.cuda.is_available()

    print("loading_data...")

    if os.path.exists("vocab.json"):
        vocab = Vocabulary()
        with open('vocab.json', 'r') as fp:
            vocab.stoi = json.load(fp)

        for key, value in vocab.stoi.items():
            vocab.itos.append(key)
    else:
        train_path = params.train_path
        vocab = build_vocab(train_path, n_vocab)

    test_X, test_y, test_K = load_data(test_path, vocab)
    test_loader = get_data_loader(test_X, test_y, test_K, n_batch)
    print("successfully loaded")

    encoder = Encoder(n_vocab, n_embed, n_hidden, n_layer).cuda()
    Kencoder = KnowledgeEncoder(n_vocab, n_embed, n_hidden, n_layer).cuda()
    manager = Manager(n_hidden, n_vocab, temperature).cuda()
    decoder = Decoder(n_vocab, n_embed, n_hidden, n_layer).cuda()

    encoder = init_model(encoder, restore=params.encoder_restore)
    Kencoder = init_model(Kencoder, restore=params.Kencoder_restore)
    manager = init_model(manager, restore=params.manager_restore)
    decoder = init_model(decoder, restore=params.decoder_restore)

    model = [encoder, Kencoder, manager, decoder]
    print("start evaluating")
    evaluate(model, test_loader)
예제 #15
0
def main():
    logger.info('=> PyTorch Version: {}'.format(torch.__version__))

    # Environment initialization
    device = init_device(args.seed, args.cpu, args.gpu, args.cpu_affinity)

    # Create the test data loader
    test_loader = Cost2100DataLoader(root=args.data_dir,
                                     batch_size=args.batch_size,
                                     num_workers=args.workers,
                                     scenario=args.scenario)()

    # Define model
    model = init_model(args)
    model.to(device)

    # Define loss function
    criterion = nn.MSELoss().to(device)

    # Inference
    Tester(model, device, criterion, print_freq=20)(test_loader)
예제 #16
0
def main(args, f):
    # args = parse_arguments()
    set_seed(args.train_seed)

    if args.model in ['roberta', 'distilroberta']:
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    else:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # preprocess data
    src_eval_loader, src_loader, tgt_all_loader, tgt_train_loader = get_all_dataloader(
        args, tokenizer)

    # load models
    if args.model == 'bert':
        encoder = BertEncoder()
        src_encoder = BertEncoder()
        classifier = BertClassifier()
    elif args.model == 'distilbert':
        encoder = DistilBertEncoder()
        src_encoder = DistilBertEncoder()
        classifier = BertClassifier()
    elif args.model == 'roberta':
        encoder = RobertaEncoder()
        src_encoder = RobertaEncoder()
        classifier = RobertaClassifier()
    else:
        encoder = DistilRobertaEncoder()
        src_encoder = DistilRobertaEncoder()
        classifier = RobertaClassifier()

    # domain discriminator
    discriminator = AdversarialNetworkCdan(param.input_dim * param.num_labels,
                                           param.hidden_dim)

    # parallel models
    if torch.cuda.device_count() > 1:
        print('Let\'s use {} GPUs!'.format(torch.cuda.device_count()))
        encoder = nn.DataParallel(encoder)
        src_encoder = nn.DataParallel(src_encoder)
        classifier = nn.DataParallel(classifier)
        discriminator = nn.DataParallel(discriminator)

    if args.load:
        encoder = init_model(args,
                             encoder,
                             restore_path=param.src_encoder_path)
        src_encoder = init_model(args,
                                 src_encoder,
                                 restore_path=param.tgt_encoder_path)
        classifier = init_model(args,
                                classifier,
                                restore_path=param.src_classifier_path)
        # discriminator = init_model(args, discriminator, restore_path=param.d_model_path)
    else:
        encoder = init_model(args, encoder)
        src_encoder = init_model(args, src_encoder)
        classifier = init_model(args, classifier)

    discriminator = init_model(args, discriminator)

    # train source model
    print("=== Pretrain encoder for source domain ===")
    if args.pretrain:
        encoder, classifier = pretrain(args, encoder, classifier, src_loader)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    evaluate(args, encoder, classifier, src_loader)
    src_acc = evaluate(args, encoder, classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt} no adapt acc on src data: {src_acc}\n')
    # x, y = save_features(args, encoder, src_loader)
    # np.savez(os.path.join(param.model_root, 's_feat_pretrain'), x, y)
    # x, y = save_features(args, encoder, tgt_all_loader)
    # np.savez(os.path.join(param.model_root, 't_feat_pretrain'), x, y)

    # adapt
    print("=== Adapt encoder for target domain ===")
    src_encoder.load_state_dict(encoder.state_dict())
    if args.src_free:
        # use the same encoder and copy encoder to src_encoder have different baseline results
        s_res_features = src_gmm(args, encoder, src_loader)
        src_loader = s_numpy_dataloader(s_res_features, args.batch_size)
        encoder, classifier = cdan_adapt_src_free(args, encoder, src_encoder,
                                                  discriminator, classifier,
                                                  src_loader, tgt_train_loader,
                                                  tgt_all_loader)
    elif args.data_free:
        s_res_features = src_gmm(args, encoder, src_loader)
        t_res_features = tgt_gmm(encoder, tgt_all_loader, 1)
        src_loader = s_numpy_dataloader(s_res_features, args.batch_size)
        tgt_train_loader = t_numpy_dataloader(t_res_features, args.batch_size)
        encoder, classifier = cdan_adapt_data_free(args, encoder,
                                                   discriminator, classifier,
                                                   src_loader,
                                                   tgt_train_loader,
                                                   tgt_all_loader)
    else:
        encoder, classifier = cdan_adapt(args, encoder, discriminator,
                                         classifier, src_loader,
                                         tgt_train_loader, tgt_all_loader)
    # x, y = save_features(args, encoder, src_loader)
    # np.savez(os.path.join(param.model_root, 's_feat_adapt_cdan'), x, y)
    # x, y = save_features(args, encoder, tgt_all_loader)
    # np.savez(os.path.join(param.model_root, 't_feat_adapt_cdan'), x, y)

    # argument setting
    print(
        f"model_type: {args.model}; batch_size: {args.batch_size}; data_free: {args.data_free}; "
        f"src_free: {args.src_free}; pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; "
        f"src: {args.src}; tgt: {args.tgt}; kd: {args.kd}; dp: {args.dp}; ent: {args.ent}"
    )

    # eval target encoder on lambda0.1 set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> domain adaption <<<")
    tgt_acc = evaluate(args, encoder, classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: DA acc on tgt data: {tgt_acc}\n')
    f.write(
        f"model_type: {args.model}; batch_size: {args.batch_size}; data_free: {args.data_free}; "
        f"src_free: {args.src_free}; pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; "
        f"src: {args.src}; tgt: {args.tgt}; kd: {args.kd}; dp: {args.dp}; ent: {args.ent}\n\n"
    )
예제 #17
0
def experiments(exp):

    #print(exp, case, affine, num_epochs)

    # init random seed
    #params.d_learning_rate = lr_d
    #params.c_learning_rate = lr_c
    init_random_seed(params.manual_seed)

    # load dataset
    src_dataset, tgt_dataset = exp.split('_')
    src_data_loader = get_data_loader(src_dataset)
    src_data_loader_eval = get_data_loader(src_dataset, train=False)

    tgt_data_loader = get_data_loader(tgt_dataset)
    tgt_data_loader_eval = get_data_loader(tgt_dataset, train=False)

    # load models
    src_encoder = init_model(net=LeNetEncoder(),
                             restore=params.src_encoder_restore,
                             exp=exp)
    src_classifier = init_model(net=LeNetClassifier(),
                                restore=params.src_classifier_restore,
                                exp=exp)
    tgt_encoder = init_model(net=LeNetEncoder(),
                             restore=params.tgt_encoder_restore,
                             exp=exp)
    critic = init_model(Discriminator(input_dims=params.d_input_dims,
                                      hidden_dims=params.d_hidden_dims,
                                      output_dims=params.d_output_dims),
                        exp=exp,
                        restore=params.d_model_restore)

    # train source model
    print("=== Training classifier for source domain ===")
    print(">>> Source Encoder <<<")
    print(src_encoder)
    print(">>> Source Classifier <<<")
    print(src_classifier)

    if not (src_encoder.restored and src_classifier.restored
            and params.src_model_trained):
        src_encoder, src_classifier = train_src(exp, src_encoder,
                                                src_classifier,
                                                src_data_loader,
                                                src_data_loader_eval)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    evaluation(src_encoder, src_classifier, src_data_loader_eval)

    # train target encoder by GAN
    print("=== Training encoder for target domain ===")
    print(">>> Target Encoder <<<")
    print(tgt_encoder)
    print(">>> Critic <<<")
    print(critic)

    # init weights of target encoder with those of source encoder
    if not tgt_encoder.restored:
        tgt_encoder.load_state_dict(src_encoder.state_dict())

    if not (tgt_encoder.restored and critic.restored
            and params.tgt_model_trained):
        tgt_encoder = train_tgt(exp, src_encoder, tgt_encoder, critic,
                                src_classifier, src_data_loader,
                                tgt_data_loader, tgt_data_loader_eval)

    # eval target encoder on test set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> source only <<<")
    evaluation(src_encoder, src_classifier, tgt_data_loader_eval)
    print(">>> domain adaption <<<")
    evaluation(tgt_encoder, src_classifier, tgt_data_loader_eval)
예제 #18
0
os.environ["CUDA_VISIBLE_DEVICES"] = '0,1'

if __name__ == '__main__':
    # init random seed
    init_random_seed(params.manual_seed)

    # load dataset
    src_data_loader = get_visda(root=params.data_root, sub_dir='train', split='train')
    src_data_loader_eval = get_visda(root=params.data_root, sub_dir='train', split='test')

    tgt_data_loader = get_visda(root=params.data_root, sub_dir='validation', split='train')
    tgt_data_loader_eval = get_visda(root=params.data_root, sub_dir='validation', split='test')

    # load models
    src_encoder = init_model(net=ResNet34Encoder(),
                             restore=params.src_encoder_restore)
    src_classifier = init_model(net=Classifier(),
                                restore=params.src_classifier_restore)

    # train source model
    # print("=== Training classifier for source domain ===")
    # print(">>> Source Encoder <<<")
    # print(src_encoder)
    # print(">>> Source Classifier <<<")
    # print(src_classifier)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    eval_src(src_encoder, src_classifier, src_data_loader_eval)

    # eval target encoder on test set of target dataset
                "Target RUBV3D2: Verification   IOU Precision = {:.4f}%, F1 IOU= {:.4f}%"
                .format(iou_acc_v, f1_v))
            (self.store_learning).write_file(
                (self.store_learning).IOU_acc_file_verif, iou_acc_v)
            (self.store_learning).write_file(
                (self.store_learning).f1_IOU_file_verif, f1_v)

if __name__ == '__main__':

    ##Final Model Classification
    from RUBV3D2 import UNet

    src_encoder = init_model(net=UNet(params.input_channels, params.nb_classes,
                                      params.default_layers,
                                      params.default_features_root,
                                      params.default_filter_width,
                                      params.distance_unet, params.bins,
                                      params.default_bn),
                             restore=params.src_encoder_restore)

    tgt_encoder = init_model(net=UNet(params.input_channels, params.nb_classes,
                                      params.default_layers,
                                      params.default_features_root,
                                      params.default_filter_width,
                                      params.distance_unet, params.bins,
                                      params.default_bn),
                             restore=params.tgt_encoder_restore)
    discri_net = init_model(net=Discriminator(
        input_dims=params.d_input_dims,
        output_dims=params.d_output_dims,
        len_feature_map=params.d_len_feature_maps,
예제 #20
0
def main(args, f):
    set_seed(args.train_seed)
    if args.model in ['roberta', 'distilroberta']:
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    else:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # preprocess data
    src_eval_loader, src_loader, tgt_all_loader, tgt_train_loader, tgt_te = get_all_dataloader(
        args, tokenizer)

    # load models
    if args.model == 'bert':
        src_encoder = BertEncoder()
        # encoder = BertEncoder()
        classifier = BertClassifier()
    elif args.model == 'distilbert':
        src_encoder = DistilBertEncoder()
        # encoder = DistilBertEncoder()
        classifier = BertClassifier()
    elif args.model == 'roberta':
        src_encoder = RobertaEncoder()
        # encoder = RobertaEncoder()
        classifier = RobertaClassifier()
    else:
        src_encoder = DistilRobertaEncoder()
        # encoder = DistilRobertaEncoder()
        classifier = RobertaClassifier()
    discriminator = Discriminator()

    # parallel models
    if torch.cuda.device_count() > 1:
        print('Let\'s use {} GPUs!'.format(torch.cuda.device_count()))
        src_encoder = nn.DataParallel(src_encoder)
        classifier = nn.DataParallel(classifier)
        # encoder = nn.DataParallel(encoder)
        discriminator = nn.DataParallel(discriminator)

    if args.load:
        src_encoder = init_model(args,
                                 src_encoder,
                                 restore_path=param.src_encoder_path)
        classifier = init_model(args,
                                classifier,
                                restore_path=param.src_classifier_path)
        # encoder = init_model(args, encoder, restore_path=param.tgt_encoder_path)
        # discriminator = init_model(args, discriminator, restore_path=param.d_model_path)
    else:
        src_encoder = init_model(args, src_encoder)
        classifier = init_model(args, classifier)

    # encoder = init_model(args, encoder)
    discriminator = init_model(args, discriminator)

    # train source model
    if args.pretrain:
        print("=== Training classifier for source domain ===")
        src_encoder, classifier = pretrain(args, src_encoder, classifier,
                                           src_loader)

        # save pretrained model
        # save_model(args, src_encoder, param.src_encoder_path)
        # save_model(args, classifier, param.src_classifier_path)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    evaluate(args, src_encoder, classifier, src_loader)
    src_acc = evaluate(args, src_encoder, classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: No adapt acc on src data: {src_acc}\n')

    # adapt
    print("=== Adapt tgt encoder ===")
    # encoder.load_state_dict(src_encoder.state_dict())
    # if args.src_free:
    # s_res_features = src_gmm(args, src_encoder, src_loader)
    # src_loader = s_numpy_dataloader(s_res_features, args.batch_size)
    # encoder = aad_adapt_src_free(args, src_encoder, encoder, discriminator,
    #                                  classifier, src_loader, tgt_train_loader, tgt_all_loader)
    # else:
    if args.adapt:
        encoder, classifier = shot_adapt(args, src_encoder, classifier,
                                         tgt_train_loader, tgt_all_loader,
                                         tgt_te)

    # save_model(args, encoder, param.tgt_encoder_path)

    # argument setting
    # print("=== Argument Setting ===")
    print(
        f"model_type: {args.model}; max_seq_len: {args.max_seq_length}; batch_size: {args.batch_size}; "
        f"pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; src: {args.src}; tgt: {args.tgt}; "
        f'src_free: {args.src_free}; dp: {args.dp}')

    # eval target encoder on lambda0.1 set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> domain adaption <<<")
    tgt_acc = evaluate(args, encoder, classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: DA acc on tgt data: {tgt_acc}\n')
    f.write(
        f"model_type: {args.model}; batch_size: {args.batch_size}; pre_epochs: {args.pre_epochs}; "
        f"num_epochs: {args.num_epochs}; src_free: {args.src_free}; src: {args.src}; "
        f"tgt: {args.tgt}; dp: {args.dp}\n\n")
예제 #21
0
    parser.add_argument('--out_dir', type=str, default=None)
    parser.add_argument('--fig_save_path', type=str, default=None)
    parser.add_argument('--num', type=int, default=10)
    parser.add_argument('--sample_h', type=int, default=256)
    parser.add_argument('--sample_w', type=int, default=256)
    parser.add_argument('--lower', type=float, default=-3)
    parser.add_argument('--upper', type=float, default=3)
    parser.add_argument('--starter_im1_path', type=str)
    parser.add_argument('--starter_im2_path', type=str)

    args = parser.parse_args()
    print(args)
    print('----------')

    latent_dim = 512
    model = init_model(args.model_class, args.restore_path, restore_required=True, latent_dim=latent_dim)
    model.eval()

    lower         = args.lower
    upper         = args.upper
    num           = args.num
    sample_h      = args.sample_h
    sample_w      = args.sample_w
    fig_save_path = args.fig_save_path
    out_dir       = args.out_dir

    starter_im1_path = args.starter_im1_path
    starter_im2_path = args.starter_im2_path

    # generate samples
    with torch.no_grad():
예제 #22
0
파일: main_ris.py 프로젝트: marcwww/LL
    opts.train_opts(parser)
    opt = parser.parse_args()

    location = opt.gpu if torch.cuda.is_available(
    ) and opt.gpu != -1 else 'cpu'
    device = torch.device(location)

    TXT, train_iter, valid_iter = \
        preproc.build_iters(ftrain=opt.ftrain,
                            fvalid=opt.fvalid,
                            bsz=opt.bsz,
                            min_freq=opt.min_freq,
                            device=opt.gpu)

    model = nets.BaseRNN(voc_size=len(TXT.vocab.itos),
                         edim=opt.edim,
                         hdim=opt.hdim,
                         padding_idx=TXT.vocab.stoi[PAD]).to(device)

    utils.init_model(model)

    optimizer = optim.Adam(params=filter(lambda p: p.requires_grad,
                                         model.parameters()),
                           lr=opt.lr)

    criterion = nn.CrossEntropyLoss(reduce=False)
    train(model, {
        'train': train_iter,
        'valid': valid_iter
    }, opt, criterion, optimizer)
예제 #23
0
    data_sequences = []

    for i in range(len(data.review)):
        tokenized_text = tokenizer.tokenize(data.review[i])
        indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
        data_sequences.append(indexed_tokens)

    data_loader = get_data_loader(data_sequences, data.label,
                                  args.batch_size, args.seqlen)

    encoder = BERTEncoder()
    if torch.cuda.device_count() > 1:
        encoder = torch.nn.DataParallel(encoder)
    if args.tgt:
        encoder = init_model(encoder, restore=param.tgt_encoder_restore)
    else:
        encoder = init_model(encoder, restore=param.src_encoder_restore)

    feats = []
    labels = []
    encoder.eval()
    print("=== start encoding data ===")
    for step, (reviews, label) in enumerate(data_loader):
        mask = (reviews != 0).long()
        feat = encoder(reviews, mask)
        feats.extend(feat.cpu().detach().numpy())
        labels.extend(label.cpu().numpy())
        print("Step [%.2d/%.2d]" % (step + 1, len(data_loader)), feat.size())

    feats = np.array(feats)
예제 #24
0
        edge = {}
        edge['data'] = result
        scipy.io.savemat(join(save_dir, "%s.mat" % filename), edge)
        #result = Image.fromarray((result * 255).astype(np.uint8))
        #result.save(join(save_dir, "%s.png" % filename))
        print("Running test [%d/%d]" % (idx + 1, len(test_loader)))


test_dataset = BSDS_Loader(root='../DATA/data/HED-BSDS/')
test_loader = DataLoader(test_dataset,
                         batch_size=1,
                         num_workers=0,
                         drop_last=True,
                         shuffle=False)
with open('../DATA/data/HED-BSDS/test.lst', 'r') as f:
    test_list = f.readlines()
test_list = [split(i.rstrip())[1] for i in test_list]
assert len(test_list) == len(test_loader), "%d vs %d" % (len(test_list),
                                                         len(test_loader))
print('Test size : %d' % len(test_loader))

save_dir = 'bsds'

model = RCF()
init_model(model)
#print(state_dict['conv1_1.weight'].shape)
#quit()
model.cuda()

test_model(model, test_loader, test_list, save_dir)
예제 #25
0
from utils import get_data_loader, init_model, init_random_seed
from test import eval_tgt
from adopt import train_target

if __name__ == '__main__':
    # init random seed
    init_random_seed(params.manual_seed)

    # load dataset
    src_data_loader = get_data_loader(params.src_dataset)
    src_data_loader_eval = get_data_loader(params.src_dataset, train=False)
    tgt_data_loader = get_data_loader(params.tgt_dataset)
    tgt_data_loader_eval = get_data_loader(params.tgt_dataset, train=False)

    # load models
    src_encoder = init_model(net=LeNetEncoder(),
                             restore=params.src_encoder_restore)
    src_classifier = init_model(net=LeNetClassifier(),
                                restore=params.src_classifier_restore)
    tgt_encoder = init_model(net=LeNetEncoder(),
                             restore=params.tgt_encoder_restore)
    critic = init_model(Discriminator(input_dim=params.d_input_dims,
                                      hidden_dim=params.d_hidden_dims,
                                      output_dim=params.d_output_dims),
                        restore=params.d_model_restore)

    # train source model
    print("=== Training classifier for source domain ===")
    print(">>> Source Encoder <<<")
    print(src_encoder)
    print(">>> Source Classifier <<<")
    print(src_classifier)
def main(data, val_data, config):

    wandb.init(project="prescalenorm")
    config = load_config(config)
    dataset = init_dataset(data)
    dataset.train_percent = config.train_data_percent
    dataset.set_data_source(config.data_source)
    loader = init_dataloader(dataset, batch_size=config.batch_size)
    model = init_model(type_vocab_size=config.type_vocab_size)

    print(f'from_scratch: {config.from_scratch}', f'prenorm: {config.prenorm}',
          f'tie_qk: {config.tie_query_key}', f'norm_type: {config.norm_type}')

    model = configure_model(model, config)

    val_dataset = init_dataset(val_data)
    val_dataset.train_percent = config.val_data_percent
    val_dataset.to_val_mode('scientsbank', 'answer')
    val_loader = init_dataloader(val_dataset,
                                 batch_size=config.batch_size,
                                 random=False)

    wandb.watch(model)
    model.train()
    cuda = torch.cuda.is_available()
    if cuda:
        model.cuda()
    optimizer = init_optimizer(model, config)
    lr_scheduler = transformers.get_cosine_schedule_with_warmup(
        optimizer, config.warmup_steps, config.total_steps)
    # best_val_acc = 0.0
    # torch.save(config, os.path.join(wandb.run.dir, 'model.config'))
    json.dump(config.__dict__,
              open(os.path.join(wandb.run.dir, 'model_config.json'), 'w'))
    wandb.save('*.config')
    best_f1 = 0.0
    patience = 0
    try:
        while lr_scheduler.last_epoch <= config.total_steps:
            av_epoch_loss = train_epoch(loader, model, optimizer, lr_scheduler,
                                        config, cuda)
            #tidy stuff up every epoch
            gc.collect()
            torch.cuda.empty_cache()

            p, r, f1, val_acc = val_loop(model, val_loader, cuda)
            log_line = f'precision: {p:.5f} | recall: {r:.5f} | f1: {f1:.5f} | accuracy: {val_acc:.5f}\n'
            print(log_line[:-1])
            print('av_epoch_loss', av_epoch_loss)
            if f1 > best_f1:
                print(
                    "saving to: ",
                    os.path.join(wandb.run.dir,
                                 f'full_bert_model_best_acc.pt'))
                torch.save([model.state_dict(), config.__dict__],
                           os.path.join(wandb.run.dir,
                                        f'full_bert_model_best_f1.pt'))
                wandb.save('*.pt')
                best_f1 = f1
                patience = max((0, patience - 1))
            else:
                patience += 1
                if patience >= 3:
                    break
            if av_epoch_loss < .2:
                break
        torch.save([model.state_dict(), config.__dict__],
                   os.path.join(
                       wandb.run.dir,
                       f'full_bert_model_{lr_scheduler.last_epoch}_steps.pt'))
        #Move stuff off the gpu
        model.cpu()
        #This is for sure a kinda dumb way of doing it, but the least mentally taxing right now
        optimizer = init_optimizer(model, config)
        gc.collect()
        torch.cuda.empty_cache()
        return model

    except KeyboardInterrupt:
        wandb.save('*.pt')
        #Move stuff off the gpu
        model.cpu()
        #This is for sure a kinda dumb way of doing it, but the least mentally taxing right now
        optimizer = init_optimizer(model, config)
        gc.collect()
        torch.cuda.empty_cache()
        return model
예제 #27
0
def main():
    max_len = 50
    n_vocab = params.n_vocab
    n_layer = params.n_layer
    n_hidden = params.n_hidden
    n_embed = params.n_embed
    temperature = params.temperature
    assert torch.cuda.is_available()

    if os.path.exists("data/vocab.json"):
        vocab = Vocabulary()
        with open('data/vocab.json', 'r') as fp:
            vocab.stoi = json.load(fp)

        for key, value in vocab.stoi.items():
            vocab.itos.append(key)
    else:
        print("vocabulary doesn't exist!")
        return

    print("loading model...")
    encoder = Encoder(n_vocab, n_embed, n_hidden, n_layer).cuda()
    Kencoder = KnowledgeEncoder(n_vocab, n_embed, n_hidden, n_layer).cuda()
    manager = Manager(n_hidden, n_vocab, temperature).cuda()
    decoder = Decoder(n_vocab, n_embed, n_hidden, n_layer).cuda()

    encoder = init_model(encoder, restore=params.encoder_restore)
    Kencoder = init_model(Kencoder, restore=params.Kencoder_restore)
    manager = init_model(manager, restore=params.manager_restore)
    decoder = init_model(decoder, restore=params.decoder_restore)
    print("successfully loaded!\n")

    utterance = ""
    while True:
        if utterance == "exit":
            break
        k1 = input("Type first Knowledge: ").lower()
        while not k1:
            print("Please type first Knowledge.\n")
            k1 = input("Type first Knowledge: ").lower()

        k2 = input("Type second Knowledge: ").lower()
        while not k2:
            print("Please type second Knowledge.\n")
            k2 = input("Type second Knowledge: ").lower()

        k3 = input("Type third Knowledge: ").lower()
        while not k3:
            print("Please type third Knowledge.\n")
            k3 = input("Type third Knowledge: ").lower()

        K = [k1, k2, k3]
        K = knowledgeToIndex(K, vocab)
        K = Kencoder(K)
        print()

        while True:
            utterance = input("you: ").lower()
            while not utterance:
                print("Please type utterance.\n")
                utterance = input("you: ")
            if utterance == "change knowledge" or utterance == "exit":
                print()
                break

            X = []
            tokens = nltk.word_tokenize(utterance)
            for word in tokens:
                if word in vocab.stoi:
                    X.append(vocab.stoi[word])
                else:
                    X.append(vocab.stoi["<UNK>"])
            X = torch.LongTensor(X).unsqueeze(0).cuda()  # X: [1, x_seq_len]

            encoder_outputs, hidden, x = encoder(X)
            k_i = manager(x, None, K)
            outputs = torch.zeros(
                max_len, 1, n_vocab).cuda()  # outputs: [max_len, 1, n_vocab]
            hidden = hidden[decoder.n_layer:]
            output = torch.LongTensor([params.SOS]).cuda()

            for t in range(max_len):
                output, hidden, attn_weights = decoder(output, k_i, hidden,
                                                       encoder_outputs)
                outputs[t] = output
                output = output.data.max(1)[1]

            outputs = outputs.max(2)[1]

            answer = ""
            for idx in outputs:
                if idx == params.EOS:
                    break
                answer += vocab.itos[idx] + " "

            print("bot:", answer[:-1], "\n")
예제 #28
0
import gym
import world
import utils
from Buffer import ReplayBuffer
from models import DQN
from world import Print, ARGS
from wrapper import WrapIt
from procedure import train_DQN

# ------------------------------------------------
env = gym.make('RiverraidNoFrameskip-v4')
env = WrapIt(env)
Print('ENV action', env.unwrapped.get_action_meanings())
Print('ENV observation', f"Image: {ARGS.imgDIM} X {ARGS.imgDIM} X {1}"
      )  # we assert to use gray image
# ------------------------------------------------
Optimizer = utils.getOptimizer()
schedule = utils.LinearSchedule(1000000, 0.1)

Game_buffer = ReplayBuffer(ARGS.buffersize, ARGS.framelen)

Q = utils.init_model(env, DQN).train().to(world.DEVICE)
Q_target = utils.init_model(env, DQN).eval().to(world.DEVICE)
# ------------------------------------------------
train_DQN(env,
          Q=Q,
          Q_target=Q_target,
          optimizer=Optimizer,
          replay_buffer=Game_buffer,
          exploration=schedule)
예제 #29
0
def main():
    args = parse_arguments()
    # argument setting
    print("=== Argument Setting ===")
    print("src: " + args.src)
    print("tgt: " + args.tgt)
    print("seed: " + str(args.seed))
    print("train_seed: " + str(args.train_seed))
    print("model_type: " + str(args.model))
    print("max_seq_length: " + str(args.max_seq_length))
    print("batch_size: " + str(args.batch_size))
    print("pre_epochs: " + str(args.pre_epochs))
    print("num_epochs: " + str(args.num_epochs))
    print("AD weight: " + str(args.alpha))
    print("KD weight: " + str(args.beta))
    print("temperature: " + str(args.temperature))
    set_seed(args.train_seed)

    if args.model in ['roberta', 'distilroberta']:
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    else:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # preprocess data
    print("=== Processing datasets ===")
    if args.src in ['blog', 'airline', 'imdb']:
        src_x, src_y = CSV2Array(
            os.path.join('data', args.src, args.src + '.csv'))
    else:
        src_x, src_y = XML2Array(
            os.path.join('data', args.src, 'negative.review'),
            os.path.join('data', args.src, 'positive.review'))

    src_x, src_test_x, src_y, src_test_y = train_test_split(
        src_x, src_y, test_size=0.2, stratify=src_y, random_state=args.seed)

    if args.tgt in ['blog', 'airline', 'imdb']:
        tgt_x, tgt_y = CSV2Array(
            os.path.join('data', args.tgt, args.tgt + '.csv'))
    else:
        tgt_x, tgt_y = XML2Array(
            os.path.join('data', args.tgt, 'negative.review'),
            os.path.join('data', args.tgt, 'positive.review'))

    tgt_train_x, tgt_test_y, tgt_train_y, tgt_test_y = train_test_split(
        tgt_x, tgt_y, test_size=0.2, stratify=tgt_y, random_state=args.seed)

    if args.model in ['roberta', 'distilroberta']:
        src_features = roberta_convert_examples_to_features(
            src_x, src_y, args.max_seq_length, tokenizer)
        src_test_features = roberta_convert_examples_to_features(
            src_test_x, src_test_y, args.max_seq_length, tokenizer)
        tgt_features = roberta_convert_examples_to_features(
            tgt_x, tgt_y, args.max_seq_length, tokenizer)
        tgt_train_features = roberta_convert_examples_to_features(
            tgt_train_x, tgt_train_y, args.max_seq_length, tokenizer)
    else:
        src_features = convert_examples_to_features(src_x, src_y,
                                                    args.max_seq_length,
                                                    tokenizer)
        src_test_features = convert_examples_to_features(
            src_test_x, src_test_y, args.max_seq_length, tokenizer)
        tgt_features = convert_examples_to_features(tgt_x, tgt_y,
                                                    args.max_seq_length,
                                                    tokenizer)
        tgt_train_features = convert_examples_to_features(
            tgt_train_x, tgt_train_y, args.max_seq_length, tokenizer)

    # load dataset

    src_data_loader = get_data_loader(src_features, args.batch_size)
    src_data_eval_loader = get_data_loader(src_test_features, args.batch_size)
    tgt_data_train_loader = get_data_loader(tgt_train_features,
                                            args.batch_size)
    tgt_data_all_loader = get_data_loader(tgt_features, args.batch_size)

    # load models
    if args.model == 'bert':
        src_encoder = BertEncoder()
        tgt_encoder = BertEncoder()
        src_classifier = BertClassifier()
    elif args.model == 'distilbert':
        src_encoder = DistilBertEncoder()
        tgt_encoder = DistilBertEncoder()
        src_classifier = BertClassifier()
    elif args.model == 'roberta':
        src_encoder = RobertaEncoder()
        tgt_encoder = RobertaEncoder()
        src_classifier = RobertaClassifier()
    else:
        src_encoder = DistilRobertaEncoder()
        tgt_encoder = DistilRobertaEncoder()
        src_classifier = RobertaClassifier()
    discriminator = Discriminator()

    if args.load:
        src_encoder = init_model(args,
                                 src_encoder,
                                 restore=param.src_encoder_path)
        src_classifier = init_model(args,
                                    src_classifier,
                                    restore=param.src_classifier_path)
        tgt_encoder = init_model(args,
                                 tgt_encoder,
                                 restore=param.tgt_encoder_path)
        discriminator = init_model(args,
                                   discriminator,
                                   restore=param.d_model_path)
    else:
        src_encoder = init_model(args, src_encoder)
        src_classifier = init_model(args, src_classifier)
        tgt_encoder = init_model(args, tgt_encoder)
        discriminator = init_model(args, discriminator)

    # train source model
    print("=== Training classifier for source domain ===")
    if args.pretrain:
        src_encoder, src_classifier = pretrain(args, src_encoder,
                                               src_classifier, src_data_loader)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    evaluate(src_encoder, src_classifier, src_data_loader)
    evaluate(src_encoder, src_classifier, src_data_eval_loader)
    evaluate(src_encoder, src_classifier, tgt_data_all_loader)

    for params in src_encoder.parameters():
        params.requires_grad = False

    for params in src_classifier.parameters():
        params.requires_grad = False

    # train target encoder by GAN
    print("=== Training encoder for target domain ===")
    if args.adapt:
        tgt_encoder.load_state_dict(src_encoder.state_dict())
        tgt_encoder = adapt(args, src_encoder, tgt_encoder, discriminator,
                            src_classifier, src_data_loader,
                            tgt_data_train_loader, tgt_data_all_loader)

    # eval target encoder on lambda0.1 set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> source only <<<")
    evaluate(src_encoder, src_classifier, tgt_data_all_loader)
    print(">>> domain adaption <<<")
    evaluate(tgt_encoder, src_classifier, tgt_data_all_loader)
예제 #30
0
                                      train=True)
    src_data_loader_eval = get_data_loader(params.src_dataset,
                                           dataset_root=params.dataset_root,
                                           batch_size=params.batch_size,
                                           train=False)
    tgt_data_loader = get_data_loader(params.tgt_dataset,
                                      dataset_root=params.dataset_root,
                                      batch_size=params.batch_size,
                                      train=True)
    tgt_data_loader_eval = get_data_loader(params.tgt_dataset,
                                           dataset_root=params.dataset_root,
                                           batch_size=params.batch_size,
                                           train=False)

    # load models
    src_encoder = init_model(net=LeNetEncoder(),
                             restore=params.src_encoder_restore)
    src_classifier = init_model(net=LeNetClassifier(),
                                restore=params.src_classifier_restore)
    tgt_encoder = init_model(net=LeNetEncoder(),
                             restore=params.tgt_encoder_restore)
    critic = init_model(Discriminator(), restore=params.d_model_restore)

    # train source model
    print("=== Training classifier for source domain ===")

    if not (src_encoder.restored and src_classifier.restored
            and params.src_model_trained):
        src_encoder, src_classifier = train_src(src_encoder, src_classifier,
                                                src_data_loader, params)

    # eval source model