示例#1
0
def load_generator(is_wgan=False):
    from model.gan import Generator
    generator = Generator()
    if is_wgan:
        generator.filename = "wgan-generator.to"
    generator.load()
    generator.eval()
    return generator
示例#2
0
def main():
    # hyper parameters
    z_dim = 100
    epochs = 3000000
    batch_size = 128
    lr = 0.0002
    is_training = True

    img_path = glob.glob(os.path.normpath(os.path.join(PROJ_PATH, "data/faces/*.jpg")))
    dataset, img_shape, _ = make_anime_dataset(img_path, batch_size)
    print(dataset, img_shape)
    sample = next(iter(dataset))
    print(sample)
    dataset = dataset.repeat()
    db_iter = iter(dataset)

    generator = Generator()
    generator.build(input_shape=(None, z_dim))
    discriminator = Discriminator()
    discriminator.build(input_shape=(None, 64, 64, 3))

    g_optimizer = tf.optimizers.Adam(lr=lr, beta_1=0.5)
    d_optimizer = tf.optimizers.Adam(lr=lr, beta_1=0.5)

    for epoch in range(epochs):
        batch_z = tf.random.uniform([batch_size, z_dim], minval=-1., maxval=1.)
        batch_x = next(db_iter)

        # train D
        with tf.GradientTape() as tape:
            d_loss = d_loss_fn(generator, discriminator, batch_z, batch_x, is_training)

        grads = tape.gradient(d_loss, discriminator.trainable_variables)
        d_optimizer.apply_gradients(zip(grads, discriminator.trainable_variables))

        with tf.GradientTape() as tape:
            g_loss = g_loss_fn(generator, discriminator, batch_z, is_training)
        grads = tape.gradient(g_loss, generator.trainable_variables)
        g_optimizer.apply_gradients(zip(grads, generator.trainable_variables))

        if epoch % 100 == 0:
            print("Epoch: {}, d_loss: {}, g_loss: {}"
                  .format(epoch, d_loss, g_loss))

            z = tf.random.uniform([100, z_dim])
            fake_image = generator(z, training=False)
            save_image_path = os.path.normpath(os.path.join(PROJ_PATH, "data/fake-faces/gan-%d.png" % epoch))
            save_result(fake_image.numpy(), 10, save_image_path, color_mode='RGB')
示例#3
0
from collections import deque

from model.gan import Generator, Discriminator
from util import device

from util import create_text_slice
from datasets import VoxelDataset
from torch.utils.data import DataLoader

show_viewer = "nogui" not in sys.argv

if show_viewer:
    from rendering import MeshRenderer
    viewer = MeshRenderer()

generator = Generator()
generator.filename = "wgan-generator.to"

critic = Discriminator()
critic.filename = "wgan-critic.to"
critic.use_sigmoid = False

if "continue" in sys.argv:
    generator.load()
    critic.load()

LEARN_RATE = 0.00005
BATCH_SIZE = 64
CRITIC_UPDATES_PER_GENERATOR_UPDATE = 5
CRITIC_WEIGHT_LIMIT = 0.01
示例#4
0
def main(args):
    logger.info('Checking...')
    SEED = args.seed
    check_manual_seed(SEED)
    check_args(args)
    gross_result['seed'] = args.seed

    logger.info('Loading config...')
    bert_config = Config('config/bert.ini')
    bert_config = bert_config(args.bert_type)

    # for oos-eval dataset
    data_config = Config('config/data.ini')
    data_config = data_config(args.dataset)

    # Prepare data processor
    data_path = os.path.join(data_config['DataDir'],
                             data_config[args.data_file])  # 把目录和文件名合成一个路径
    label_path = data_path.replace('.json', '.label')

    if args.dataset == 'oos-eval':
        processor = OOSProcessor(bert_config, maxlen=32)
    elif args.dataset == 'smp':
        processor = SMPProcessor(bert_config, maxlen=32)
    else:
        raise ValueError('The dataset {} is not supported.'.format(
            args.dataset))

    processor.load_label(
        label_path)  # Adding label_to_id and id_to_label ot processor.

    n_class = len(processor.id_to_label)
    config = vars(args)  # 返回参数字典
    config['gan_save_path'] = os.path.join(args.output_dir, 'save', 'gan.pt')
    config['bert_save_path'] = os.path.join(args.output_dir, 'save', 'bert.pt')
    config['n_class'] = n_class

    logger.info('config:')
    logger.info(config)

    D = Discriminator(config)
    G = Generator(config)
    E = BertModel.from_pretrained(
        bert_config['PreTrainModelDir'])  # Bert encoder

    if args.fine_tune:
        for param in E.parameters():
            param.requires_grad = True
    else:
        for param in E.parameters():
            param.requires_grad = False

    D.to(device)
    G.to(device)
    E.to(device)

    global_step = 0

    def train(train_dataset, dev_dataset):
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=args.train_batch_size,
                                      shuffle=True,
                                      num_workers=2)

        nonlocal global_step
        n_sample = len(train_dataloader)
        early_stopping = EarlyStopping(args.patience, logger=logger)
        # Loss function
        adversarial_loss = torch.nn.BCELoss().to(device)
        classified_loss = torch.nn.CrossEntropyLoss().to(device)

        # Optimizers
        optimizer_G = torch.optim.Adam(G.parameters(),
                                       lr=args.G_lr)  # optimizer for generator
        optimizer_D = torch.optim.Adam(
            D.parameters(), lr=args.D_lr)  # optimizer for discriminator
        optimizer_E = AdamW(E.parameters(), args.bert_lr)

        G_total_train_loss = []
        D_total_fake_loss = []
        D_total_real_loss = []
        FM_total_train_loss = []
        D_total_class_loss = []
        valid_detection_loss = []
        valid_oos_ind_recall = []
        valid_oos_ind_f_score = []

        train_loss = []
        iteration = 0

        for i in range(args.n_epoch):

            # Initialize model state
            G.train()
            D.train()
            E.train()

            G_train_loss = 0
            D_fake_loss = 0
            D_real_loss = 0
            FM_train_loss = 0
            D_class_loss = 0

            total_loss = 0

            for sample in tqdm.tqdm(train_dataloader):
                sample = (i.to(device) for i in sample)
                token, mask, type_ids, y = sample
                batch = len(token)

                # the label used to train generator and discriminator.
                valid_label = FloatTensor(batch, 1).fill_(1.0).detach()
                fake_label = FloatTensor(batch, 1).fill_(0.0).detach()

                optimizer_E.zero_grad()
                sequence_output, pooled_output = E(token,
                                                   mask,
                                                   type_ids,
                                                   return_dict=False)
                real_feature = pooled_output

                # train D on real
                optimizer_D.zero_grad()
                real_f_vector, discriminator_output, classification_output = D(
                    real_feature, return_feature=True)
                discriminator_output = discriminator_output.squeeze()
                real_loss = adversarial_loss(discriminator_output,
                                             (y != 0.0).float())
                if n_class > 2:  # 大于2表示除了训练判别器还要训练分类器
                    class_loss = classified_loss(classification_output,
                                                 y.long())
                    real_loss += class_loss
                    D_class_loss += class_loss.detach()
                real_loss.backward()

                # # train D on fake
                z = FloatTensor(np.random.normal(
                    0, 1, (batch, args.G_z_dim))).to(device)
                fake_feature = G(z).detach()
                fake_discriminator_output = D.detect_only(fake_feature)
                fake_loss = args.beta * adversarial_loss(
                    fake_discriminator_output, fake_label)
                fake_loss.backward()
                optimizer_D.step()

                if args.fine_tune:
                    optimizer_E.step()

                # train G
                optimizer_G.zero_grad()
                z = FloatTensor(np.random.normal(
                    0, 1, (batch, args.G_z_dim))).to(device)
                fake_f_vector, D_decision = D.detect_only(G(z),
                                                          return_feature=True)
                gd_loss = adversarial_loss(D_decision, valid_label)
                fm_loss = torch.abs(
                    torch.mean(real_f_vector.detach(), 0) -
                    torch.mean(fake_f_vector, 0)).mean()
                g_loss = gd_loss + 0 * fm_loss
                g_loss.backward()
                optimizer_G.step()

                global_step += 1

                D_fake_loss += fake_loss.detach()
                D_real_loss += real_loss.detach()
                G_train_loss += g_loss.detach() + fm_loss.detach()
                FM_train_loss += fm_loss.detach()

            # train_loss.append(total_loss / n_sample)
            # iteration += 1

            logger.info('[Epoch {}] Train: D_fake_loss: {}'.format(
                i, D_fake_loss / n_sample))
            logger.info('[Epoch {}] Train: D_real_loss: {}'.format(
                i, D_real_loss / n_sample))
            logger.info('[Epoch {}] Train: D_class_loss: {}'.format(
                i, D_class_loss / n_sample))
            logger.info('[Epoch {}] Train: G_train_loss: {}'.format(
                i, G_train_loss / n_sample))
            logger.info('[Epoch {}] Train: FM_train_loss: {}'.format(
                i, FM_train_loss / n_sample))
            logger.info(
                '---------------------------------------------------------------------------'
            )

            D_total_fake_loss.append(D_fake_loss / n_sample)
            D_total_real_loss.append(D_real_loss / n_sample)
            D_total_class_loss.append(D_class_loss / n_sample)
            G_total_train_loss.append(G_train_loss / n_sample)
            FM_total_train_loss.append(FM_train_loss / n_sample)

            if dev_dataset:
                logger.info(
                    '#################### eval result at step {} ####################'
                    .format(global_step))
                eval_result = eval(dev_dataset)

                valid_detection_loss.append(eval_result['detection_loss'])
                valid_oos_ind_recall.append(eval_result['oos_ind_recall'])
                valid_oos_ind_f_score.append(eval_result['oos_ind_f_score'])

                # 1 表示要保存模型
                # 0 表示不需要保存模型
                # -1 表示不需要模型,且超过了patience,需要early stop
                signal = early_stopping(-eval_result['eer'])
                if signal == -1:
                    break
                elif signal == 0:
                    pass
                elif signal == 1:
                    save_gan_model(D, G, config['gan_save_path'])
                    if args.fine_tune:
                        save_model(E,
                                   path=config['bert_save_path'],
                                   model_name='bert')

                logger.info(eval_result)
                logger.info('valid_eer: {}'.format(eval_result['eer']))
                logger.info('valid_oos_ind_precision: {}'.format(
                    eval_result['oos_ind_precision']))
                logger.info('valid_oos_ind_recall: {}'.format(
                    eval_result['oos_ind_recall']))
                logger.info('valid_oos_ind_f_score: {}'.format(
                    eval_result['oos_ind_f_score']))
                logger.info('valid_fpr95: {}'.format(
                    ErrorRateAt95Recall(eval_result['all_binary_y'],
                                        eval_result['y_score'])))

        if args.patience >= args.n_epoch:
            save_gan_model(D, G, config['gan_save_path'])
            if args.fine_tune:
                save_model(E, path=config['bert_save_path'], model_name='bert')

        freeze_data['D_total_fake_loss'] = D_total_fake_loss
        freeze_data['D_total_real_loss'] = D_total_real_loss
        freeze_data['D_total_class_loss'] = D_total_class_loss
        freeze_data['G_total_train_loss'] = G_total_train_loss
        freeze_data['FM_total_train_loss'] = FM_total_train_loss
        freeze_data['valid_real_loss'] = valid_detection_loss
        freeze_data['valid_oos_ind_recall'] = valid_oos_ind_recall
        freeze_data['valid_oos_ind_f_score'] = valid_oos_ind_f_score

        # from utils.visualization import draw_curve
        # draw_curve(train_loss, iteration, 'train_loss', args.output_dir)

    def eval(dataset):
        dev_dataloader = DataLoader(dataset,
                                    batch_size=args.predict_batch_size,
                                    shuffle=False,
                                    num_workers=2)
        n_sample = len(dev_dataloader)
        result = dict()

        # Loss function
        detection_loss = torch.nn.BCELoss().to(device)
        classified_loss = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)

        G.eval()
        D.eval()
        E.eval()

        all_detection_preds = []
        all_class_preds = []

        for sample in tqdm.tqdm(dev_dataloader):
            sample = (i.to(device) for i in sample)
            token, mask, type_ids, y = sample
            batch = len(token)

            # -------------------------evaluate D------------------------- #
            # BERT encode sentence to feature vector

            with torch.no_grad():
                sequence_output, pooled_output = E(token,
                                                   mask,
                                                   type_ids,
                                                   return_dict=False)
                real_feature = pooled_output

                # 大于2表示除了训练判别器还要训练分类器
                if n_class > 2:
                    f_vector, discriminator_output, classification_output = D(
                        real_feature, return_feature=True)
                    all_detection_preds.append(discriminator_output)
                    all_class_preds.append(classification_output)

                # 只预测判别器
                else:
                    f_vector, discriminator_output = D.detect_only(
                        real_feature, return_feature=True)
                    all_detection_preds.append(discriminator_output)

        all_y = LongTensor(
            dataset.dataset[:, -1].astype(int)).cpu()  # [length, n_class]
        all_binary_y = (all_y != 0).long()  # [length, 1] label 0 is oos
        all_detection_preds = torch.cat(all_detection_preds,
                                        0).cpu()  # [length, 1]
        all_detection_binary_preds = convert_to_int_by_threshold(
            all_detection_preds.squeeze())  # [length, 1]

        # 计算损失
        detection_loss = detection_loss(all_detection_preds,
                                        all_binary_y.float())
        result['detection_loss'] = detection_loss

        if n_class > 2:
            class_one_hot_preds = torch.cat(all_class_preds,
                                            0).detach().cpu()  # one hot label
            class_loss = classified_loss(class_one_hot_preds,
                                         all_y)  # compute loss
            all_class_preds = torch.argmax(class_one_hot_preds, 1)  # label
            class_acc = metrics.ind_class_accuracy(
                all_class_preds, all_y, oos_index=0)  # accuracy for ind class
            logger.info(
                metrics.classification_report(
                    all_y, all_class_preds,
                    target_names=processor.id_to_label))

        logger.info(
            metrics.classification_report(all_binary_y,
                                          all_detection_binary_preds,
                                          target_names=['oos', 'in']))

        # report
        oos_ind_precision, oos_ind_recall, oos_ind_fscore, _ = metrics.binary_recall_fscore(
            all_detection_binary_preds, all_binary_y)
        detection_acc = metrics.accuracy(all_detection_binary_preds,
                                         all_binary_y)

        y_score = all_detection_preds.squeeze().tolist()
        eer = metrics.cal_eer(all_binary_y, y_score)

        result['eer'] = eer
        result['all_detection_binary_preds'] = all_detection_binary_preds
        result['detection_acc'] = detection_acc
        result['all_binary_y'] = all_binary_y
        result['oos_ind_precision'] = oos_ind_precision
        result['oos_ind_recall'] = oos_ind_recall
        result['oos_ind_f_score'] = oos_ind_fscore
        result['y_score'] = y_score
        result['auc'] = roc_auc_score(all_binary_y, y_score)
        result['all_binary_y'] = all_binary_y
        if n_class > 2:
            result['class_loss'] = class_loss
            result['class_acc'] = class_acc

        freeze_data['valid_all_y'] = all_y
        freeze_data['vaild_all_pred'] = all_detection_binary_preds
        freeze_data['valid_score'] = y_score

        return result

    def test(dataset):
        # load BERT and GAN
        load_gan_model(D, G, config['gan_save_path'])
        if args.fine_tune:
            load_model(E, path=config['bert_save_path'], model_name='bert')

        test_dataloader = DataLoader(dataset,
                                     batch_size=args.predict_batch_size,
                                     shuffle=False,
                                     num_workers=2)
        n_sample = len(test_dataloader)
        result = dict()

        # Loss function
        detection_loss = torch.nn.BCELoss().to(device)
        classified_loss = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)

        G.eval()
        D.eval()
        E.eval()

        all_detection_preds = []
        all_class_preds = []
        all_features = []

        for sample in tqdm.tqdm(test_dataloader):
            sample = (i.to(device) for i in sample)
            token, mask, type_ids, y = sample
            batch = len(token)

            # -------------------------evaluate D------------------------- #
            # BERT encode sentence to feature vector

            with torch.no_grad():
                sequence_output, pooled_output = E(token,
                                                   mask,
                                                   type_ids,
                                                   return_dict=False)
                real_feature = pooled_output

                # 大于2表示除了训练判别器还要训练分类器
                if n_class > 2:
                    f_vector, discriminator_output, classification_output = D(
                        real_feature, return_feature=True)
                    all_detection_preds.append(discriminator_output)
                    all_class_preds.append(classification_output)

                # 只预测判别器
                else:
                    f_vector, discriminator_output = D.detect_only(
                        real_feature, return_feature=True)
                    all_detection_preds.append(discriminator_output)
                if args.do_vis:
                    all_features.append(f_vector)

        all_y = LongTensor(
            dataset.dataset[:, -1].astype(int)).cpu()  # [length, n_class]
        all_binary_y = (all_y != 0).long()  # [length, 1] label 0 is oos
        all_detection_preds = torch.cat(all_detection_preds,
                                        0).cpu()  # [length, 1]
        all_detection_binary_preds = convert_to_int_by_threshold(
            all_detection_preds.squeeze())  # [length, 1]

        # 计算损失
        detection_loss = detection_loss(all_detection_preds,
                                        all_binary_y.float())
        result['detection_loss'] = detection_loss

        if n_class > 2:
            class_one_hot_preds = torch.cat(all_class_preds,
                                            0).detach().cpu()  # one hot label
            class_loss = classified_loss(class_one_hot_preds,
                                         all_y)  # compute loss
            all_class_preds = torch.argmax(class_one_hot_preds, 1)  # label
            class_acc = metrics.ind_class_accuracy(
                all_class_preds, all_y, oos_index=0)  # accuracy for ind class
            logger.info(
                metrics.classification_report(
                    all_y, all_class_preds,
                    target_names=processor.id_to_label))

        logger.info(
            metrics.classification_report(all_binary_y,
                                          all_detection_binary_preds,
                                          target_names=['oos', 'in']))

        # report
        oos_ind_precision, oos_ind_recall, oos_ind_fscore, _ = metrics.binary_recall_fscore(
            all_detection_binary_preds, all_binary_y)
        detection_acc = metrics.accuracy(all_detection_binary_preds,
                                         all_binary_y)

        y_score = all_detection_preds.squeeze().tolist()
        eer = metrics.cal_eer(all_binary_y, y_score)

        result['eer'] = eer
        result['all_detection_binary_preds'] = all_detection_binary_preds
        result['detection_acc'] = detection_acc
        result['all_binary_y'] = all_binary_y
        result['all_y'] = all_y
        result['oos_ind_precision'] = oos_ind_precision
        result['oos_ind_recall'] = oos_ind_recall
        result['oos_ind_f_score'] = oos_ind_fscore
        result['score'] = y_score
        result['y_score'] = y_score
        result['auc'] = roc_auc_score(all_binary_y, y_score)
        if n_class > 2:
            result['class_loss'] = class_loss
            result['class_acc'] = class_acc
        if args.do_vis:
            all_features = torch.cat(all_features, 0).cpu().numpy()
            result['all_features'] = all_features

        freeze_data['test_all_y'] = all_y.tolist()
        freeze_data['test_all_pred'] = all_detection_binary_preds.tolist()
        freeze_data['test_score'] = y_score

        return result

    def get_fake_feature(num_output):
        """
        生成一定数量的假特征
        """
        G.eval()
        fake_features = []
        start = 0
        batch = args.predict_batch_size
        with torch.no_grad():
            while start < num_output:
                end = min(num_output, start + batch)
                z = FloatTensor(
                    np.random.normal(0, 1, size=(end - start, args.G_z_dim)))
                fake_feature = G(z)
                f_vector, _ = D.detect_only(fake_feature, return_feature=True)
                fake_features.append(f_vector)
                start += batch
            return torch.cat(fake_features, 0).cpu().numpy()

    if args.do_train:
        if config['data_file'].startswith('binary'):
            text_train_set = processor.read_dataset(data_path, ['train'])
            text_dev_set = processor.read_dataset(data_path, ['val'])
        elif config['dataset'] == 'oos-eval':
            text_train_set = processor.read_dataset(data_path,
                                                    ['train', 'oos_train'])
            text_dev_set = processor.read_dataset(data_path,
                                                  ['val', 'oos_val'])
        elif config['dataset'] == 'smp':
            text_train_set = processor.read_dataset(data_path, ['train'])
            text_dev_set = processor.read_dataset(data_path, ['val'])

        # 去除训练集中的ood数据
        if config['dataset'] == 'smp':
            text_train_set = [
                sample for sample in text_train_set
                if sample['domain'] != 'chat'
            ]
        else:
            text_train_set = [
                sample for sample in text_train_set if sample[-1] != 'oos'
            ]
        train_features = processor.convert_to_ids(text_train_set)
        train_dataset = OOSDataset(train_features)
        dev_features = processor.convert_to_ids(text_dev_set)
        dev_dataset = OOSDataset(dev_features)

        train(train_dataset, dev_dataset)

    if args.do_eval:
        logger.info(
            '#################### eval result at step {} ####################'.
            format(global_step))
        if config['data_file'].startswith('binary'):
            text_dev_set = processor.read_dataset(data_path, ['val'])
        elif config['dataset'] == 'oos-eval':
            text_dev_set = processor.read_dataset(data_path,
                                                  ['val', 'oos_val'])
        elif config['dataset'] == 'smp':
            text_dev_set = processor.read_dataset(data_path, ['val'])

        dev_features = processor.convert_to_ids(text_dev_set)
        dev_dataset = OOSDataset(dev_features)
        eval_result = eval(dev_dataset)
        logger.info(eval_result)
        logger.info('eval_eer: {}'.format(eval_result['eer']))
        logger.info('eval_oos_ind_precision: {}'.format(
            eval_result['oos_ind_precision']))
        logger.info('eval_oos_ind_recall: {}'.format(
            eval_result['oos_ind_recall']))
        logger.info('eval_oos_ind_f_score: {}'.format(
            eval_result['oos_ind_f_score']))
        logger.info('eval_auc: {}'.format(eval_result['auc']))
        logger.info('eval_fpr95: {}'.format(
            ErrorRateAt95Recall(eval_result['all_binary_y'],
                                eval_result['y_score'])))
        gross_result['eval_oos_ind_precision'] = eval_result[
            'oos_ind_precision']
        gross_result['eval_oos_ind_recall'] = eval_result['oos_ind_recall']
        gross_result['eval_oos_ind_f_score'] = eval_result['oos_ind_f_score']
        gross_result['eval_eer'] = eval_result['eer']
        gross_result['eval_fpr95'] = ErrorRateAt95Recall(
            eval_result['all_binary_y'], eval_result['y_score'])
        gross_result['eval_auc'] = eval_result['auc']

    if args.do_test:
        logger.info(
            '#################### test result at step {} ####################'.
            format(global_step))
        if config['data_file'].startswith('binary'):
            text_test_set = processor.read_dataset(data_path, ['test'])
        elif config['dataset'] == 'oos-eval':
            text_test_set = processor.read_dataset(data_path,
                                                   ['test', 'oos_test'])
        elif config['dataset'] == 'smp':
            text_test_set = processor.read_dataset(data_path, ['test'])

        test_features = processor.convert_to_ids(text_test_set)
        test_dataset = OOSDataset(test_features)
        test_result = test(test_dataset)
        logger.info(test_result)
        logger.info('test_eer: {}'.format(test_result['eer']))
        logger.info('test_ood_ind_precision: {}'.format(
            test_result['oos_ind_precision']))
        logger.info('test_ood_ind_recall: {}'.format(
            test_result['oos_ind_recall']))
        logger.info('test_ood_ind_f_score: {}'.format(
            test_result['oos_ind_f_score']))
        logger.info('test_auc: {}'.format(test_result['auc']))
        logger.info('test_fpr95: {}'.format(
            ErrorRateAt95Recall(test_result['all_binary_y'],
                                test_result['y_score'])))
        gross_result['test_oos_ind_precision'] = test_result[
            'oos_ind_precision']
        gross_result['test_oos_ind_recall'] = test_result['oos_ind_recall']
        gross_result['test_oos_ind_f_score'] = test_result['oos_ind_f_score']
        gross_result['test_eer'] = test_result['eer']
        gross_result['test_fpr95'] = ErrorRateAt95Recall(
            test_result['all_binary_y'], test_result['y_score'])
        gross_result['test_auc'] = test_result['auc']
        my_plot_roc(test_result['all_binary_y'], test_result['y_score'],
                    os.path.join(args.output_dir, 'roc_curve.png'))
        save_result(test_result, os.path.join(args.output_dir, 'test_result'))

        # 输出错误cases
        if config['dataset'] == 'oos-eval':
            texts = [line[0] for line in text_test_set]
        elif config['dataset'] == 'smp':
            texts = [line['text'] for line in text_test_set]
        else:
            raise ValueError('The dataset {} is not supported.'.format(
                args.dataset))

        output_cases(texts, test_result['all_binary_y'],
                     test_result['all_detection_binary_preds'],
                     os.path.join(args.output_dir,
                                  'test_cases.csv'), processor)

        # confusion matrix
        plot_confusion_matrix(test_result['all_binary_y'],
                              test_result['all_detection_binary_preds'],
                              args.output_dir)

        if args.do_vis:
            # [2 * length, feature_fim]
            features = np.concatenate([
                test_result['all_features'],
                get_fake_feature(len(test_dataset) // 2)
            ],
                                      axis=0)
            features = TSNE(n_components=2, verbose=1,
                            n_jobs=-1).fit_transform(
                                features)  # [2 * length, 2]
            # [2 * length, 1]
            if n_class > 2:
                labels = np.concatenate([
                    test_result['all_y'],
                    np.array([-1] * (len(test_dataset) // 2))
                ], 0).reshape((-1, 1))
            else:
                labels = np.concatenate([
                    test_result['all_binary_y'],
                    np.array([-1] * (len(test_dataset) // 2))
                ], 0).reshape((-1, 1))
            # [2 * length, 3]
            data = np.concatenate([features, labels], 1)
            fig = scatter_plot(data, processor)
            fig.savefig(os.path.join(args.output_dir, 'plot.png'))
            fig.show()
            freeze_data['feature_label'] = data

    with open(os.path.join(config['output_dir'], 'freeze_data.pkl'),
              'wb') as f:
        pickle.dump(freeze_data, f)
    df = pd.DataFrame(
        data={
            'valid_y': freeze_data['valid_all_y'],
            'valid_score': freeze_data['valid_score'],
        })
    df.to_csv(os.path.join(config['output_dir'], 'valid_score.csv'))

    df = pd.DataFrame(
        data={
            'test_y': freeze_data['test_all_y'],
            'test_score': freeze_data['test_score']
        })
    df.to_csv(os.path.join(config['output_dir'], 'test_score.csv'))

    if args.result != 'no':
        pd_result = pd.DataFrame(gross_result)
        if args.seed == 16:
            pd_result.to_csv(args.result + '_gross_result.csv', index=False)
        else:
            pd_result.to_csv(args.result + '_gross_result.csv',
                             index=False,
                             mode='a',
                             header=False)
示例#5
0
from model.gan import Generator
from model.gan import Discriminator
import torch

dummy_x = torch.ones(size=(1, 100, 1, 1))
generator = Generator(latent_size=100)
discriminator = Discriminator(feature_map_size=64)

generator = generator.to("cpu")
discriminator = discriminator.to("cpu")

gen_y = generator(dummy_x)
gen_y.shape

dis_y = discriminator(gen_y)
print(dis_y)