示例#1
0
    def __init__(self, _vae: VaeWrapper, _classifier: LatentSpaceLEC,
                 _model_under_test: LECUnderTest, dataset: str,
                 output_dir: str, pso_options: dict, n_iter: int):
        """ Initialize a test generator that synthesizes new
        high-uncertainty image inputs for a given pair of VAE and a classifier.

        :param _vae: A VAE model
        :param _classifier: A classifier model attached to the latent layer
                            of the VAE
        :param _model_under_test: Model under test
        :param dataset: name of a dataset
        :param output_dir: (str) Output directory path
        :param pso_options: a dictionary containing PSO hyper-parameters,
        which are {c1, c2, w, k, p}.
        :param n_iter: PSO iteration
        """
        self.threshold = 1.0
        self.vae = _vae
        self.classifier = _classifier
        self.model_under_test = _model_under_test
        if not (os.path.exists(output_dir) and os.path.isdir(output_dir)):
            os.mkdir(output_dir)
        self.output_dir = output_dir
        self.total_cnt = 0  # total number of test generation attempted

        self.xs, self.dim = load_dataset(dataset, 'train', normalize=True)
        self.ys, self.n_classes = load_dataset(dataset, 'train', label=True)

        # self.n_particle = testgen_config["optimizer"]["n_particle"]
        self.n_iter = n_iter
        self.options = pso_options
        self.topology = Ring(static=False)
        min_bound = np.array([-1.0] * self.vae.latent_dim)
        max_bound = np.array([1.0] * self.vae.latent_dim)
        self.bounds = (min_bound, max_bound)
示例#2
0
def exp_train_ood_classifier(args):
    """ How well does each metric separate the in-distribution and
    out-of-distribution samples?
    """
    x_train, __ = data.load_dataset(args.dataset, 'train', normalize=True)
    y_train, __ = data.load_dataset(args.dataset, 'train', label=True)
    # x_train, y_train = data.get_test_dataset(args.dataset, val=False)
    if args.drop:
        x_train, y_train = data.drop_class_except(x_train, y_train, args.drop)

    x_val, y_val = data.get_test_dataset(args.dataset, val=True)
    if args.drop:
        x_val, y_val = data.drop_class_except(x_val, y_val, args.drop)

    signal_train = get_signals(args, x_train)
    signal_val = get_signals(args, x_val)

    lec = load_model(args.lec_path)
    y_pred_train = lec.predict(x_train)
    y_pred_val = lec.predict(x_val)
    ood_train = (y_train != np.argmax(y_pred_train, axis=1)).astype(int)
    ood_val = (y_val != np.argmax(y_pred_val, axis=1)).astype(int)

    model = get_ood_model((signal_train.shape[1], ))
    model.fit(signal_train,
              ood_train,
              validation_data=(signal_val, ood_val),
              epochs=100,
              batch_size=16,
              shuffle=True)
    print('weights', model.layers[-1].get_weights())
示例#3
0
def plot_latent(vae: VaeWrapper, sess: tf1.Session, dataset: str,
                output_path: str, stage: int, posterior: bool = False):

    xs, dim = load_dataset(dataset, 'test')
    ys, n_classes = load_dataset(dataset, 'test', label=True)
    us = vae.encode(xs, stage)
    if posterior:
        assert stage == 1
        us = vae.decode(us)

    plt.figure(figsize=(12, 10))
    plt.scatter(us[:, 0], us[:, 1], c=ys, cmap=plt.cm.get_cmap('jet', 10))
    plt.colorbar()
    plt.xlabel("u[0]")
    plt.ylabel("u[1]")
    plt.savefig(output_path)
    plt.show()

    return
    # display a 30x30 2D manifold of digits
    n = 30
    digit_size = 28
    figure = np.zeros((digit_size * n, digit_size * n))
    # linearly spaced coordinates corresponding to the 2D plot
    # of digit classes in the latent space
    grid_x = np.linspace(-3, 3, n)
    grid_y = np.linspace(-3, 3, n)[::-1]

    for i, yi in enumerate(grid_y):
        for j, xi in enumerate(grid_x):
            # print("xi: {}, yi: {}".format(xi, yi))
            z_sample = np.array([[xi, yi]])
            if vae.is_conditional_decoder:
                #TODO: FIX
                x_decoded = vae.decoder._get_discriminator([z_sample, np.array([
                    random.randint(0, 9) / 10.])])
            else:
                x_decoded = vae.decoder._get_discriminator(z_sample)
            digit = x_decoded[0].reshape(digit_size, digit_size)
            figure[i * digit_size: (i + 1) * digit_size,
            j * digit_size: (j + 1) * digit_size] = digit

    plt.figure(figsize=(10, 10))
    start_range = digit_size // 2
    end_range = n * digit_size + start_range + 1
    pixel_range = np.arange(start_range, end_range, digit_size)
    sample_range_x = np.round(grid_x, 1)
    sample_range_y = np.round(grid_y, 1)
    plt.xticks(pixel_range, sample_range_x)
    plt.yticks(pixel_range, sample_range_y)
    plt.xlabel("z[0]")
    plt.ylabel("z[1]")
    plt.imshow(figure, cmap='Greys_r')
    plt.savefig(filename)
    plt.show()
示例#4
0
def train_latent_lec(args):
    print('exp_folder: {}, dataset: {}'.format(args.exp_dir, args.dataset))

    tf1.reset_default_graph()
    tf_cfg = tf1.ConfigProto(device_count={'GPU': 0}) if args.cpu else None
    with tf1.Session(config=tf_cfg) as sess_vae:
        vae_config = vae_util.get_training_config(args.exp_dir)
        vae = vae_util.load_vae_model(sess_vae, args.exp_dir, args.dataset)

        x, _ = data.load_dataset(args.dataset,
                                 'train',
                                 vae_config["root_dir"],
                                 normalize=True)
        c, n_class = data.load_dataset(args.dataset,
                                       'train',
                                       vae_config["root_dir"],
                                       label=True)
        y, __ = data.load_dataset(args.dataset,
                                  'train',
                                  vae_config["root_dir"],
                                  label=True)
        n_condition = n_class if vae_config["conditional"] else 0
        if args.drop:
            print("Dropping class {}".format(args.drop))
            # x, c = data.drop_class(x, c, args.drop)
            x, y = data.drop_class(x, y, args.drop)

        lec = tf.keras.models.load_model(args.lec_path)
        y_lec = lec.predict(x)

        # Train the latent space LEC
        with tf1.Session(config=tf_cfg) as sess_classifier:
            latent_lec = LatentSpaceLEC(sess_classifier,
                                        n_class,
                                        n_condition,
                                        latent_dim=vae_config["latent_dim1"],
                                        batch_size=64)

            c_one_hot = utility.one_hot(c, n_condition) \
                if vae_config["conditional"] else None
            y_one_hot = utility.one_hot(y, n_class)
            encode = get_encode(sess_vae,
                                vae,
                                x,
                                c=c_one_hot,
                                stage=args.stage)
            latent_lec.train(encode,
                             y_one_hot,
                             c_one_hot,
                             epochs=args.epochs,
                             lr=args.lr,
                             lr_epochs=args.lr_epochs,
                             encode_frequency=10)
            latent_lec.save(args.exp_dir)
示例#5
0
def get_latent_plot(sess: tf1.Session, vae: VaeInterface, dataset: str):

    xs, dim = load_dataset(dataset, 'test')
    ys, n_classes = load_dataset(dataset, 'test', label=True)
    # chain of encoding
    us = vae.encode(xs, ys)

    fig = plt.figure(figsize=(12, 10))
    plt.scatter(us[:, 0], us[:, 1], c=ys, cmap=plt.cm.get_cmap('jet', 10))
    plt.colorbar()
    plt.xlabel("u[0]")
    plt.ylabel("u[1]")
    # plt.savefig(output_path); plt.show()

    return fig
示例#6
0
def exp_proximity(args):
    """ Pick clusters of samples by latent space proximity and visualize """
    n_repeat, n_sample = 10, 10

    sess = tf1.Session()
    vae = vae_util.load_vae_model(sess,
                                  args.exp_dir,
                                  args.dataset,
                                  batch_size=batch_size)
    x_train, _ = data.load_dataset(args.dataset, 'train', normalize=True)
    xs, ys = data.get_test_dataset(args.dataset, val=False)
    zs = vae.outer.encode(xs)

    inds = []
    for i in range(n_repeat):
        z_ind = np.random.randint(len(xs))
        z_picked = zs[z_ind]
        dist = [(
            i,
            np.linalg.norm(z_picked - z),
        ) for i, z in enumerate(zs)]
        dist.sort(key=lambda x: x[1])
        # pick the closest `n_samples` samples and extract their indices
        inds += [tup[0] for tup in dist[:n_sample]]

    figure = vae_util.create_image_grid(xs[inds], n_repeat, n_sample)
    plt.imshow(figure)
    plt.savefig(get_figure_path(args, 'proximity'))
示例#7
0
def get_outer_vae(args: argparse.Namespace, sess: tf1.Session) -> OuterVaeModel:
    """ Create a first-stage (outer) VAE model """
    _, dim = load_dataset(args.dataset, 'train', args.root_dir)
    _, n_classes = load_dataset(args.dataset, 'train', args.root_dir,
                                label=True)
    x_holder = tf1.placeholder(tf.float32,
                               [args.batch_size, dim[0], dim[1], dim[2]], 'x')
    n_conditions = n_classes if args.conditional else 0
    if 'taxinet' == args.dataset.lower():
        model1 = TaxiNet('outer',
                         sess,
                         args.exp_dir,
                         x_holder,
                         args.batch_size,
                         args.latent_dim1,
                         n_conditions=n_conditions,
                         acai=args.acai1,
                         beta=args.beta,
                         block_per_scale=args.block_per_scale,
                         depth_per_block=args.depth_per_block,
                         kernel_size=args.kernel_size, fc_dim=args.fc_dim
                         )
    elif 'infogan2' in args.network_structure.lower():
        model1 = InfoGan2('outer', sess, args.exp_dir, x_holder,
                          args.batch_size, args.latent_dim1,
                          n_conditions=n_conditions, acai=args.acai1
                          )
    elif 'infogan' in args.network_structure.lower():
        model1 = InfoGan('outer', sess, args.exp_dir, x_holder,
                         args.batch_size, args.latent_dim1,
                         n_conditions=n_conditions, acai=args.acai1
                         )
    elif 'resnet' in args.network_structure.lower():
        model1 = ResNet('outer', sess, args.exp_dir, x_holder, args.batch_size,
                        args.latent_dim1, n_conditions=n_conditions,
                        num_scale=args.num_scale,
                        block_per_scale=args.block_per_scale,
                        depth_per_block=args.depth_per_block,
                        kernel_size=args.kernel_size, base_dim=args.base_dim,
                        fc_dim=args.fc_dim
                        )
    else:
        raise Exception("Invalid model type")

    return model1
示例#8
0
def analyze_manifold(sess: tf1.Session, model: VaeWrapper, dataset: str):
    # TODO (3/17)
    # fig = get_latent_plot(sess, model, dataset)
    # plt.savefig('manifold.png')
    # plt.show()
    xs, _ = load_dataset(dataset)
    visualize_2d_manifold(sess, model, xs.shape[1:3], cnt_per_row=10,
                          bound=3.0, label=5, n_class=10)
    pass
示例#9
0
def evaluate_models(sess: tf1.Session, outer: OuterVaeModel,
                    inner: InnerVaeModel, dataset: str, root_dir='.') \
        -> Tuple[float, float]:
    """ Evaluate inner and outer VAE for the MAE of reconstruction

    :param sess: tf Session
    :param outer: outer VAE
    :param inner: inner VAE
    :param dataset: Dataset
    :param root_dir: dataset root folder
    :return: (mae1, mae2)
    """
    x, dim = load_dataset(dataset, 'test', root_dir, normalize=True)
    y, n_classes = load_dataset(dataset, 'test', root_dir, label=True)
    y_encoded = one_hot(y, n_classes)

    encoded = outer.encode(x, c=y_encoded)
    mae1 = outer.evaluate(x, c=y_encoded)
    mae2 = inner.evaluate(encoded, c=y_encoded)
    return mae1, mae2
示例#10
0
    def __init__(self, _vae: VaeWrapper, _model_under_test: LECUnderTest,
                 dataset: str, latent_dim: int, output_dir: str):
        self.plaus_lower_bound = 0.1
        self.distance_lower_bound = 4.0
        self.uncertainty_upper_bound = 999.

        self.vae = _vae
        self.model_under_test = _model_under_test
        self.latent_dim = latent_dim
        if not (os.path.exists(output_dir) and os.path.isdir(output_dir)):
            os.mkdir(output_dir)
        self.output_dir = output_dir
        self.total_cnt = 0  # total number of test generation attempted

        self.xs, self.dim = load_dataset(dataset, 'train', normalize=True)
        self.ys, self.n_classes = load_dataset(dataset, 'train', label=True)

        min_bound = np.array([-1.0] * self.latent_dim)
        max_bound = np.array([1.0] * self.latent_dim)
        self.bounds = (min_bound, max_bound)
示例#11
0
    def get_uncertainty_threshold(self, cut: float = 0.7) -> float:
        """ Compute the uncertainty threshold based on the test dataset. The
        threshold is determined to find `cut` * 100 percentage of the "bugs"
        in the test dataset.

        :param cut: 0.0 < cut <= 1.0, percentage of bugs to be caught by the
                    threshold to determine.
        :return: threshold uncertainty value
        """
        assert 0.0 < cut <= 1.0
        # Get uncertainty (sigma) value for the test dataset
        x_test, _ = load_dataset(self.dataset, 'test', normalize=True)
        sigmas = self.measure_uncertainty(x_test, repeat=dropout_repeat)

        # sort the indices by sigma in a descending order
        sigma_tup_list = [(i, sig) for i, sig in enumerate(sigmas)]
        sigma_tup_list.sort(key=lambda x: x[1], reverse=True)
        sorted_indices = [tup[0] for tup in sigma_tup_list]

        # check if each input is fault-finding
        y_pred = np.argmax(self.model.predict(x_test), axis=-1)
        y_test, _ = load_dataset(self.dataset, 'test', label=True)
        y_buggy = ~(y_pred == y_test)[sorted_indices]

        # Get sigma threshold
        total_bug_cnt = np.count_nonzero(y_buggy)
        thres_buggy_cnt = int(total_bug_cnt * cut)
        i, bug_cnt = 0, 0
        for i in range(len(sigmas)):
            bug_cnt += 1 if y_buggy[i] else 0
            if bug_cnt >= thres_buggy_cnt:
                break
        sigma_thres = sigmas[sorted_indices][i]
        print(sigmas[sorted_indices])
        logger.info("Sigma {:.4f} at index {} which covers {:.2f}% "
                    "of {} buggy inputs = {}, out of {}".format(
                        sigma_thres, i, cut * 100, total_bug_cnt, bug_cnt,
                        len(sigmas)))
        return sigma_thres
示例#12
0
def main():
    batch_size = 64
    xs, n_channel = load_dataset('celeba', normalize=True)
    attrs, labels = load_attribute('celeba', category='train')
    assert len(xs) == len(attrs), \
        "len(xs): {}, len(attrs): {}".format(len(xs), len(attrs))

    model = get_model(xs.shape[-3:], len(labels))
    model.compile(keras.optimizers.Adam(),
                  loss='categorical_crossentropy',
                  metrics=[get_mae(i) for i in range(len(labels))])
    model.summary()
    print('xs', xs.shape, 'ys', attrs.shape)
    model.fit(x=xs, y=attrs, batch_size=batch_size)
示例#13
0
def get_semantically_partial_dataset(dataset: str, exp_dir: str,
                                     root_dir: str = os.getcwd()) \
        -> Tuple[np.array, np.array, np.array, np.array]:
    """ Get partial dataset, separated by "semantics", or semantic
    similarity captured by VAE. The VAE to use is loaded from `exp_dir`.

    :param dataset: Name of the dataset
    :param exp_dir: VAE directory
    :param root_dir: Project root directory
    :return: (x1, x2, y1, y2)
    """
    ratio = .5
    x, __ = load_dataset(dataset, 'train', root_dir, normalize=True)
    y, n_class = load_dataset(dataset, 'train', root_dir, label=True)
    c = one_hot(y, n_class)

    with tf1.Session() as sess:
        vae = load_vae_model(sess, exp_dir, dataset)
        z, __ = vae.extract_posterior(x, c)
        inds1 = [i for i, z in enumerate(z)
                 if z[:, 0] >= norm.ppf(ratio)]
        inds2 = [i for i, z in enumerate(z)
                 if z[:, 0] < norm.ppf(ratio)]
    return x[inds1], x[inds2], y[inds1], y[inds2]
示例#14
0
def get_inner_vae(args: argparse.Namespace,
                  sess: tf1.Session,
                  model1: OuterVaeModel) -> InnerVaeModel:
    """ Create a second-stage VAE """
    y, n_classes = load_dataset(args.dataset, 'train', args.root_dir,
                                label=True)
    n_conditions = n_classes if args.conditional else 0
    z_holder = tf1.placeholder(tf.float32, [args.batch_size, args.latent_dim1],
                               'z')
    model2 = InnerVaeModel('inner', sess, args.exp_dir, z_holder,
                           args.batch_size, args.latent_dim2,
                           n_conditions=n_conditions, acai=args.acai2,
                           beta=args.beta, depth=args.second_depth,
                           fc_dim=args.second_dim, outer_vaes=[model1]
                           )
    return model2
示例#15
0
def main_search(args):
    # TODO(200624): Fix and refactor!
    setup_globals(args)
    # Load the VAE and VAE classifier
    sess_vae, sess_classifier = tf1.Session(), tf1.Session()
    vae_model = load_vae_model(sess_vae,
                               args.exp_dir,
                               args.dataset,
                               batch_size=get_batch_size(args.cnt))
    try:
        latent_lec = load_latent_lec(sess_classifier,
                                     args.exp_dir,
                                     batch_size=get_batch_size(args.cnt))
    except:
        latent_lec = None

    # Load test model
    logger.info("Testing {} model at {}".format(args.dataset, args.lec_path))
    target_model = LECUnderTest(args.dataset, args.lec_path)

    # Test generator
    testset_dir = os.path.join(args.exp_dir, args.name)
    # sigma_threshold = target_model.get_uncertainty_threshold()

    gtor = VaeTestGenerator(vae_model, latent_lec, target_model, args.dataset,
                            testset_dir, get_pso_option(args), args.n_iter)
    cost_factory = CostFunctionFactory(latent_lec, sess_classifier, vae_model,
                                       sess_vae, target_model)
    us, xs, ys = gtor.optimize_conditional(cost_factory,
                                           args.cnt,
                                           reshape=True)

    logger.info("intended labels: " + str(ys))
    calculate_cost = cost_factory.get_conditional_cost(ys,
                                                       plaus_weight=args.plaus)
    print_cost(calculate_cost, us)
    __, dim = load_dataset(args.dataset, 'train', root_dir=get_root_dir())
    gtor.save_to_npy(xs, ys, us, dim)

    # log(testset_dir, str(datetime.datetime.now()))
    # log(testset_dir, 'Total: {}, bug finding: {}\n'.format(gtor.total_cnt,
    #                                                        args.n_test))

    sess_vae.close()
    sess_classifier.close()
示例#16
0
def get_fid(fake_images, dataset, root_folder, n, num_batches=1, parallel=1):
    real_images, _ = datasets.load_dataset(dataset,
                                           'train',
                                           root_folder,
                                           normalize=False)
    np.random.shuffle(real_images)
    real_images = preprocess_real_images(real_images[:n])
    fake_images = preprocess_fake_images(fake_images[:n])

    with tf1.Session() as sess:
        sess.run(tf1.global_variables_initializer())
        at1, at2 = get_inception_activations(real_images,
                                             fake_images,
                                             num_batches=num_batches,
                                             parallel=parallel)
        score = measure_fid(at1.eval(), at2.eval())

    return float(score)
示例#17
0
def main_random(args):
    setup_globals(args)

    testset_dir = os.path.join(args.exp_dir, args.name)

    # Load the VAE and VAE classifier
    sess_vae = tf1.Session()
    vae_model = load_vae_model(sess_vae,
                               args.exp_dir,
                               args.dataset,
                               batch_size=get_batch_size(args.cnt))

    # Load test model
    logger.info("Testing {} model at {}".format(args.dataset, args.lec_path))
    target_model = LECUnderTest(args.dataset, args.lec_path)

    # Test generator
    fuzzer = VaeFuzzer(vae_model, target_model, args.dataset,
                       vae_model.latent_dim, testset_dir)
    us, xs, ys = fuzzer.generate(args.cnt)

    __, dim = load_dataset(args.dataset, 'train', root_dir=get_root_dir())
    fuzzer.save_to_npy(xs, ys, us, dim)
示例#18
0
def load_latent_lec(sess: tf1.Session, exp_dir: str, batch_size: int = 64) \
        -> LatentSpaceLEC:
    """ Load a classifier model in the given experiment folder

    :param sess:
    :param exp_dir: The folder in which the model checkpoint is stored
    :param batch_size: Batch size
    :return: A VaeClassifier
    """
    vae_config = vae_util.get_training_config(exp_dir)
    dataset = vae_config["dataset"]
    __, n_class = data.load_dataset(dataset,
                                    'train',
                                    vae_config["root_dir"],
                                    label=True)
    n_condition = n_class if vae_config["conditional"] else 0
    classifier = LatentSpaceLEC(sess,
                                n_class,
                                n_condition,
                                latent_dim=vae_config["latent_dim1"],
                                batch_size=batch_size)
    model_path = os.path.join(exp_dir, 'model')
    classifier.load(model_path)
    return classifier
示例#19
0
def evaluate(args, model1: OuterVaeModel, model2: InnerVaeModel,
             sess: tf1.Session):

    maes = vae_util.evaluate_models(sess, model1, model2, args.dataset,
                                    args.root_dir)
    logger.info(maes)
    total_params = vae_util.get_trainable_parameters('outer')
    logger.info("stage1 trainable params: {}".format(total_params))
    total_params = vae_util.get_trainable_parameters('inner')
    logger.info("stage2 trainable params: {}".format(total_params))

    # test dataset
    x, dim = load_dataset(args.dataset, 'test', args.root_dir,
                          normalize=True)
    y, n_class = load_dataset(args.dataset, 'test', args.root_dir,
                              label=True)
    inds = np.array(list(range(len(x))))
    np.random.shuffle(inds)
    x = x[inds][0:args.fid_cnt]
    y = y[inds][0:args.fid_cnt]
    y_encoded = utility.one_hot(y, n_class) if args.conditional else None

    # reconstruction and generation
    def generate_label(cnt):
        return utility.one_hot(np.random.randint(0, n_class, cnt), n_class)

    def decode(_v):
        return np.array([np.where(__v == 1)[0][0] for __v in _v])

    img_recons = model1.reconstruct(x, c=y_encoded)
    print('recon.shape', img_recons.shape)

    y, y1, y2 = None, None, None
    img_gens1, y1 = model1.generate(args.fid_cnt, generate_label)
    img_gens2, y2 = model2.generate(args.fid_cnt, generate_label)
    logger.debug('recon.shape: {}, img1.shape: {}, img2.shape: {}'
                 ''.format(img_recons.shape, img_gens1.shape, img_gens2.shape))
    y1 = decode(y1) if y1 is not None else None
    y2 = decode(y2) if y2 is not None else None

    col = 5 if args.dataset == 'taxinet' else 10
    img_recons_sample, recon_inds = vae_util.stitch_imgs(img_recons, None,
                                                         row_size=n_class,
                                                         col_size=col)
    print('img_recons_sample: {}, recon_inds: {}'.format(
        img_recons_sample.shape, recon_inds))
    # x = np.rint(x[recon_inds] * 255.0)
    img_originals, _ = vae_util.stitch_imgs(x[recon_inds], y,
                                            row_size=n_class, col_size=col)
    print('img_originals', img_originals.shape)
    img_originals = cv2.cvtColor(img_originals.astype(np.uint8),
                                 cv2.COLOR_BGR2RGB)
    # y1, y2
    img_gens1_sample, _ = vae_util.stitch_imgs(img_gens1, y1,
                                               row_size=n_class,
                                               col_size=col)
    img_gens2_sample, _ = vae_util.stitch_imgs(img_gens2, y2,
                                               row_size=n_class,
                                               col_size=col)
    cv2.imwrite(os.path.join(args.exp_dir, 'recon_original.png'),
                img_originals)
    cv2.imwrite(os.path.join(args.exp_dir, 'recon_sample.png'),
                vae_util.scale_up(img_recons_sample))
    cv2.imwrite(os.path.join(args.exp_dir, 'gen1_sample.png'),
                vae_util.scale_up(img_gens1_sample))
    cv2.imwrite(os.path.join(args.exp_dir, 'gen2_sample.png'),
                vae_util.scale_up(img_gens2_sample))

    # calculating FID score
    batches, parallel = 100, 4
    tf1.reset_default_graph()
    fid_recon = get_fid(img_recons, args.dataset, args.root_dir,
                        args.fid_cnt, num_batches=batches, parallel=parallel)
    logger.info('FID = {:.2f}\n'.format(fid_recon))
    fid_gen1 = get_fid(img_gens1, args.dataset, args.root_dir, args.fid_cnt,
                       num_batches=batches, parallel=parallel)
    logger.info('FID = {:.2f}\n'.format(fid_gen1))
    fid_gen2 = get_fid(img_gens2, args.dataset, args.root_dir, args.fid_cnt,
                       num_batches=batches, parallel=parallel)
    logger.info('FID = {:.2f}\n'.format(fid_gen2))

    logger.info('Reconstruction Results: FID = {:.2f}'.format(fid_recon))
    logger.info('Generation Results (Stage 1): FID = {:.2f}'.format(fid_gen1))
    logger.info('Generation Results (Stage 2): FID = {:.2f}'.format(fid_gen2))

    with open(os.path.join(args.exp_dir, 'fid.txt'), 'w') as f:
        f.write("recon: {:.2f}, 1st: {:.2f}, 2nd: {:.2f}\n".format(
            fid_recon, fid_gen1, fid_gen2))
    if args.train1 and args.wandb:
        # wandb is initialized only when train1 is True
        wandb.log({
            'fid_recon': fid_recon,
            'fid_gen1': fid_gen1,
            'fid_gen2': fid_gen2,
        })
示例#20
0
def load_vae_model(sess: tf1.Session, exp_dir: str, dataset: str,
                   batch_size: int = 0) -> VaeWrapper:
    """ Load the two-stage VAE models

    :param sess: A tf.Session
    :param exp_dir: An experiment folder
    :param dataset: The name of the dataset
    :param batch_size: Batch size
    :param conditional: conditional VAE
    :return: a pair Two-stage VAE models
    """
    config = get_training_config(exp_dir)
    root_dir = config["root_dir"] \
        if "root_dir" in config else config["root_folder"]
    x, dim = load_dataset(dataset, 'train', root_dir)
    y, n_classes = load_dataset(dataset, 'train', root_dir,
                                label=True)
    if batch_size > 0:
        config["batch_size"] = batch_size
    n_conditions = n_classes if config["conditional"] else 0
    if 'beta' not in config:
        config['beta'] = 1.0

    input_x = tf1.placeholder(tf.float32,
                              [config['batch_size'], dim[0], dim[1], dim[2]],
                              'x')
    if dataset == 'taxinet':
        model1 = TaxiNet('outer',
                         sess,
                         exp_dir,
                         input_x,
                         config["batch_size"],
                         config["latent_dim1"],
                         n_conditions=n_conditions,
                         acai=config["acai1"],
                         beta=config["beta"],
                         block_per_scale=config["block_per_scale"],
                         depth_per_block=config["depth_per_block"],
                         kernel_size=config["kernel_size"],
                         fc_dim=config["fc_dim"]
                         )
    else:
        if config["network_structure"].lower() == 'infogan':
            model1 = InfoGan('outer',
                             sess,
                             exp_dir,
                             input_x,
                             config["batch_size"],
                             config["latent_dim1"],
                             n_conditions=n_conditions,
                             acai=config["acai1"],
                             beta=config["beta"],
                             )
        elif config["network_structure"].lower() == 'infogan2':
            model1 = InfoGan2('outer',
                              sess,
                              exp_dir,
                              input_x,
                              config["batch_size"],
                              config["latent_dim1"],
                              n_conditions=n_conditions,
                              acai=config["acai1"],
                              beta=config["beta"],
                              )
        elif config["network_structure"].lower() == 'resnset':
            model1 = ResNet('outer',
                            sess,
                            exp_dir,
                            input_x,
                            config["batch_size"],
                            config["latent_dim1"],
                            n_conditions=n_conditions,
                            acai=config["acai1"],
                            beta=config["beta"],
                            num_scale=config["num_scale"],
                            block_per_scale=config["block_per_scale"],
                            depth_per_block=config["depth_per_block"],
                            kernel_size=config["kernel_size"],
                            base_dim=config["base_dim"],
                            fc_dim=config["fc_dim"]
                            )
        else:
            raise Exception("Failed to load VAE model")
    z_holder = tf1.placeholder(tf.float32,
                               [config["batch_size"], config["latent_dim1"]],
                               'z')

    model2 = InnerVaeModel('inner',
                           sess,
                           exp_dir,
                           z_holder,
                           config["batch_size"],
                           config["latent_dim2"],
                           n_conditions=n_conditions,
                           acai=config["acai2"],
                           beta=config["beta"],
                           depth=config["second_depth"],
                           fc_dim=config["second_dim"],
                           outer_vaes=[model1]
                           )

    sess.run(tf1.global_variables_initializer())
    saver = tf1.train.Saver()
    saver.restore(sess, os.path.join(exp_dir, 'model', 'stage2'))

    vae = VaeWrapper(model1, model2)
    vae.latent_weights = load_latent_var_weights(exp_dir)
    return vae
示例#21
0
def main(args):
    global logger
    tf1.reset_default_graph()

    if not os.path.exists(args.exp_dir):
        os.makedirs(args.exp_dir)
    model_path = os.path.join(args.exp_dir, 'model')
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    logger = vae_util.setup_logger(os.path.join(args.exp_dir, 'training.log'),
                                   args.debug)
    logger.info("Experiment at {}".format(args.exp_dir))
    logger.info(vars(args))

    # dataset
    xs, dim = load_dataset(args.dataset, 'train', args.root_dir,
                           normalize=True)
    ys, n_class = load_dataset(args.dataset, 'train', args.root_dir,
                               label=True)
    if args.limit:
        xs, ys = xs[:6400], ys[:6400]
    logger.info('Train data len: {}, dim: {}, classes: {}'.format(len(xs),
                                                                  dim, n_class))

    xs_val, _ = load_dataset(args.dataset, 'test', args.root_dir,
                             normalize=True)
    ys_val, _ = load_dataset(args.dataset, 'test', args.root_dir, label=True)

    if args.drop >= 0:
        logger.info("Dropping class {}".format(args.drop))
        xs, ys = drop_class(xs, ys, args.drop)
        xs_val, ys_val = drop_class(xs_val, ys_val, args.drop)
    cs = utility.one_hot(ys)

    n_sample = np.shape(xs)[0]
    logger.info('Num Sample = {}.'.format(n_sample))

    # Load from configuration
    config_filename = os.path.join(args.exp_dir, 'config.yml')
    load_configuration(args, config_filename)
    pprinter = pprint.PrettyPrinter(indent=4)
    logger.info("Configuration: {}".format(pprinter.pformat(vars(args))))

    # Save/update the config only when any of the VAE gets trained
    if args.train1 or args.train2:
        logger.info("Saving configuration to " + config_filename)
        save_configuration(args, config_filename, n_sample)

    # session
    config = tf1.ConfigProto(device_count={'GPU': 0}) if args.cpu else None
    sess = tf1.Session(config=config)

    # model
    outer_vae = vae_util.get_outer_vae(args, sess)
    outer_params = vae_util.get_trainable_parameters('outer')
    logger.info("Created VAE models:")
    logger.info("{}, {} params".format(outer_vae, outer_params))

    # train model
    if args.train1:
        if args.wandb:
            wandb.init(project=args.dataset, name=args.exp_name,
                       sync_tensorboard=True, config=args)
        mae = outer_vae.train(lambda: (xs, cs), args.epochs1, args.lr1,
                              os.path.join(model_path, 'stage1'),
                              log_epoch=log_epoch)
        logger.info("Finished training stage 1 VAE. Mae: {:.2%}".format(mae))

    if args.train2:
        inner_vae = vae_util.get_inner_vae(args, sess, outer_vae)
        sess.run(tf1.global_variables_initializer())
        outer_vae.restore(os.path.join(model_path, 'stage1'))

        mu_z, sd_z = outer_vae.extract_posterior(xs, cs)

        def get_data():
            zs = mu_z + sd_z * np.random.normal(0, 1,
                                                [len(mu_z), args.latent_dim1])
            return zs, cs

        mae = inner_vae.train(get_data, args.epochs2, args.lr2,
                              os.path.join(model_path, 'stage2'),
                              log_epoch=log_epoch)
        logger.info("Finished training stage 2 VAE. Mae: {:.2%}".format(mae))

    # load
    if not (args.train1 or args.train2):
        # saver.restore(sess, os.path.join(model_path, 'stage1'))
        if os.path.exists(os.path.join(model_path, 'stage2.index')):
            inner_vae = vae_util.get_inner_vae(args, sess, outer_vae)
            inner_vae.restore(os.path.join(model_path, 'stage2'))
            logger.info("Loaded Stage 2 VAE")
        elif os.path.exists(os.path.join(model_path, 'stage1.index')):
            outer_vae.restore(os.path.join(model_path, 'stage1'))
            logger.info("Loaded Stage 1 VAE")
        else:
            raise Exception("No checkpoint found!")

    if args.eval:
        logger.info("Evaluating...")
        evaluate(args, outer_vae, inner_vae, sess)

    if args.interpolate:
        interpolate(VaeWrapper(outer_vae, inner_vae), sess, args.exp_dir, xs, ys, 20)

    if args.manifold:
        logger.info("Analyze manifold")
        vae_util.analyze_manifold(sess, VaeWrapper(outer_vae, inner_vae), args.dataset)
示例#22
0
 def xyc(self):
     x, __ = data.load_dataset(dataset, 'test', __root_dir)
     y, __ = data.load_dataset(dataset, 'test', __root_dir, label=True)
     c = utility.one_hot(y)
     return x, y, c
示例#23
0
def exp_scatter(args):
    """ Draw scatter plot of the 2D manifold. color-code samples by
    category--train (green), normal (blue), fault-finding (red).
    """
    # configuration
    figsize = (20, 20)
    vmin, vmax = -3., 3.
    plot_manifold = True
    plot_train = True
    plot_normal = False
    plot_ff = True
    is_val = False
    uniform = True
    if uniform:
        vmin, vmax = 0., 1.

    sess = tf1.Session()
    vae = vae_util.load_vae_model(sess,
                                  args.exp_dir,
                                  args.dataset,
                                  batch_size=batch_size)
    x_train, _ = data.load_dataset(args.dataset, 'train', normalize=True)
    y_train, _ = data.load_dataset(args.dataset, 'train', label=True)
    if args.drop:
        logger.info("Dropping class {}".format(args.drop))
        x_train, y_train = data.drop_class(x_train, y_train, args.drop)
    model = load_model(args.lec_path)
    xs, ys = data.get_test_dataset(args.dataset, val=is_val)
    if args.drop:
        xs, ys = data.drop_class_except(xs, ys, args.drop)
    preds = model.predict(xs)
    preds = np.argmax(preds, axis=1)

    # divide the test dataset into normal vs. fault-revealing subsets
    ff_inputs = [x for x, y, _y in zip(xs, ys, preds) if y != _y]
    normal_inputs = [x for x, y, _y in zip(xs, ys, preds) if y == _y]
    print("ff: {}, normal: {}".format(len(ff_inputs), len(normal_inputs)))

    def encode(_x: np.array):
        _z, _ = vae.outer.extract_posterior()
        return stats.norm.cdf(_z) if uniform else _z

    z_train = encode(x_train)
    z_ff = encode(np.array(ff_inputs))
    z_normal = encode(np.array(normal_inputs))

    def per_axis(_z: np.ndarray):
        return (_z[:, i] for i in range(_z.shape[1]))

    plt.figure(figsize=figsize)
    if plot_manifold and vae.latent_dim == 2:
        save_path = get_figure_path(args, 'manifold')
        img = vae_util.visualize_2d_manifold(sess,
                                             vae,
                                             bound=vmax,
                                             cnt_per_row=40,
                                             save_path=save_path)
        plt.clf()

    if vae.latent_dim == 2:
        ax = plt
    elif vae.latent_dim == 3:
        fig = plt.figure()
        ax = Axes3D(fig)
        ax.set_zlim(vmin, vmax)
    else:
        raise Exception("Invalid latent dimension {}".format(vae.latent_dim))

    if plot_normal:
        ax.scatter(*per_axis(z_normal),
                   vmin=vmin,
                   vmax=vmax,
                   s=1,
                   c='deepskyblue')
    if plot_train:
        ax.scatter(*per_axis(z_train),
                   vmin=vmin,
                   vmax=vmax,
                   s=1,
                   c='lightgreen')
    if plot_ff:
        ax.scatter(*per_axis(z_ff), vmin=vmin, vmax=vmax, s=1, c='r')

    plt.xlim(vmin, vmax)
    plt.ylim(vmin, vmax)
    if args.show:
        plt.show()
    plt.savefig(get_figure_path(args, 'scatter'), dpi=100)

    # Save figure handle to disk
    with open(get_figure_path(args, 'scatter') + '.pickle', 'wb') as f:
        pickle.dump(plt.gcf(), f)
示例#24
0
parser.add_argument('--epochs', type=int, default=100, help="Epochs")
parser.add_argument('--batch-size', type=int, default=32, help="Batch size")
parser.add_argument('--lr', type=float, default=0.0001, help="Learning rate")
parser.add_argument('--drop', type=int,
                    help="Drop a specified class in training dataset")
parser.add_argument("--limit-gpu", type=float, default=0.7,
                    help='Limit GPU mem usage by percentage 0 < f <= 1')
args = parser.parse_args()

limit_keras_gpu_usage(args.limit_gpu)
dir = os.path.join('models', args.dataset)
if not os.path.exists(dir):
    os.mkdir(dir)
model_path = os.path.join(dir, '{}.h5'.format(args.filename))

x_train, input_shape = dataset.load_dataset(args.dataset, 'train',
                                            normalize=True)
y_train, n_class = dataset.load_dataset(args.dataset, 'train', label=True)
if args.drop:
    x_train, y_train = dataset.drop_class(x_train, y_train, args.drop)
x_val, _ = dataset.load_dataset(args.dataset, 'test', normalize=True)
y_val, _ = dataset.load_dataset(args.dataset, 'test', label=True)

# model = efn.EfficientNetB0(classes=n_class)
if args.arch == 'xception':
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=input_shape))
    model.add(tf.keras.layers.Lambda(
        lambda image: tf.image.resize(
            image,
            (96, 96),
            method=tf.image.ResizeMethod.BILINEAR,