Пример #1
0
def main(args):
    # choose model
    dataset_train = load_data.Load(args.dataset, args.train_on, shuffle=False, batch_size=1000, img_size=args.img_size)
    next_element_train = dataset_train.get_imgs_next()
    dataset_test = load_data.Load(args.dataset, args.test_on, shuffle=False, batch_size=1000, img_size=args.img_size)
    next_element_test = dataset_test.get_imgs_next()
    init = tf.global_variables_initializer()
    config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    with tf.Session(config=config) as sess:
        sess.run(init)
        # get images to calculate the fid mean, std and ndb
        dataset_train.init_dataset(sess)
        train_imgs = []
        while True:
            try:
                train_imgs.append(sess.run(next_element_train))
            except tf.errors.OutOfRangeError:
                break
        train_imgs = np.concatenate(train_imgs, 0)
        if args.eval == 'ndb':
            ndb_model = ndb.NDB(np.random.permutation(train_imgs)[:80000],max_dims=2000, semi_whitening=True)
        else:
            inception_score.update_fid_mean(train_imgs)

        # get images to eval inception scores and fid
        dataset_test.init_dataset(sess)
        test_imgs = []
        while True:
            try:
                test_imgs.append(sess.run(next_element_test))
            except tf.errors.OutOfRangeError:
                break
        test_imgs = np.concatenate(test_imgs, 0)
        if args.eval == 'is':
            inception_score_mean, inception_score_std, fid = inception_score.calc_scores(test_imgs)
            print('Inception score mean: ', inception_score_mean)
            print('Inception score std: ', inception_score_std)
        if args.eval == 'fid':
            inception_score_mean, inception_score_std, fid = inception_score.calc_scores(test_imgs)
            print('FID: ', fid)
        if args.eval == 'ndb':
            results_train = ndb_model.evaluate(train_imgs)
            results_test = ndb_model.evaluate(test_imgs)
Пример #2
0
    def test_load_data(self):
        self.raised = False
        self.load = ld.Load()
        self.frame = pd.DataFrame()

        try:
            self.frame = self.load.load_data(
                'DOHMH_New_York_City_Restaurant_Inspection_Results.cs')

        except (IOError, SystemExit):
            self.raised = True
            self.assertFalse(self.raised == False)
Пример #3
0
    'Characters', characters_data, 'titles', 'name', 'gender', 'playedBy',
    'url', log)
denorm_character_data_2 = transform.denormalizing_data_list(
    'Characters', denorm_character_data_1, 'playedBy', 'name', 'gender',
    'titles', 'url', log)
denorm_character_data = transform.dict_to_list(denorm_character_data_2)

trf_char = transform.Transform(denorm_character_data)
character_actor_dictionary = dict(trf_char.key_values(3))
#print(character_actor_dictionary)
character_to_actor = trf_char.relate_entities(character_actor_dictionary, 3)
#print(character_to_actor)
character_title_dictionary = dict(trf_char.key_values(2))
character_to_title = trf_char.relate_entities(character_title_dictionary, 2)

character_load = load_data.Load(cur, conn, log)
character_load.load_three_fields('characters', 'character_name', 'gender',
                                 'character_id', characters_list)
character_load.load_two_fields('actors', 'actor_name', 'actor_id',
                               trf_char.key_values(3))
character_load.load_two_fields('character_to_actor', 'character_id',
                               'actor_id', character_to_actor)
character_load.load_two_fields('titles', 'title', 'title_id',
                               trf_char.key_values(2))
character_load.load_two_fields('character_to_title', 'character_id',
                               'title_id', character_to_title)

#Book entity
book_data = ext_books.api_data(log)

denorm_book_data_1 = transform.denormalizing_data_list('Books', book_data,
Пример #4
0
def main(args, logging):
    # get frequently argument
    batch_size = args.batch_size
    log_iter = args.log_iter
    max_iter = args.max_iter
    tensorboard_log = args.tensorboard_log
    N_CRITIC = args.N_CRITIC
    CALC_INCEPTION = args.calc_is_and_fid
    CALC_NDB = args.calc_ndb
    INCEPTION_FREQUENCY = args.INCEPTION_FREQUENCY

    # choose model
    all_models = {'model2': model2}
    model = all_models.get(args.architecture)

    dataset_train = load_data.Load(args.dataset,
                                   args.train_on,
                                   shuffle=True,
                                   batch_size=batch_size,
                                   img_size=args.img_size)
    next_element_train = dataset_train.get_full_next()
    image_size = [
        dataset_train.img_size, dataset_train.img_size,
        next_element_train[0].shape.as_list()[-1]
    ]

    if args.label == 'unsup':
        n_labels = None
    elif args.label == 'clustering':
        n_labels = args.n_clusters
        dataset_train.set_clustring(args.clustering_path, n_labels)
    elif args.label == 'sup':
        n_labels = dataset_train.num_classes

    dataset_train._init_dataset()
    TrainData = dataset_train.load_sub_imgs(80000)
    if CALC_INCEPTION:
        inception_score.update_fid_mean(TrainData)
    if CALC_NDB:
        ndb_model = ndb.NDB(TrainData, max_dims=2000, semi_whitening=True)

    _iteration = tf.placeholder(tf.int32, shape=None)
    input_x = tf.placeholder(tf.float32, shape=[batch_size] + image_size)
    all_real_labels = tf.placeholder(tf.int32, shape=[batch_size])

    # data augmentation
    all_real_data = input_x + tf.random_uniform(
        shape=[batch_size] + image_size, minval=0., maxval=1.)  # dequantize
    all_real_data = all_real_data / 128. - 1

    disc_costs = []
    disc_acgan_costs = []
    disc_acgan_accs = []
    disc_acgan_fake_accs = []
    gen_costs = []
    gen_acgan_costs = []

    all_fake_data = model.Generator(batch_size,
                                    args.arch,
                                    args.DIM_G,
                                    args.z_len, [all_real_labels, n_labels],
                                    is_training=True,
                                    image_size=image_size,
                                    reuse=False)

    # Discriminator
    real_and_fake_data = tf.concat([all_real_data, all_fake_data], axis=0)
    real_and_fake_labels = tf.concat([all_real_labels, all_real_labels],
                                     axis=0)
    disc_all, disc_all_2, disc_all_acgan = model.Discriminator(
        real_and_fake_data,
        args.DIM_D, [real_and_fake_labels, n_labels],
        is_training=True,
        image_size=image_size,
        reuse=False)

    disc_real, disc_fake = tf.split(disc_all, 2)
    disc_real_2, disc_fake_2 = tf.split(disc_all_2, 2)

    # Discriminator for Consistency Term (CT)
    disc_all_, disc_all_2_, disc_all_acgan_ = model.Discriminator(
        real_and_fake_data,
        args.DIM_D, [real_and_fake_labels, n_labels],
        is_training=True,
        image_size=image_size,
        reuse=True)
    disc_real_, disc_fake_ = tf.split(disc_all_, 2)
    disc_real_2_, disc_fake_2_ = tf.split(disc_all_2_, 2)

    # wasserstein distance
    disc_costs.append(tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real))
    # gradient penalty
    alpha = tf.random_uniform(shape=[batch_size, 1, 1, 1],
                              minval=0.,
                              maxval=1.)
    differences = all_fake_data - all_real_data
    interpolates = all_real_data + (alpha * differences)
    gp_disc = model.Discriminator(interpolates,
                                  args.DIM_D, [all_real_labels, n_labels],
                                  is_training=True,
                                  image_size=image_size,
                                  reuse=True)[0]
    gradients = tf.gradients(gp_disc, [interpolates])[0]  # same dropout rate
    slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3]))
    gradient_penalty = 10.0 * tf.reduce_mean((slopes - 1.)**2)
    disc_costs.append(gradient_penalty)
    # consistency term
    CT = args.LAMBDA_2 * tf.square(disc_real - disc_real_)
    CT += args.LAMBDA_2 * 0.1 * tf.reduce_mean(
        tf.square(disc_real_2 - disc_real_2_), axis=[1])
    CT_ = tf.maximum(CT - args.Factor_M, 0.0 * (CT - args.Factor_M))
    CT_ = tf.reduce_mean(CT_)
    disc_costs.append(CT_)

    # train the generator
    n_samples = args.GEN_BS_MULTIPLE * batch_size
    if n_labels is None:
        fake_labels = None
    else:
        fake_labels = tf.cast(
            tf.random_uniform([n_samples]) * n_labels, tf.int32)
    x = model.Generator(n_samples,
                        args.arch,
                        args.DIM_G,
                        args.z_len, [fake_labels, n_labels],
                        is_training=True,
                        image_size=image_size,
                        reuse=True)
    disc_fake, disc_fake_2, disc_fake_acgan = model.Discriminator(
        x,
        args.DIM_D, [fake_labels, n_labels],
        is_training=True,
        image_size=image_size,
        reuse=True)
    gen_costs.append(-tf.reduce_mean(disc_fake))

    # build the loss function
    disc_wgan = tf.add_n(disc_costs)
    if n_labels is not None:
        disc_all_acgan_real, disc_all_acgan_fake = tf.split(disc_all_acgan, 2)
        disc_acgan_costs.append(
            tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=disc_all_acgan_real, labels=all_real_labels)))

        disc_acgan_accs.append(
            tf.reduce_mean(
                tf.cast(
                    tf.equal(
                        tf.to_int32(tf.argmax(disc_all_acgan_real, axis=1)),
                        all_real_labels), tf.float32)))

        disc_acgan_fake_accs.append(
            tf.reduce_mean(
                tf.cast(
                    tf.equal(
                        tf.to_int32(tf.argmax(disc_all_acgan_fake, axis=1)),
                        all_real_labels), tf.float32)))
        gen_acgan_costs.append(
            tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=disc_fake_acgan, labels=fake_labels)))
        disc_acgan = tf.add_n(disc_acgan_costs)
        disc_acgan_acc = tf.add_n(disc_acgan_accs)
        disc_acgan_fake_acc = tf.add_n(disc_acgan_fake_accs)
        disc_cost = disc_wgan + (args.ACGAN_SCALE * disc_acgan)
        gen_cost = tf.add_n(gen_costs) + (args.ACGAN_SCALE_G *
                                          (tf.add_n(gen_acgan_costs)))
    else:
        disc_acgan = tf.constant(0.)
        disc_acgan_acc = tf.constant(0.)
        disc_acgan_fake_acc = tf.constant(0.)
        disc_cost = disc_wgan
        gen_cost = tf.add_n(gen_costs)

    if args.DECAY:
        decay = tf.maximum(0.,
                           1. - (tf.cast(_iteration, tf.float32) / max_iter))
    else:
        decay = 1.

    var = tf.trainable_variables()
    gen_var = [v for v in var if 'Generator' in v.name]
    disc_var = [v for v in var if 'Discriminator' in v.name]

    gen_opt = tf.train.AdamOptimizer(learning_rate=args.lr * decay,
                                     beta1=0.0,
                                     beta2=0.9)
    disc_opt = tf.train.AdamOptimizer(learning_rate=args.lr * decay,
                                      beta1=0.0,
                                      beta2=0.9)
    gen_gv = gen_opt.compute_gradients(gen_cost, var_list=gen_var)
    disc_gv = disc_opt.compute_gradients(disc_cost, var_list=disc_var)
    gen_train_op = gen_opt.apply_gradients(gen_gv)
    disc_train_op = disc_opt.apply_gradients(disc_gv)

    if tensorboard_log:
        tf_inception_m1 = tf.placeholder(tf.float32, shape=None)
        tf_inception_std1 = tf.placeholder(tf.float32, shape=None)
        tf_inception_m2 = tf.placeholder(tf.float32, shape=None)
        tf_inception_std2 = tf.placeholder(tf.float32, shape=None)
        tf_fid = tf.placeholder(tf.float32, shape=None)
        tf_ndb = tf.placeholder(tf.float32, shape=None)
        tf_ndb_js = tf.placeholder(tf.float32, shape=None)
        summary1 = utils_summary.summary_collection('col1')
        summary2 = utils_summary.summary_collection('col2')
        summary3 = utils_summary.summary_collection('col3')
        summary4 = utils_summary.summary_collection('col4')
        with tf.name_scope('disc'):
            summary1.add_summary_scalar(disc_cost, 'disc_cost')
            summary1.add_summary_scalar(disc_wgan, 'disc_wgan')
            summary1.add_summary_scalar(disc_acgan, 'disc_acgan')
        with tf.name_scope('ACGAN'):
            summary1.add_summary_scalar(disc_acgan_acc, 'acc_real')
            summary1.add_summary_scalar(disc_acgan_fake_acc, 'acc_fake')
        with tf.name_scope('gen'):
            summary1.add_summary_scalar(gen_cost, 'gen_cost')
        with tf.name_scope('inception'):
            summary3.add_summary_scalar(tf_inception_m1, 'incep_mean')
            summary3.add_summary_scalar(tf_inception_std1, 'incep_std')
            summary3.add_summary_scalar(tf_inception_m2, 'incep_mean')
            summary3.add_summary_scalar(tf_inception_std2, 'incep_std')
            summary3.add_summary_scalar(tf_fid, 'fid')
            summary4.add_summary_scalar(tf_ndb, 'ndb')
            summary4.add_summary_scalar(tf_ndb_js, 'ndb_js')

        # Function for generating samples
        if n_labels:
            fixed_noise = tf.constant(
                np.random.normal(size=(n_labels**2,
                                       args.z_len)).astype('float32'))
            fixed_labels = tf.constant(
                np.array(list(range(0, n_labels)) * n_labels, dtype='int32'))
            fixed_noise_samples = model.Generator(n_labels**2,
                                                  args.arch,
                                                  args.DIM_G,
                                                  args.z_len,
                                                  [fixed_labels, n_labels],
                                                  is_training=True,
                                                  image_size=image_size,
                                                  reuse=True,
                                                  noise=fixed_noise)
            fixed_noise_samples = (fixed_noise_samples + 1) / 2
            images = tf.contrib.gan.eval.image_grid(
                fixed_noise_samples, [10, 10],
                image_shape=(image_size[0], image_size[1]))
            images = tf.cast(tf.image.convert_image_dtype(images, tf.uint8),
                             tf.uint8)
            images = tf.reshape(images,
                                [image_size[0] * 10, image_size[1] * 10, 3])
            images_encode = tf.image.encode_jpeg(images)
            fname = tf.constant(str(datetime.now()) + ".jpeg")
            fwrite = tf.write_file(save + '/samples/' + fname, images_encode)

            summary2.add_summary_image1(fixed_noise_samples, n_labels**2,
                                        'Sam')
        else:
            fixed_noise = tf.constant(
                np.random.normal(size=(10**2, args.z_len)).astype('float32'))
            fixed_noise_samples = model.Generator(10**2,
                                                  args.arch,
                                                  args.DIM_G,
                                                  args.z_len, [None, None],
                                                  is_training=True,
                                                  image_size=image_size,
                                                  reuse=True,
                                                  noise=fixed_noise)
            fixed_noise_samples = (fixed_noise_samples + 1) / 2
            images = tf.contrib.gan.eval.image_grid(
                fixed_noise_samples, [10, 10],
                image_shape=(image_size[0], image_size[1]))
            images = tf.cast(tf.image.convert_image_dtype(images, tf.uint8),
                             tf.uint8)
            images = tf.reshape(images,
                                [image_size[0] * 10, image_size[1] * 10, 3])
            images = tf.image.encode_png(images)
            fname = tf.constant(str(datetime.now()) + ".png")
            fwrite = tf.write_file(save + '/samples/' + fname, images)

            summary2.add_summary_image1(fixed_noise_samples, 10**2, 'Sam')

        summary_op_1 = tf.summary.merge(summary1.get_summary())
        summary_op_2 = tf.summary.merge(summary2.get_summary())
        summary_op_3 = tf.summary.merge(summary3.get_summary())
        summary_op_4 = tf.summary.merge(summary4.get_summary())

    # Function for calculating inception score
    if n_labels:
        # fake_labels_100 = tf.cast(tf.random_uniform([100])*n_labels, tf.int32)
        prob = dataset_train.get_label_dist()
        fake_labels_100 = tf.py_func(np.random.choice,
                                     [np.arange(n_labels), 100, True, prob],
                                     tf.int64)
        fake_labels_100.set_shape(100)
        samples_100 = model.Generator(100,
                                      args.arch,
                                      args.DIM_G,
                                      args.z_len, [fake_labels_100, n_labels],
                                      is_training=True,
                                      image_size=image_size,
                                      reuse=True)
    else:
        samples_100 = model.Generator(100,
                                      args.arch,
                                      args.DIM_G,
                                      args.z_len, [None, None],
                                      is_training=True,
                                      image_size=image_size,
                                      reuse=True)

    with tf.Session() as sess:

        def get_samples(n):
            all_samples = []
            for i in range(int(n / 100)):
                all_samples.append(sess.run(samples_100))
            all_samples = np.concatenate(all_samples, axis=0)
            all_samples = ((all_samples + 1.) * (255.99 / 2)).astype('int32')
            return all_samples

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())

        # ckpt_state = tf.train.get_checkpoint_state(os.path.join(log_dir,'ckpt'))
        # if ckpt_state and ckpt_state.model_checkpoint_path:
        #     print("Loading file %s" % ckpt_state.model_checkpoint_path)
        #     saver.restore(sess, ckpt_state.model_checkpoint_path)

        summary_writer = tf.summary.FileWriter(os.path.join(
            args.log_dir, 'summary'),
                                               graph=sess.graph)
        os.makedirs(os.path.join(args.log_dir, 'data'), exist_ok=True)
        _disc_cost = 0
        _disc_wgan = 0
        it = -1
        ep = -1
        global_step = -1
        best_IS1 = -1
        while global_step < max_iter:  # for epoch
            dataset_train.init_dataset(sess)
            ep += 1
            it_per_epoch = it_in_epoch if it != -1 else -1
            it_in_epoch = 0
            while True:  # for iter in epoch
                try:
                    x, y = sess.run(next_element_train)
                    start_time = time.time()

                    _ = sess.run([gen_train_op],
                                 feed_dict={_iteration: global_step})
                    for i in range(N_CRITIC):
                        x, y = sess.run(next_element_train)
                        y = np.squeeze(y)
                        if n_labels is not None:
                            _disc_cost, _disc_wgan, _disc_acgan, _disc_acgan_acc, _disc_acgan_fake_acc, _ = sess.run(
                                [
                                    disc_cost, disc_wgan, disc_acgan,
                                    disc_acgan_acc, disc_acgan_fake_acc,
                                    disc_train_op
                                ],
                                feed_dict={
                                    input_x: x,
                                    all_real_labels: y,
                                    _iteration: global_step
                                })
                        else:
                            _disc_cost, _ = sess.run(
                                [disc_cost, disc_train_op],
                                feed_dict={
                                    input_x: x,
                                    all_real_labels: y,
                                    _iteration: global_step
                                })
                    duration = time.time() - start_time

                    if global_step % log_iter == 0:
                        examples_per_sec = batch_size / float(duration)
                        info_str = (
                            '{}: Epoch: {:3d} ({:5d}/{:5d}), global_setp {:5d}, disc_cost = {:.2f},disc_wgan = {:.2f} '
                            '({:.1f} examples/sec; {:.3f} '
                            'sec/batch)').format(datetime.now(), ep,
                                                 it_in_epoch, it_per_epoch,
                                                 global_step, _disc_cost,
                                                 _disc_wgan, examples_per_sec,
                                                 duration)
                        logging.info(info_str)
                        print('\r', info_str, end='', flush=True)

                        if global_step % 1000 == 0:
                            summary_str = sess.run(summary_op_2, {
                                input_x: x,
                                all_real_labels: y
                            })
                        else:
                            summary_str = sess.run(summary_op_1, {
                                input_x: x,
                                all_real_labels: y
                            })
                        summary_writer.add_summary(summary_str, global_step)
                        summary_writer.flush()

                    sess.run(fwrite)
                    if tensorboard_log and CALC_INCEPTION and global_step % INCEPTION_FREQUENCY == INCEPTION_FREQUENCY - 1:
                        samples1 = get_samples(50000)
                        inception_score1_m, inception_score1_s, fid1 = inception_score.calc_scores(
                            samples1)
                        info_str = 'IS_mean: {:6.3f} , IS_std: {:6.3f} , fid: {:6.3f}'.format(
                            inception_score1_m, inception_score1_s, fid1)
                        logging.info(info_str)
                        if inception_score1_m > best_IS1:
                            best_IS1 = inception_score1_m
                            samples1 = get_samples(50000)
                            inception_score2_m, inception_score2_s, fid2 = inception_score.calc_scores(
                                samples1)
                            info_str = 'IS_mean2: {:6.3f} , IS_std2: {:6.3f} , fid2: {:6.3f}'.format(
                                inception_score2_m, inception_score2_s, fid2)
                            logging.info(info_str)
                        else:
                            inception_score2_m, inception_score2_s = 0, 0
                        summary_str = sess.run(
                            summary_op_3, {
                                tf_inception_m1: inception_score1_m,
                                tf_inception_std1: inception_score1_s,
                                tf_inception_m2: inception_score2_m,
                                tf_inception_std2: inception_score2_s,
                                tf_fid: fid1
                            })
                        summary_writer.add_summary(summary_str, global_step)
                        summary_writer.flush()
                    if tensorboard_log and CALC_NDB and global_step % INCEPTION_FREQUENCY == INCEPTION_FREQUENCY - 1:
                        samples = get_samples(20000)
                        results = ndb_model.evaluate(samples)
                        info_str = 'ndb: {:6.3f} , ndb_js: {:6.3f}'.format(
                            results['NDB'], results['JS'])
                        logging.info(info_str)
                        summary_str = sess.run(summary_op_4, {
                            tf_ndb: results['NDB'],
                            tf_ndb_js: results['JS']
                        })
                        summary_writer.add_summary(summary_str, global_step)
                        summary_writer.flush()

                    # # Save the model checkpoint periodically.
                    # if global_step % checkpoint_iter == 0 and checkpoint_save:
                    #     checkpoint_path = os.path.join(log_dir,'ckpt', 'model')
                    #     saver.save(
                    #         sess,
                    #         checkpoint_path,
                    #         global_step=global_step)
                    global_step += 1
                    it += 1
                    it_in_epoch += 1
                except tf.errors.OutOfRangeError:
                    break
Пример #5
0
'''
"""This main module is intended to run the whole program making use of all needed modules"""

import load_data as ld
import graph_nyc as gn
import graph_borough as gb
import pandas as pd
import sys
import clean_data as cd
import tendency as td

if __name__ == '__main__':
    pass

#Variables
load_data = ld.Load()  #Call instance of class within load_data module
frame = pd.DataFrame()  #Instantiate a data frame variable
clean_data = cd.Clean()  #Call instance of class within clean_data module
iteration = True
tendency = td.Tendency()  #Call instance of class within tendency module
graph_nyc = gn.Graph()  #Call instance of class within graph_nyc module
graph_borough = gb.Graph()  #Call instance of class within graph_borough module
result = [
]  #Create array for storing the outcomes from test_restaurant_grades(camis_id)
camis_set = []  #Create an array for storing a list with unique CAMIS

try:
    #Read dataset which can be found using this link: https://data.cityofnewyork.us/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/xx67-kt59
    frame = load_data.load_data(
        'DOHMH_New_York_City_Restaurant_Inspection_Results.csv')
Пример #6
0
import load_data
import tensorflow as tf
from numpy import *

l = load_data.Load('data.csv')

x, y = l.load_data()
scaler = l.get_scaler()

step = l.step
input_size = l.input_size
batch_size = 30
output_size = 1
rnn_unit = 5

num = 1000

index = (int)((5.0 / 7.0) * len(x))
train_x, train_y = x[:index], y[:index]

###################################### RNN variables ##############################
weight = {
    'in': tf.Variable(tf.random_normal([input_size, rnn_unit]), name='w_in'),
    'out': tf.Variable(tf.random_normal([rnn_unit, output_size]), name='w_out')
}

baies = {
    'in': tf.Variable(tf.random_normal([
        rnn_unit,
    ]), name='b_in'),
    'out': tf.Variable(tf.random_normal([
Пример #7
0
            if 'Argument dataset' in line:
                args.dataset = line.strip().split()[-1]
                i += 1
            elif 'Argument max_iter' in line:
                args.max_iter = float(line.strip().split()[-1])
                i += 1
            elif 'Argument save_latent_iter' in line:
                args.latent_iter = float(line.strip().split()[-1])
                i += 1
            if i == 3:
                break

    # get all images and labels
    dataset_eval = load_data.Load(args.dataset,
                                  'all',
                                  shuffle=False,
                                  batch_size=5000,
                                  img_size=None)
    next_element_eval = dataset_eval.get_full_next()
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    with tf.Session(config=config) as sess:
        x_test = []
        y_test = []
        dataset_eval.init_dataset(sess)
        while True:
            try:
                t_x, t_y = sess.run(next_element_eval)
                x_test.append(t_x)
                y_test.append(t_y)
            except tf.errors.OutOfRangeError:
def main(args, logging):
    # get frequently argument
    batch_size = args.batch_size
    pad_size = args.pad_size
    z_len = args.z_len
    log_iter = args.log_iter
    max_iter = args.max_iter
    tensorboard_log = args.tensorboard_log
    save_image_iter = args.save_image_iter
    calc_cluster_flag = args.calc_cluster
    cluster_sample = args.cluster_sample

    # prepare for saving latent space
    os.makedirs(os.path.join(args.log_dir, 'latent'), exist_ok=True)
    max_hist = max(cluster_sz)
    latent_samples_iter = np.arange(0, max_iter + 1, args.cluster_sample)[:, np.newaxis] - \
                          np.arange(0, max_hist * args.cluster_gap, args.cluster_gap)[np.newaxis, :]
    latent_samples_iter = np.unique(latent_samples_iter)
    latent_samples_iter = latent_samples_iter[latent_samples_iter >= 0]
    latent_samples_iter = list(latent_samples_iter)
    latent_samples_iter.append(-1)  # add dummy iteration
    latent_queue = collections.deque(maxlen=max_hist)

    # choose model
    all_models = {
        'model1': model1,
        'model2': model2,
        'ALI_orig_celeba': ALI_orig_celeba,
        'ALI_orig_cifar10': ALI_orig_cifar10
    }
    model = all_models.get(args.architecture)

    # prepare data
    dataset_train = load_data.Load(args.dataset,
                                   args.train_on,
                                   shuffle=True,
                                   batch_size=batch_size,
                                   pad_size=pad_size,
                                   img_size=args.img_size)
    next_element_train = dataset_train.get_imgs_next()
    dataset_eval = load_data.Load(args.dataset,
                                  'all',
                                  shuffle=False,
                                  batch_size=500,
                                  pad_size=0,
                                  img_size=args.img_size)
    next_element_eval = dataset_eval.get_full_next()
    image_size = [
        dataset_train.img_size, dataset_train.img_size,
        next_element_train.shape.as_list()[-1]
    ]

    # define inputs
    input_x = tf.placeholder(tf.float32, [
        batch_size,
    ] + image_size)
    input_x_eval = tf.placeholder(tf.float32, [None] + image_size)
    sam_z = tf.placeholder(tf.float32, [args.batch_size, z_len])

    # data augmentation
    imgs_real = input_x + tf.random_uniform(
        shape=[batch_size] + image_size, minval=0., maxval=1.)  # dequantize
    imgs_real = imgs_real / 128. - 1
    imgs_real = tf.image.random_flip_left_right(imgs_real)

    # network
    x_gen = model.x_generator(sam_z,
                              args.dim_decoder,
                              is_training=True,
                              image_size=image_size,
                              reuse=False)
    z_gen = model.z_generator(imgs_real,
                              z_len,
                              args.dim_encoder,
                              is_training=True,
                              image_size=image_size,
                              reuse=False)

    # lr prior
    imgs_real_lr = tf.image.flip_left_right(imgs_real)
    z_gen_lr = model.z_generator(imgs_real_lr,
                                 z_len,
                                 args.dim_encoder,
                                 is_training=True,
                                 image_size=image_size,
                                 reuse=True)

    imgs_concat = tf.concat([imgs_real, x_gen], 0)
    z_concat = tf.concat([z_gen, sam_z], 0)
    t_d = model.discriminator(imgs_concat,
                              z_concat,
                              args.dim_discriminator,
                              is_training=True,
                              image_size=image_size,
                              reuse=False)
    p1, q1 = tf.split(t_d, 2)

    t_d = model.discriminator(imgs_concat,
                              z_concat,
                              args.dim_discriminator,
                              is_training=False,
                              image_size=image_size,
                              reuse=True)
    p2, q2 = tf.split(t_d, 2)

    # cost function
    disc_real = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=p1,
                                                labels=tf.ones_like(p1)))
    disc_fake = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=q1,
                                                labels=tf.zeros_like(q1)))
    disc_loss = (disc_real + disc_fake) / 2
    gen_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=q2,
                                                labels=tf.ones_like(q2)))
    enc_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=p2,
                                                labels=tf.zeros_like(p2)))
    enc_gen_loss = (enc_loss + gen_loss) / 2

    z1_loss = -1
    if args.aug > 0:
        z1_loss = tf.reduce_mean(tf.reduce_sum((z_gen - z_gen_lr)**2, 1))
        enc_gen_loss += args.aug * z1_loss

    if args.alice:
        lamb = 1 / 1000
        x_rec = model.x_generator(z_gen,
                                  args.dim_decoder,
                                  is_training=True,
                                  image_size=image_size,
                                  reuse=True)
        z_rec = model.z_generator(x_gen,
                                  z_len,
                                  args.dim_encoder,
                                  is_training=True,
                                  image_size=image_size,
                                  reuse=True)
        x_rec_loss = tf.reduce_mean(tf.abs(imgs_real - x_rec))
        z_rec_loss = tf.reduce_mean(tf.abs(sam_z - z_rec))
        enc_gen_loss += lamb * x_rec_loss + lamb * z_rec_loss

    # optimizer
    var = tf.trainable_variables()
    x_gen_var = [v for v in var if 'Decoder' in v.name]
    z_gen_var = [v for v in var if 'Encoder' in v.name]
    disc_var = [v for v in var if 'Discriminator' in v.name]
    gen_save = tf.train.Saver(
        [v for v in tf.global_variables() if 'Decoder' in v.name])
    enc_save = tf.train.Saver(
        [v for v in tf.global_variables() if 'Encoder' in v.name])
    gen_opt = tf.train.AdamOptimizer(learning_rate=args.lr * 5,
                                     beta1=0.5,
                                     beta2=0.999)
    disc_opt = tf.train.AdamOptimizer(learning_rate=args.lr,
                                      beta1=0.5,
                                      beta2=0.999)
    gen_gv = gen_opt.compute_gradients(enc_gen_loss,
                                       var_list=x_gen_var + z_gen_var)
    disc_gv = disc_opt.compute_gradients(disc_loss, var_list=disc_var)
    gen_train_op = gen_opt.apply_gradients(gen_gv)
    disc_train_op = disc_opt.apply_gradients(disc_gv)

    # for saving latent space
    t_input_x_eval = input_x_eval / 128. - 1
    z_eval = model.z_generator(t_input_x_eval,
                               z_len,
                               args.dim_encoder,
                               is_training=False,
                               image_size=image_size,
                               reuse=True)

    # save images
    x_rec = model.x_generator(z_gen,
                              args.dim_decoder,
                              is_training=True,
                              image_size=image_size,
                              reuse=True)
    z = np.random.normal(size=(100, z_len)).astype(np.float32)
    z = tf.Variable(z, False)
    x_gen_fix = model.x_generator(z,
                                  args.dim_decoder,
                                  is_training=True,
                                  image_size=image_size,
                                  reuse=True)

    if tensorboard_log:
        summary1 = utils_summary.summary_collection('col1')
        summary2 = utils_summary.summary_collection('col2')
        summary_cluster = utils_summary.summary_collection('col3')
        if calc_cluster_flag:
            ph_ACC = tf.placeholder(tf.float32)
            ph_NMI = tf.placeholder(tf.float32)
            ph_ARI = tf.placeholder(tf.float32)
            clustering_algo = KMeans(n_clusters=dataset_eval.num_classes,
                                     precompute_distances=True,
                                     n_jobs=1)
            with tf.name_scope('cluster'):
                summary_cluster.add_summary_scalar(ph_ACC, 'ACC')
                summary_cluster.add_summary_scalar(ph_NMI, 'NMI')
                summary_cluster.add_summary_scalar(ph_ARI, 'ARI')
        with tf.name_scope('losses'):
            summary1.add_summary_scalar(disc_real, 'disc_real')
            summary1.add_summary_scalar(disc_fake, 'disc_fake')
            summary1.add_summary_scalar(disc_loss, 'disc_loss')
            summary1.add_summary_scalar(enc_gen_loss, 'enc_gen_loss')
            summary1.add_summary_scalar(gen_loss, 'gen_loss')
            summary1.add_summary_scalar(enc_loss, 'enc_loss')
            summary1.add_summary_scalar(z1_loss, 'z1_loss')
            summary1.add_summary_scalar(
                tf.math.sqrt(tf.reduce_mean(gen_gv[len(x_gen_var) - 2][0]**2)),
                'gen_grad')
            summary1.add_summary_scalar(
                tf.math.sqrt(tf.reduce_mean(gen_gv[len(gen_gv) - 2][0]**2)),
                'enc_grad')
            summary1.add_summary_scalar(
                tf.math.sqrt(tf.reduce_mean(disc_gv[0][0]**2)), 'disc_grad')
        summary2.add_summary_image2(imgs_real, x_rec, 12**2, 'Input')
        summary2.add_summary_image1(x_gen_fix, args.batch_size, 'Sam')
        summary2.add_collection(summary1)
        summary_op_1 = tf.summary.merge(summary1.get_summary())
        summary_op_2 = tf.summary.merge(summary2.get_summary())
        summary_op_cluster = tf.summary.merge(summary_cluster.get_summary())

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    init = tf.global_variables_initializer()
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    with tf.Session(config=config) as sess:
        sess.run(init)
        # enc_save.restore(sess, tf.train.latest_checkpoint('/root/Documents/Cluster-With-GAN/results/clustering30/cifar10'))
        # gen_save.restore(sess, tf.train.latest_checkpoint('/root/Documents/Cluster-With-GAN/results/clustering30/cifar10'))
        # -- create tensorboard summary writers -- #
        if tensorboard_log:
            summary_writer = tf.summary.FileWriter(os.path.join(
                args.log_dir, 'tb_summary'),
                                                   graph=sess.graph)
            summary_writer_cluster = []
            if calc_cluster_flag:
                for Len in cluster_sz:
                    summary_writer_cluster.append(
                        tf.summary.FileWriter(
                            os.path.join(args.log_dir,
                                         'tb_Cluster' + str(Len))))
        save_latent_iter = latent_samples_iter[0]
        latent_ind = 0
        it = -1
        ep = -1
        global_step = -1
        cluster_thread = None
        while global_step <= max_iter:  # for epoch
            dataset_train.init_dataset(sess)
            ep += 1
            it_per_epoch = it_in_epoch if it != -1 else -1
            it_in_epoch = 0
            while global_step <= max_iter:  # for iter in epoch
                try:
                    global_step += 1
                    it += 1
                    it_in_epoch += 1

                    # -- train network -- #
                    x = sess.run(next_element_train)
                    z = np.random.normal(size=(batch_size, z_len))
                    start_time = time.time()
                    d_loss, _ = sess.run([disc_loss, disc_train_op], {
                        input_x: x,
                        sam_z: z
                    })
                    for i in range(1):
                        g_loss, _, _ = sess.run(
                            [enc_gen_loss, gen_train_op, update_ops], {
                                input_x: x,
                                sam_z: z
                            })
                    duration = time.time() - start_time

                    # -- save log -- #
                    if global_step % log_iter == 0:
                        examples_per_sec = batch_size / float(duration)
                        info_str = '{}: Epoch: {:3d} ({:5d}/{:5d}), global_setp {:6d}, d_loss = {:.2f},g_loss = {:.2f} ({:.1f} examples/sec; {:.3f} sec/batch)'.format(
                            datetime.now(), ep, it_in_epoch, it_per_epoch,
                            global_step, d_loss, g_loss, examples_per_sec,
                            duration)
                        logging.info(info_str)
                        print('\r', info_str, end='', flush=True)
                        if tensorboard_log:
                            summary_str = sess.run(summary_op_1, {
                                input_x: x,
                                sam_z: z
                            })
                            summary_writer.add_summary(summary_str,
                                                       global_step)

                    # -- save latent space to queue-- #
                    if global_step == save_latent_iter:
                        dataset_eval.init_dataset(sess)
                        info_str = 'saving latent iter: ' + str(global_step)
                        logging.info(info_str)
                        print('\r', info_str, end='', flush=True)
                        latent_eval = []
                        label_eval = []
                        while True:
                            try:
                                t_x, t_l = sess.run(next_element_eval)
                                latent_eval.append(
                                    z_eval.eval({input_x_eval: t_x}))
                                label_eval.append(t_l)
                            except tf.errors.OutOfRangeError:
                                break
                        latent_eval = np.concatenate(latent_eval, 0)
                        label_eval = np.concatenate(label_eval, 0)
                        latent_queue.append(latent_eval)
                        latent_ind += 1
                        save_latent_iter = latent_samples_iter[latent_ind]

                    # -- calc clustering -- #
                    if global_step % cluster_sample == 0 and calc_cluster_flag:
                        if cluster_thread is not None:
                            cluster_thread.join()
                        latent_list = list(latent_queue)
                        cluster_args = (sess, latent_list, label_eval,
                                        clustering_algo, global_step,
                                        summary_writer_cluster,
                                        summary_op_cluster, ph_ACC, ph_NMI,
                                        ph_ARI, logging)
                        cluster_thread = threading.Thread(target=calc_cluster,
                                                          args=cluster_args)
                        cluster_thread.start()

                    # -- save images -- #
                    if tensorboard_log and global_step % save_image_iter == 0:
                        summary_str = sess.run(summary_op_2, {
                            input_x: x,
                            sam_z: z
                        })
                        summary_writer.add_summary(summary_str, global_step)
                        summary_writer.flush()

                    if global_step % 100000 == 0 or (global_step >= 450000 and
                                                     global_step % 5000 == 0):
                        gen_save.save(sess,
                                      os.path.join(args.log_dir, 'gen-model'),
                                      global_step=global_step)
                        enc_save.save(sess,
                                      os.path.join(args.log_dir, 'enc-model'),
                                      global_step=global_step)

                except tf.errors.OutOfRangeError:
                    break
        # save last latent space to disk
        for latents, step in zip(latent_queue,
                                 latent_samples_iter[-max_hist - 1:-1]):
            np.savez(os.path.join(args.log_dir, 'latent',
                                  'latent' + str(step) + '.npz'),
                     latent=latents)
        cluster_thread.join()