Exemplo n.º 1
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser)
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('plotting', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.int32,
                             shape=(None, config.x_dim),
                             name='input_x')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)

    # build the posterior flow
    with tf.variable_scope('posterior_flow'):
        flows = []
        for i in range(config.n_flows):
            flows.append(spt.layers.ActNorm())
            flows.append(
                spt.layers.CouplingLayer(tf.make_template(
                    'coupling',
                    coupling_layer_shift_and_scale,
                    create_scope_now_=True),
                                         scale_type='exp'))
            flows.append(spt.layers.InvertibleDense())
        posterior_flow = spt.layers.SequentialFlow(flows=flows)

    # derive the initialization op
    with tf.name_scope('initialization'), \
            arg_scope([spt.layers.act_norm], initializing=True):
        init_q_net = q_net(input_x, posterior_flow)
        init_chain = init_q_net.chain(p_net,
                                      latent_axis=0,
                                      observed={'x': input_x})
        init_loss = tf.reduce_mean(init_chain.vi.training.sgvb())

    # derive the loss and lower-bound for training
    with tf.name_scope('training'):
        train_q_net = q_net(input_x, posterior_flow)
        train_chain = train_q_net.chain(p_net,
                                        latent_axis=0,
                                        observed={'x': input_x})

        vae_loss = tf.reduce_mean(train_chain.vi.training.sgvb())
        loss = vae_loss + tf.losses.get_regularization_loss()

    # derive the nll and logits output for testing
    with tf.name_scope('testing'):
        test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_z)
        test_chain = test_q_net.chain(p_net,
                                      latent_axis=0,
                                      observed={'x': input_x})
        test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood())
        test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo())

    # derive the optimizer
    with tf.name_scope('optimizing'):
        optimizer = tf.train.AdamOptimizer(learning_rate)
        params = tf.trainable_variables()
        grads = optimizer.compute_gradients(loss, var_list=params)
        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            train_op = optimizer.apply_gradients(grads)

    # derive the plotting function
    with tf.name_scope('plotting'):
        plot_p_net = p_net(n_z=100)
        x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28))

    def plot_samples(loop):
        with loop.timeit('plot_time'):
            images = session.run(x_plots)
            save_images_collection(images=images,
                                   filename='plotting/{}.png'.format(
                                       loop.epoch),
                                   grid_size=(10, 10))

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = spt.datasets.load_mnist()
    train_flow = bernoulli_flow(x_train,
                                config.batch_size,
                                shuffle=True,
                                skip_incomplete=True)
    test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True)

    with spt.utils.create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        # initialize the network
        spt.utils.ensure_variables_initialized()
        for [batch_x] in train_flow:
            print('Network initialization loss: {:.6g}'.format(
                session.run(init_loss, {input_x: batch_x})))
            print('')
            break

        # train the network
        with spt.TrainLoop(params,
                           var_groups=['p_net', 'q_net', 'posterior_flow'],
                           max_epoch=config.max_epoch,
                           max_step=config.max_step,
                           summary_dir=(results.system_path('train_summary')
                                        if config.write_summary else None),
                           summary_graph=tf.get_default_graph(),
                           early_stopping=False) as loop:
            trainer = spt.Trainer(loop,
                                  train_op, [input_x],
                                  train_flow,
                                  metrics={'loss': loss})
            trainer.anneal_after(learning_rate,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = spt.Evaluator(loop,
                                      metrics={
                                          'test_nll': test_nll,
                                          'test_lb': test_lb
                                      },
                                      inputs=[input_x],
                                      data_flow=test_flow,
                                      time_metric_name='test_time')
            evaluator.after_run.add_hook(
                lambda: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                plot_samples, loop),
                                          freq=10)
            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
Exemplo n.º 2
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser)
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.float32,
                             shape=(None, ) + config.x_shape,
                             name='input_x')
    input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y')
    is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)
    multi_gpu = MultiGPU()

    # build the model
    grads = []
    losses = []
    y_list = []
    acc_list = []
    batch_size = spt.utils.get_batch_size(input_x)
    params = None
    optimizer = tf.train.AdamOptimizer(learning_rate)

    for dev, pre_build, [dev_input_x, dev_input_y
                         ] in multi_gpu.data_parallel(batch_size,
                                                      [input_x, input_y]):
        with tf.device(dev), multi_gpu.maybe_name_scope(dev):
            if pre_build:
                _ = model(dev_input_x, is_training, channels_last=True)

            else:
                # derive the loss, output and accuracy
                dev_logits = model(dev_input_x,
                                   is_training=is_training,
                                   channels_last=multi_gpu.channels_last(dev))
                dev_cls_loss = tf.losses.sparse_softmax_cross_entropy(
                    dev_input_y, dev_logits)
                dev_loss = dev_cls_loss + tf.losses.get_regularization_loss()
                dev_y = spt.ops.softmax_classification_output(dev_logits)
                dev_acc = spt.ops.classification_accuracy(dev_y, dev_input_y)
                losses.append(dev_loss)
                y_list.append(dev_y)
                acc_list.append(dev_acc)

                # derive the optimizer
                params = tf.trainable_variables()
                grads.append(
                    optimizer.compute_gradients(dev_loss, var_list=params))

    # merge multi-gpu outputs and operations
    [loss, acc] = multi_gpu.average([losses, acc_list], batch_size)
    [y] = multi_gpu.concat([y_list])
    train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads),
                                     optimizer=optimizer,
                                     control_inputs=tf.get_collection(
                                         tf.GraphKeys.UPDATE_OPS))

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = \
        spt.datasets.load_cifar10(x_shape=config.x_shape, normalize_x=True)
    train_flow = spt.DataFlow.arrays([x_train, y_train],
                                     config.batch_size,
                                     shuffle=True,
                                     skip_incomplete=True)
    test_flow = spt.DataFlow.arrays([x_test, y_test], config.test_batch_size)

    with spt.utils.create_session().as_default():
        # train the network
        with spt.TrainLoop(params,
                           max_epoch=config.max_epoch,
                           max_step=config.max_step,
                           summary_dir=(results.system_path('train_summary')
                                        if config.write_summary else None),
                           summary_graph=tf.get_default_graph(),
                           early_stopping=False) as loop:
            trainer = spt.Trainer(loop,
                                  train_op, [input_x, input_y],
                                  train_flow,
                                  feed_dict={is_training: True},
                                  metrics={
                                      'loss': loss,
                                      'acc': acc
                                  })
            trainer.anneal_after(learning_rate,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = spt.Evaluator(loop,
                                      metrics={'test_acc': acc},
                                      inputs=[input_x, input_y],
                                      data_flow=test_flow,
                                      feed_dict={is_training: False},
                                      time_metric_name='test_time')
            evaluator.after_run.add_hook(
                lambda: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=5)
            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
Exemplo n.º 3
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings,
                                  arg_parser,
                                  prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('plotting', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.int32,
                             shape=(None, config.x_dim),
                             name='input_x')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)

    # derive the output for initialization
    with tf.name_scope('initialization'), \
            spt.utils.scoped_set_config(spt.settings, auto_histogram=False):
        init_q_net = q_net(input_x, is_initializing=True)
        init_chain = init_q_net.chain(p_net,
                                      observed={'x': input_x},
                                      is_initializing=True)
        init_lb = tf.reduce_mean(init_chain.vi.lower_bound.elbo())

    # derive the loss and lower-bound for training
    with tf.name_scope('training'):
        train_q_net = q_net(input_x)
        train_chain = train_q_net.chain(p_net, observed={'x': input_x})
        vae_loss = tf.reduce_mean(train_chain.vi.training.sgvb())
        loss = vae_loss + tf.losses.get_regularization_loss()

    # derive the nll and logits output for testing
    with tf.name_scope('testing'):
        test_q_net = q_net(input_x, n_z=config.test_n_z)
        test_chain = test_q_net.chain(p_net,
                                      latent_axis=0,
                                      observed={'x': input_x})
        test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood())
        test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo())

    # derive the optimizer
    with tf.name_scope('optimizing'):
        optimizer = tf.train.AdamOptimizer(learning_rate)
        params = tf.trainable_variables()
        grads = optimizer.compute_gradients(loss, var_list=params)
        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            train_op = optimizer.apply_gradients(grads)

    # derive the plotting function
    with tf.name_scope('plotting'):
        plot_p_net = p_net(n_z=100)
        x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28))

    def plot_samples(loop):
        with loop.timeit('plot_time'):
            images = session.run(x_plots)
            save_images_collection(images=images,
                                   filename='plotting/{}.png'.format(
                                       loop.epoch),
                                   grid_size=(10, 10),
                                   results=results)

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = \
        spt.datasets.load_mnist(x_shape=[784])
    train_flow = bernoulli_flow(x_train,
                                config.batch_size,
                                shuffle=True,
                                skip_incomplete=True)
    test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True)

    with spt.utils.create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        spt.utils.ensure_variables_initialized()

        # initialize the network
        for [x] in train_flow:
            print('Network initialized, first-batch loss is {:.6g}.\n'.format(
                session.run(init_lb, feed_dict={input_x: x})))
            break

        # train the network
        with spt.TrainLoop(params,
                           var_groups=['q_net', 'p_net'],
                           max_epoch=config.max_epoch,
                           max_step=config.max_step,
                           summary_dir=(results.system_path('train_summary')
                                        if config.write_summary else None),
                           summary_graph=tf.get_default_graph(),
                           early_stopping=False) as loop:
            trainer = spt.Trainer(loop,
                                  train_op, [input_x],
                                  train_flow,
                                  metrics={'loss': loss},
                                  summaries=tf.summary.merge_all(
                                      spt.GraphKeys.AUTO_HISTOGRAM))
            trainer.anneal_after(learning_rate,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = spt.Evaluator(loop,
                                      metrics={
                                          'test_nll': test_nll,
                                          'test_lb': test_lb
                                      },
                                      inputs=[input_x],
                                      data_flow=test_flow,
                                      time_metric_name='test_time')
            evaluator.events.on(
                spt.EventKeys.AFTER_EXECUTION,
                lambda e: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                plot_samples, loop),
                                          freq=10)
            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
Exemplo n.º 4
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser)
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('plotting', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.int32,
                             shape=(None, config.x_dim),
                             name='input_x')
    is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)
    multi_gpu = MultiGPU(disable_prebuild=False)

    # build the model
    grads = []
    losses = []
    test_nlls = []
    test_lbs = []
    batch_size = spt.utils.get_batch_size(input_x)
    params = None
    optimizer = tf.train.AdamOptimizer(learning_rate)

    for dev, pre_build, [dev_input_x
                         ] in multi_gpu.data_parallel(batch_size, [input_x]):
        with tf.device(dev), multi_gpu.maybe_name_scope(dev):
            if pre_build:
                with arg_scope([p_net, q_net],
                               is_training=is_training,
                               channels_last=True):
                    _ = q_net(dev_input_x).chain(p_net,
                                                 observed={'x': dev_input_x})

            else:
                with arg_scope([p_net, q_net],
                               is_training=is_training,
                               channels_last=multi_gpu.channels_last(dev)):
                    # derive the loss and lower-bound for training
                    with tf.name_scope('training'):
                        train_q_net = q_net(dev_input_x)
                        train_chain = train_q_net.chain(
                            p_net, latent_axis=0, observed={'x': dev_input_x})

                        dev_vae_loss = tf.reduce_mean(
                            train_chain.vi.training.sgvb())
                        dev_loss = dev_vae_loss + \
                            tf.losses.get_regularization_loss()
                        losses.append(dev_loss)

                    # derive the nll and logits output for testing
                    with tf.name_scope('testing'):
                        test_q_net = q_net(dev_input_x, n_z=config.test_n_z)
                        test_chain = test_q_net.chain(
                            p_net, latent_axis=0, observed={'x': dev_input_x})
                        dev_test_nll = -tf.reduce_mean(
                            test_chain.vi.evaluation.is_loglikelihood())
                        dev_test_lb = tf.reduce_mean(
                            test_chain.vi.lower_bound.elbo())
                        test_nlls.append(dev_test_nll)
                        test_lbs.append(dev_test_lb)

                    # derive the optimizer
                    with tf.name_scope('optimizing'):
                        params = tf.trainable_variables()
                        grads.append(
                            optimizer.compute_gradients(dev_loss,
                                                        var_list=params))

    # merge multi-gpu outputs and operations
    with tf.name_scope('optimizing'):
        [loss, test_lb, test_nll] = \
            multi_gpu.average([losses, test_lbs, test_nlls], batch_size)
        train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads),
                                         optimizer=optimizer,
                                         control_inputs=tf.get_collection(
                                             tf.GraphKeys.UPDATE_OPS))

    # derive the plotting function
    work_dev = multi_gpu.work_devices[0]
    with tf.device(work_dev), tf.name_scope('plotting'):
        plot_p_net = p_net(n_z=100,
                           is_training=is_training,
                           channels_last=multi_gpu.channels_last(work_dev))
        x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28))

    def plot_samples(loop):
        with loop.timeit('plot_time'):
            images = session.run(x_plots, feed_dict={is_training: False})
            save_images_collection(images=images,
                                   filename='plotting/{}.png'.format(
                                       loop.epoch),
                                   grid_size=(10, 10),
                                   results=results)

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = spt.datasets.load_mnist()
    train_flow = bernoulli_flow(x_train,
                                config.batch_size,
                                shuffle=True,
                                skip_incomplete=True)
    test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True)

    with spt.utils.create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        # train the network
        with spt.TrainLoop(params,
                           var_groups=['q_net', 'p_net'],
                           max_epoch=config.max_epoch,
                           max_step=config.max_step,
                           summary_dir=(results.system_path('train_summary')
                                        if config.write_summary else None),
                           summary_graph=tf.get_default_graph(),
                           early_stopping=False) as loop:
            trainer = spt.Trainer(loop,
                                  train_op, [input_x],
                                  train_flow,
                                  feed_dict={is_training: True},
                                  metrics={'loss': loss})
            trainer.anneal_after(learning_rate,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = spt.Evaluator(loop,
                                      metrics={
                                          'test_nll': test_nll,
                                          'test_lb': test_lb
                                      },
                                      inputs=[input_x],
                                      data_flow=test_flow,
                                      feed_dict={is_training: False},
                                      time_metric_name='test_time')
            evaluator.after_run.add_hook(
                lambda: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                plot_samples, loop),
                                          freq=10)
            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
Exemplo n.º 5
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings,
                                  arg_parser,
                                  prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('plotting', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.int32,
                             shape=(None, config.x_dim),
                             name='input_x')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)

    # derive the loss and lower-bound for training
    with tf.name_scope('training'):
        train_q_net = q_net(input_x, n_samples=config.train_n_samples)
        train_chain = train_q_net.chain(p_net,
                                        latent_axis=0,
                                        observed={'x': input_x})

        if config.vi_algorithm == 'reinforce':
            baseline = reinforce_baseline_net(input_x)
            vae_loss = tf.reduce_mean(
                train_chain.vi.training.reinforce(baseline=baseline))
        else:
            assert (config.vi_algorithm == 'vimco')
            vae_loss = tf.reduce_mean(train_chain.vi.training.vimco())
        loss = vae_loss + tf.losses.get_regularization_loss()

    # derive the nll and logits output for testing
    with tf.name_scope('testing'):
        test_q_net = q_net(input_x, n_samples=config.test_n_samples)
        test_chain = test_q_net.chain(p_net,
                                      latent_axis=0,
                                      observed={'x': input_x})
        test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood())

        # derive the classifier via q(y|x)
        q_y_given_x = tf.argmax(test_q_net['y'].distribution.logits,
                                axis=-1,
                                name='q_y_given_x')

    # derive the optimizer
    with tf.name_scope('optimizing'):
        optimizer = tf.train.AdamOptimizer(learning_rate)
        params = tf.trainable_variables()
        grads = optimizer.compute_gradients(loss, var_list=params)
        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            train_op = optimizer.apply_gradients(grads)

    # derive the plotting function
    with tf.name_scope('plotting'):
        plot_p_net = p_net(
            observed={'y': tf.range(config.n_clusters, dtype=tf.int32)},
            n_z=10)
        x_plots = tf.reshape(
            tf.transpose(bernoulli_as_pixel(plot_p_net['x']), (1, 0, 2)),
            (-1, 28, 28))

    def plot_samples(loop):
        with loop.timeit('plot_time'):
            images = session.run(x_plots)
            save_images_collection(images=images,
                                   filename='plotting/{}.png'.format(
                                       loop.epoch),
                                   grid_size=(config.n_clusters, 10),
                                   results=results)

    # derive the final un-supervised classifier
    c_classifier = ClusteringClassifier(config.n_clusters, 10)

    def train_classifier(loop):
        df = bernoulli_flow(x_train,
                            config.batch_size,
                            shuffle=False,
                            skip_incomplete=False)
        with loop.timeit('cls_train_time'):
            [c_pred] = collect_outputs(
                outputs=[q_y_given_x],
                inputs=[input_x],
                data_flow=df,
            )
            c_classifier.fit(c_pred, y_train)
            print(c_classifier.describe())

    def evaluate_classifier(loop):
        with loop.timeit('cls_test_time'):
            [c_pred] = collect_outputs(
                outputs=[q_y_given_x],
                inputs=[input_x],
                data_flow=test_flow,
            )
            y_pred = c_classifier.predict(c_pred)
            cls_metrics = {'test_acc': accuracy_score(y_test, y_pred)}
            loop.collect_metrics(cls_metrics)
            results.update_metrics(cls_metrics)

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = \
        spt.datasets.load_mnist(x_shape=[784])
    train_flow = bernoulli_flow(x_train,
                                config.batch_size,
                                shuffle=True,
                                skip_incomplete=True)
    test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True)

    with spt.utils.create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        # train the network
        with spt.TrainLoop(
                params,
                var_groups=['p_net', 'q_net', 'gaussian_mixture_prior'],
                max_epoch=config.max_epoch,
                max_step=config.max_step,
                summary_dir=(results.system_path('train_summary')
                             if config.write_summary else None),
                summary_graph=tf.get_default_graph(),
                early_stopping=False) as loop:
            trainer = spt.Trainer(loop,
                                  train_op, [input_x],
                                  train_flow,
                                  metrics={'loss': loss},
                                  summaries=tf.summary.merge_all(
                                      spt.GraphKeys.AUTO_HISTOGRAM))
            trainer.anneal_after(learning_rate,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = spt.Evaluator(loop,
                                      metrics={'test_nll': test_nll},
                                      inputs=[input_x],
                                      data_flow=test_flow,
                                      time_metric_name='test_time')
            evaluator.events.on(
                spt.EventKeys.AFTER_EXECUTION,
                lambda e: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                plot_samples, loop),
                                          freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                train_classifier, loop),
                                          freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                evaluate_classifier, loop),
                                          freq=10)

            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    with codecs.open('cluster_classifier.txt', 'wb', 'utf-8') as f:
        f.write(c_classifier.describe())
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
Exemplo n.º 6
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings,
                                  arg_parser,
                                  prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    model_file = config.result_dir + "/" + os.path.basename(__file__).split(".py")[0] + "_" + \
                 str(config.noExp) + ".model"
    dirName = os.path.basename(__file__).split(".py")[0] + "_" + str(
        config.noExp)
    results = MLResults(os.path.join(config.result_dir, dirName))
    results.save_config(config)  # save experiment settings
    results.make_dirs('train_summary', exist_ok=True)
    results.make_dirs('result_summary', exist_ok=True)
    results.make_dirs('mid_summary', exist_ok=True)

    # os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_number

    # input placeholders
    input_x = tf.placeholder(dtype=tf.float32,
                             shape=(None, ) + config.x_shape,
                             name='input_x')
    learning_rate = spt.AnnealingVariable('learning_rate',
                                          config.initial_lr,
                                          config.lr_anneal_factor,
                                          min_value=1e-6)
    multi_gpu = MultiGPU(disable_prebuild=True)
    # multi_gpu = MultiGPU()

    # derive the training operation
    gradses = []
    grad_vars = []
    train_losses = []
    BATCH_SIZE = get_batch_size(input_x)

    for dev, pre_build, [dev_input_x
                         ] in multi_gpu.data_parallel(BATCH_SIZE, [input_x]):
        with tf.device(dev), multi_gpu.maybe_name_scope(dev):
            # derive the loss for initializing
            with tf.name_scope('initialization'), \
                    arg_scope([p_net, q_net], is_initializing=True), \
                    spt.utils.scoped_set_config(spt.settings, auto_histogram=False):
                init_q_net = q_net(dev_input_x, n_z=config.train_n_samples)
                init_chain = init_q_net.chain(p_net,
                                              latent_axis=0,
                                              observed={'x': dev_input_x})
                init_loss = tf.reduce_mean(init_chain.vi.training.vimco())

            # derive the loss and lower-bound for training
            with tf.name_scope('training'), \
                    arg_scope([p_net, q_net], is_training=True):
                train_q_net = q_net(dev_input_x, n_z=config.train_n_samples)
                train_chain = train_q_net.chain(p_net,
                                                latent_axis=0,
                                                observed={'x': dev_input_x})
                train_loss = (tf.reduce_mean(train_chain.vi.training.vimco()) +
                              tf.losses.get_regularization_loss())
                train_losses.append(train_loss)

            # derive the logits output for testing
            with tf.name_scope('testing'):
                test_q_net = q_net(dev_input_x, n_z=config.test_n_z)
                test_chain = test_q_net.chain(p_net,
                                              latent_axis=0,
                                              observed={'x': dev_input_x})
                # log_prob of X and each univariate time series of X
                log_prob = tf.reduce_mean(
                    test_chain.model['x'].distribution.log_prob(dev_input_x),
                    0)
                log_prob_per_element = tf.reduce_sum(log_prob)
                log_prob_per_element_univariate_TS = tf.reduce_sum(
                    log_prob, [0, 1, 3])
                log_prob_per_element_univariate_TS_All = tf.reduce_sum(
                    log_prob, [1, 3])

            # derive the optimizer
            with tf.name_scope('optimizing'):
                params = tf.trainable_variables()
                optimizer = tf.train.AdamOptimizer(learning_rate)
                grads = optimizer.compute_gradients(train_loss, params)
                for grad, var in grads:
                    if grad is not None and var is not None:
                        if config.grad_clip_norm:
                            grad = tf.clip_by_norm(grad, config.grad_clip_norm)
                        if config.check_numerics:
                            grad = tf.check_numerics(
                                grad,
                                'gradient for {} has numeric issue'.format(
                                    var.name))
                        grad_vars.append((grad, var))
                gradses.append(grad_vars)

    # merge multi-gpu outputs and operations
    [train_loss] = multi_gpu.average([train_losses], BATCH_SIZE)
    train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(gradses),
                                     optimizer=optimizer,
                                     control_inputs=tf.get_collection(
                                         tf.GraphKeys.UPDATE_OPS))

    # sort the contribution of each univariate_TS of input
    SORT_UNIVARIATE_TS_INPUT = tf.placeholder(dtype=tf.float32,
                                              shape=(None, None),
                                              name='SORT_UNIVARIATE_TS_INPUT')
    SORT_UNIVARIATE_TS = tf.nn.top_k(SORT_UNIVARIATE_TS_INPUT,
                                     k=config.metricNumber).indices + 1

    # load the training and testing data
    print("=" * 10 + "Shape of Input data" + "=" * 10)
    x, time_indexs, x_test, time_indexs2 = load_matrix_allData(
        config.dataReadformat, config.datapathForTrain, config.datapathForTest,
        config.timeLength, config.metricNumber, "TrainFileNameList.txt",
        "TestFileNameList.txt", results, config.norm)

    x_test = x_test.reshape([-1, config.timeLength, config.metricNumber, 1])
    print("Test:", x_test.shape)
    if config.batchTest:
        test_flow = DataFlow.arrays(
            [x_test], config.test_batch_size)  # DataFlow is iterator
        del x_test
    x_train, x_val = split_numpy_array(x, portion=config.VALID_PORTION)
    x_train = x_train.reshape([-1, config.timeLength, config.metricNumber, 1])
    x_val = x_val.reshape([-1, config.timeLength, config.metricNumber, 1])
    train_flow = DataFlow.arrays([x_train],
                                 config.batch_size,
                                 shuffle=False,
                                 skip_incomplete=True)
    val_flow = DataFlow.arrays([x_val], config.test_batch_size)
    print("Note:", config.x_dim,
          ", x_dim = size of datapoint = timeLength * metricNumber")
    print("Input data shape:", x.shape, "Train data shape:", x_train.shape,
          "Validation data shape:", x_val.shape)
    del x_train, x_val, x

    # training part
    with spt.utils.create_session().as_default() as session:
        spt.utils.ensure_variables_initialized()
        saver = CheckpointSaver(tf.trainable_variables(), model_file)
        if os.path.exists(model_file):
            # load the parameters of trained model
            saver.restore_latest()
        else:
            # initialize the network
            while True:
                breakFlag = 0
                for [x] in train_flow:
                    INITLOSS = session.run(init_loss, feed_dict={input_x: x})
                    print('Network initialized, first-batch loss is {:.6g}.'.
                          format(INITLOSS))
                    if np.isnan(INITLOSS) or np.isinf(
                            INITLOSS) or INITLOSS > 10**5:
                        pass
                    else:
                        breakFlag = 1
                        break
                if breakFlag:
                    break

            # train the network
            with train_flow.threaded(10) as train_flow:
                with spt.TrainLoop(
                        params,
                        var_groups=['q_net', 'p_net'],
                        max_epoch=config.max_epoch,
                        max_step=config.max_step,
                        summary_dir=(results.system_path('train_summary')
                                     if config.write_summary else None),
                        summary_graph=tf.get_default_graph(),
                        early_stopping=True) as loop:
                    trainer = spt.Trainer(loop,
                                          train_op, [input_x],
                                          train_flow,
                                          metrics={'loss': train_loss},
                                          summaries=tf.summary.merge_all(
                                              spt.GraphKeys.AUTO_HISTOGRAM))
                    # anneal the learning rate
                    trainer.anneal_after(learning_rate,
                                         epochs=config.lr_anneal_epoch_freq,
                                         steps=config.lr_anneal_step_freq)
                    validator = spt.Validator(
                        loop,
                        train_loss,
                        [input_x],
                        val_flow,
                    )
                    trainer.evaluate_after_epochs(validator, freq=10)
                    trainer.log_after_epochs(freq=1)
                    trainer.run()
                saver.save()

            # save the training infomation
            firWrite = True
            num = 0
            time0 = time.time()
            for [x_train] in train_flow:
                if config.savetrainDS:
                    # log prob of each metric of each instance
                    log_prob_per_element_univariate_TS_list_item_Train = (
                        session.run(log_prob_per_element_univariate_TS_All,
                                    feed_dict={input_x: x_train}))
                    log_prob_per_element_univariate_TS_list_Train = log_prob_per_element_univariate_TS_list_item_Train
                    log_prob_per_element_list_Train = np.sum(np.array(
                        log_prob_per_element_univariate_TS_list_item_Train),
                                                             axis=1).tolist()
                    if firWrite:
                        save_file(
                            results.system_path("train_summary"),
                            "OutlierScores_metric.txt",
                            log_prob_per_element_univariate_TS_list_Train)
                        save_file(results.system_path("train_summary"),
                                  "OutlierScores.txt",
                                  log_prob_per_element_list_Train)
                    else:
                        save_file(
                            results.system_path("train_summary"),
                            "OutlierScores_metric.txt",
                            log_prob_per_element_univariate_TS_list_Train,
                            "\n", "a")
                        save_file(results.system_path("train_summary"),
                                  "OutlierScores.txt",
                                  log_prob_per_element_list_Train, "\n", "a")

                firWrite = False
                num += 1
                if num % 1000 == 0:
                    print(
                        "-----Train %s >>>>>:Sum time of batch instances:%s" %
                        (num, float(time.time() - time0) / float(num)))
            del train_flow, val_flow

        # online test
        time2 = time.time()
        log_prob_per_element_list, log_prob_per_element_univariate_TS_list = [], []
        if config.batchTest:
            num = 0
            for [x_test] in test_flow:
                if config.savetestDS:
                    # log prob of each metric of each instance
                    log_prob_per_element_univariate_TS_list_item = (
                        session.run(log_prob_per_element_univariate_TS_All,
                                    feed_dict={input_x: x_test}))
                    log_prob_per_element_univariate_TS_list += log_prob_per_element_univariate_TS_list_item.tolist(
                    )
                    log_prob_per_element_list += np.sum(
                        np.array(log_prob_per_element_univariate_TS_list_item),
                        axis=1).tolist()

                num += 1
                if num % 200 == 0:
                    print("-----Test %s >>>>>:Sum time of batch instances:%s" %
                          (num, float(time.time() - time2) / float(num)))
        else:
            num = 1
            for batch_x in x_test:
                if config.savetestTS:
                    log_prob_per_element_list_item = (session.run(
                        log_prob_per_element, feed_dict={input_x: [batch_x]}))
                    log_prob_per_element_list.append(
                        log_prob_per_element_list_item)

                if config.savetestDS:
                    log_prob_per_element_univariate_TS_list_item = (
                        session.run(log_prob_per_element_univariate_TS,
                                    feed_dict={input_x: [batch_x]}))
                    log_prob_per_element_univariate_TS_list.append(
                        log_prob_per_element_univariate_TS_list_item)
                    log_prob_per_element_list.append(
                        sum(log_prob_per_element_univariate_TS_list_item))

                if num % 200 == 0:
                    print(
                        "-----Test>>>>>:%d, average time of each instance:%s" %
                        (num, float(time.time() - time2) / float(num)))
                num += 1

        # get the lable file name and its line cnt number
        allLabelFileNameLineCntList = get_machineID(results, config.labelpath)

        print("No of OutlierScores for all dataPoint:(%s):" %
              len(log_prob_per_element_list))
        if config.savetestDS:
            save_file(
                results.system_path("result_summary"),
                "OutlierScores_metric.txt",
                cat_List(allLabelFileNameLineCntList,
                         log_prob_per_element_univariate_TS_list))
        save_file(
            results.system_path("result_summary"), "OutlierScores.txt",
            cat_List(allLabelFileNameLineCntList, log_prob_per_element_list))

        if config.evaluation:
            # Prepraration for the hitory two-metric results
            twoMetricScore = read_file(results.system_path("train_summary"),
                                       "OutlierScores_metric.txt")
            ave_twoMetricScore = np.mean(np.array(twoMetricScore),
                                         axis=0).tolist()
            save_file(results.system_path("result_summary"), "PRF.txt",
                      ["Average score of each univariate time series", "\n"],
                      ",")
            save_file(results.system_path("result_summary"), "PRF.txt",
                      ave_twoMetricScore + ["\n"], ",", "a")
            save_file(results.system_path("result_summary"), "PRF.txt", [
                "Threshold", "F", "Precision", "Recall", "TP", "FP", "FN", "\n"
            ], ",", "a")

            # get the sorted item each metric by change score
            twoMetricScoreList = cal_scoreChanges(
                log_prob_per_element_list, ave_twoMetricScore,
                log_prob_per_element_univariate_TS_list)
            MetricResult = session.run(
                SORT_UNIVARIATE_TS,
                feed_dict={SORT_UNIVARIATE_TS_INPUT: twoMetricScoreList})
            save_file(results.system_path("result_summary"),
                      "MetricResult.txt",
                      cat_List(allLabelFileNameLineCntList, MetricResult))

            # POT evalution
            POT_TH = pot_eval(
                read_file(results.system_path("train_summary"),
                          "OutlierScores.txt", "float"), config.q,
                config.level)
            resultArray, outlierLabelfileNameLineCntList = cal_binaryResult(
                log_prob_per_element_list, POT_TH, time_indexs2,
                config.saveMetricInfo, allLabelFileNameLineCntList)
            evaluate(results, config.labelpath, resultArray, time_indexs2,
                     POT_TH)

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()

    interpretation_hit_ratio(truth_filepath=config.interpret_filepath,
                             prediction_filepath=os.path.join(
                                 config.result_dir, dirName, "result_summary",
                                 "MetricResult.txt"))
Exemplo n.º 7
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings,
                                  arg_parser,
                                  prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.float32,
                             shape=(None, config.x_dim),
                             name='input_x')
    input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)

    # derive the loss, output and accuracy
    logits = model(input_x)
    cls_loss = tf.losses.sparse_softmax_cross_entropy(input_y, logits)
    loss = cls_loss + tf.losses.get_regularization_loss()
    y = spt.ops.softmax_classification_output(logits)
    acc = spt.ops.classification_accuracy(y, input_y)

    # derive the optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate)
    params = tf.trainable_variables()
    grads = optimizer.compute_gradients(loss, var_list=params)
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_op = optimizer.apply_gradients(grads)

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = \
        spt.datasets.load_cifar10(x_shape=(config.x_dim,), normalize_x=True)
    train_flow = spt.DataFlow.arrays([x_train, y_train],
                                     config.batch_size,
                                     shuffle=True,
                                     skip_incomplete=True)
    test_flow = spt.DataFlow.arrays([x_test, y_test], config.test_batch_size)

    with spt.utils.create_session().as_default():
        # train the network
        with spt.TrainLoop(params,
                           max_epoch=config.max_epoch,
                           max_step=config.max_step,
                           summary_dir=(results.system_path('train_summary')
                                        if config.write_summary else None),
                           summary_graph=tf.get_default_graph(),
                           early_stopping=False) as loop:
            trainer = spt.Trainer(loop,
                                  train_op, [input_x, input_y],
                                  train_flow,
                                  metrics={
                                      'loss': loss,
                                      'acc': acc
                                  },
                                  summaries=tf.summary.merge_all(
                                      spt.GraphKeys.AUTO_HISTOGRAM))
            trainer.anneal_after(learning_rate,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = spt.Evaluator(loop,
                                      metrics={'test_acc': acc},
                                      inputs=[input_x, input_y],
                                      data_flow=test_flow,
                                      time_metric_name='test_time')
            evaluator.events.on(
                spt.EventKeys.AFTER_EXECUTION,
                lambda e: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=5)
            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
Exemplo n.º 8
0
def main(trainpath, normalpath, abnormalpath, outputpath):
    if config.debug_level == -1:
        spt.utils.set_assertion_enabled(False)
    elif config.debug_level == 1:
        spt.utils.set_check_numerics(True)

    #spt.utils.set_assertion_enabled(False)
    # print the config
    print_with_title('Configurations', config.format_config(), after='\n')

    # input and output file
    train_file = trainpath
    normal_file = normalpath
    abnormal_file = abnormalpath
    output_file = os.path.join('webankdata',
                               '{}_{}.csv'.format(config.flow_type or 'vae',
                                                  outputpath))
    valid_file = os.path.join('webankdata',
                              'v{}_{}.csv'.format(config.flow_type or 'vae',
                                                  outputpath))
    # you can change it by yourself

    # read data
    (x_train, y_train), (x_test, y_test), flows_test = \
        get_data_vae(train_file, normal_file, abnormal_file)
    config.x_dim = x_train.shape[1]
    #config.z_dim = get_z_dim(x_train.shape[1])

    all_len = x_train.shape[0]
    print('origin data: %s' % all_len)
    for i in range(30):
        print(list(x_train[i]))
    
    valid_rate = 0.1
    x_train, x_valid = train_test_split(x_train, test_size=valid_rate)
    
    
    # x_valid = x_train
    print('%s for validation, %s for training v2' % (x_valid.shape[0], x_train.shape[0]))
    print('%s for test' % x_test.shape[0])

    print('x_dim: %s z_dim: %s' % (config.x_dim, config.z_dim))
    # change it by yourself

    # input placeholders
    input_x = tf.placeholder(
        dtype=tf.float32, shape=(None, config.x_dim), name='input_x')
    learning_rate = spt.AnnealingVariable(
        'learning_rate', config.initial_lr, config.lr_anneal_factor)

    # build the posterior flow
    if config.flow_type is None:
        posterior_flow = None
    elif config.flow_type == 'planar_nf':
        posterior_flow = \
            spt.layers.planar_normalizing_flows(config.n_planar_nf_layers)
    else:
        assert(config.flow_type == 'rnvp')
        with tf.variable_scope('posterior_flow'):
            flows = []
            for i in range(config.n_rnvp_layers):
                flows.append(spt.layers.ActNorm())
                flows.append(spt.layers.CouplingLayer(
                    tf.make_template(
                        'coupling',
                        coupling_layer_shift_and_scale,
                        create_scope_now_=True
                    ),
                    scale_type='sigmoid'
                ))
                flows.append(spt.layers.InvertibleDense(strict_invertible=True))
            posterior_flow = spt.layers.SequentialFlow(flows=flows)

    # derive the initialization op
    with tf.name_scope('initialization'), \
            arg_scope([spt.layers.act_norm], initializing=True):
        init_q_net = q_net(input_x, posterior_flow)
        init_chain = init_q_net.chain(
            p_net, latent_axis=0, observed={'x': input_x})
        init_loss = tf.reduce_mean(init_chain.vi.training.sgvb())

    # derive the loss and lower-bound for training
    with tf.name_scope('training'):
        train_q_net = q_net(input_x, posterior_flow)
        train_chain = train_q_net.chain(
            p_net, latent_axis=0, observed={'x': input_x})

        vae_loss = tf.reduce_mean(train_chain.vi.training.sgvb())
        loss = vae_loss + tf.losses.get_regularization_loss()

    # derive the nll and logits output for testing
    with tf.name_scope('testing'):
        test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_z)
        test_chain = test_q_net.chain(
            p_net, latent_axis=0, observed={'x': input_x})
        test_logp = test_chain.vi.evaluation.is_loglikelihood()
        test_nll = -tf.reduce_mean(test_logp)
        test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo())

    # derive the optimizer
    with tf.name_scope('optimizing'):
        optimizer = tf.train.AdamOptimizer(learning_rate)
        params = tf.trainable_variables()
        grads = optimizer.compute_gradients(loss, var_list=params)

        cliped_grad = []
        for grad, var in grads:
            if grad is not None and var is not None:
                if config.norm_clip is not None:
                    grad = tf.clip_by_norm(grad, config.norm_clip)
                cliped_grad.append((grad, var))

        with tf.control_dependencies(
                tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            train_op = optimizer.apply_gradients(cliped_grad)

    train_flow = spt.DataFlow.arrays([x_train],
                                     config.batch_size,
                                     shuffle=True,
                                     skip_incomplete=True)
    valid_flow = spt.DataFlow.arrays([x_valid],
                                     config.test_batch_size)
    test_flow = spt.DataFlow.arrays([x_test],
                                    config.test_batch_size)

    # model_file
    #model_name = ''
    model_name = os.path.join(
        'webankdata',
        'md_{}_{}.model'.format(
            config.flow_type or 'vae',
            outputpath.split('.')[0]
        )
    )

    with spt.utils.create_session().as_default() as session:
        var_dict = spt.utils.get_variables_as_dict()
        saver = spt.VariableSaver(var_dict, model_name)
        #if os.path.exists(model_name):
        if False:
            print('%s exists' % model_name)
            saver.restore()
        else:
            print('no model here, and start training')
            # initialize the network
            spt.utils.ensure_variables_initialized()
            for [batch_x] in train_flow:
                print('Network initialization loss: {:.6g}'.
                      format(session.run(init_loss, {input_x: batch_x})))
                print('')
                break

            # train the network
            with spt.TrainLoop(params,
                               var_groups=['p_net', 'q_net', 'posterior_flow'],
                               max_epoch=config.max_epoch,
                               max_step=config.max_step,
                               early_stopping=True,
                               valid_metric_name='valid_loss',
                               valid_metric_smaller_is_better=True) as loop:
                trainer = spt.Trainer(
                    loop, train_op, [input_x], train_flow,
                    metrics={'loss': loss}
                )
                trainer.anneal_after(
                    learning_rate,
                    epochs=config.lr_anneal_epoch_freq,
                    steps=config.lr_anneal_step_freq
                )
                evaluator = spt.Evaluator(
                    loop,
                    metrics={'valid_loss': test_nll},
                    inputs=[input_x],
                    data_flow=valid_flow,
                    time_metric_name='valid_time'
                )

                trainer.evaluate_after_epochs(evaluator, freq=10)
                trainer.log_after_epochs(freq=1)
                trainer.run()
            saver.save()

        # get the answer
        print('start testing')
        start = time.time()
        test_ans = collect_outputs([test_logp], [input_x], test_flow)[0] \
            / config.x_dim
        end = time.time()
        print("test time: ", end-start)
        
        pd.DataFrame(
            {'id': flows_test, 'label': y_test, 'score': test_ans}) \
            .to_csv(output_file, index=False)
        valid_ans = collect_outputs([test_logp], [input_x], valid_flow)[0] \
            / config.x_dim
        pd.DataFrame({'score': valid_ans}).to_csv(valid_file, index=False)