Exemplo n.º 1
0
def test_builder_methods():
    c = Config()
    c.option('option1', 1)
    assert c.section is None
    assert c['option1'] is 1
    c.section = 'SECTION1'
    c.option('option2', 2)
    opt = c.get_option('option2', 'SECTION1')
    assert opt.section == 'SECTION1'
Exemplo n.º 2
0
def main():
    # config for training
    config = Config()

    # config for validation
    valid_config = Config()
    valid_config.keep_prob = 1.0
    valid_config.dec_keep_prob = 1.0
    valid_config.batch_size = 60

    # configuration for testing
    test_config = Config()
    test_config.keep_prob = 1.0
    test_config.dec_keep_prob = 1.0
    test_config.batch_size = 1

    pp(config)

    # get data set
Exemplo n.º 3
0
def test_cache():
    os.environ['OPTION2'] = '33'
    os.environ['OPTION3'] = 'spam'

    config = Config(options=[
        Option('option1', 1),
        Option('User'),
        Option('ForwardX11'),
        Option('Port'),
    ],
                    readers=[
                        EnvConfigReader(),
                        IniConfigReader(
                            'tests/config.ini',
                            sections=['bitbucket.org', 'topsecret.server.com'])
                    ])

    config.section = "OTHER"
    config.option('option4', 'yes')
Exemplo n.º 4
0
def test_addition():
    os.environ['OPTION2'] = '33'
    os.environ['OPTION3'] = 'spam'
    os.environ['USER'] = '******'

    config1 = Config(options=[
        Option('option1', 1),
        Option('ForwardX11'),
        Option('Port'),
    ],
                     readers=[
                         EnvConfigReader(),
                     ])

    with pytest.raises(ConfigError):
        assert config1['User'] == 'EnvironUser'

    config2 = Config(
        options=[
            Option('User'),
        ],
        readers=[
            IniConfigReader('tests/config.ini',
                            sections=['bitbucket.org', 'topsecret.server.com'])
        ])

    config1 = Config(options=[
        Option('option1', 1),
        Option('ForwardX11'),
        Option('Port'),
        Option('User')
    ],
                     readers=[
                         EnvConfigReader(),
                     ])

    config = config1 + config2
    assert config['User'] == "hg"

    config = config2 + config1
    assert config['User'] == "EnvironUser"
Exemplo n.º 5
0
def test_one(caplog):
    os.environ['OPTION2'] = '33'
    os.environ['OPTION3'] = 'spam'

    config1 = Config(
        options=[
            # Option with a default value. Found nowhere else
            Option('option1', 1),

            # Option with a specified value, overriden in environment.
            # Should return specified balue
            Option('option2', value=2, processor=int),

            # this one has a default value and an environment value
            # it should return the environment value
            Option('option3', 3)
        ],
        readers=[
            # WARNING: When searching in environment, option names
            # are uppercased
            EnvConfigReader()
        ])

    assert config1[
        'option1'] == 1, 'You only had 1 job: return the default value of the option'
    assert config1[
        'option2'] == 2, 'This item had a hardcoded value. Where is it?'
    assert config1[
        'option3'] == 'spam', 'We should have received the environment value'

    config2 = Config(
        options=[
            # we are overwriting a config option from the previous config
            Option('option3', value='cat'),
            Option('option4', 4)
        ],
        readers=[
            IniConfigReader('tests/config.ini',
                            sections=['bitbucket.org', 'topsecret.server.com'])
        ])

    # we are merging 2 configs
    # if an option is defined in both
    # the config2 will overwrite config1's option
    config3 = config1 + config2

    assert config2[
        'option3'] == 'cat', 'This option should have been overridden after the merge'
    assert config3['option4'] == 4

    # should raise value error because we tried to access an option
    # that is not defined in the config
    with pytest.raises(UnassignedOptionError):
        assert config3['User'] == 'hg'

    # make new config defining the User option
    config4 = config3 + Config([
        Option('User'),

        # test if configparser picks up stuff in the DEFAULT
        # config section
        Option('ForwardX11'),

        # define an option that is in the second section
        # from our scanned sections list
        Option('Port'),
        Option('Undefined')
    ])

    with pytest.raises(ConfigError):
        assert config4['User'] == 'hg'

    config4.flatten()
    assert config4['User'] == 'hg'

    # Carefull! Even if you define searching in multiple sections,
    # once a value is not found in the first section, then it will
    # be searched in the DEFAULT section
    assert config4['ForwardX11'] == 'yes'

    # This item is found in our second searched list.
    # It will only be returned if the first section does not have it
    # AND if it is not defined in the DEFAULT section
    assert config4['Port'] == '50022'

    # We raise ValueError if the option is defined
    # but we can't find its value
    with pytest.raises(ConfigError):
        assert config4['Undefined']
Exemplo n.º 6
0
def main():
    # config for training
    config = Config()

    # config for validation
    valid_config = Config()
    valid_config.keep_prob = 1.0
    valid_config.dec_keep_prob = 1.0
    valid_config.batch_size = 60

    # configuration for testing
    test_config = Config()
    test_config.keep_prob = 1.0
    test_config.dec_keep_prob = 1.0
    test_config.batch_size = 1

    pp(config)

    # get data set
    api = SWDADialogCorpus(FLAGS.data_dir,
                           word2vec=FLAGS.word2vec_path,
                           word2vec_dim=config.embed_size)
    dial_corpus = api.get_dialog_corpus()
    meta_corpus = api.get_meta_corpus()

    train_meta, valid_meta, test_meta = meta_corpus.get(
        "train"), meta_corpus.get("valid"), meta_corpus.get("test")
    train_dial, valid_dial, test_dial = dial_corpus.get(
        "train"), dial_corpus.get("valid"), dial_corpus.get("test")

    # convert to numeric input outputs that fits into TF models
    train_feed = SWDADataLoader("Train", train_dial, train_meta, config)
    valid_feed = SWDADataLoader("Valid", valid_dial, valid_meta, config)
    test_feed = SWDADataLoader("Test", test_dial, test_meta, config)

    if FLAGS.forward_only or FLAGS.resume:
        log_dir = os.path.join(FLAGS.work_dir, FLAGS.test_path)
    else:
        log_dir = os.path.join(FLAGS.work_dir, "run" + str(int(time.time())))

    # begin training
    with tf.Session() as sess:
        initializer = tf.random_uniform_initializer(-1.0 * config.init_w,
                                                    config.init_w)
        scope = "model"
        with tf.variable_scope(scope, reuse=None, initializer=initializer):
            model = KgRnnCVAE(sess,
                              config,
                              api,
                              log_dir=None if FLAGS.forward_only else log_dir,
                              forward=False,
                              scope=scope)
        with tf.variable_scope(scope, reuse=True, initializer=initializer):
            valid_model = KgRnnCVAE(sess,
                                    valid_config,
                                    api,
                                    log_dir=None,
                                    forward=False,
                                    scope=scope)
        with tf.variable_scope(scope, reuse=True, initializer=initializer):
            test_model = KgRnnCVAE(sess,
                                   test_config,
                                   api,
                                   log_dir=None,
                                   forward=True,
                                   scope=scope)

        print("Created computation graphs")
        if api.word2vec is not None and not FLAGS.forward_only:
            print("Loaded word2vec")
            sess.run(model.embedding.assign(np.array(api.word2vec)))

        # write config to a file for logging
        if not FLAGS.forward_only:
            with open(os.path.join(log_dir, "run.log"), "w") as f:
                f.write(pp(config, output=False))

        # create a folder by force
        ckp_dir = os.path.join(log_dir, "checkpoints")
        if not os.path.exists(ckp_dir):
            os.mkdir(ckp_dir)

        ckpt = tf.train.get_checkpoint_state(ckp_dir)
        print("Created models with fresh parameters.")
        sess.run(tf.global_variables_initializer())

        if ckpt:
            print("Reading dm models parameters from %s" %
                  ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)

        if not FLAGS.forward_only:
            dm_checkpoint_path = os.path.join(
                ckp_dir, model.__class__.__name__ + ".ckpt")
            global_t = 1
            patience = 10  # wait for at least 10 epoch before stop
            dev_loss_threshold = np.inf
            best_dev_loss = np.inf
            for epoch in range(config.max_epoch):
                print(">> Epoch %d with lr %f" %
                      (epoch, model.learning_rate.eval()))

                # begin training
                if train_feed.num_batch is None or train_feed.ptr >= train_feed.num_batch:
                    train_feed.epoch_init(config.batch_size,
                                          config.backward_size,
                                          config.step_size,
                                          shuffle=True)
                global_t, train_loss = model.train(
                    global_t,
                    sess,
                    train_feed,
                    update_limit=config.update_limit)
                print("Training Loss : {}".format(train_loss))

                # begin validation
                valid_feed.epoch_init(valid_config.batch_size,
                                      valid_config.backward_size,
                                      valid_config.step_size,
                                      shuffle=False,
                                      intra_shuffle=False)
                valid_loss = valid_model.valid("ELBO_VALID", sess, valid_feed)

                test_feed.epoch_init(test_config.batch_size,
                                     test_config.backward_size,
                                     test_config.step_size,
                                     shuffle=True,
                                     intra_shuffle=False)
                test_model.test(sess, test_feed, num_batch=5)

                done_epoch = epoch + 1
                # only save a models if the dev loss is smaller
                # Decrease learning rate if no improvement was seen over last 3 times.
                if config.op == "sgd" and done_epoch > config.lr_hold:
                    sess.run(model.learning_rate_decay_op)

                if valid_loss < best_dev_loss:
                    if valid_loss <= dev_loss_threshold * config.improve_threshold:
                        patience = max(patience,
                                       done_epoch * config.patient_increase)
                        dev_loss_threshold = valid_loss

                    # still save the best train model
                    if FLAGS.save_model:
                        print("Save model!!")
                        model.saver.save(sess,
                                         dm_checkpoint_path,
                                         global_step=epoch)
                    best_dev_loss = valid_loss

                if config.early_stop and patience <= done_epoch:
                    print("!!Early stop due to run out of patience!!")
                    break

            print("Best validation loss %f" % best_dev_loss)
            print("Done training")
        else:
            # begin validation
            # begin validation
            valid_feed.epoch_init(valid_config.batch_size,
                                  valid_config.backward_size,
                                  valid_config.step_size,
                                  shuffle=False,
                                  intra_shuffle=False)
            valid_model.valid("ELBO_VALID", sess, valid_feed)

            test_feed.epoch_init(valid_config.batch_size,
                                 valid_config.backward_size,
                                 valid_config.step_size,
                                 shuffle=False,
                                 intra_shuffle=False)
            valid_model.valid("ELBO_TEST", sess, test_feed)

            dest_f = open(os.path.join(log_dir, "test.txt"), "wb")
            test_feed.epoch_init(test_config.batch_size,
                                 test_config.backward_size,
                                 test_config.step_size,
                                 shuffle=False,
                                 intra_shuffle=False)
            test_model.test(sess,
                            test_feed,
                            num_batch=None,
                            repeat=10,
                            dest=dest_f)
            dest_f.close()
Exemplo n.º 7
0
def main():
    ## random seeds
    seed = FLAGS.seed
    # tf.random.set_seed(seed)
    np.random.seed(seed)

    ## config for training
    config = Config()
    pid = PIDControl(FLAGS.exp_KL)
    
    # config for validation
    valid_config = Config()
    valid_config.keep_prob = 1.0
    valid_config.dec_keep_prob = 1.0
    valid_config.batch_size = 60

    # configuration for testing
    test_config = Config()
    test_config.keep_prob = 1.0
    test_config.dec_keep_prob = 1.0
    test_config.batch_size = 1

    pp(config)

    # get data set
    api = SWDADialogCorpus(FLAGS.data_dir, word2vec=FLAGS.word2vec_path, word2vec_dim=config.embed_size)
    dial_corpus = api.get_dialog_corpus()
    meta_corpus = api.get_meta_corpus()

    train_meta, valid_meta, test_meta = meta_corpus.get("train"), meta_corpus.get("valid"), meta_corpus.get("test")
    train_dial, valid_dial, test_dial = dial_corpus.get("train"), dial_corpus.get("valid"), dial_corpus.get("test")
    
    # convert to numeric input outputs that fits into TF models
    train_feed = SWDADataLoader("Train", train_dial, train_meta, config)
    valid_feed = SWDADataLoader("Valid", valid_dial, valid_meta, config)
    test_feed = SWDADataLoader("Test", test_dial, test_meta, config)

    if FLAGS.forward_only or FLAGS.resume:
        # log_dir = os.path.join(FLAGS.work_dir, FLAGS.test_path)
        log_dir = os.path.join(FLAGS.work_dir, FLAGS.model_name)
    else:
        log_dir = os.path.join(FLAGS.work_dir, FLAGS.model_name)

    
    ## begin training
    with tf.Session() as sess:
        initializer = tf.random_uniform_initializer(-1.0 * config.init_w, config.init_w)
        scope = "model"
        with tf.variable_scope(scope, reuse=None, initializer=initializer):
            model = KgRnnCVAE(sess, config, api, log_dir=None if FLAGS.forward_only else log_dir, forward=False, pid_control=pid, scope=scope)
        with tf.variable_scope(scope, reuse=True, initializer=initializer):
            valid_model = KgRnnCVAE(sess, valid_config, api, log_dir=None, forward=False, pid_control=pid, scope=scope)
        with tf.variable_scope(scope, reuse=True, initializer=initializer):
            test_model = KgRnnCVAE(sess, test_config, api, log_dir=None, forward=True, pid_control=pid, scope=scope)

        print("Created computation graphs")
        if api.word2vec is not None and not FLAGS.forward_only:
            print("Loaded word2vec")
            sess.run(model.embedding.assign(np.array(api.word2vec)))

        # write config to a file for logging
        if not FLAGS.forward_only:
            with open(os.path.join(log_dir, "configure.log"), "wb") as f:
                f.write(pp(config, output=False))
        
        # create a folder by force
        ckp_dir = os.path.join(log_dir, "checkpoints")
        print("*******checkpoint path: ", ckp_dir)
        if not os.path.exists(ckp_dir):
            os.mkdir(ckp_dir)

        ckpt = tf.train.get_checkpoint_state(ckp_dir)
        print("Created models with fresh parameters.")
        sess.run(tf.global_variables_initializer())

        if ckpt:
            print("Reading dm models parameters from %s" % ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        ### save log when running
        if not FLAGS.forward_only:
            logfileName = "train.log"
        else:
            logfileName = "test.log"
        fw_log = open(os.path.join(log_dir, logfileName), "w")
        print("log directory >>> : ", os.path.join(log_dir, "run.log"))
        if not FLAGS.forward_only:
            print('--start training now---')
            dm_checkpoint_path = os.path.join(ckp_dir, model.__class__.__name__+ ".ckpt")
            global_t = 1
            patience = 20  # wait for at least 10 epoch before stop
            dev_loss_threshold = np.inf
            best_dev_loss = np.inf
            pbar = tqdm(total = config.max_epoch)
            ## epoch start training
            for epoch in range(config.max_epoch):
                pbar.update(1)
                print(">> Epoch %d with lr %f" % (epoch, model.learning_rate.eval()))

                ## begin training
                FLAGS.mode = 'train'
                if train_feed.num_batch is None or train_feed.ptr >= train_feed.num_batch:
                    train_feed.epoch_init(config.batch_size, config.backward_size,
                                          config.step_size, shuffle=True)
                global_t, train_loss = model.train(global_t, sess, train_feed, update_limit=config.update_limit)
                
                FLAGS.mode = 'valid'
                valid_feed.epoch_init(valid_config.batch_size, valid_config.backward_size,
                                  valid_config.step_size, shuffle=False, intra_shuffle=False)
                test_feed.epoch_init(valid_config.batch_size, valid_config.backward_size,
                                  valid_config.step_size, shuffle=False, intra_shuffle=False)
                elbo, nll, ppl, au_count, kl_loss = valid_model.valid("ELBO_TEST", sess, valid_feed, test_feed)
                print('middle test nll: {} ppl: {} ActiveUnit: {} kl_loss:{}\n'.format(nll, ppl,au_count,kl_loss))
                fw_log.write('epoch:{} testing nll:{} ppl:{} ActiveUnit:{} kl_loss:{} elbo:{}\n'.\
                            format(epoch, nll, ppl, au_count, kl_loss, elbo))
                fw_log.flush()
                
                '''
                ## begin validation
                FLAGS.mode = 'valid'
                valid_feed.epoch_init(valid_config.batch_size, valid_config.backward_size,
                                      valid_config.step_size, shuffle=False, intra_shuffle=False)
                valid_loss = valid_model.valid("ELBO_VALID", sess, valid_feed)

                ## test model
                FLAGS.mode = 'test'
                test_feed.epoch_init(test_config.batch_size, test_config.backward_size,
                                     test_config.step_size, shuffle=True, intra_shuffle=False)
                test_model.test(sess, test_feed, num_batch=5)

                done_epoch = epoch + 1
                # only save a models if the dev loss is smaller
                # Decrease learning rate if no improvement was seen over last 3 times.
                if config.op == "sgd" and done_epoch > config.lr_hold:
                    sess.run(model.learning_rate_decay_op)

                if valid_loss < best_dev_loss:
                    if valid_loss <= dev_loss_threshold * config.improve_threshold:
                        patience = max(patience, done_epoch * config.patient_increase)
                        dev_loss_threshold = valid_loss

                    # still save the best train model
                    if FLAGS.save_model:
                        print("Save model!!")
                        model.saver.save(sess, dm_checkpoint_path, global_step=epoch)
                    best_dev_loss = valid_loss

                if config.early_stop and patience <= done_epoch:
                    print("!!Early stop due to run out of patience!!")
                    break
                    ## print("Best validation loss %f" % best_dev_loss)
                 '''
            print("Done training and save checkpoint")

            if FLAGS.save_model:
                print("Save model!!")
                model.saver.save(sess, dm_checkpoint_path, global_step=epoch)
            # begin validation
            print('--------after training to testing now-----')
            FLAGS.mode = 'test'
            # valid_feed.epoch_init(valid_config.batch_size, valid_config.backward_size,
                                #   valid_config.step_size, shuffle=False, intra_shuffle=False)
            # valid_model.valid("ELBO_VALID", sess, valid_feed)
            valid_feed.epoch_init(valid_config.batch_size, valid_config.backward_size,
                                  valid_config.step_size, shuffle=False, intra_shuffle=False)
            
            test_feed.epoch_init(valid_config.batch_size, valid_config.backward_size,
                                  valid_config.step_size, shuffle=False, intra_shuffle=False)
            elbo, nll, ppl, au_count,kl_loss = valid_model.valid("ELBO_TEST", sess, valid_feed, test_feed)

            print('final test nll: {} ppl: {} ActiveUnit: {} kl_loss:{}\n'.format(nll, ppl,au_count,kl_loss))
            fw_log.write('Final testing nll:{} ppl:{} ActiveUnit:{} kl_loss:{} elbo:{}\n'.\
                            format(nll, ppl, au_count, kl_loss, elbo))
            
            dest_f = open(os.path.join(log_dir, FLAGS.test_res), "wb")
            test_feed.epoch_init(test_config.batch_size, test_config.backward_size,
                                 test_config.step_size, shuffle=False, intra_shuffle=False)
            test_model.test(sess, test_feed, num_batch=None, repeat=10, dest=dest_f)
            dest_f.close()
            print("****testing done****")
        else:
            # begin validation
            # begin validation
            print('*'*89)
            print('--------testing now-----')
            print('*'*89)
            FLAGS.mode = 'test'
            valid_feed.epoch_init(valid_config.batch_size, valid_config.backward_size,
                                  valid_config.step_size, shuffle=False, intra_shuffle=False)
            # valid_model.valid("ELBO_VALID", sess, valid_feed)

            test_feed.epoch_init(valid_config.batch_size, valid_config.backward_size,
                                  valid_config.step_size, shuffle=False, intra_shuffle=False)
            elbo, nll, ppl, au_count, kl_loss = valid_model.valid("ELBO_TEST", sess, valid_feed, test_feed)

            print('final test nll: {} ppl: {} ActiveUnit: {} kl_loss:{}\n'.format(nll, ppl,au_count,kl_loss))
            fw_log.write('Final testing nll:{} ppl:{} ActiveUnit:{} kl_loss:{} elbo:{}\n'.\
                            format(nll, ppl, au_count, kl_loss, elbo))
            # dest_f = open(os.path.join(log_dir, FLAGS.test_res), "wb")
            # test_feed.epoch_init(test_config.batch_size, test_config.backward_size,
            #                      test_config.step_size, shuffle=False, intra_shuffle=False)
            # test_model.test(sess, test_feed, num_batch=None, repeat=10, dest=dest_f)
            # dest_f.close()
            print("****testing done****")
        fw_log.close()
Exemplo n.º 8
0
                           "D:/workspace/数据/glove.6B/glove.6B.200d.txt",
                           "The path to word2vec. Can be None.")
tf.app.flags.DEFINE_string("data_dir", "data", "Raw data directory.")
tf.app.flags.DEFINE_string("dataset", "ATIS", "dataset.")
tf.app.flags.DEFINE_string("work_dir", "working",
                           "Experiment results directory.")
tf.app.flags.DEFINE_bool("equal_batch", True,
                         "Make each batch has similar length.")
tf.app.flags.DEFINE_bool("resume", False, "Resume from previous")
tf.app.flags.DEFINE_bool("forward_only", False, "Only do decoding")
tf.app.flags.DEFINE_bool("save_model", True, "Create checkpoints")
tf.app.flags.DEFINE_string("test_path", "",
                           "the dir to load checkpoint for forward only")
FLAGS = tf.app.flags.FLAGS

config = Config()

# get data set
api = Corpus(FLAGS.data_dir,
             FLAGS.dataset,
             word2vec=FLAGS.word2vec_path,
             word2vec_dim=config.embed_size)

# convert the word to ids
corpus = api.get_corpus()
train_corpus, valid_corpus, test_corpus = corpus['train'], corpus[
    'valid'], corpus['test']

print('train_label', set(api.train_corpus['label'].tolist()))
print(Counter(api.train_corpus['label'].tolist()).most_common())
print('valid_label', set(api.valid_corpus['label'].tolist()))
Exemplo n.º 9
0
from sensor import Sensor
from socket_client import Client
from config_utils import Config
from apscheduler.schedulers.blocking import BlockingScheduler



def update(sensor, client):
    client.send(sensor.get_environment())


if __name__ == "__main__":
    config = Config()

    interval = config.get_value("interval")
    sensor_id = config.get_value("sensor_id")
    host, port = config.get_data_center_config()

    sensor = Sensor(sensor_id)

    scheduler = BlockingScheduler()
    client = Client(host, port, scheduler)
    client.connect()

    scheduler.add_job(update, 'interval', seconds=interval, max_instances=100, id='update', args=[sensor, client])
    client.update_job_started = True
    scheduler.start()
Exemplo n.º 10
0
def main(model_type):
    # config for training
    config = Config()

    # config for validation
    valid_config = Config()
    valid_config.keep_prob = 1.0
    valid_config.dec_keep_prob = 1.0
    valid_config.batch_size = 60

    # configuration for testing
    test_config = Config()
    test_config.keep_prob = 1.0
    test_config.dec_keep_prob = 1.0
    test_config.batch_size = 1

    pp(config)

    # which model to run
    if model_type == "kgcvae":
        model_class = KgRnnCVAE
        backward_size = config.backward_size
        config.use_hcf, valid_config.use_hcf, test_config.use_hcf = True
    elif model_type == "cvae":
        model_class = KgRnnCVAE
        backward_size = config.backward_size
        config.use_hcf, valid_config.use_hcf, test_config.use_hcf = False
    elif model_type == 'hierbaseline':
        model_class = HierBaseline
        backward_size = config.backward_size
    else:
        raise ValueError("This shouldn't happen.")

    # LDA Model
    ldamodel = LDAModel(config,
                        trained_model_path=FLAGS.lda_model_path,
                        id2word_path=FLAGS.id2word_path)

    # get data set
    api = SWDADialogCorpus(FLAGS.data_dir,
                           word2vec=FLAGS.word2vec_path,
                           word2vec_dim=config.embed_size,
                           vocab_dict_path=FLAGS.vocab_dict_path,
                           lda_model=ldamodel,
                           imdb=FLAGS.use_imdb)

    dial_corpus = api.get_dialog_corpus()
    meta_corpus = api.get_meta_corpus()

    train_meta, valid_meta, test_meta = meta_corpus.get(
        "train"), meta_corpus.get("valid"), meta_corpus.get("test")
    train_dial, valid_dial, test_dial = dial_corpus.get(
        "train"), dial_corpus.get("valid"), dial_corpus.get("test")

    # convert to numeric input outputs that fits into TF models
    train_feed = SWDADataLoader("Train", train_dial, train_meta, config)
    valid_feed = SWDADataLoader("Valid", valid_dial, valid_meta, config)
    test_feed = SWDADataLoader("Test", test_dial, test_meta, config)

    # if you're testing an existing implementation or resuming training
    if FLAGS.forward_only or FLAGS.resume:
        log_dir = os.path.join(FLAGS.work_dir + "_" + FLAGS.model_type,
                               FLAGS.test_path)
    else:
        log_dir = os.path.join(FLAGS.work_dir + "_" + FLAGS.model_type,
                               "run" + str(int(time.time())))

    # begin training
    with tf.Session() as sess:
        initializer = tf.random_uniform_initializer(-1.0 * config.init_w,
                                                    config.init_w)
        scope = "model"
        with tf.variable_scope(scope, reuse=None, initializer=initializer):
            model = model_class(
                sess,
                config,
                api,
                log_dir=None if FLAGS.forward_only else log_dir,
                forward=False,
                scope=scope)
        with tf.variable_scope(scope, reuse=True, initializer=initializer):
            valid_model = model_class(sess,
                                      valid_config,
                                      api,
                                      log_dir=None,
                                      forward=False,
                                      scope=scope)
        with tf.variable_scope(scope, reuse=True, initializer=initializer):
            test_model = model_class(sess,
                                     test_config,
                                     api,
                                     log_dir=None,
                                     forward=True,
                                     scope=scope)

        print("Created computation graphs")
        if api.word2vec is not None and not FLAGS.forward_only:
            print("Loaded word2vec")
            sess.run(model.embedding.assign(np.array(api.word2vec)))

        # write config to a file for logging
        if not FLAGS.forward_only:
            with open(os.path.join(log_dir, "run.log"), "wb") as f:
                f.write(pp(config, output=False))

        # create a folder by force
        ckp_dir = os.path.join(log_dir, "checkpoints")
        if not os.path.exists(ckp_dir):
            os.mkdir(ckp_dir)

        ckpt = tf.train.get_checkpoint_state(ckp_dir)
        print("Created models with fresh parameters.")
        sess.run(tf.global_variables_initializer())

        if ckpt:
            print("Reading dm models parameters from %s" %
                  ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)

        # if you're training a model
        if not FLAGS.forward_only:

            dm_checkpoint_path = os.path.join(
                ckp_dir, model.__class__.__name__ + ".ckpt")
            global_t = 1
            patience = 10  # wait for at least 10 epoch before stop
            dev_loss_threshold = np.inf
            best_dev_loss = np.inf

            # train for a max of max_epoch's. saves the model after the epoch if it's some amount better than current best
            for epoch in range(config.max_epoch):
                print(">> Epoch %d with lr %f" %
                      (epoch, model.learning_rate.eval()))

                # begin training
                if train_feed.num_batch is None or train_feed.ptr >= train_feed.num_batch:
                    train_feed.epoch_init(config.batch_size,
                                          backward_size,
                                          config.step_size,
                                          shuffle=True)

                global_t, train_loss = model.train(
                    global_t,
                    sess,
                    train_feed,
                    update_limit=config.update_limit)

                # begin validation and testing
                valid_feed.epoch_init(valid_config.batch_size,
                                      valid_config.backward_size,
                                      valid_config.step_size,
                                      shuffle=False,
                                      intra_shuffle=False)
                valid_loss = valid_model.valid("ELBO_VALID", sess, valid_feed)

                test_feed.epoch_init(test_config.batch_size,
                                     test_config.backward_size,
                                     test_config.step_size,
                                     shuffle=True,
                                     intra_shuffle=False)
                test_model.test(
                    sess, test_feed, num_batch=1
                )  #TODO change this batch size back to a reasonably large number

                done_epoch = epoch + 1
                # only save a models if the dev loss is smaller
                # Decrease learning rate if no improvement was seen over last 3 times.
                if config.op == "sgd" and done_epoch > config.lr_hold:
                    sess.run(model.learning_rate_decay_op)

                if True:  #valid_loss < best_dev_loss: # TODO this change makes the model always save. Change this back when corpus not trivial
                    if True:  #valid_loss <= dev_loss_threshold * config.improve_threshold:
                        patience = max(patience,
                                       done_epoch * config.patient_increase)
                        # dev_loss_threshold = valid_loss

                    # still save the best train model
                    if FLAGS.save_model:
                        print("Save model!!")
                        model.saver.save(sess,
                                         dm_checkpoint_path,
                                         global_step=epoch)
                    # best_dev_loss = valid_loss

                if config.early_stop and patience <= done_epoch:
                    print("!!Early stop due to run out of patience!!")
                    break
            # print("Best validation loss %f" % best_dev_loss)
            print("Done training")

        # else if you're just testing an existing model
        else:
            # begin validation
            valid_feed.epoch_init(valid_config.batch_size,
                                  valid_config.backward_size,
                                  valid_config.step_size,
                                  shuffle=False,
                                  intra_shuffle=False)
            valid_model.valid("ELBO_VALID", sess, valid_feed)

            test_feed.epoch_init(valid_config.batch_size,
                                 valid_config.backward_size,
                                 valid_config.step_size,
                                 shuffle=False,
                                 intra_shuffle=False)
            valid_model.valid("ELBO_TEST", sess, test_feed)

            # begin testing
            dest_f = open(os.path.join(log_dir, "test.txt"), "wb")
            test_feed.epoch_init(test_config.batch_size,
                                 test_config.backward_size,
                                 test_config.step_size,
                                 shuffle=False,
                                 intra_shuffle=False)
            test_model.test(sess,
                            test_feed,
                            num_batch=None,
                            repeat=10,
                            dest=dest_f)
            dest_f.close()
Exemplo n.º 11
0
import cv2
from config_utils import Config
from socket_client import Client
from json import loads
from random import randint

config = Config()
video_server_host, video_server_port = config.get_video_server_config()
data_center_host, data_center_port = config.get_data_center_config()

camera_host = config.get_value("camera_host")

image_quality = config.get_value("image_quality")

client_of_data_center = Client(data_center_host, data_center_port)
client_of_data_center.connect()
client_of_video_server = Client(video_server_host, video_server_port)
client_of_video_server.connect()


def capture(camera_id, camera_host, image_quality):
    if camera_host == "0":
        camera_host = 0
    cap = cv2.VideoCapture(camera_host)
    while True:
        ret, frame = cap.read()
        encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), image_quality]
        image = cv2.imencode('.jpg', frame, encode_param)[1]
        image_byte = image.tobytes()
        client_of_video_server.send_to_video_server(camera_id, 50, image_byte)
Exemplo n.º 12
0
def main():
    # Load configuration
    config = Config()
    # for perplexity evaluation
    valid_config = Config()
    valid_config.keep_prob = 1.0
    valid_config.batch_size = 200

    # for forward only decoding
    test_config = Config()
    test_config.keep_prob = 1.0
    test_config.batch_size = 10
    pp(config)

    # load corpus
    api = WordSeqCorpus(FLAGS.data_dir, FLAGS.data_file, [99, 0.5, 0.5],
                        FLAGS.max_vocab_size, config.max_enc_len,
                        config.max_dec_len, config.line_thres)
    corpus_data = api.get_corpus()

    # convert to numeric input outputs that fits into TF models
    train_feed = WordSeqDataFeed("Train", config, corpus_data["train"],
                                 api.vocab)
    valid_feed = WordSeqDataFeed("Valid", config, corpus_data["valid"],
                                 api.vocab)
    test_feed = WordSeqDataFeed("Test", config, corpus_data["test"], api.vocab)

    if not os.path.exists(FLAGS.work_dir):
        os.mkdir(FLAGS.work_dir)

    if FLAGS.forward or FLAGS.resume:
        log_dir = os.path.join(FLAGS.work_dir, FLAGS.test_path)
    else:
        log_dir = os.path.join(FLAGS.work_dir, "run" + str(int(time.time())))
        os.mkdir(log_dir)

    # begin training
    with tf.Session() as sess:
        initializer = tf.random_uniform_initializer(-1 * config.init_w,
                                                    config.init_w)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            model = Word2SeqAutoEncoder(
                sess,
                config,
                len(train_feed.vocab),
                train_feed.EOS_ID,
                log_dir=None if FLAGS.forward else log_dir,
                forward=False)

        with tf.variable_scope("model", reuse=True, initializer=initializer):
            valid_model = Word2SeqAutoEncoder(sess,
                                              valid_config,
                                              len(train_feed.vocab),
                                              train_feed.EOS_ID,
                                              None,
                                              forward=False)

        # get a random batch and do forward decoding. Print the most likely response
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            test_model = Word2SeqAutoEncoder(sess,
                                             test_config,
                                             len(train_feed.vocab),
                                             train_feed.EOS_ID,
                                             None,
                                             forward=True)

        ckp_dir = os.path.join(log_dir, "checkpoints")

        global_t = 0
        patience = 10  # wait for at least 10 epoch before consider early stop
        valid_loss_threshold = np.inf
        best_valid_loss = np.inf
        checkpoint_path = os.path.join(ckp_dir, "word2seq.ckpt")

        if not os.path.exists(ckp_dir):
            os.mkdir(ckp_dir)

        ckpt = tf.train.get_checkpoint_state(ckp_dir)
        base_epoch = 0

        if ckpt:
            print("Reading models parameters from %s" %
                  ckpt.model_checkpoint_path)
            sess.run(tf.initialize_all_variables())
            model.saver.restore(sess, ckpt.model_checkpoint_path)
            base_epoch = int(ckpt.model_checkpoint_path.split("-")[1]) + 1
            print("Resume from epoch %d" % base_epoch)
        else:
            print("Created models with fresh parameters.")
            sess.run(tf.initialize_all_variables())

        if not FLAGS.forward:
            for epoch in range(base_epoch, config.max_epoch):
                print(">> Epoch %d with lr %f" %
                      (epoch, model.learning_rate.eval()))

                train_feed.epoch_init(config.batch_size, shuffle=True)
                global_t, train_loss = model.train(global_t, sess, train_feed)

                # begin validation
                valid_feed.epoch_init(valid_config.batch_size, shuffle=False)
                valid_loss = valid_model.valid("VALID", sess, valid_feed)

                test_feed.epoch_init(valid_config.batch_size, shuffle=False)
                valid_model.valid("TEST", sess, test_feed)

                # do sampling to see what kind of sentences is generated
                test_feed.epoch_init(test_config.batch_size, shuffle=True)
                test_model.test("TEST", sess, test_feed, num_batch=2)

                done_epoch = epoch + 1

                # only save a models if the dev loss is smaller
                # Decrease learning rate if no improvement was seen over last 3 times.
                if config.op == "sgd" and done_epoch > config.lr_hold:
                    sess.run(model.learning_rate_decay_op)

                if valid_loss < best_valid_loss:
                    if valid_loss <= valid_loss_threshold * config.improve_threshold:
                        patience = max(patience,
                                       done_epoch * config.patient_increase)
                        valid_loss_threshold = valid_loss

                    # still save the best train model
                    if FLAGS.save_model:
                        print("Saving model!")
                        model.saver.save(sess,
                                         checkpoint_path,
                                         global_step=epoch)
                    best_valid_loss = valid_loss

                if config.early_stop and patience <= done_epoch:
                    print("!!Early stop due to run out of patience!!")
                    break

            print("Best valid loss %f and perpleixyt %f" %
                  (best_valid_loss, np.exp(best_valid_loss)))
            print("Done training")
        else:
            # do sampling to see what kind of sentences is generated
            test_feed.epoch_init(test_config.batch_size, shuffle=False)
            test_model.test("TEST", sess, test_feed, num_batch=20)

            # begin validation
            valid_feed.epoch_init(valid_config.batch_size, shuffle=False)
            valid_model.valid("VALID", sess, valid_feed)

            test_feed.epoch_init(valid_config.batch_size, shuffle=False)
            valid_model.valid("TEST", sess, test_feed)
Exemplo n.º 13
0
    def filter(self):
        self.cfg._read_files_filter()
        info, f_type_in, f_type_not_in, f_size_start, f_size_end = self.cfg._read_files_filter()

        filted_files = []

        for f in self.files_to_filter:
            #get file extension
            if os.path.exists(f):
                file_type = os.path.splitext(f)[1]
                file_type = file_type.replace('.','')

                if file_type not in f_type_not_in or file_type in f_type_in and file_type:
                    if f_size_start == 0 or f_size_end == 0:
                        filted_files.append(f)
                    elif get_file_mb_size(f) > f_size_start \
                            or  get_file_mb_size < f_size_end:
                        filted_files.append(f)
                    elif get_file_mb_size(f) > f_size_start \
                        and get_file_mb_size < f_size_end:
                        filted_files.append(f)
        return filted_files

if __name__ == '__main__':
    from config_utils import Config

    files = ['2015-09-28 10-03-26.log','test.txt']

    f = FileFilter(files,Config("config_test.cfg"))
    print f.filter()
Exemplo n.º 14
0
def main():
    # config for training
    config = Config()

    # config for validation
    valid_config = Config()
    valid_config.keep_prob = 1.0
    valid_config.dec_keep_prob = 1.0
    valid_config.batch_size = 60

    # configuration for testing
    test_config = Config()
    test_config.keep_prob = 1.0
    test_config.dec_keep_prob = 1.0
    test_config.batch_size = 1

    pp(config)

    # get data set
    api = SWDADialogCorpus(FLAGS.data_dir,
                           word2vec=FLAGS.word2vec_path,
                           word2vec_dim=config.embed_size)
    dial_corpus = api.get_dialog_corpus()
    meta_corpus = api.get_meta_corpus()

    train_meta, valid_meta, test_meta = meta_corpus.get(
        "train"), meta_corpus.get("valid"), meta_corpus.get("test")
    train_dial, valid_dial, test_dial = dial_corpus.get(
        "train"), dial_corpus.get("valid"), dial_corpus.get("test")

    # convert to numeric input outputs that fits into TF models
    train_feed = SWDADataLoader("Train", train_dial, train_meta, config)
    valid_feed = SWDADataLoader("Valid", valid_dial, valid_meta, config)
    test_feed = SWDADataLoader("Test", test_dial, test_meta, config)

    if FLAGS.forward_only or FLAGS.resume:
        log_dir = os.path.join(FLAGS.work_dir, FLAGS.test_path)
    else:
        log_dir = os.path.join(FLAGS.work_dir, "run" + str(int(time.time())))

    # begin training
    if True:
        scope = "model"
        model = KgRnnCVAE(config,
                          api,
                          log_dir=None if FLAGS.forward_only else log_dir,
                          scope=scope)

        print("Created computation graphs")
        # write config to a file for logging
        if not FLAGS.forward_only:
            with open(os.path.join(log_dir, "run.log"), "wb") as f:
                f.write(pp(config, output=False))

        # create a folder by force
        ckp_dir = os.path.join(log_dir, "checkpoints")
        if not os.path.exists(ckp_dir):
            os.mkdir(ckp_dir)

        ckpt = get_checkpoint_state(ckp_dir)
        print("Created models with fresh parameters.")
        model.apply(lambda m: [
            torch.nn.init.uniform(p.data, -1.0 * config.init_w, config.init_w)
            for p in m.parameters()
        ])

        # Load word2vec weight
        if api.word2vec is not None and not FLAGS.forward_only:
            print("Loaded word2vec")
            model.embedding.weight.data.copy_(
                torch.from_numpy(np.array(api.word2vec)))
        model.embedding.weight.data[0].fill_(0)

        if ckpt:
            print("Reading dm models parameters from %s" % ckpt)
            model.load_state_dict(torch.load(ckpt))

        #

        # turn to cuda
        model.cuda()

        if not FLAGS.forward_only:
            dm_checkpoint_path = os.path.join(
                ckp_dir, model.__class__.__name__ + "-%d.pth")
            global_t = 1
            patience = 10  # wait for at least 10 epoch before stop
            dev_loss_threshold = np.inf
            best_dev_loss = np.inf
            for epoch in range(config.max_epoch):
                print(">> Epoch %d with lr %f" % (epoch, model.learning_rate))

                # begin training
                if train_feed.num_batch is None or train_feed.ptr >= train_feed.num_batch:
                    train_feed.epoch_init(config.batch_size,
                                          config.backward_size,
                                          config.step_size,
                                          shuffle=True)
                global_t, train_loss = model.train_model(
                    global_t, train_feed, update_limit=config.update_limit)

                # begin validation
                valid_feed.epoch_init(valid_config.batch_size,
                                      valid_config.backward_size,
                                      valid_config.step_size,
                                      shuffle=False,
                                      intra_shuffle=False)
                model.eval()
                valid_loss = model.valid_model("ELBO_VALID", valid_feed)

                test_feed.epoch_init(test_config.batch_size,
                                     test_config.backward_size,
                                     test_config.step_size,
                                     shuffle=True,
                                     intra_shuffle=False)
                model.test_model(test_feed, num_batch=5)
                model.train()

                done_epoch = epoch + 1
                # only save a models if the dev loss is smaller
                # Decrease learning rate if no improvement was seen over last 3 times.
                if config.op == "sgd" and done_epoch > config.lr_hold:
                    model.learning_rate_decay()

                if valid_loss < best_dev_loss:
                    if valid_loss <= dev_loss_threshold * config.improve_threshold:
                        patience = max(patience,
                                       done_epoch * config.patient_increase)
                        dev_loss_threshold = valid_loss

                    # still save the best train model
                    if FLAGS.save_model:
                        print("Save model!!")
                        torch.save(model.state_dict(),
                                   dm_checkpoint_path % (epoch))
                    best_dev_loss = valid_loss

                if config.early_stop and patience <= done_epoch:
                    print("!!Early stop due to run out of patience!!")
                    break
            print("Best validation loss %f" % best_dev_loss)
            print("Done training")
        else:
            # begin validation
            # begin validation
            valid_feed.epoch_init(valid_config.batch_size,
                                  valid_config.backward_size,
                                  valid_config.step_size,
                                  shuffle=False,
                                  intra_shuffle=False)
            model.eval()
            model.valid_model("ELBO_VALID", valid_feed)

            test_feed.epoch_init(valid_config.batch_size,
                                 valid_config.backward_size,
                                 valid_config.step_size,
                                 shuffle=False,
                                 intra_shuffle=False)
            model.valid_model("ELBO_TEST", test_feed)

            dest_f = open(os.path.join(log_dir, "test.txt"), "wb")
            test_feed.epoch_init(test_config.batch_size,
                                 test_config.backward_size,
                                 test_config.step_size,
                                 shuffle=False,
                                 intra_shuffle=False)
            model.test_model(test_feed, num_batch=None, repeat=10, dest=dest_f)
            model.train()
            dest_f.close()
Exemplo n.º 15
0
def main():
    # config for training
    config = Config()
    config.batch_size = 1

    # config for validation
    valid_config = Config()
    valid_config.keep_prob = 1.0
    valid_config.dec_keep_prob = 1.0
    valid_config.batch_size = 1

    # configuration for testing
    test_config = Config()
    test_config.keep_prob = 1.0
    test_config.dec_keep_prob = 1.0
    test_config.batch_size = 1
    test_config.max_length = 135

    pp(config)

    best_test = np.inf

    # get data set
    train_feed = Data_helper(FLAGS.task + '_input.txt',
                             FLAGS.task + '_output.txt', config.batch_size,
                             config.position_len)
    test_feed = Data_helper(FLAGS.task + '_input_test.txt',
                            FLAGS.task + '_output_test.txt',
                            test_config.batch_size, config.position_len)

    if FLAGS.resume:
        log_dir = os.path.join(FLAGS.work_dir, FLAGS.test_path)
    else:
        log_dir = os.path.join(FLAGS.work_dir, "run" + str(int(time.time())))

    # begin training
    with tf.Session() as sess:
        initializer = tf.random_uniform_initializer(-1.0 * config.init_w,
                                                    config.init_w)
        scope = "model"
        with tf.variable_scope(scope, reuse=None, initializer=initializer):
            model = SensorRNN(sess,
                              config,
                              None,
                              log_dir=log_dir,
                              forward=False,
                              scope=scope)
        with tf.variable_scope(scope, reuse=True, initializer=initializer):
            test_model = SensorRNN(sess,
                                   test_config,
                                   None,
                                   log_dir=None,
                                   forward=True,
                                   scope=scope)

        # write config to a file for logging
        if not FLAGS.resume:
            with open(os.path.join(log_dir, "run.log"), "wb") as f:
                f.write(pp(config, output=False).encode())

        # create a folder by force
        ckp_dir = os.path.join(log_dir, "checkpoints")
        if not os.path.exists(ckp_dir):
            os.mkdir(ckp_dir)

        ckpt = tf.train.get_checkpoint_state(ckp_dir)
        print("Created models with fresh parameters.")
        sess.run(tf.global_variables_initializer())

        if FLAGS.resume:
            print(("Reading dm models parameters from %s" % FLAGS.test_path))
            model_checkpoint_path = FLAGS.test_path
            model.saver.restore(sess, model_checkpoint_path)

        if FLAGS.test:
            #test_feed = train_feed
            test_label, test_prediction, test_loss, weights = test_model.test(
                sess, test_feed)
            evaluate(test_feed.label, test_prediction)
            print(test_loss)

            with open(FLAGS.test_path + '.csv', mode='w') as file:
                file_writer = csv.writer(file,
                                         delimiter=',',
                                         quotechar='"',
                                         quoting=csv.QUOTE_MINIMAL)
                for i in range(len(test_label)):
                    file_writer.writerow([test_label[i], test_prediction[i]])

        else:
            dm_checkpoint_path = os.path.join(
                ckp_dir, model.__class__.__name__ + ".ckpt")

            global_t = 1

            for epoch in range(config.max_epoch):
                print((">> Epoch %d with lr %f" %
                       (epoch, model.learning_rate.eval())))
                global_t, loss = model.train(global_t, sess, train_feed)
                test_sensors, test_prediction, test_loss, weights = test_model.test(
                    sess, test_feed)
                print(("Epoch ", epoch + 1, " average loss is ", loss,
                       " test loss is ", test_loss))
                #if test_loss < best_test:
                print("Save model!!")
                model.saver.save(sess, dm_checkpoint_path, global_step=epoch)
                best_test = test_loss
Exemplo n.º 16
0
def main():
    # config for training
    config = Config()
    if FLAGS.data == 'none':
        config.use_profile = False

    # config for validation
    valid_config = Config()
    valid_config.keep_prob = 1.0
    valid_config.dec_keep_prob = 1.0
    valid_config.batch_size = 60

    # configuration for testing
    test_config = Config()
    test_config.keep_prob = 1.0
    test_config.dec_keep_prob = 1.0
    test_config.batch_size = 1

    train_persona = None
    valid_persona = None

    pp(config)
    data_path = "data/convai2/train_" + FLAGS.data + "_original_no_cands.txt"
    # get data set
    api = PERSONADialogCorpus(data_path,
                              FLAGS.data,
                              word2vec=FLAGS.word2vec_path,
                              word2vec_dim=config.embed_size)
    print("dataset loaded")

    dial_corpus = api.get_dialog_corpus()
    train_dial, valid_dial = dial_corpus.get("train"), dial_corpus.get("valid")
    if config.use_profile:
        persona_corpus = api.get_persona_corpus()
        train_persona, valid_persona = persona_corpus.get(
            "train"), persona_corpus.get("valid")

    # convert to numeric input outputs that fits into TF models
    train_feed = PERSONAataLoader("Train", train_dial, train_persona, config)
    valid_feed = PERSONAataLoader("Valid", valid_dial, valid_persona, config)

    if FLAGS.forward_only or FLAGS.resume:
        log_dir = os.path.join(FLAGS.work_dir, FLAGS.test_path)
    else:
        log_dir = os.path.join(FLAGS.work_dir, FLAGS.model)

    # begin training
    if True:
        scope = "model"
        if FLAGS.model == 'cvae':
            model = DirVAE(config,
                           api,
                           log_dir=None if FLAGS.forward_only else log_dir,
                           scope=scope)
        elif FLAGS.model == 's2s':
            model = S2S(config,
                        api,
                        log_dir=None if FLAGS.forward_only else log_dir,
                        scope=scope)

        print("Created computation graphs")
        # write config to a file for logging
        if not FLAGS.forward_only:
            with open(os.path.join(log_dir, "run.log"), "wb") as f:
                f.write(pp(config, output=False))

        # create a folder by force
        ckp_dir = os.path.join(log_dir, "checkpoints")
        if not os.path.exists(ckp_dir):
            os.mkdir(ckp_dir)

        ckpt = get_checkpoint_state(ckp_dir)
        print("Created models with fresh parameters.")
        model.apply(lambda m: [
            torch.nn.init.uniform_(p.data, -1.0 * config.init_w, config.init_w)
            for p in m.parameters()
        ])

        # Load word2vec weight
        if api.word2vec is not None and not FLAGS.forward_only:
            print("Loaded word2vec")
            model.embedding.weight.data.copy_(
                torch.from_numpy(np.array(api.word2vec)))
            model.embedding.weight.require_grad = False
        model.embedding.weight.data[0].fill_(0)
        #model.idxembedding.weight.data.copy_(torch.from_numpy(np.array(api.word2idx, dtype='float32')).unsqueeze(-1))
        #model.idxembedding.weight.require_grad = False

        if ckpt:
            print("Reading dm models parameters from %s" % ckpt)
            model.load_state_dict(torch.load(ckpt))

        if torch.cuda.is_available():
            model.cuda()

        if not FLAGS.forward_only:
            dm_checkpoint_path = os.path.join(
                ckp_dir, model.__class__.__name__ + "-%d.pth")
            global_t = 1
            patience = 10  # wait for at least 10 epoch before stop
            dev_loss_threshold = np.inf
            best_dev_loss = np.inf
            best_dev_losses = None
            loss_names = None
            for epoch in range(config.max_epoch):
                print(">> Epoch %d with lr %f" % (epoch, model.learning_rate))

                # begin training
                if train_feed.num_batch is None or train_feed.ptr >= train_feed.num_batch:
                    train_feed.epoch_init(config.batch_size,
                                          config.backward_size,
                                          config.step_size,
                                          shuffle=True)
                global_t, train_loss = model.train_model(
                    global_t,
                    train_feed,
                    update_limit=config.update_limit,
                    use_profile=config.use_profile)

                # begin validation
                # valid_feed.epoch_init(valid_config.batch_size, valid_config.backward_size,
                #                       valid_config.step_size, shuffle=False, intra_shuffle=False)
                # model.eval()
                # model.test_model(valid_feed, num_batch=50, repeat=1)

                valid_feed.epoch_init(valid_config.batch_size,
                                      valid_config.backward_size,
                                      valid_config.step_size,
                                      shuffle=False,
                                      intra_shuffle=False)
                model.eval()
                valid_loss, loss_names = model.valid_model(
                    "ELBO_VALID", valid_feed, use_profile=config.use_profile)
                model.train()

                done_epoch = epoch + 1
                # only save a models if the dev loss is smaller
                # Decrease learning rate if no improvement was seen over last 3 times.
                if config.op == "sgd" and done_epoch > config.lr_hold:
                    model.learning_rate_decay()

                if valid_loss[0] < best_dev_loss:
                    if valid_loss[
                            0] <= dev_loss_threshold * config.improve_threshold:
                        patience = max(patience,
                                       done_epoch * config.patient_increase)
                        dev_loss_threshold = valid_loss[0]

                    # still save the best train model
                    if FLAGS.save_model:
                        print("Save model!!")
                        torch.save(model.state_dict(),
                                   dm_checkpoint_path % (epoch))
                    best_dev_loss = valid_loss[0]
                    best_dev_losses = valid_loss

                if config.early_stop and patience <= done_epoch:
                    print("!!Early stop due to run out of patience!!")
                    break
            print("Best validation loss %f" % best_dev_loss)
            model.print_loss("ELBO_BEST", loss_names, best_dev_losses, "")
            print("Done training")
        else:
            valid_feed.epoch_init(valid_config.batch_size,
                                  valid_config.backward_size,
                                  valid_config.step_size,
                                  shuffle=False,
                                  intra_shuffle=False)
            model.eval()
            dest_f = open(os.path.join(log_dir, "test.txt"), "wb")
            model.test_model(valid_feed,
                             repeat=10,
                             dest=dest_f,
                             use_profile=config.use_profile)

            model.train()
            dest_f.close()
Exemplo n.º 17
0
def main():
    # config for training
    config = Config()

    # config for validation
    valid_config = Config()
    # valid_config.keep_prob = 1.0
    # valid_config.dec_keep_prob = 1.0
    # valid_config.batch_size = 60

    # configuration for testing
    test_config = Config()
    test_config.keep_prob = 1.0
    test_config.dec_keep_prob = 1.0
    test_config.batch_size = 1

    config.n_state = FLAGS.n_state
    valid_config.n_state = FLAGS.n_state
    test_config.n_state = FLAGS.n_state

    config.with_direct_transition = FLAGS.with_direct_transition
    valid_config.with_direct_transition = FLAGS.with_direct_transition
    test_config.with_direct_transition = FLAGS.with_direct_transition

    config.with_word_weights = FLAGS.with_word_weights
    valid_config.with_word_weights = FLAGS.with_word_weights
    test_config.with_word_weights = FLAGS.with_word_weights

    pp(config)

    print(config.n_state)
    print(config.with_direct_transition)
    print(config.with_word_weights)
    # get data set
    # api = SWDADialogCorpus(FLAGS.data_dir, word2vec=FLAGS.word2vec_path, word2vec_dim=config.embed_size)
    with open(config.api_dir, "r") as fh:
        api = pkl.load(fh)
    dial_corpus = api.get_dialog_corpus()
    if config.with_label_loss:
        labeled_dial_labels = api.get_state_corpus(
            config.max_dialog_len)['labeled']
    # meta_corpus = api.get_meta_corpus()

    # train_meta, valid_meta, test_meta = meta_corpus.get("train"), meta_corpus.get("valid"), meta_corpus.get("test")
    train_dial, labeled_dial, test_dial = dial_corpus.get(
        "train"), dial_corpus.get("labeled"), dial_corpus.get("test")

    # convert to numeric input outputs that fits into TF models
    train_feed = SWDADataLoader("Train", train_dial, config)
    # valid_feed = SWDADataLoader("Valid", valid_dial, valid_meta, config)
    test_feed = SWDADataLoader("Test", test_dial, config)
    if config.with_label_loss:
        labeled_feed = SWDADataLoader("Labeled",
                                      labeled_dial,
                                      config,
                                      labeled=True)
    valid_feed = test_feed

    if FLAGS.forward_only or FLAGS.resume:
        log_dir = os.path.join(FLAGS.work_dir, FLAGS.test_path)
    else:
        log_dir = os.path.join(FLAGS.work_dir, "run" + str(int(time.time())))

    # begin training
    with tf.Session(config=tf.ConfigProto(log_device_placement=True,
                                          allow_soft_placement=True)) as sess:
        initializer = tf.random_uniform_initializer(-1.0 * config.init_w,
                                                    config.init_w)
        scope = "model"
        with tf.variable_scope(scope, reuse=None, initializer=initializer):
            model = VRNN(sess,
                         config,
                         api,
                         log_dir=None if FLAGS.forward_only else log_dir,
                         scope=scope)
        with tf.variable_scope(scope, reuse=True, initializer=initializer):
            valid_model = VRNN(sess,
                               valid_config,
                               api,
                               log_dir=None,
                               scope=scope)
        #with tf.variable_scope(scope, reuse=True, initializer=initializer):
        #    test_model = KgRnnCVAE(sess, test_config, api, log_dir=None, forward=True, scope=scope)

        print("Created computation graphs")
        if api.word2vec is not None and not FLAGS.forward_only:
            print("Loaded word2vec")
            sess.run(model.W_embedding.assign(np.array(api.word2vec)))

        # write config to a file for logging
        if not FLAGS.forward_only:
            with open(os.path.join(log_dir, "run.log"), "wb") as f:
                f.write(pp(config, output=False))

        # create a folder by force
        ckp_dir = os.path.join(log_dir, "checkpoints")
        if not os.path.exists(ckp_dir):
            os.mkdir(ckp_dir)

        ckpt = tf.train.get_checkpoint_state(ckp_dir)
        print("Created models with fresh parameters.")
        sess.run(tf.global_variables_initializer())

        if ckpt:
            print("Reading dm models parameters from %s" %
                  ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
            #print([str(op.name) for op in tf.get_default_graph().get_operations()])
            print([str(v.name) for v in tf.global_variables()])
            import sys
            # sys.exit()

        if not FLAGS.forward_only:
            dm_checkpoint_path = os.path.join(
                ckp_dir, model.__class__.__name__ + ".ckpt")
            global_t = 1
            patience = config.n_epoch  # wait for at least 10 epoch before stop
            dev_loss_threshold = np.inf
            best_dev_loss = np.inf
            for epoch in range(config.max_epoch):
                print(">> Epoch %d with lr %f" %
                      (epoch, model.learning_rate.eval()))

                # begin training
                if train_feed.num_batch is None or train_feed.ptr >= train_feed.num_batch:
                    train_feed.epoch_init(config.batch_size, shuffle=True)

                if config.with_label_loss:
                    labeled_feed.epoch_init(len(labeled_dial), shuffle=False)
                else:
                    labeled_feed = None
                    labeled_dial_labels = None
                global_t, train_loss = model.train(
                    global_t,
                    sess,
                    train_feed,
                    labeled_feed,
                    labeled_dial_labels,
                    update_limit=config.update_limit)

                # begin validation
                valid_feed.epoch_init(config.batch_size, shuffle=False)
                valid_loss = valid_model.valid("ELBO_VALID", sess, valid_feed,
                                               labeled_feed,
                                               labeled_dial_labels)
                """
                test_feed.epoch_init(test_config.batch_size, test_config.backward_size,
                                     test_config.step_size, shuffle=True, intra_shuffle=False)
                test_model.test(sess, test_feed, num_batch=5)
                """

                done_epoch = epoch + 1
                # only save a models if the dev loss is smaller
                # Decrease learning rate if no improvement was seen over last 3 times.
                if config.op == "sgd" and done_epoch > config.lr_hold:
                    sess.run(model.learning_rate_decay_op)
                """
                if valid_loss < best_dev_loss:
                    if valid_loss <= dev_loss_threshold * config.improve_threshold:
                        patience = max(patience, done_epoch * config.patient_increase)
                        dev_loss_threshold = valid_loss

                    # still save the best train model
                    if FLAGS.save_model:
                        print("Save model!!")
                        model.saver.save(sess, dm_checkpoint_path, global_step=epoch)
                    best_dev_loss = valid_loss
                """
                # still save the best train model
                if FLAGS.save_model:
                    print("Save model!!")
                    model.saver.save(sess,
                                     dm_checkpoint_path,
                                     global_step=epoch)

                if config.early_stop and patience <= done_epoch:
                    print("!!Early stop due to run out of patience!!")
                    break
            print("Best validation loss %f" % best_dev_loss)
            print("Done training")
        else:
            # begin validation
            # begin validation
            global_t = 1
            for epoch in range(1):
                print("test-----------")
                print(">> Epoch %d with lr %f" %
                      (epoch, model.learning_rate.eval()))

            if not FLAGS.use_test_batch:
                # begin training
                if train_feed.num_batch is None or train_feed.ptr >= train_feed.num_batch:
                    train_feed.epoch_init(config.batch_size, shuffle=False)
                if config.with_label_loss:
                    labeled_feed.epoch_init(len(labeled_dial), shuffle=False)
                else:
                    labeled_feed = None
                    labeled_dial_labels = None
                results, fetch_results = model.get_zt(
                    global_t,
                    sess,
                    train_feed,
                    update_limit=config.update_limit,
                    labeled_feed=labeled_feed,
                    labeled_labels=labeled_dial_labels)
                with open(FLAGS.result_path, "w") as fh:
                    pkl.dump(results, fh)
                with open(FLAGS.result_path + ".param.pkl", "w") as fh:
                    pkl.dump(fetch_results, fh)
            else:
                print("use_test_batch")
                # begin training
                valid_feed.epoch_init(config.batch_size, shuffle=False)

                if config.with_label_loss:
                    labeled_feed.epoch_init(len(labeled_dial), shuffle=False)
                else:
                    labeled_feed = None
                    labeled_dial_labels = None
                results, fetch_results = model.get_zt(
                    global_t,
                    sess,
                    valid_feed,
                    update_limit=config.update_limit,
                    labeled_feed=labeled_feed,
                    labeled_labels=labeled_dial_labels)
                with open(FLAGS.result_path, "w") as fh:
                    pkl.dump(results, fh)
                with open(FLAGS.result_path + ".param.pkl", "w") as fh:
                    pkl.dump(fetch_results, fh)
            """