Exemplo n.º 1
0
    def train(self, text_train, label_train, text_test, label_test, model_dir):

        label_test = np.reshape(label_test, [-1, 1])
        label_test = labels_smooth(label_test, self._config.class_nums, self._config.label_smooth_eps)
        with self._graph.as_default():
            dataset = tf.data.Dataset.from_tensor_slices((text_train, label_train))
            batch_dataset = dataset.batch(batch_size=self._config.batch_size)
            repeat_dataset = batch_dataset.repeat(self._config.epoch)
            data_iterator = repeat_dataset.make_one_shot_iterator()
            next_batch_text, next_batch_label = data_iterator.get_next()

            global_step = tf.Variable(0, trainable=False)
            data_len = len(label_train)
            batch_nums = data_len // self._config.batch_size
            learning_rate = tf.train.exponential_decay(self._config.init_learning_rate, global_step=global_step, decay_steps=data_len // self._config.batch_size, decay_rate=self._config.learning_rate_decay) 
            train_op = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.loss, global_step=global_step, name="adam-textcnn")
            init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
            with tf.Session(config=gpu_config()) as sess:
                sess.run(init_op)
                counter = 1
                while True:
                    try:
                        counter += 1
                        self._mode = "train"
                        x, y = sess.run([next_batch_text, next_batch_label])
                        y = np.reshape(y, [-1, 1]).astype(np.int32)
                        y = labels_smooth(y, self._config.class_nums, self._config.label_smooth_eps)
                        loss, _ = sess.run([self.loss, train_op], feed_dict={self._input: x, self._target: y})
                        if counter % batch_nums == 0:
                            print("Epoch %d loss: %lf" % ((counter // batch_nums), loss))
                            self._mode = "test"
                            accurcy = sess.run(self.accurcy, feed_dict={self._input: text_test, self._target: label_test})
                            print("Test accurcy:", accurcy)
                    except tf.errors.OutOfRangeError:
                        break
Exemplo n.º 2
0
    def __init__(self, model, batcher, vocab, ckpt_id=None, fw_sess=None, bw_model=None, bw_sess=None, bidi_ckpt_path=None):
        self.model = model
        self.bw_model = model
        self.batcher = batcher
        self.vocab = vocab
        self.sess = tf.Session(config=utils.gpu_config()) if fw_sess is None else fw_sess
        self.sess2 = bw_sess
        self.bw_model = bw_model

        if bw_model is None:
            ckpt_path = utils.load_ckpt(self.model.hps, self.model.saver, self.sess)
            print('Checkpoint path name: {}'.format(ckpt_path))
            ckpt_name = 'ckpt-' + ckpt_path.split('-')[-1]
        else:
            ckpt_name = 'ckpt-' + bidi_ckpt_path.split('-')[-1]
        self.decode_dir = os.path.join(model.hps.model_path, make_decode_dir_name(ckpt_name, model.hps))

        if os.path.exists(self.decode_dir):
            pass
        else:
            os.makedirs(self.decode_dir)
Exemplo n.º 3
0
                args.out_path + '/y/' + args.gain + '/' + args.test_fnames[j] +
                '.wav', args.fs, y_out)

        print("Inference (%s): %3.2f%%.       " %
              (args.out_type, 100 * ((j + 1) / len(args.test_x_len))),
              end="\r")
    print('\nInference complete.')


if __name__ == '__main__':
    ## GET COMMAND LINE ARGUMENTS
    args = utils.args()

    ## ARGUMENTS
    args.ver = '3a'
    args.blocks = ['C3'] + ['B5'] * 40 + ['O1']
    args.epoch = 175  # for inference.

    ## TRAINING AND TESTING SET ARGUMENTS
    args = deepxi_args(args)

    ## MAKE DEEP XI NNET
    net = deepxi_net(args)

    ## GPU CONFIGURATION
    config = utils.gpu_config(args.gpu)

    with tf.Session(config=config) as sess:
        if args.train: train(sess, net, args)
        if args.infer: infer(sess, net, args)
Exemplo n.º 4
0
        else:
            ## DEEP XI FOR IBM ESTIMATION
            deepxi_args = utils.args()
            deepxi_args.ver = '3a'
            deepxi_args.blocks = ['C3'] + ['B5'] * 40 + ['O1']
            deepxi_args.epoch = 175
            deepxi_args.stats_path = './DeepXi/stats'
            deepxi_args.model_path = './DeepXi/model'
            deepxi_args.train = False
            deepxi_args = deepxi.deepxi_args(deepxi_args)
            deepxi_args.infer = True
            deepxi_graph = tf.Graph()
            with deepxi_graph.as_default():
                deepxi_net = deepxi.deepxi_net(deepxi_args)
            config = utils.gpu_config(deepxi_args.gpu)
            deepxi_sess = tf.Session(config=config, graph=deepxi_graph)
            deepxi_net.saver.restore(
                deepxi_sess, deepxi_args.model_path + '/epoch-' +
                str(deepxi_args.epoch))  # load model for epoch.

            ## MARGINALISATION
            if spn_args.mft == 'marg':
                test_noisy_speech(deepxi_sess, deepxi_net, spn_args)

            ## BOUNDED MARGINALISATION
            if spn_args.mft == 'bmarg':
                test_noisy_speech(deepxi_sess, deepxi_net, spn_args)

            # CLOSE TF GRAPH
            deepxi_sess.close()
Exemplo n.º 5
0
def main():

    utils.print_config(args)

    if 'train' not in args.mode:
        args.keep_rate = 1.0
    args.use_pretrain = True if args.use_pretrain == 'True' else False
    args.use_aux_task = True if args.use_aux_task == 'True' else False

    if args.mode == 'lm_train':
        args.model = 'lm'
        args.data_path = "./data/wikitext/wikitext-103/processed_wiki_train.bin"
        args.use_pretrain = False

    args.model_path = os.path.join(args.model_path, args.exp_name).format(
        args.model)  #model_path default="data/log/{}

    if not os.path.exists(args.model_path):
        if 'train' not in args.mode:
            print(args.model_path)
            raise ValueError
        os.makedirs(args.model_path)
    with open(os.path.join(args.model_path, 'config.json'),
              'w',
              encoding='utf8') as f:
        json.dump(vars(args), f)

    print("Default models path: {}".format(args.model_path))

    print('code start/ {} mode / {} models'.format(args.mode, args.model))
    utils.assign_specific_gpu(args.gpu_nums)

    vocab = utils.Vocab()

    vardicts = utils.get_pretrain_weights(
        args.true_pretrain_ckpt_path
    ) if args.use_pretrain and args.mode == 'train' else None

    if args.mode == 'decode':
        if args.model == 'mmi_bidi': args.beam_size = args.mmi_bsize
        args.batch_size = args.beam_size

    modelhps = deepcopy(args)
    if modelhps.mode == 'decode':
        modelhps.max_dec_len = 1

    if args.model == 'vanilla':
        model = BaseModel(vocab, modelhps)
    elif args.model == 'mmi_bidi':
        if args.mode == 'decode':
            bw_graph = tf.Graph()
            with bw_graph.as_default():
                bw_model = BaseModel(vocab, args)

            bw_sess = tf.Session(graph=bw_graph, config=utils.gpu_config())

            with bw_sess.as_default():
                with bw_graph.as_default():
                    bidi_ckpt_path = utils.load_ckpt(bw_model.hps,
                                                     bw_model.saver, bw_sess)

            fw_graph = tf.Graph()
            with fw_graph.as_default():
                modelhps.model_path = modelhps.model_path.replace(
                    'mmi_bidi', 'vanilla')
                modelhps.model = 'vanilla'
                fw_model = BaseModel(vocab, modelhps)
            fw_sess = tf.Session(graph=fw_graph)
            with fw_sess.as_default():
                with fw_graph.as_default():
                    ckpt_path = utils.load_ckpt(fw_model.hps, fw_model.saver,
                                                fw_sess)
        else:
            model = BaseModel(vocab, modelhps)

    elif args.model == 'lm':
        model = LMModel(vocab, modelhps)
    elif args.model == 'embmin':
        model = DiverEmbMin(vocab, modelhps)
    else:
        raise ValueError
    print('models load end')

    if args.mode in ['train', 'lm_train']:
        train(model, vocab, vardicts)
    elif args.mode == 'decode':
        import time

        if args.model == 'mmi_bidi':
            batcher = Batcher(
                vocab, bw_model.hps.data_path.replace('train_', 'test_'), args)
            decoder = BeamsearchDecoder(fw_model,
                                        batcher,
                                        vocab,
                                        fw_sess=fw_sess,
                                        bw_model=bw_model,
                                        bw_sess=bw_sess,
                                        bidi_ckpt_path=bidi_ckpt_path)
        else:
            batcher = Batcher(vocab,
                              model.hps.data_path.replace('train_', 'test_'),
                              args)
            decoder = BeamsearchDecoder(model, batcher, vocab)
        decoder.decode()
    elif args.mode == 'eval':
        pass
Exemplo n.º 6
0
def train(model, vocab, pretrain_vardicts=None):

    train_data_loader = Batcher_(vocab, model.hps.data_path, args)
    valid_data_loader = Batcher_(vocab,
                                 model.hps.data_path.replace('train_', 'dev_'),
                                 args)
    all_id, claim_id, claim_pers = train_data_loader.get_claim_id()
    all_val_id, claim_val_id, claim_val_pers = valid_data_loader.get_claim_id()

    train_logdir, dev_logdir = os.path.join(args.model_path,
                                            'logdir/train'), os.path.join(
                                                args.model_path, 'logdir/dev')
    train_savedir = os.path.join(args.model_path, 'train/')
    print("[*] Train save directory is: {}".format(train_savedir))
    if not os.path.exists(train_logdir): os.makedirs(train_logdir)
    if not os.path.exists(dev_logdir): os.makedirs(dev_logdir)
    if not os.path.exists(train_savedir): os.makedirs(train_savedir)
    # print(all_id)

    los = model.get_loss()

    optim = tf.train.MomentumOptimizer(model.hps.meta_learning_rate, 0.9)
    grads_vars = optim.compute_gradients(
        los,
        tf.trainable_variables(),
        aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE)
    grads_vars = deal_gradient(grads_vars)
    grads_cache = [
        tf.Variable(np.zeros(t[0].shape.as_list(), np.float32),
                    trainable=False) for t in grads_vars[1:]
    ]
    clear_grads_cache_op = tf.group(
        [gc.assign(tf.zeros_like(gc)) for gc in grads_cache])
    accumulate_grad_op = tf.group(
        [gc.assign_add(gv[0]) for gc, gv in zip(grads_cache, grads_vars[1:])])
    new_grads_vars = [(g, gv[1]) for g, gv in zip(grads_cache, grads_vars[1:])]
    apply_grad_op = optim.apply_gradients(new_grads_vars)
    print("ready done!")

    with tf.Session(config=utils.gpu_config()) as sess:

        if model.hps.use_pretrain:
            assign_ops, uninitialized_varlist = utils.assign_pretrain_weights(
                pretrain_vardicts)
            sess.run(assign_ops)
            sess.run(tf.initialize_variables(uninitialized_varlist))
        else:
            sess.run(tf.global_variables_initializer())

        saver = tf.train.Saver()
        model.saver.save(sess, './save/model')
        with tf.device('/cpu:0'):
            saver_ = tf.train.import_meta_graph('./save/model.meta')

        for meta_iteration in range(model.hps.max_epoch):
            train_loss_before = []
            train_loss_meta = []
            epoch_val_loss = 0
            for epoch_bs in range(epoch_batch_size):
                batch_loss = 0
                val_all_loss = tf.zeros((), dtype=tf.float32)
                val_all_dia = []
                batch_grad_list = []
                sess.run(clear_grads_cache_op)
                for b_size in range(model.hps.meta_batch_size):
                    # print(b_size)
                    with tf.device('/cpu:0'):
                        shuffle(all_id)
                        cid_list = all_id[:model.hps.batch_size]
                        train_iter, val_iter = train_data_loader.get_data_loader(
                            cid_list, claim_id, claim_pers,
                            model.hps.batch_size)

                    model.try_run(val_iter, sess, accumulate_grad_op)

                    res = model.run_step(val_iter,
                                         sess,
                                         is_train=False,
                                         freeze_layer=model.hps.use_pretrain)
                    v_loss, summaries, step = res['loss'], res[
                        'summaries'], res['global_step']

                    train_loss_before.append(v_loss)

                    #update
                    res_val, val_batch_loss = do_learning_fix_step(
                        model, train_iter, val_iter, sess)
                    print("do learning is done")
                    val_all_loss = tf.add(val_all_loss, val_batch_loss)

                    val_loss, summaries_val, step_val = res_val[
                        'loss'], res_val['summaries'], res_val['global_step']
                    print("val_loss:", val_loss)
                    train_loss_meta.append(val_loss)
                    batch_loss += val_loss

                    #reset
                    saver_.restore(sess, tf.train.latest_checkpoint('./save'))
                    print("reset")

                print("one batch is done")
                sess.run(apply_grad_op)
                the_name_model = './save/' + 'model' + str(
                    meta_iteration) + str(epoch_bs)
                model.saver.save(sess, the_name_model, write_meta_graph=False)

            print("epoch: {}, before loss:{} ".format(
                meta_iteration, np.mean(train_loss_before)))
            print("epoch: {}, after loss:{} ".format(meta_iteration,
                                                     np.mean(train_loss_meta)))

            best_loss = 30
            patience = 5
            stop_count = 0
            if meta_iteration % 2 == 0:
                num_claim_val = len(all_val_id)
                val_loss_before = []
                val_loss_meta = []
                shuffle(all_val_id)

                for i in range(0, 80, model.hps.batch_size):
                    with tf.device('/cpu:0'):
                        val_cid_list = all_val_id[i:i + model.hps.batch_size]
                        valid_train_iter, valid_val_iter = valid_data_loader.get_data_loader(
                            val_cid_list, claim_val_id, claim_val_pers,
                            model.hps.batch_size)

                    res = model.run_step(valid_val_iter,
                                         sess,
                                         is_train=False,
                                         freeze_layer=model.hps.use_pretrain)
                    loss = res['loss']
                    val_loss_before.append(loss)

                    #meta tuning
                    res_val_, val_batch_loss = do_learning_fix_step(
                        model, valid_train_iter, valid_val_iter, sess)
                    val_loss_meta.append(res_val_['loss'])
                    saver_.restore(sess, tf.train.latest_checkpoint('./save'))

                print("epoch: {}, fine tuning loss:{} ".format(
                    meta_iteration, np.mean(val_loss_meta)))

                if np.mean(val_loss_meta) < best_loss:
                    best_loss = np.mean(val_loss_meta)
                    the_meta_model = train_savedir + 'MetaModel' + str(
                        meta_iteration)
                    model.saver.save(sess, the_meta_model)
                    print("save fine tuning model in {}".format(train_savedir))
                else:
                    stop_count += 1
                    if stop_count > patience:
                        print("loss has been rising, stop training")
                        break
def train(model, vocab, pretrain_vardicts=None):
    train_data_loader = Batcher(vocab, model.hps.data_path, args)
    valid_data_loader = Batcher(vocab,
                                model.hps.data_path.replace('train_', 'dev_'),
                                args)
    if model.hps.mode == 'lm_train':
        valid_data_loader = Batcher(
            vocab, model.hps.data_path.replace('train_', 'valid_'), args)

    with tf.Session(config=utils.gpu_config()) as sess:
        train_logdir, dev_logdir = os.path.join(args.model_path,
                                                'logdir/train'), os.path.join(
                                                    args.model_path,
                                                    'logdir/dev')
        train_savedir = os.path.join(args.model_path, 'train/')
        print("[*] Train save directory is: {}".format(train_savedir))
        if not os.path.exists(train_logdir): os.makedirs(train_logdir)
        if not os.path.exists(dev_logdir): os.makedirs(dev_logdir)
        if not os.path.exists(train_savedir): os.makedirs(train_savedir)

        summary_writer1 = tf.summary.FileWriter(train_logdir, sess.graph)
        summary_writer2 = tf.summary.FileWriter(dev_logdir, sess.graph)
        """
        Initialize with pretrain variables
        """
        if model.hps.use_pretrain:
            assign_ops, uninitialized_varlist = utils.assign_pretrain_weights(
                pretrain_vardicts)
            sess.run(assign_ops)
            sess.run(tf.initialize_variables(uninitialized_varlist))
        else:
            sess.run(tf.global_variables_initializer())
        posterior = [0 for _ in range(model.hps.matrix_num)]
        prior = [0 for _ in range(model.hps.matrix_num)]
        step = 0
        while True:  # 6978 sample for one epoch
            beg_time = time()

            batch = train_data_loader.next_batch()
            sample_per_epoch = 857899 if 'lm' in model.hps.mode else 6978

            if model.hps.mode == 'lm_train':
                res = model.run_step(batch, sess, is_train=True)
            else:
                res = model.run_step(
                    batch,
                    sess,
                    is_train=True,
                    freeze_layer=(
                        model.hps.use_pretrain
                        and step < sample_per_epoch / model.hps.batch_size))
            loss, summaries, step = res['loss'], res['summaries'], res[
                'global_step']
            if model.hps.model == 'posterior':
                gumbel = res['posterior']
                gumbel_prior = res['prior']
                selected = np.argsort(-gumbel)
                selected_poste = [int(el[0]) for el in selected]
                selected_prior = [
                    int(el[0]) for el in np.argsort(-gumbel_prior)
                ]
                posterior = [
                    el1 + el2 for el1, el2 in zip(posterior, selected_poste)
                ]
                prior = [el1 + el2 for el1, el2 in zip(prior, selected_prior)]
                print("prior: {}  posterior: {}".format(prior, posterior))
            elif model.hps.model == 'embmin':
                dist = res['selected_emb_idx']
                for tmp in dist:
                    prior[tmp] += 1
                print(prior)

            end_time = time()
            print("{} epoch, {} step, {}sec, {} loss".format(
                int(step * model.hps.batch_size / sample_per_epoch), step,
                round(end_time - beg_time, 3), round(loss, 3)))
            summary_writer1.add_summary(summaries, step)

            if step % 5 == 0:
                dev_batch = valid_data_loader.next_batch()
                res = model.run_step(dev_batch, sess, is_train=False)
                loss, summaries, step = res['loss'], res['summaries'], res[
                    'global_step']
                assert step % 5 == 0
                print("[VALID] {} loss".format(round(loss, 3)))
                summary_writer2.add_summary(summaries, step)

            if step == 10 or step % 2000 == 0:
                model.saver.save(sess, train_savedir, global_step=step)

            if int(step * model.hps.batch_size /
                   sample_per_epoch) > model.hps.max_epoch:
                model.saver.save(sess, train_savedir, global_step=step)
                print("training end")
                break