Пример #1
0
def tower_ensemble_graph(eval_features, total_graphs, total_params):
    default_params = total_params[0]

    # define multi-gpu inferring graph
    def _tower_infer_graph(features):
        infer_fns = []
        for midx, (graph, params) in enumerate(zip(total_graphs,
                                                   total_params)):
            params = copy.copy(params)
            params.scope_name = params.scope_name + "_ensembler_%d" % midx
            infer_fns.append(graph.infer_fn(params))

        total_encoding_fns, total_decoding_fns = list(zip(*infer_fns))

        def _encoding_fn(source):
            model_state = {}
            for _midx in range(len(total_encoding_fns)):
                current_model_state = total_encoding_fns[_midx](source)
                model_state['ensembler_%d' % _midx] = current_model_state
            return model_state

        def _decoding_fn(target, model_state, time):
            pred_logits = []

            for _midx in range(len(total_decoding_fns)):
                state_describ = "ensembler_%d" % _midx
                if default_params.search_mode == "cache":
                    current_output = total_decoding_fns[_midx](
                        target, model_state[state_describ], time)
                else:
                    current_output = total_decoding_fns[_midx](target,
                                                               model_state,
                                                               time)
                step_logits, step_state = current_output

                pred_logits.append(step_logits)

                if default_params.search_mode == "cache":
                    model_state[state_describ] = step_state

            model_logits = tf.add_n(
                [tf.nn.softmax(logits)
                 for logits in pred_logits]) / len(pred_logits)

            return tf.log(model_logits), model_state

        beam_output = beam_search(features, _encoding_fn, _decoding_fn,
                                  default_params)

        return beam_output

    # feed model to multiple gpus
    eval_outputs = parallel.parallel_model(_tower_infer_graph,
                                           eval_features,
                                           default_params.gpus,
                                           use_cpu=(len(
                                               default_params.gpus) == 0))
    eval_seqs, eval_scores = eval_outputs['seq'], eval_outputs['score']

    return eval_seqs, eval_scores
Пример #2
0
def tower_train_graph(train_features, optimizer, graph, params):
    # define multi-gpu training graph
    def _tower_train_graph(features):
        train_output = graph.train_fn(features,
                                      params,
                                      initializer=initializer.get_initializer(
                                          params.initializer,
                                          params.initializer_gain))

        tower_gradients = optimizer.compute_gradients(
            train_output["loss"] * tf.cast(params.loss_scale, tf.float32),
            colocate_gradients_with_ops=True)
        tower_gradients = [(g / tf.cast(params.loss_scale, tf.float32), v)
                           for g, v in tower_gradients]

        return {"loss": train_output["loss"], "gradient": tower_gradients}

    # feed model to multiple gpus
    tower_outputs = parallel.parallel_model(_tower_train_graph,
                                            train_features,
                                            params.gpus,
                                            use_cpu=(len(params.gpus) == 0))

    loss = tf.add_n(tower_outputs['loss']) / len(tower_outputs['loss'])
    gradients = parallel.average_gradients(tower_outputs['gradient'])

    return loss, gradients
Пример #3
0
def tower_infer_graph(eval_features, graph, params):
    # define multi-gpu inferring graph
    def _tower_infer_graph(features):
        return graph.infer_fn(params, features)

    # feed model to multiple gpus
    eval_outputs = parallel.parallel_model(_tower_infer_graph,
                                           eval_features,
                                           params.gpus,
                                           use_cpu=(len(params.gpus) == 0))

    return eval_outputs
Пример #4
0
def tower_score_graph(eval_features, graph, params):
    # define multi-gpu inferring graph
    def _tower_infer_graph(features):
        scores = graph.score_fn(features, params)
        return scores

    # feed model to multiple gpus
    eval_outputs = parallel.parallel_model(
        _tower_infer_graph, eval_features,
        params.gpus, use_cpu=(len(params.gpus) == 0))
    eval_scores = eval_outputs['score']

    return eval_scores
Пример #5
0
def tower_infer_graph(eval_features, graph, params):
    # define multi-gpu inferring graph
    def _tower_infer_graph(features):
        encoding_fn, decoding_fn = graph.infer_fn(params)
        beam_output = beam_search(features, encoding_fn, decoding_fn, params)

        return beam_output

    # feed model to multiple gpus
    eval_outputs = parallel.parallel_model(
        _tower_infer_graph, eval_features,
        params.gpus, use_cpu=(len(params.gpus) == 0))
    eval_seqs, eval_scores = eval_outputs['seq'], eval_outputs['score']

    return eval_seqs, eval_scores
Пример #6
0
def tower_train_graph(train_features, optimizer, params):
    # define multi-gpu training graph
    def _tower_train_graph(features):
        train_output = graph.train_fn(
            features,
            params,
            initializer=tf.random_uniform_initializer(-0.08, 0.08))
        tower_gradients = optimizer.compute_gradients(
            train_output["loss"], colocate_gradients_with_ops=True)
        return {"loss": train_output["loss"], "gradient": tower_gradients}

    # feed model to multiple gpus
    tower_outputs, tower_mask = parallel.parallel_model(_tower_train_graph,
                                                        train_features,
                                                        params.gpus,
                                                        use_cpu=(len(
                                                            params.gpus) == 0))

    loss = parallel.fusion_with_mask(tower_outputs['loss'], tower_mask)
    gradients = parallel.average_gradients(tower_outputs['gradient'],
                                           mask=tower_mask)

    return loss, gradients
Пример #7
0
def main(args):
    tf.logging.set_verbosity(tf.logging.INFO)
    model_cls = transformer.Transformer
    args.model = model_cls.get_name()
    params = default_parameters()

    # Import and override parameters
    # Priorities (low -> high):
    # default -> saved -> command
    params = merge_parameters(params, model_cls.get_parameters())
    params = import_params(args.output, args.model, params)
    override_parameters(params, args)

    # Export all parameters and model specific parameters
    export_params(params.output, "params.json", params)
    export_params(params.output, "%s.json" % args.model,
                  collect_params(params, model_cls.get_parameters()))

    #tf.set_random_seed(params.seed)

    # Build Graph
    with tf.Graph().as_default():
        # Build input queue
        features = dataset.get_training_input(params.input, params)

        # features, init_op = cache.cache_features(features, params.update_cycle)
        # Add pre_trained_embedding:
        if params.use_pretrained_embedding:
            _, src_embs = dataset.get_pre_embeddings(params.embeddings[0])
            _, trg_embs = dataset.get_pre_embeddings(params.embeddings[1])
            features['src_embs'] = src_embs
            features['trg_embs'] = trg_embs
            print('Loaded Embeddings!', src_embs.shape, trg_embs.shape)

        # Build model
        initializer = get_initializer(params)
        model = model_cls(params, args.model)

        # Multi-GPU setting
        sharded_losses = parallel.parallel_model(
            model.get_training_func(initializer), features, params.device_list)
        loss = tf.add_n(sharded_losses) / len(sharded_losses)

        # Create global step
        global_step = tf.train.get_or_create_global_step()
        initial_global_step = global_step.assign(0)

        # Print parameters
        all_weights = {v.name: v for v in tf.trainable_variables()}
        total_size = 0

        for v_name in sorted(list(all_weights)):
            v = all_weights[v_name]
            tf.logging.info("%s\tshape    %s", v.name[:-2].ljust(80),
                            str(v.shape).ljust(20))
            v_size = np.prod(np.array(v.shape.as_list())).tolist()
            total_size += v_size
        tf.logging.info("Total trainable variables size: %d", total_size)

        learning_rate = get_learning_rate_decay(params.learning_rate,
                                                global_step, params)
        if params.learning_rate_minimum:
            lr_min = float(params.learning_rate_minimum)
            learning_rate = tf.maximum(learning_rate, tf.to_float(lr_min))

        learning_rate = tf.convert_to_tensor(learning_rate, dtype=tf.float32)
        tf.summary.scalar("learning_rate", learning_rate)

        # Create optimizer
        if params.optimizer == "Adam":
            opt = tf.train.AdamOptimizer(learning_rate,
                                         beta1=params.adam_beta1,
                                         beta2=params.adam_beta2,
                                         epsilon=params.adam_epsilon)
        elif params.optimizer == "LazyAdam":
            opt = tf.contrib.opt.LazyAdamOptimizer(learning_rate,
                                                   beta1=params.adam_beta1,
                                                   beta2=params.adam_beta2,
                                                   epsilon=params.adam_epsilon)
        else:
            raise RuntimeError("Optimizer %s not supported" % params.optimizer)

        loss, ops = optimize.create_train_op(loss, opt, global_step, params)

        restore_op = restore_variables(args.output)

        # Validation
        if params.validation and params.references[0]:
            files = [params.validation] + list(params.references)
            eval_inputs = dataset.sort_and_zip_files(files)
            eval_input_fn = dataset.get_evaluation_input
        else:
            eval_input_fn = None

        # Add hooks
        save_vars = tf.trainable_variables() + [global_step]
        saver = tf.train.Saver(
            var_list=save_vars if params.only_save_trainable else None,
            max_to_keep=params.keep_checkpoint_max,
            sharded=False)
        tf.add_to_collection(tf.GraphKeys.SAVERS, saver)

        train_hooks = [
            tf.train.StopAtStepHook(last_step=params.train_steps),
            #tf.train.StopAtStepHook(num_steps=params.train_steps),
            tf.train.NanTensorHook(loss),
            tf.train.LoggingTensorHook({
                "step": global_step,
                "loss": loss,
            },
                                       every_n_iter=params.print_steps),
            tf.train.CheckpointSaverHook(
                checkpoint_dir=params.output,
                save_secs=params.save_checkpoint_secs or None,
                save_steps=params.save_checkpoint_steps or None,
                saver=saver)
        ]

        config = session_config(params)

        if eval_input_fn is not None:
            train_hooks.append(
                hooks.EvaluationHook(
                    lambda f: beamsearch.create_inference_graph(
                        [model.get_inference_func()], f, params),
                    lambda: eval_input_fn(eval_inputs, params),
                    lambda x: decode_target_ids(x, params),
                    params.output,
                    config,
                    params.keep_top_checkpoint_max,
                    eval_steps_begin=params.eval_steps_begin,
                    eval_secs=params.eval_secs,
                    eval_steps=params.eval_steps))

        def restore_fn(step_context):
            step_context.session.run(restore_op)

        def step_fn(step_context):
            # Bypass hook calls
            return step_context.run_with_hooks(ops)

        # Create session, do not use default CheckpointSaverHook
        with tf.train.MonitoredTrainingSession(checkpoint_dir=params.output,
                                               hooks=train_hooks,
                                               save_checkpoint_secs=None,
                                               config=config) as sess:
            #sess.run(features['source'].eval())
            #sess.run(features['target'].eval())
            # Restore pre-trained variables
            sess.run_step_fn(restore_fn)
            if params.renew_lr == True:
                sess.run(initial_global_step)

            while not sess.should_stop():
                sess.run_step_fn(step_fn)
Пример #8
0
def main(args):
    tf.logging.set_verbosity(tf.logging.INFO)
    model_cls = models.get_model(args.model)

    params = default_parameters()

    params = merge_parameters(params, model_cls.get_parameters())

    params = import_params(args.output, args.model, params)
    override_parameters(params, args)

    export_params(params.output, "params.json", params)
    export_params(params.output, "%s.json" % args.model,
                  collect_params(params, model_cls.get_parameters()))

    with tf.Graph().as_default():
        features = dataset.get_training_input(params.input, params)

        update_cycle = params.update_cycle
        features, init_op = cache.cache_features(features, update_cycle)

        initializer = get_initializer(params)
        regularizer = tf.contrib.layers.l1_l2_regularizer(
            scale_l1=params.scale_l1, scale_l2=params.scale_l2)
        model = model_cls(params)
        global_step = tf.train.get_or_create_global_step()

        sharded_losses = parallel.parallel_model(
            model.get_training_func(initializer, regularizer), features,
            params.device_list)

        loss = tf.add_n(sharded_losses) / len(sharded_losses)
        loss = loss + tf.losses.get_regularization_loss()

        all_weights = {v.name: v for v in tf.trainable_variables()}
        total_size = 0

        for v_name in sorted(list(all_weights)):
            v = all_weights[v_name]
            tf.logging.info("%s\tshape    %s", v.name[:-2].ljust(80),
                            str(v.shape).ljust(20))
            v_size = np.prod(np.array(v.shape.as_list())).tolist()
            total_size += v_size
        tf.logging.info("Total trainable variables size: %d", total_size)

        learning_rate = get_learning_rate_decay(params.learning_rate,
                                                global_step, params)
        learning_rate = tf.convert_to_tensor(learning_rate, dtype=tf.float32)
        tf.summary.scalar("learning_rate", learning_rate)

        if params.optimizer == "Adam":
            opt = tf.train.AdamOptimizer(learning_rate,
                                         beta1=params.adam_beta1,
                                         beta2=params.adam_beta2,
                                         epsilon=params.adam_epsilon)
        elif params.optimizer == "LazyAdam":
            opt = tf.contrib.opt.LazyAdamOptimizer(learning_rate,
                                                   beta1=params.adam_beta1,
                                                   beta2=params.adam_beta2,
                                                   epsilon=params.adam_epsilon)
        elif params.optimizer == "SGD":
            opt = tf.train.GradientDescentOptimizer(learning_rate)
        else:
            raise RuntimeError("Optimizer %s not supported" % params.optimizer)

        loss, ops = optimize.create_train_op(loss, opt, global_step, params)
        restore_op = restore_variables(args.checkpoint)

        if params.validation:
            eval_sorted_keys, eval_inputs = dataset.read_eval_input_file(
                params.validation)
            eval_input_fn = dataset.get_predict_input
        else:
            eval_input_fn = None

        save_vars = tf.trainable_variables() + [global_step]
        saver = tf.train.Saver(
            var_list=save_vars if params.only_save_trainable else None,
            max_to_keep=params.keep_checkpoint_max,
            sharded=False)
        tf.add_to_collection(tf.GraphKeys.SAVERS, saver)

        multiplier = tf.convert_to_tensor([update_cycle, 1])

        train_hooks = [
            tf.train.StopAtStepHook(last_step=params.train_steps),
            tf.train.NanTensorHook(loss),
            tf.train.LoggingTensorHook(
                {
                    "step": global_step,
                    "loss": loss,
                    "text": tf.shape(features["text"]) * multiplier,
                    "aspect": tf.shape(features["aspect"]) * multiplier,
                    "polarity": tf.shape(features["polarity"]) * multiplier
                },
                every_n_iter=1),
            tf.train.CheckpointSaverHook(
                checkpoint_dir=params.output,
                save_secs=params.save_checkpoint_secs or None,
                save_steps=params.save_checkpoint_steps or None,
                saver=saver)
        ]

        config = session_config(params)

        if eval_input_fn is not None:
            train_hooks.append(
                hooks.EvaluationHook(
                    lambda f: inference.create_predict_graph([model], f, params
                                                             ),
                    lambda: eval_input_fn(eval_inputs, params),
                    params.output,
                    config,
                    params.keep_top_checkpoint_max,
                    eval_secs=params.eval_secs,
                    eval_steps=params.eval_steps))

        def restore_fn(step_context):
            step_context.session.run(restore_op)

        def step_fn(step_context):
            step_context.session.run([init_op, ops["zero_op"]])
            for i in range(update_cycle - 1):
                step_context.session.run(ops["collect_op"])

            return step_context.run_with_hooks(ops["train_op"])

        with tf.train.MonitoredTrainingSession(checkpoint_dir=params.output,
                                               hooks=train_hooks,
                                               save_checkpoint_secs=None,
                                               config=config) as sess:
            sess.run_step_fn(restore_fn)

            while not sess.should_stop():
                sess.run_step_fn(step_fn)
Пример #9
0
def main(args):
    tf.logging.set_verbosity(tf.logging.INFO)
    params = default_parameters()
    override_parameters(params, args)
    export_params(params.output, "params.json", params)

    # Build Graph
    with tf.Graph().as_default():
        dataset.start_queue(params)
        features = dataset.get_train_input(params)
        print(features)
        # Build model
        initializer = get_initializer(params)
        # model = LineBased.Model(params)
        model = pixellink.PixelLinkNetwork(params)

        # Multi-GPU setting
        sharded_losses, ((sum_img, sum_loss), *_) = parallel.parallel_model(
            model.get_training_func(initializer), features, params.device_list)
        loss = tf.add_n(sharded_losses) / len(sharded_losses)

        # Create global step
        global_step = tf.train.get_or_create_global_step()

        # Print parameters
        all_weights = {v.name: v for v in tf.trainable_variables()}
        total_size = 0

        for v_name in sorted(list(all_weights)):
            v = all_weights[v_name]
            tf.logging.info("%s\tshape    %s", v.name[:-2].ljust(80),
                            str(v.shape).ljust(20))
            v_size = np.prod(np.array(v.shape.as_list())).tolist()
            total_size += v_size
        tf.logging.info("Total trainable variables size: %d", total_size)

        learning_rate = get_learning_rate_decay(params.learning_rate,
                                                global_step, params)
        learning_rate = tf.convert_to_tensor(learning_rate, dtype=tf.float32)

        # weitght decay
        weights = tf.trainable_variables()
        with tf.variable_scope('weights_norm') as scope:
            weights_norm = tf.reduce_sum(
                input_tensor=params.weight_decay *
                tf.stack([tf.nn.l2_loss(v) for v in weights]),
                name='weights_norm')
        loss = loss + weights_norm
        tf.summary.scalar('total_loss', loss)

        print('create opt')
        if params.optimizer == 'adam':
            # Create optimizer
            opt = tf.train.AdamOptimizer(learning_rate,
                                         beta1=params.adam_beta1,
                                         beta2=params.adam_beta2,
                                         epsilon=params.adam_epsilon)
        elif params.optimizer == 'sgd_momentum':
            opt = tf.train.MomentumOptimizer(learning_rate,
                                             momentum=params.momentum)
        else:
            raise NotImplementedError()

        train_op = tf.contrib.layers.optimize_loss(
            name="training",
            loss=loss,
            global_step=global_step,
            learning_rate=learning_rate,
            clip_gradients=params.clip_grad_norm or None,
            optimizer=opt,
            colocate_gradients_with_ops=True)

        print('create hooks')
        # Add hooks
        train_hooks = [
            tf.train.StopAtStepHook(last_step=params.train_steps),
            tf.train.NanTensorHook(loss),
            tf.train.LoggingTensorHook({
                "step": global_step,
                "loss": loss,
            },
                                       every_n_iter=1),
            tf.train.CheckpointSaverHook(
                checkpoint_dir=params.output,
                save_secs=params.save_checkpoint_secs or None,
                save_steps=params.save_checkpoint_steps or None,
                saver=tf.train.Saver(max_to_keep=params.keep_checkpoint_max,
                                     sharded=False)),
            tf.train.SummarySaverHook(save_steps=20,
                                      save_secs=None,
                                      output_dir=os.path.join(
                                          params.output, "sumimg"),
                                      summary_op=sum_img),
            tf.train.SummarySaverHook(save_steps=1,
                                      save_secs=None,
                                      output_dir=os.path.join(
                                          params.output, "sumloss"),
                                      summary_op=sum_loss)
        ]

        config = session_config(params)

        train_hooks.append(
            hooks.EvaluationHook(model.get_evaluation_func(),
                                 dataset.get_eval_input,
                                 params.output,
                                 config,
                                 params.keep_top_checkpoint_max,
                                 eval_secs=params.eval_secs,
                                 eval_steps=params.eval_steps))

        print('create session')
        # Create session, do not use default CheckpointSaverHook
        with tf.train.MonitoredTrainingSession(checkpoint_dir=params.output,
                                               hooks=train_hooks,
                                               save_checkpoint_secs=None,
                                               config=config) as sess:
            # coord = tf.train.Coordinator()
            # threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            while not sess.should_stop():
                sess.run(train_op)