Exemplo n.º 1
0
def build_graph(opts, is_training=True):
    train_graph = tf.Graph()
    strategy = None

    if opts['use_popdist']:
        strategy = create_popdist_strategy()

    with train_graph.as_default(), ExitStack() as stack:
        if strategy:
            stack.enter_context(strategy.scope())

        if opts["groupbert"]:
            bert_config = bert_ipu.BertConfig.from_dict(
                opts, config=bert_ipu.GroupBertConfig(vocab_size=None))
        else:
            bert_config = bert_ipu.BertConfig.from_dict(
                opts, config=bert_ipu.BertConfig(vocab_size=None))

        bert_config.dtype = tf.float32 if opts[
            "precision"] == '32' else tf.float16

        # define placeholders
        placeholders = {
            'learning_rate': tf.placeholder(tf.float32, shape=[]),
            'loss_scaling': tf.placeholder(tf.float32, shape=[])
        }
        learning_rate = placeholders['learning_rate']
        loss_scaling = placeholders['loss_scaling']

        # define input, datasets must be defined outside the ipu device scope.
        train_iterator = ipu.ipu_infeed_queue.IPUInfeedQueue(
            data_loader.load(opts, is_training=is_training))
        # define output
        outfeed_queue = ipu.ipu_outfeed_queue.IPUOutfeedQueue()

        # building networks with pipeline
        def bert_net():
            return build_network(train_iterator, outfeed_queue, bert_config,
                                 opts, learning_rate, loss_scaling,
                                 is_training)

        with ipu.scopes.ipu_scope('/device:IPU:0'):
            train = training_step_with_infeeds_and_outfeeds(
                train_iterator, outfeed_queue, bert_config, opts,
                learning_rate, loss_scaling, is_training)

        # get result from outfeed queue
        outfeed = outfeed_queue.dequeue()

        if strategy:
            # Take the mean of all the outputs across the distributed workers
            outfeed = [
                strategy.reduce(tf.distribute.ReduceOp.MEAN, v)
                for v in outfeed
            ]

        if opts['distributed_worker_index'] == 0 or opts['log_all_workers']:
            log.print_trainable_variables(opts)

        model_and_optimiser_variables = tf.global_variables()
        model_variables = tf.trainable_variables() + tf.get_collection(
            tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)
        restore = tf.train.Saver(
            var_list=model_and_optimiser_variables
            if opts['restore_optimiser_from_checkpoint'] else model_variables)

        train_saver = tf.train.Saver(
            var_list=model_and_optimiser_variables
            if opts['save_optimiser_to_checkpoint'] else model_variables,
            max_to_keep=5)

        ipu.utils.move_variable_initialization_to_cpu()
        train_init = tf.global_variables_initializer()
        tvars = tf.trainable_variables()

    # calculate the number of required IPU
    num_ipus = (max(opts['device_mapping']) + 1) * opts['replicas']
    num_ipus = ipu_utils.next_power_of_two(num_ipus)

    ipu_config = ipu_utils.get_config(
        fp_exceptions=opts["fp_exceptions"],
        enable_recomputation=opts["enable_recomputation"],
        disable_graph_outlining=False,
        num_required_ipus=num_ipus,
        enable_stochastic_rounding=opts['stochastic_rounding'],
        minimum_remote_tensor_size=opts['min_remote_tensor_size'],
        max_cross_replica_sum_buffer_size=opts[
            'max_cross_replica_sum_buffer_size'],
        max_reduce_scatter_buffer_size=opts['max_reduce_scatter_buffer_size'],
        scheduler_selection=opts['scheduler'],
        compile_only=opts['compile_only'],
        ipu_id=opts['select_ipu'])

    if opts['use_popdist']:
        ipu_config = popdist.tensorflow.set_ipu_config(ipu_config,
                                                       opts['shards'],
                                                       configure_device=False)

    # Do not acquire a device, compile only.
    if opts["compile_only"]:
        ipu_config.device_connection.version = "ipu2"
        ipu_config.device_connection.enable_remote_buffers = True
        # PRE_COMPILE allows for runing execuatables on graph without being online
        ipu_config.device_connection.type = DeviceConnectionType.PRE_COMPILE

        # Enforce using a exe cache dir, defaulting if not given
        if ("TF_POPLAR_FLAGS" in os.environ):
            if ("--executable_cache_path"
                    not in os.environ["TF_POPLAR_FLAGS"]):
                print(
                    "Warning: --executable_cache_path in TF_POPLAR_FLAGS " +
                    "(for 'poprun --mpi_local_args') not set. Setting to default "
                    + "path: ./tmp/tf_cache/")
                os.environ[
                    "TF_POPLAR_FLAGS"] = "--executable_cache_path=/tmp/tf_cache"

        # Sometimes TF_POPLAR_FLAGS might not even exist
        else:
            print(
                "Warning: TF_POPLAR_FLAGS environment variable (for 'poprun " +
                "--mpi_local_args') not set. --executable_cache_path must be "
                +
                "defined when using --compile-only. Setting to default path: "
                + "./tmp/tf_cache/")
            os.environ[
                "TF_POPLAR_FLAGS"] = "--executable_cache_path=/tmp/tf_cache"

    ipu_config.configure_ipu_system()

    train_sess = tf.Session(graph=train_graph)

    return GraphOps(train_graph, train_sess, train_init, [train], placeholders,
                    train_iterator, outfeed, train_saver, restore, tvars)
Exemplo n.º 2
0
def build_graph(opts, iterations_per_step=1, is_training=True):

    train_graph = tf.Graph()
    with train_graph.as_default():
        bert_config = bert_ipu.BertConfig.from_dict(
            opts, config=bert_ipu.BertConfig(vocab_size=None))
        bert_config.dtype = tf.float32 if opts[
            "precision"] == '32' else tf.float16
        placeholders = dict()

        learning_rate = None
        opts['version_2_with_negative'] = False
        train_iterator = ipu_infeed_queue.IPUInfeedQueue(
            data_loader.load(opts, is_training=is_training))
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue()

        # building networks with pipeline
        if not should_be_pipeline_when_inference(opts):

            def bert_net():
                return build_infer_network_without_pipeline(
                    train_iterator,
                    outfeed_queue,
                    iterations_per_step,
                    bert_config=bert_config,
                    opts=opts)
        else:

            def bert_net():
                return build_network(train_iterator, outfeed_queue,
                                     iterations_per_step, bert_config, opts,
                                     learning_rate, is_training)

        with ipu_scope('/device:IPU:0'):
            embedded = opts["embedded_runtime"]

            if embedded and is_training:
                raise ValueError(
                    "embedded_runtime is only to be used for inference.")

            train = ipu.ipu_compiler.compile(bert_net,
                                             []) if not embedded else None

        exec_path = None
        compile_op = None
        poplar_exec_filepath = get_exec_path(
            opts['seq_length'], opts['micro_batch_size'],
            opts['device_mapping'], should_be_pipeline_when_inference(opts))
        exec_path = os.path.join(poplar_exec_filepath)
        compile_op = application_compile_op.experimental_application_compile_op(
            bert_net, output_path=exec_path, freeze_variables=True)

        outfeed = outfeed_queue.dequeue()

        restore = tf.train.Saver(var_list=tf.global_variables())

        ipu.utils.move_variable_initialization_to_cpu()
        train_init = tf.global_variables_initializer()
        tvars = tf.trainable_variables()

    # Calculate the number of required IPU"""
    num_ipus = (max(opts['device_mapping']) + 1) * int(opts['replicas'])
    # The number of acquired IPUs must be the power of 2.
    if num_ipus & (num_ipus - 1) != 0:
        num_ipus = 2**int(math.ceil(math.log(num_ipus) / math.log(2)))
    ipu_config = get_config(
        fp_exceptions=opts["fp_exceptions"],
        enable_recomputation=opts["enable_recomputation"],
        disable_graph_outlining=False,
        num_required_ipus=num_ipus,
        enable_stochastic_rounding=opts['stochastic_rounding'],
        max_cross_replica_sum_buffer_size=opts[
            'max_cross_replica_sum_buffer_size'],
        max_reduce_scatter_buffer_size=opts['max_reduce_scatter_buffer_size'],
        scheduler_selection='CLUSTERING',
        compile_only=False,
        ipu_id=None,
        partials_type=opts["partials_type"],
        available_memory_proportion=opts['available_memory_proportion'])

    ipu_config.configure_ipu_system()

    train_sess = tf.Session(graph=train_graph)
    _ = train_sess.run(train_init, [])
    # -----------------
    # Checkpoints    restore and save
    init_checkpoint_path = opts['init_checkpoint']
    logger.info(f"At the checkpoint location {init_checkpoint_path}")
    if init_checkpoint_path:
        logger.info("Loading checkpoint...")
        if os.path.isfile(init_checkpoint_path):
            init_checkpoint_path = os.path.splitext(init_checkpoint_path)[0]
            logger.info(f"checkpoint path: {init_checkpoint_path}")

        (assignment_map, initialized_variable_names
         ) = bert_ipu.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint_path)

        for var in tvars:
            if var.name in initialized_variable_names:
                mark = "*"
            else:
                mark = " "
            logger.info("%-60s [%s]\t%s (%s)", var.name, mark, var.shape,
                        var.dtype.name)

        reader = tf.train.NewCheckpointReader(init_checkpoint_path)
        load_vars = reader.get_variable_to_shape_map()

        saver_restore = tf.train.Saver(assignment_map)
        saver_restore.restore(train_sess, init_checkpoint_path)
    # -----------------
    if compile_op is not None:
        logger.info(
            f"Compiling and saving Poplar executable to {poplar_exec_filepath}"
        )
        _ = train_sess.run(compile_op, [])
    else:
        exec_path = None
    return GraphOps(train_graph, train_sess, train_init, [train], placeholders,
                    train_iterator, outfeed, restore, tvars,
                    exec_path), ipu_config
Exemplo n.º 3
0
def build_graph(opts, iterations_per_step=1, is_training=True):

    train_graph = tf.Graph()
    with train_graph.as_default():
        if opts["groupbert"]:
            bert_config = bert_ipu.BertConfig.from_dict(
                opts, config=bert_ipu.GroupBertConfig(vocab_size=None))
        else:
            bert_config = bert_ipu.BertConfig.from_dict(
                opts, config=bert_ipu.BertConfig(vocab_size=None))
        bert_config.dtype = tf.float32 if opts[
            "precision"] == '32' else tf.float16
        placeholders = dict()

        if is_training:
            placeholders['learning_rate'] = tf.placeholder(bert_config.dtype,
                                                           shape=[])
            learning_rate = placeholders['learning_rate']
        else:
            learning_rate = None

        # Need to load the Glue File here
        label_list = opts["pass_in"][1]
        bert_config.num_lables = len(label_list)
        if opts['do_training'] and opts['current_mode'] == 'train':
            input_file = os.path.join(opts["output_dir"],
                                      f"train_{opts['task_type']}.tf_record")
        elif opts['do_eval'] and opts['current_mode'] == 'eval':
            input_file = os.path.join(opts["output_dir"],
                                      f"eval_{opts['task_type']}.tf_record")
        elif opts['do_predict'] and opts['current_mode'] == 'predict':
            input_file = os.path.join(
                opts["output_dir"], f"predict_{opts['task_type']}.tf_record")
        else:
            raise NotImplementedError()

        opts['input_file'] = input_file
        opts['drop_remainder'] = True

        train_iterator = ipu_infeed_queue.IPUInfeedQueue(
            data_loader.load(opts, is_training=is_training))
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue()

        def bert_net():
            return build_network(train_iterator, outfeed_queue,
                                 iterations_per_step, bert_config, opts,
                                 learning_rate, is_training)

        with ipu_scope('/device:IPU:0'):
            train = ipu.ipu_compiler.compile(bert_net, [])

        outfeed = outfeed_queue.dequeue()

        log.print_trainable_variables(opts)

        restore = tf.train.Saver(var_list=tf.global_variables())
        train_saver = tf.train.Saver(max_to_keep=5)

        ipu.utils.move_variable_initialization_to_cpu()
        train_init = tf.global_variables_initializer()
        tvars = tf.trainable_variables()
    """calculate the number of required IPU"""
    num_ipus = (max(opts['device_mapping']) + 1) * int(opts['replicas'])
    # The number of acquired IPUs must be the power of 2.
    if num_ipus & (num_ipus - 1) != 0:
        num_ipus = 2**int(math.ceil(math.log(num_ipus) / math.log(2)))
    ipu_config = get_config(
        fp_exceptions=opts["fp_exceptions"],
        enable_recomputation=opts["enable_recomputation"],
        disable_graph_outlining=False,
        num_required_ipus=num_ipus,
        enable_stochastic_rounding=opts['stochastic_rounding'],
        max_cross_replica_sum_buffer_size=opts[
            'max_cross_replica_sum_buffer_size'],
        max_reduce_scatter_buffer_size=opts['max_reduce_scatter_buffer_size'],
        scheduler_selection='CLUSTERING',
        compile_only=False,
        ipu_id=None,
        available_memory_proportion=opts["available_memory_proportion"])

    ipu_config.configure_ipu_system()

    train_sess = tf.Session(graph=train_graph)

    return GraphOps(train_graph, train_sess, train_init, [train], placeholders,
                    train_iterator, outfeed, train_saver, restore, tvars)
Exemplo n.º 4
0
def predict_loop(opts):
    dataset_list = None
    if not opts['generated_data']:
        eval_examples = squad_data.read_squad_examples(opts["predict_file"],
                                                       opts,
                                                       is_training=False)
        tfrecord_dir = opts['tfrecord_dir']
        if not os.path.exists(tfrecord_dir):
            os.makedirs(tfrecord_dir)

        eval_writer = squad_data.FeatureWriter(filename=os.path.join(
            tfrecord_dir, "eval.tf_record"),
                                               is_training=False)
        eval_features = []

        tokenizer = tokenization.FullTokenizer(
            vocab_file=opts['vocab_file'], do_lower_case=opts['do_lower_case'])

        def append_feature(feature):
            eval_features.append(feature)
            eval_writer.process_feature(feature)

        # Create eval.tfrecord
        num_features = squad_data.convert_examples_to_features(
            examples=eval_examples,
            tokenizer=tokenizer,
            max_seq_length=opts["seq_length"],
            doc_stride=opts["doc_stride"],
            max_query_length=opts["max_query_length"],
            is_training=False,
            output_fn=append_feature)

        eval_writer.close()

        squad_dataset = data_loader.load(opts, is_training=False)
        squad_dataset = squad_dataset.make_one_shot_iterator()
        _input_mask_array = []
        _segment_ids_array = []
        _input_ids_array = []
        _unique_ids_array = []

        # Call `get_next()` once outside the loop to create the TensorFlow operations once.
        with tf.Session() as sess:
            next_element = squad_dataset.get_next()
            is_data = True
            while is_data:
                try:
                    output = sess.run(next_element)
                    _input_mask_array.extend(output['input_mask'])
                    _segment_ids_array.extend(output['segment_ids'])
                    _input_ids_array.extend(output['input_ids'])
                    _unique_ids_array.extend(output['unique_ids'])
                except tf.errors.OutOfRangeError:
                    print("end of training dataset")
                    is_data = False

        dataset_list = [
            _input_ids_array, _input_mask_array, _segment_ids_array,
            _unique_ids_array
        ]

    iterations_per_step = 1
    predict, ipu_config = build_graph(opts,
                                      iterations_per_step,
                                      is_training=False)

    if predict.exec_path is not None:
        all_results = run_time(opts, dataset_list)
        if opts['do_predict'] is True:
            logger.info(f"Writing out the predictions:")
            output_dir = opts['output_dir']
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)

            output_prediction_file = os.path.join(output_dir,
                                                  "predictions.json")
            output_nbest_file = os.path.join(output_dir,
                                             "best_predictions.json")
            output_null_log_odds_file = os.path.join(output_dir,
                                                     "null_odds.json")
            eval_features = eval_features[:num_features]
            squad_results.write_predictions(
                eval_examples, eval_features, all_results, opts["n_best_size"],
                opts["max_answer_length"], opts["do_lower_case"],
                output_prediction_file, output_nbest_file,
                output_null_log_odds_file, opts["version_2_with_negative"],
                opts["null_score_diff_threshold"], opts["verbose_logging"])

            predict.session.close()

            if opts['do_evaluation']:
                evaluate_squad(output_prediction_file, opts)
Exemplo n.º 5
0
def build_graph(opts, iterations_per_step=1, is_training=True):

    train_graph = tf.Graph()
    with train_graph.as_default():
        if opts["groupbert"]:
            bert_config = bert_ipu.BertConfig.from_dict(
                opts, config=bert_ipu.GroupBertConfig(vocab_size=None))
        else:
            bert_config = bert_ipu.BertConfig.from_dict(
                opts, config=bert_ipu.BertConfig(vocab_size=None))
        bert_config.dtype = tf.float32 if opts[
            "precision"] == '32' else tf.float16
        placeholders = dict()

        if is_training:
            placeholders['learning_rate'] = tf.placeholder(bert_config.dtype,
                                                           shape=[])
            learning_rate = placeholders['learning_rate']
        else:
            learning_rate = None

        train_iterator = ipu_infeed_queue.IPUInfeedQueue(
            data_loader.load(opts, is_training=is_training))
        outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue()

        # building networks with pipeline
        if not should_be_pipeline_when_inference(opts):

            def bert_net():
                return build_infer_network_without_pipeline(
                    train_iterator,
                    outfeed_queue,
                    iterations_per_step,
                    bert_config=bert_config,
                    opts=opts)
        else:

            def bert_net():
                return build_network(train_iterator, outfeed_queue,
                                     iterations_per_step, bert_config, opts,
                                     learning_rate, is_training)

        with ipu_scope('/device:IPU:0'):
            train = ipu.ipu_compiler.compile(bert_net, [])

        outfeed = outfeed_queue.dequeue()

        restore = tf.train.Saver(var_list=tf.global_variables())
        train_saver = tf.train.Saver(max_to_keep=5)

        ipu.utils.move_variable_initialization_to_cpu()
        train_init = tf.global_variables_initializer()
        tvars = tf.trainable_variables()
    """calculate the number of required IPU"""
    num_ipus = (max(opts['device_mapping']) + 1) * int(opts['replicas'])
    # The number of acquired IPUs must be the power of 2.
    if num_ipus & (num_ipus - 1) != 0:
        num_ipus = 2**int(math.ceil(math.log(num_ipus) / math.log(2)))
    ipu_config = get_config(
        fp_exceptions=opts["fp_exceptions"],
        enable_recomputation=opts["enable_recomputation"],
        disable_graph_outlining=False,
        num_required_ipus=num_ipus,
        enable_stochastic_rounding=opts['stochastic_rounding'],
        max_cross_replica_sum_buffer_size=opts[
            'max_cross_replica_sum_buffer_size'],
        max_reduce_scatter_buffer_size=opts['max_reduce_scatter_buffer_size'],
        scheduler_selection='CLUSTERING',
        compile_only=False,
        ipu_id=None,
        partials_type=opts["partials_type"])

    ipu_config.configure_ipu_system()

    train_sess = tf.Session(graph=train_graph)

    return GraphOps(train_graph, train_sess, train_init, [train], placeholders,
                    train_iterator, outfeed, train_saver, restore, tvars)