示例#1
0
def get_config(opts):
    """Builds ipu_options"""
    profile = opts.report

    config = utils.create_ipu_config(profiling=profile,
                                     profile_execution=profile,
                                     report_every_nth_execution=1)
    if opts.device_id == -1:
        config = utils.auto_select_ipus(config, opts.shards * opts.replicas)
    else:
        config = utils.select_ipus(config, [opts.device_id])

    if opts.convolution_options:
        config = utils.set_convolution_options(
            config, json.loads(opts.convolution_options))

    if opts.matmul_options:
        config = utils.set_matmul_options(config,
                                          json.loads(opts.matmul_options))

    if opts.enable_half_partials:
        config = utils.set_matmul_options(config, {"partialsType": 'half'})
        config = utils.set_convolution_options(config,
                                               {"partialsType": 'half'})
    return config
示例#2
0
def get_ipu_config(fp_exceptions=True,
                   stochastic_rounding=True,
                   xla_recompute=False,
                   available_memory_proportion=None,
                   disable_graph_outlining=False,
                   num_ipus_required=0,
                   max_cross_replica_sum_buffer_size=0,
                   scheduler_selection='',
                   compile_only=False,
                   partials_type="half"):
    """Builds ipu_options"""
    config = utils.create_ipu_config(
        max_report_size=3001819596000,
        merge_infeed_io_copies=True,
        always_rearrange_copies_on_the_host=False,
        selection_order=utils.SelectionOrder.AUTO,
        disable_graph_outlining=disable_graph_outlining,
        max_cross_replica_sum_buffer_size=max_cross_replica_sum_buffer_size,
        scheduler_selection=scheduler_selection)

    config = utils.auto_select_ipus(config, num_ipus_required)

    config = utils.set_matmul_options(config, clear_pass_type=True)

    if available_memory_proportion is not None:
        config = utils.set_convolution_options(
            config, {
                "availableMemoryProportion": str(available_memory_proportion),
                "partialsType": partials_type
            })
        config = utils.set_matmul_options(
            config, {
                "availableMemoryProportion": str(available_memory_proportion),
                "partialsType": partials_type
            })

    config = utils.set_norm_options(config, use_stable_statistics=True)

    config = utils.set_recomputation_options(config,
                                             allow_recompute=xla_recompute)

    if compile_only:
        config = utils.set_ipu_connection_type(
            config,
            utils.DeviceConnectionType.NEVER,
            ipu_version=2,
            enable_remote_buffers=True)

    config = utils.set_floating_point_behaviour_options(
        config,
        inv=fp_exceptions,
        div0=fp_exceptions,
        oflo=fp_exceptions,
        esr=stochastic_rounding,
        nanoo=fp_exceptions)
    return config
示例#3
0
def get_config(prng=False,
               ipu_id=-1,
               shards=1,
               number_of_replicas=1,
               max_cross_replica_buffer_size=10*1024*1024,
               merge_infeed_io_copies=True,
               fp_exceptions=True,
               xla_recompute=False,
               seed=None,
               profile=None,
               availableMemoryProportion=None,
               stable_norm=False):
    """Builds ipu_options"""

    profile_exec_modes = {"NO_PROFILE": ExecutionProfileType.NO_PROFILE,
                          "TILE_PROFILE": ExecutionProfileType.TILE_PROFILE,
                          "DEVICE_PROFILE": ExecutionProfileType.DEVICE_PROFILE,
                          "IPU_PROFILE": ExecutionProfileType.IPU_PROFILE}

    config = utils.create_ipu_config(max_cross_replica_sum_buffer_size=max_cross_replica_buffer_size,
                                     merge_infeed_io_copies=merge_infeed_io_copies,
                                     always_rearrange_copies_on_the_host=False,
                                     profiling=profile is not None,
                                     profile_execution=profile_exec_modes[profile] if profile else None)

    if "GCL_REAL_COLLECTIVES" in os.environ:
        config = utils.set_gcl_options(config, num_io_tiles=128, gcl_options={"useGclCollectives": "true", })

    if ipu_id == -1:
        config = utils.auto_select_ipus(config, number_of_replicas*shards)
    else:
        config = utils.select_ipus(config, [ipu_id])
    config = utils.set_compilation_options(config, {
        "device.clearAtomicFlagAfterExchange": "false",
        "prng.enable": "true" if prng else "false",
        "target.deterministicWorkers": "false" if seed is None else "true",
    })

    if availableMemoryProportion is not None:
        config = utils.set_convolution_options(config, {
            "availableMemoryProportion": str(availableMemoryProportion)
        })

    if stable_norm:
        config = utils.set_norm_options(config, use_stable_statistics=True)

    if xla_recompute:
        utils.set_recomputation_options(config, allow_recompute=True)

    config = utils.set_floating_point_behaviour_options(config, inv=fp_exceptions, div0=fp_exceptions,
                                                        oflo=fp_exceptions, esr=prng, nanoo=True)

    return config
示例#4
0
def get_report(loop_op: tf.Operation,
               infeed_queue_initializer: tf.Operation,
               outfeed_op: tf.Operation,
               report_dest: str,
               available_memory_proportion: Optional[float] = 0.6) -> None:
    """Generate report from running model on IPU and save to disk.

    Args:
        loop_op: Inference op to generate report on.
        infeed_queue_initializer: Initializer for the infeed queue
        outfeed_op: Outfeed operator.
        report_dest: Location to store report.
        available_memory_proportion: Proportion of tile memory available as temporary memory
        for matmul and convolution execution

    """
    # Set compile and device options
    os.environ["TF_POPLAR_FLAGS"] += " --use_ipu_model"
    use_poplar_text_report = report_mode == 'text'
    opts = ipu_utils.create_ipu_config(
        profiling=True,
        use_poplar_text_report=use_poplar_text_report,
        profile_execution=True)
    opts = ipu_utils.set_matmul_options(opts,
                                        matmul_options={
                                            "availableMemoryProportion":
                                            str(available_memory_proportion)
                                        })
    opts = ipu_utils.set_convolution_options(
        opts,
        convolution_options={
            "availableMemoryProportion": str(available_memory_proportion)
        })
    ipu_utils.auto_select_ipus(opts, [1])
    ipu_utils.configure_ipu_system(opts)

    with tf.device('cpu'):
        report = gen_ipu_ops.ipu_event_trace()

    run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
    session = tf.Session()
    session.run(infeed_queue_initializer)
    session.run(loop_op, options=run_options)
    session.run(outfeed_op, options=run_options)
    out = session.run(report)
    if report_mode == 'text':
        # extract the report
        rep = ipu_utils.extract_all_strings_from_event_trace(out)
        logging.info("Writing profiling report to %s" % report_dest)
        with open(report_dest, "w") as f:
            f.write(rep)
    else:
        save_tf_report(out)
示例#5
0
def get_config(opts):
    """Builds ipu_options"""
    profile = opts.cycle_report

    config = utils.create_ipu_config(profiling=profile,
                                     profile_execution=profile,
                                     report_every_nth_execution=1)
    if opts.device_id == -1:
        config = utils.auto_select_ipus(config, [opts.shards or 1])
    else:
        config = utils.select_ipus(config, [opts.device_id])

    if opts.convolution_options:
        config = utils.set_convolution_options(
            config, json.loads(opts.convolution_options))
    return config
示例#6
0
def get_config(prng=False,
               ipu_id=-1,
               shards=1,
               number_of_replicas=1,
               max_cross_replica_buffer_size=10 * 1024 * 1024,
               merge_infeed_io_copies=True,
               fp_exceptions=True,
               xla_recompute=False,
               seed=None,
               profile=False,
               availableMemoryProportion=None):
    """Builds ipu_options"""
    config = utils.create_ipu_config(
        max_cross_replica_sum_buffer_size=max_cross_replica_buffer_size,
        merge_infeed_io_copies=merge_infeed_io_copies,
        always_rearrange_copies_on_the_host=False,
        profiling=profile,
        profile_execution=profile)
    if ipu_id == -1:
        config = utils.auto_select_ipus(config, number_of_replicas * shards)
    else:
        config = utils.select_ipus(config, [ipu_id])
    config = utils.set_compilation_options(
        config, {
            "device.clearAtomicFlagAfterExchange": "false",
            "prng.enable": "true" if prng else "false",
            "target.deterministicWorkers": "false" if seed is None else "true",
        })

    if availableMemoryProportion is not None:
        config = utils.set_convolution_options(
            config,
            {"availableMemoryProportion": str(availableMemoryProportion)})

    if xla_recompute:
        utils.set_recomputation_options(config, allow_recompute=True)

    config = utils.set_floating_point_behaviour_options(config,
                                                        inv=fp_exceptions,
                                                        div0=fp_exceptions,
                                                        oflo=fp_exceptions,
                                                        esr=prng,
                                                        nanoo=True)

    return config
示例#7
0
def get_config(prng=False,
               ipu_id=-1,
               shards=1,
               number_of_replicas=1,
               max_cross_replica_buffer_size=10 * 1024 * 1024,
               merge_infeed_io_copies=True,
               fp_exceptions=True,
               half_partials=False,
               conv_dithering=False,
               xla_recompute=False,
               seed=None,
               profile=None,
               availableMemoryProportion=None,
               stable_norm=False,
               internalExchangeOptimisationTarget=None,
               limitVertexState=None):
    """Builds ipu_options"""

    profile_exec_modes = {
        "NO_PROFILE": ExecutionProfileType.NO_PROFILE,
        "TILE_PROFILE": ExecutionProfileType.TILE_PROFILE,
        "DEVICE_PROFILE": ExecutionProfileType.DEVICE_PROFILE,
        "IPU_PROFILE": ExecutionProfileType.IPU_PROFILE
    }

    config = utils.create_ipu_config(
        merge_infeed_io_copies=merge_infeed_io_copies,
        always_rearrange_copies_on_the_host=False,
        profiling=profile is not None,
        profile_execution=profile_exec_modes[profile] if profile else None)

    config = utils.set_optimization_options(
        config,
        max_cross_replica_sum_buffer_size=max_cross_replica_buffer_size)

    if ipu_id == -1:
        config = utils.auto_select_ipus(config, number_of_replicas * shards)
    else:
        config = utils.select_ipus(config, [ipu_id])
    config = utils.set_compilation_options(
        config, {
            "device.clearAtomicFlagAfterExchange": "false",
            "prng.enable": "true" if prng else "false",
            "target.deterministicWorkers":
            "false" if seed is None else "portable",
        })

    if internalExchangeOptimisationTarget is not None:
        utils.set_compilation_options(
            config, {
                "opt.internalExchangeOptimisationTarget":
                internalExchangeOptimisationTarget
            })

    if limitVertexState is not None:
        config = utils.set_compilation_options(
            config, {
                "opt.limitVertexStateToLower256K":
                "true" if limitVertexState else "false"
            })

    if availableMemoryProportion is not None:
        config = utils.set_convolution_options(
            config,
            {"availableMemoryProportion": str(availableMemoryProportion)})

    if half_partials:
        config = utils.set_convolution_options(config,
                                               {"partialsType": 'half'})
        config = utils.set_matmul_options(config, {"partialsType": 'half'})

    if conv_dithering:
        config = utils.set_convolution_options(config,
                                               {"enableConvDithering": "true"})

    if stable_norm:
        config = utils.set_norm_options(config, use_stable_statistics=True)

    if xla_recompute:
        utils.set_recomputation_options(config, allow_recompute=True)

    config = utils.set_floating_point_behaviour_options(config,
                                                        inv=fp_exceptions,
                                                        div0=fp_exceptions,
                                                        oflo=fp_exceptions,
                                                        esr=prng,
                                                        nanoo=True)

    return config
示例#8
0
for var in tf.trainable_variables():
    placeholder = tf.placeholder(var.dtype, var.shape,
                                 var.name.split(':')[0] + '_setter')
    param_setters[var.name] = (tf.assign(var, placeholder), placeholder)

# Capture IPU event trace for reporting
if REPORT:
    with tf.device('cpu'):
        report = gen_ipu_ops.ipu_event_trace()

# Setup IPU configuration and build session
cfg = ipu.utils.create_ipu_config(profiling=REPORT,
                                  use_poplar_text_report=False,
                                  profile_execution=REPORT)
cfg = ipu.utils.auto_select_ipus(cfg, num_ipus=NUM_IPUS)
cfg = utils.set_convolution_options(
    cfg, convolution_options={"availableMemoryProportion": "0.4"})
ipu.utils.configure_ipu_system(cfg)
ipu.utils.move_variable_initialization_to_cpu()
outfeed = outfeed_queue.dequeue()

# Calculate total flops for graph (experimental)
run_meta = tf.RunMetadata()
opts = tf.profiler.ProfileOptionBuilder.float_operation()
flops = tf.profiler.profile(tf.get_default_graph(),
                            run_meta=run_meta,
                            cmd='op',
                            options=opts)
print("Total FLOPs reported by TF is: ", flops.total_float_ops)

# Initiate and run the session
with tf.Session() as sess:
示例#9
0
def run_inference(loop_op: tf.Operation,
                  infeed_queue_initializer: tf.Operation,
                  outfeed_op: tf.Operation,
                  batch_size: int,
                  batches_per_step: int,
                  network_name: str,
                  decode_predictions: Callable,
                  ground_truth: Tuple[str],
                  num_iterations: Optional[int] = 500,
                  num_ipus: Optional[int] = 1,
                  mode: Optional[str] = "single_ipu",
                  data: Optional[str] = "real",
                  available_memory_proportion: Optional[float] = 0.6) -> None:
    """Run inference on device and decode predictions.

    Args:
        loop_op: Inference op.
        infeed_queue_initializer: Initializer for the infeed queue.
        outfeed_op: Outfeed operator to extract results.
        batch_size: Batch size per forward pass.
        batches_per_step: Number of forward passes per step.
        network_name: Name of this network, to use in frames_per_second plot filename.
        decode_predictions: Function to decode predictions with.
        ground_truth: Ground-truth labels.
        num_iterations: Number of iterations to run the inference, if running in a loop.
        num_ipus: Number of ipus to run the inference on.
        mode: Mode of inference - {"single_ipu", "replicated", "sharded"}
        data: Run on real data transferred from host or on random synthetic data generated on device.
        available_memory_proportion: Proportion of tile memory available as temporary memory for
        matmul and convolution execution

    """
    # Set compile and device options
    opts = ipu_utils.create_ipu_config(profiling=False,
                                       profile_execution=False,
                                       use_poplar_text_report=False)
    opts = ipu_utils.set_matmul_options(opts,
                                        matmul_options={
                                            "availableMemoryProportion":
                                            str(available_memory_proportion)
                                        })
    opts = ipu_utils.set_convolution_options(
        opts,
        convolution_options={
            "availableMemoryProportion": str(available_memory_proportion)
        })

    if mode == 'replicated':
        num_replicas = num_ipus
        os.environ["TF_POPLAR_FLAGS"] += " --force_replicated_mode"
    else:
        num_replicas = 1
    cfg = ipu_utils.auto_select_ipus(opts, num_ipus)
    ipu_utils.configure_ipu_system(cfg)
    with tf.Session() as session:
        session.run(infeed_queue_initializer)
        fps = []
        for iter_count in range(num_iterations):
            start = time.time()
            session.run(loop_op)
            predictions = session.run(outfeed_op)
            stop = time.time()
            fps.append(batch_size * batches_per_step * num_replicas /
                       (stop - start))
            logging.info(
                "Iter {4}: {0} Throughput using {1} data = {2:.1f} imgs/sec at batch size = {3}"
                .format(network_name, data, fps[-1], batch_size, iter_count))
            duration = stop - start
            report_string = "{:<7.3} sec/itr.".format(duration)
            report_string += "   {:5f} images/sec.".format(fps[-1])
            print(report_string)
            print("Total time: {}".format(duration))

            # Decode a random prediction per step to check functional correctness.
            if data == 'real':
                predictions = np.reshape(predictions,
                                         (-1, predictions.shape[-1]))
                index = np.random.randint(0, len(predictions))
                if network_name in ("inceptionv1", "efficientnet-s",
                                    "efficientnet-m", "efficientnet-l"):
                    # These models encode background in 0th index.
                    decoded_predictions = decode_predictions(
                        predictions[index:index + 1, 1:], top=3)
                else:
                    decoded_predictions = decode_predictions(
                        predictions[index:index + 1, :], top=3)
                labels_and_probs = [
                    (label, prob) for _, label, prob in decoded_predictions[0]
                ]
                print(
                    'Actual: ',
                    ground_truth[(index + num_replicas * iter_count *
                                  batches_per_step * batch_size) %
                                 len(ground_truth)])
                print('Predicted: ', labels_and_probs)

    print("Average statistics excluding the 1st 20 iterations.")
    print(
        "-------------------------------------------------------------------------------------------"
    )
    fps = fps[20:]
    print("Throughput at bs={}, data_mode={}, data_type={}, mode={},"
          " num_ipus={}, of {}: min={}, max={}, mean={}, std={}.".format(
              batch_size, data, predictions.dtype, mode, num_ipus,
              network_name, min(fps), max(fps), np.mean(fps), np.std(fps)))
示例#10
0
def get_config(prng=False,
               ipu_id=-1,
               shards=1,
               number_of_replicas=1,
               max_cross_replica_buffer_size=10*1024*1024,
               merge_infeed_io_copies=True,
               fp_exceptions=True,
               half_partials=False,
               conv_dithering=False,
               xla_recompute=False,
               seed=None,
               profile=None,
               availableMemoryProportion=None,
               stable_norm=False,
               internalExchangeOptimisationTarget=None):
    """Builds ipu_options"""

    profile_exec_modes = {"NO_PROFILE": ExecutionProfileType.NO_PROFILE,
                          "TILE_PROFILE": ExecutionProfileType.TILE_PROFILE,
                          "DEVICE_PROFILE": ExecutionProfileType.DEVICE_PROFILE,
                          "IPU_PROFILE": ExecutionProfileType.IPU_PROFILE}

    config = utils.create_ipu_config(merge_infeed_io_copies=merge_infeed_io_copies,
                                     always_rearrange_copies_on_the_host=False,
                                     profiling=profile is not None,
                                     profile_execution=profile_exec_modes[profile] if profile else None)

    config = utils.set_optimization_options(config,
                                            max_cross_replica_sum_buffer_size=max_cross_replica_buffer_size)

    if "GCL_REAL_COLLECTIVES" in os.environ:
        # The GCL_NUM_IO_TILES environment variable sets how many tiles in the IPU are reserved for Graphcore Communication Library (GCL) collectives.
        iotiles = int(os.environ['GCL_NUM_IO_TILES'])
        if iotiles % 2 or iotiles < 32 or iotiles > 192:
            raise ValueError(
                'GCL IO Tiles must be a multiple of 2 in between 32 and 192.'.format(iotiles))

        config = utils.set_gcl_options(config, num_io_tiles=iotiles, gcl_options={
                                       "useGclCollectives": "true", })

    if ipu_id == -1:
        config = utils.auto_select_ipus(config, number_of_replicas*shards)
    else:
        config = utils.select_ipus(config, [ipu_id])
    config = utils.set_compilation_options(config, {
        "device.clearAtomicFlagAfterExchange": "false",
        "prng.enable": "true" if prng else "false",
        "target.deterministicWorkers": "false" if seed is None else "portable",
    })

    if internalExchangeOptimisationTarget is not None:
        utils.set_compilation_options(config, {
            "opt.internalExchangeOptimisationTarget": internalExchangeOptimisationTarget
        })

    if availableMemoryProportion is not None:
        config = utils.set_convolution_options(config, {
            "availableMemoryProportion": str(availableMemoryProportion)
        })

    if half_partials:
        config = utils.set_convolution_options(config, {
            "partialsType": 'half'
        })
        config = utils.set_matmul_options(config, {
            "partialsType": 'half'
        })

    if conv_dithering:
        config = utils.set_convolution_options(config, {
            "enableConvDithering": "true"
        })

    if stable_norm:
        config = utils.set_norm_options(config, use_stable_statistics=True)

    if xla_recompute:
        utils.set_recomputation_options(config, allow_recompute=True)

    config = utils.set_floating_point_behaviour_options(config, inv=fp_exceptions, div0=fp_exceptions,
                                                        oflo=fp_exceptions, esr=prng, nanoo=True)

    return config