Exemplo n.º 1
0
def main():
    log.basicConfig(format='[ %(levelname)s ] %(message)s',
                    level=log.INFO,
                    stream=sys.stdout)

    # ---------------------------Initialize inference engine core----------------------------------------------------------
    log.info('Creating Inference Engine')
    ie = IECore()

    # ---------------------------Get metrics of available devices----------------------------------------------------------
    log.info('Available devices:')
    for device in ie.available_devices:
        log.info(f'{device} :')
        log.info('\tSUPPORTED_METRICS:')
        for metric in ie.get_metric(device, 'SUPPORTED_METRICS'):
            if metric not in ('SUPPORTED_METRICS', 'SUPPORTED_CONFIG_KEYS'):
                try:
                    metric_val = ie.get_metric(device, metric)
                except TypeError:
                    metric_val = 'UNSUPPORTED TYPE'
                log.info(f'\t\t{metric}: {param_to_string(metric_val)}')
        log.info('')

        log.info('\tSUPPORTED_CONFIG_KEYS (default values):')
        for config_key in ie.get_metric(device, 'SUPPORTED_CONFIG_KEYS'):
            try:
                config_val = ie.get_config(device, config_key)
            except TypeError:
                config_val = 'UNSUPPORTED TYPE'
            log.info(f'\t\t{config_key}: {param_to_string(config_val)}')
        log.info('')

    # ----------------------------------------------------------------------------------------------------------------------
    return 0
Exemplo n.º 2
0
def main():
    ie = IECore()
    print("Available devices:")
    for device in ie.available_devices:
        print("\tDevice: {}".format(device))
        print("\tMetrics:")
        for metric in ie.get_metric(device, "SUPPORTED_METRICS"):
            metric_val = ie.get_metric(device, metric)
            print("\t\t{}: {}".format(metric, param_to_string(metric_val)))

        print("\n\tDefault values for device configuration keys:")
        for cfg in ie.get_metric(device, "SUPPORTED_CONFIG_KEYS"):
            cfg_val = ie.get_config(device, cfg)
            print("\t\t{}: {}".format(cfg, param_to_string(cfg_val)))
Exemplo n.º 3
0
def main():
    ie = IECore()
    print("Available devices:")
    for device in ie.available_devices:
        print(f"\tDevice: {device}")
        print("\tMetrics:")
        for metric in ie.get_metric(device, "SUPPORTED_METRICS"):
            try:
                metric_val = ie.get_metric(device, metric)
                print(f"\t\t{metric}: {param_to_string(metric_val)}")
            except TypeError:
                print(f"\t\t{metric}: UNSUPPORTED TYPE")

        print("\n\tDefault values for device configuration keys:")
        for cfg in ie.get_metric(device, "SUPPORTED_CONFIG_KEYS"):
            try:
                cfg_val = ie.get_config(device, cfg)
                print(f"\t\t{cfg}: {param_to_string(cfg_val)}")
            except TypeError:
                print(f"\t\t{cfg}: UNSUPPORTED TYPE")
Exemplo n.º 4
0
def main(args=None):
    try:
        # ------------------------------ 1. Parsing and validating input arguments -------------------------------------
        next_step()

        if not args:
            args = parse_args()

        # ------------------------------ 2. Loading Inference Engine ---------------------------------------------------
        next_step()

        device_name = args.target_device.upper()

        ie = IECore()

        if CPU_DEVICE_NAME in device_name:
            if args.path_to_extension:
                ie.add_cpu_extension(extension_path=args.path_to_extension,
                                     device_name=CPU_DEVICE_NAME)
        if GPU_DEVICE_NAME in device_name:
            if args.path_to_cldnn_config:
                ie.set_config({'CONFIG_FILE': args.path_to_cldnn_config},
                              GPU_DEVICE_NAME)
                logger.info("GPU extensions is loaded {}".format(
                    args.path_to_cldnn_config))

        logger.info("InferenceEngine:\n{: <9}{}".format("", get_version()))
        version_string = "Device is {}\n".format(device_name)
        for device, version in ie.get_versions(device_name).items():
            version_string += "{: <9}{}\n".format("", device)
            version_string += "{: <9}{:.<24}{} {}.{}\n".format(
                "", version.description, " version", version.major,
                version.minor)
            version_string += "{: <9}{:.<24} {}\n".format(
                "", "Build", version.build_number)
        logger.info(version_string)

        # --------------------- 3. Read the Intermediate Representation of the network ---------------------------------
        next_step()

        xml_filename = os.path.abspath(args.path_to_model)
        head, tail = os.path.splitext(xml_filename)
        bin_filename = os.path.abspath(head + BIN_EXTENSION)

        ie_network = IENetwork(xml_filename, bin_filename)

        input_info = ie_network.inputs

        if len(input_info) == 0:
            raise AttributeError('No inputs info is provided')

        # --------------------- 4. Resizing network to match image sizes and given batch -------------------------------
        next_step()

        batch_size = ie_network.batch_size
        precision = ie_network.precision

        if args.batch_size and args.batch_size != ie_network.batch_size:
            new_shapes = {}
            for key in input_info.keys():
                shape = input_info[key].shape
                layout = input_info[key].layout

                batchIndex = -1
                if ((layout == 'NCHW') or (layout == 'NCDHW')
                        or (layout == 'NHWC') or (layout == 'NDHWC')
                        or (layout == 'NC')):
                    batchIndex = 0
                elif (layout == 'CN'):
                    batchIndex = 1

                if ((batchIndex != -1)
                        and (shape[batchIndex] != args.batch_size)):
                    shape[batchIndex] = args.batch_size
                    new_shapes[key] = shape

            if (len(new_shapes) > 0):
                logger.info("Resizing network to batch = {}".format(
                    args.batch_size))
                ie_network.reshape(new_shapes)

            batch_size = args.batch_size

        logger.info("Network batch size: {}, precision {}".format(
            batch_size, precision))

        # --------------------- 5. Configuring input of the model ------------------------------------------------------
        next_step()

        for key in input_info.keys():
            if (isImage(input_info[key])):
                # Set the precision of input data provided by the user
                # Should be called before load of the network to the plugin
                input_info[key].precision = 'U8'

        # --------------------- 6. Setting device configuration --------------------------------------------------------
        next_step()

        devices = parseDevices(device_name)
        device_nstreams = parseValuePerDevice(devices, args.number_streams)
        for device in devices:
            if device == CPU_DEVICE_NAME:  ## CPU supports few special performance-oriented keys
                ## limit threading for CPU portion of inference
                if args.number_threads:
                    ie.set_config(
                        {'CPU_THREADS_NUM': str(args.number_threads)}, device)

                if MULTI_DEVICE_NAME in device_name and GPU_DEVICE_NAME in device_name:
                    ie.set_config({'CPU_BIND_THREAD': 'NO'}, CPU_DEVICE_NAME)
                else:
                    # pin threads for CPU portion of inference
                    ie.set_config(
                        {'CPU_BIND_THREAD': args.infer_threads_pinning},
                        device)

                ## for CPU execution, more throughput-oriented execution via streams
                # for pure CPU execution, more throughput-oriented execution via streams
                if args.api_type == 'async':
                    ie.set_config(
                        {
                            'CPU_THROUGHPUT_STREAMS':
                            str(device_nstreams.get(device))
                            if device in device_nstreams.keys() else
                            'CPU_THROUGHPUT_AUTO'
                        }, device)
                device_nstreams[device] = int(
                    ie.get_config(device, 'CPU_THROUGHPUT_STREAMS'))

            elif device == GPU_DEVICE_NAME:
                if args.api_type == 'async':
                    ie.set_config(
                        {
                            'GPU_THROUGHPUT_STREAMS':
                            str(device_nstreams.get(device))
                            if device in device_nstreams.keys() else
                            'GPU_THROUGHPUT_AUTO'
                        }, device)
                device_nstreams[device] = int(
                    ie.get_config(device, 'GPU_THROUGHPUT_STREAMS'))

                if MULTI_DEVICE_NAME in device_name and CPU_DEVICE_NAME in device_name:
                    ## multi-device execution with the CPU+GPU performs best with GPU trottling hint,
                    ## which releases another CPU thread (that is otherwise used by the GPU driver for active polling)
                    ie.set_config({'CLDNN_PLUGIN_THROTTLE': str(1)}, device)

            elif device == MYRIAD_DEVICE_NAME:
                ie.set_config(
                    {
                        'LOG_LEVEL': 'LOG_INFO',
                        'VPU_LOG_LEVEL': 'LOG_WARNING'
                    }, MYRIAD_DEVICE_NAME)

        # --------------------- 7. Loading the model to the device -----------------------------------------------------
        next_step()

        config = {'PERF_COUNT': ('YES' if args.perf_counts else 'NO')}

        exe_network = ie.load_network(ie_network,
                                      device_name,
                                      config=config,
                                      num_requests=args.number_infer_requests
                                      if args.number_infer_requests else 0)

        # --------------------- 8. Setting optimal runtime parameters --------------------------------------------------
        next_step()

        ## Number of requests
        infer_requests = exe_network.requests
        nireq = len(infer_requests)

        ## Iteration limit
        niter = args.number_iterations
        if niter and args.api_type == 'async':
            niter = (int)((niter + nireq - 1) / nireq) * nireq
            if (args.number_iterations != niter):
                logger.warn(
                    "Number of iterations was aligned by request number "
                    "from {} to {} using number of requests {}".format(
                        args.number_iterations, niter, nireq))

        ## Time limit
        duration_seconds = 0
        if args.time:
            ## time limit
            duration_seconds = args.time
        elif not args.number_iterations:
            ## default time limit
            duration_seconds = get_duration_in_secs(device)

        # ------------------------------------ 8. Creating infer requests and filling input blobs ----------------------
        next_step()

        request_queue = InferRequestsQueue(infer_requests)

        path_to_input = os.path.abspath(
            args.path_to_input) if args.path_to_input else None
        requests_input_data = getInputs(path_to_input, batch_size,
                                        ie_network.inputs, infer_requests)

        # ------------------------------------ 9. Measuring performance ------------------------------------------------

        progress_count = 0
        progress_bar_total_count = 10000

        output_string = "Start inference {}ronously".format(args.api_type)
        if (args.api_type == "async"):
            if output_string != "":
                output_string += ", "

            output_string += str(nireq) + " inference requests"
            device_ss = ''
            for device, nstreams in device_nstreams.items():
                if device_ss != '':
                    device_ss += ', '
                device_ss += "{} streams for {}".format(str(nstreams), device)
            if device_ss != '':
                output_string += " using " + device_ss

        output_string += ", limits: "
        if niter:
            if not duration_seconds:
                progress_bar_total_count = niter
            output_string += str(niter) + " iterations"

        if duration_seconds:
            if niter:
                output_string += ", "
            output_string += str(
                getDurationInMilliseconds(duration_seconds)) + " ms duration"

        next_step(output_string)

        ## warming up - out of scope
        infer_request = request_queue.getIdleRequest()
        if not infer_request:
            raise Exception("No idle Infer Requests!")

        if (args.api_type == 'sync'):
            infer_request.infer(requests_input_data[infer_request.id])
        else:
            infer_request.startAsync(requests_input_data[infer_request.id])

        request_queue.waitAll()
        request_queue.resetTimes()

        start_time = datetime.now()
        exec_time = (datetime.now() - start_time).total_seconds()
        iteration = 0

        progress_bar = ProgressBar(progress_bar_total_count,
                                   args.stream_output, args.progress)

        ## Start inference & calculate performance
        ## to align number if iterations to guarantee that last infer requests are executed in the same conditions **/
        while ((niter and iteration < niter)
               or (duration_seconds and exec_time < duration_seconds)
               or (args.api_type == "async" and iteration % nireq != 0)):
            infer_request = request_queue.getIdleRequest()
            if not infer_request:
                raise Exception("No idle Infer Requests!")

            if (args.api_type == 'sync'):
                infer_request.infer(requests_input_data[infer_request.id])
            else:
                infer_request.startAsync(requests_input_data[infer_request.id])
            iteration += 1

            exec_time = (datetime.now() - start_time).total_seconds()

            if niter:
                progress_bar.add_progress(1)
            else:
                ## calculate how many progress intervals are covered by current iteration.
                ## depends on the current iteration time and time of each progress interval.
                ## Previously covered progress intervals must be skipped.
                progress_interval_time = duration_seconds / progress_bar_total_count
                new_progress = (int)(exec_time / progress_interval_time -
                                     progress_count)
                progress_bar.add_progress(new_progress)
                progress_count += new_progress

        ## wait the latest inference executions
        request_queue.waitAll()

        total_duration_sec = request_queue.getDurationInSeconds()
        times = request_queue.times
        times.sort()
        latency_ms = median(times)
        fps = batch_size * 1000 / latency_ms if args.api_type == 'sync' else batch_size * iteration / total_duration_sec

        progress_bar.finish()

        # ------------------------------------ 10. Dumping statistics report -------------------------------------------
        next_step()

        if args.exec_graph_path:
            try:
                exec_graph_info = exe_network.get_exec_graph_info()
                exec_graph_info.serialize(args.exec_graph_path)
                logger.info("Executable graph is stored to {}".format(
                    args.exec_graph_path))
                del exec_graph_info
            except Exception as e:
                logging.exception(e)

        if args.perf_counts:
            for ni in range(int(nireq)):
                perf_counts = exe_network.requests[ni].get_perf_counts()
                logger.info(
                    "Pefrormance counts for {}-th infer request".format(ni))
                for layer, stats in perf_counts.items():
                    max_layer_name = 30
                    print("{:<30}{:<15}{:<30}{:<20}{:<20}{:<20}".format(
                        layer[:max_layer_name - 4] + '...' if
                        (len(layer) >= max_layer_name) else layer,
                        stats['status'],
                        'layerType: ' + str(stats['layer_type']),
                        'realTime: ' + str(stats['real_time']),
                        'cpu: ' + str(stats['cpu_time']),
                        'execType: ' + str(stats['exec_type'])))

        print("Count:      {} iterations".format(iteration))
        print("Duration:   {:.2f} ms".format(
            getDurationInMilliseconds(total_duration_sec)))
        if not MULTI_DEVICE_NAME in device_name:
            print("Latency:    {:.4f} ms".format(latency_ms))
        print("Throughput: {:.2f} FPS".format(fps))

        del exe_network
        del ie
        next_step.step_id = 0
    except Exception as e:
        logging.exception(e)
Exemplo n.º 5
0
class Benchmark:
    def __init__(self, device: str, number_infer_requests, number_iterations,
                 duration_seconds, api_type):
        self.device = device.upper()
        self.ie = IECore()
        self.nireq = number_infer_requests
        self.niter = number_iterations
        self.duration_seconds = get_duration_seconds(duration_seconds,
                                                     self.niter, self.device)
        self.api_type = api_type
        self.device_number_streams = {}

    def __del__(self):
        del self.ie

    def add_extension(self,
                      path_to_extension: str = None,
                      path_to_cldnn_config: str = None):
        if GPU_DEVICE_NAME in self.device:
            if path_to_cldnn_config:
                self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config},
                                   GPU_DEVICE_NAME)
                logger.info(
                    'GPU extensions is loaded {}'.format(path_to_cldnn_config))
        if CPU_DEVICE_NAME in self.device or MYRIAD_DEVICE_NAME in self.device:
            if path_to_extension:
                self.ie.add_extension(extension_path=path_to_extension,
                                      device_name=CPU_DEVICE_NAME)
                logger.info(
                    'CPU extensions is loaded {}'.format(path_to_extension))

    def get_version_info(self) -> str:
        logger.info('InferenceEngine:\n{: <9}{:.<24} {}'.format(
            '', 'API version', get_version()))
        version_string = 'Device info\n'
        for device, version in self.ie.get_versions(self.device).items():
            version_string += '{: <9}{}\n'.format('', device)
            version_string += '{: <9}{:.<24}{} {}.{}\n'.format(
                '', version.description, ' version', version.major,
                version.minor)
            version_string += '{: <9}{:.<24} {}\n'.format(
                '', 'Build', version.build_number)
        return version_string

    @staticmethod
    def reshape(ie_network: IENetwork, batch_size: int):
        new_shapes = {}
        for input_layer_name, input_layer in ie_network.inputs.items():
            shape = input_layer.shape
            layout = input_layer.layout

            try:
                batch_index = layout.index('N')
            except ValueError:
                batch_index = 1 if layout == 'C' else -1

            if batch_index != -1 and shape[batch_index] != batch_size:
                shape[batch_index] = batch_size
                new_shapes[input_layer_name] = shape

        if new_shapes:
            logger.info('Resizing network to batch = {}'.format(batch_size))
            ie_network.reshape(new_shapes)

    def set_config(self,
                   number_streams: int,
                   api_type: str = 'async',
                   number_threads: int = None,
                   infer_threads_pinning: int = None):
        devices = parse_devices(self.device)
        self.device_number_streams = parse_value_per_device(
            devices, number_streams)
        for device in devices:
            if device == CPU_DEVICE_NAME:  # CPU supports few special performance-oriented keys
                # limit threading for CPU portion of inference
                if number_threads:
                    self.ie.set_config(
                        {'CPU_THREADS_NUM': str(number_threads)}, device)

                if MULTI_DEVICE_NAME in self.device and GPU_DEVICE_NAME in self.device:
                    self.ie.set_config({'CPU_BIND_THREAD': 'NO'},
                                       CPU_DEVICE_NAME)
                else:
                    # pin threads for CPU portion of inference
                    self.ie.set_config(
                        {'CPU_BIND_THREAD': infer_threads_pinning}, device)

                # for CPU execution, more throughput-oriented execution via streams
                # for pure CPU execution, more throughput-oriented execution via streams
                if api_type == 'async':
                    cpu_throughput = {
                        'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'
                    }
                    if device in self.device_number_streams.keys():
                        cpu_throughput['CPU_THROUGHPUT_STREAMS'] = str(
                            self.device_number_streams.get(device))
                    self.ie.set_config(cpu_throughput, device)
                    self.device_number_streams[device] = self.ie.get_config(
                        device, 'CPU_THROUGHPUT_STREAMS')

            elif device == GPU_DEVICE_NAME:
                if api_type == 'async':
                    gpu_throughput = {
                        'GPU_THROUGHPUT_STREAMS': 'GPU_THROUGHPUT_AUTO'
                    }
                    if device in self.device_number_streams.keys():
                        gpu_throughput['GPU_THROUGHPUT_STREAMS'] = str(
                            self.device_number_streams.get(device))
                    self.ie.set_config(gpu_throughput, device)
                    self.device_number_streams[device] = self.ie.get_config(
                        device, 'GPU_THROUGHPUT_STREAMS')

                if MULTI_DEVICE_NAME in self.device and CPU_DEVICE_NAME in self.device:
                    # multi-device execution with the CPU+GPU performs best with GPU trottling hint,
                    # which releases another CPU thread (that is otherwise used by the GPU driver for active polling)
                    self.ie.set_config({'CLDNN_PLUGIN_THROTTLE': '1'}, device)

            elif device == MYRIAD_DEVICE_NAME:
                self.ie.set_config({'LOG_LEVEL': 'LOG_INFO'},
                                   MYRIAD_DEVICE_NAME)

    def load_network(self,
                     ie_network: IENetwork,
                     perf_counts: bool,
                     number_infer_requests: int = None):
        config = {'PERF_COUNT': ('YES' if perf_counts else 'NO')}

        exe_network = self.ie.load_network(ie_network,
                                           self.device,
                                           config=config,
                                           num_requests=number_infer_requests
                                           or 0)

        return exe_network

    def infer(self, request_queue, requests_input_data, batch_size,
              progress_bar):
        progress_count = 0
        # warming up - out of scope
        infer_request = request_queue.get_idle_request()
        if not infer_request:
            raise Exception('No idle Infer Requests!')

        if self.api_type == 'sync':
            infer_request.infer(requests_input_data[infer_request.req_id])
        else:
            infer_request.start_async(
                requests_input_data[infer_request.req_id])

        request_queue.wait_all()
        request_queue.reset_times()

        start_time = datetime.now()
        exec_time = (datetime.now() - start_time).total_seconds()
        iteration = 0

        # Start inference & calculate performance
        # to align number if iterations to guarantee that last infer requests are executed in the same conditions **/
        while (self.niter and iteration < self.niter) or \
              (self.duration_seconds and exec_time < self.duration_seconds) or \
              (self.api_type == 'async' and iteration % self.nireq):
            infer_request = request_queue.get_idle_request()
            if not infer_request:
                raise Exception('No idle Infer Requests!')

            if self.api_type == 'sync':
                infer_request.infer(requests_input_data[infer_request.req_id])
            else:
                infer_request.start_async(
                    requests_input_data[infer_request.req_id])
            iteration += 1

            exec_time = (datetime.now() - start_time).total_seconds()

            if self.duration_seconds:
                # calculate how many progress intervals are covered by current iteration.
                # depends on the current iteration time and time of each progress interval.
                # Previously covered progress intervals must be skipped.
                progress_interval_time = self.duration_seconds / progress_bar.total_num
                new_progress = int(exec_time / progress_interval_time -
                                   progress_count)
                progress_bar.add_progress(new_progress)
                progress_count += new_progress
            elif self.niter:
                progress_bar.add_progress(1)

        # wait the latest inference executions
        request_queue.wait_all()

        total_duration_sec = request_queue.get_duration_in_seconds()
        times = request_queue.times
        times.sort()
        latency_ms = median(times)
        fps = batch_size * 1000 / latency_ms
        if self.api_type == 'async':
            fps = batch_size * iteration / total_duration_sec
        progress_bar.finish()
        return fps, latency_ms, total_duration_sec, iteration
Exemplo n.º 6
0
def on_select(item):
    ax1 = fig.add_subplot(gs[2, :])
    ax2 = fig.add_subplot(gs[1, 3])
    image = plt.imread("openvino-logo.png")
    ax2.axis('off')
    ax2.imshow(image)
    if 'clear' in (item.labelstr):
        ax1.cla()

    else:
        log.basicConfig(format="[ %(levelname)s ] %(message)s",
                        level=log.INFO,
                        stream=sys.stdout)
        args = build_argparser().parse_args()
        #read input data
        if 'Async' in (item.labelstr):
            ecg_data = load.load_ecg("A00001.mat")
        else:
            ecg_data = load.load_ecg(item.labelstr)
        preproc = util.load(".")
        input_ecg = preproc.process_x([ecg_data])
        ecg_n, ecg_h, ecg_w = input_ecg.shape
        log.info("Input ecg file shape: {}".format(input_ecg.shape))

        input_ecg_plot = np.squeeze(input_ecg)

        # raw signal plot
        Fs = 1000
        N = len(input_ecg_plot)
        T = (N - 1) / Fs
        ts = np.linspace(0, T, N, endpoint=False)
        ax1.plot(ts, input_ecg_plot, label=item.labelstr, lw=2)
        ax1.set_ylabel('Amplitude')
        ax1.set_title(
            "ECG Raw signal: length - {}, Freq - 1000 Hz".format(ecg_h))
        ax1.legend(loc='upper right')

        #choose proper IRs
        if (input_ecg.shape[1] == 8960):
            model_xml = "tf_model_8960_fp16.xml"
            model_bin = os.path.splitext(model_xml)[0] + ".bin"
        elif (input_ecg.shape[1] == 17920):
            model_xml = "tf_model_17920_fp16.xml"
            model_bin = os.path.splitext(model_xml)[0] + ".bin"
        # Plugin initialization for specified device and load extensions library if specified
        log.info("OpenVINO Initializing plugin for {} device...".format(
            args.device))
        ie = IECore()

        # Read IR
        log.info("OpenVINO Reading IR...")

        net = IENetwork(model=model_xml, weights=model_bin)
        assert len(net.inputs.keys()
                   ) == 1, "Demo supports only single input topologies"

        if args.cpu_extension and 'CPU' in args.device:
            ie.add_extension(args.cpu_extension, "CPU")
        config = {'PERF_COUNT': ('YES' if args.perf_counts else 'NO')}
        device_nstreams = parseValuePerDevice(args.device, None)
        if ('Async' in (item.labelstr)) and ('CPU' in (args.device)):
            ie.set_config(
                {
                    'CPU_THROUGHPUT_STREAMS':
                    str(device_nstreams.get(args.device)) if args.device
                    in device_nstreams.keys() else 'CPU_THROUGHPUT_AUTO'
                }, args.device)
            device_nstreams[args.device] = int(
                ie.get_config(args.device, 'CPU_THROUGHPUT_STREAMS'))

        #prepare input blob
        input_blob = next(iter(net.inputs))
        #load IR to plugin
        log.info("Loading network with plugin...")

        n, h, w = net.inputs[input_blob].shape
        log.info("Network input shape: {}".format(
            net.inputs[input_blob].shape))
        if 'Async' in (item.labelstr):
            exec_net = ie.load_network(net,
                                       args.device,
                                       config=config,
                                       num_requests=12)
            infer_requests = exec_net.requests
            request_queue = InferRequestsQueue(infer_requests)
        else:
            exec_net = ie.load_network(net, args.device)
        output_blob = next(iter(net.outputs))
        del net

        #Do infer
        inf_start = time.time()

        if 'Async' in (item.labelstr):
            for i in range(12):
                infer_request = request_queue.getIdleRequest()
                if not infer_request:
                    raise Exception("No idle Infer Requests!")
                infer_request.startAsync({input_blob: input_ecg})
            request_queue.waitAll()
        else:

            res = exec_net.infer({input_blob: input_ecg})

        inf_end = time.time()

        if 'Async' in (item.labelstr):
            det_time = (inf_end - inf_start) / 12
            res = exec_net.requests[0].outputs[output_blob]
        else:
            det_time = inf_end - inf_start
            res = res[output_blob]

        del exec_net
        print("[Performance] each inference time:{} ms".format(det_time *
                                                               1000))
        prediction = sst.mode(np.argmax(res, axis=2).squeeze())[0][0]
        result = preproc.int_to_class[prediction]

        ax1.set_xlabel(
            'File: {}, Intel OpenVINO Infer_perf for each input: {}ms, classification_result: {}'
            .format(item.labelstr, det_time * 1000, result),
            fontsize=15,
            color="c",
            fontweight='bold')
        ax1.grid()
Exemplo n.º 7
0
def main():
    total_timer = Timer(name='total')
    total_timer.tic()

    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = build_argparser().parse_args()
    # --------------------------- 1. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
    model_xml = args.model
    model_bin = os.path.splitext(model_xml)[0] + ".bin"
    log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
    net = IENetwork(model=model_xml, weights=model_bin)
    # -----------------------------------------------------------------------------------------------------

    # ------------- 2. Load Plugin for inference engine and extensions library if specified --------------
    log.info("Loading Inference Engine")
    ie = IECore()
    log.info("Device info:")
    versions = ie.get_versions(args.device)
    print("{}{}".format(" " * 8, args.device))
    print("{}MKLDNNPlugin version ......... {}.{}".format(
        " " * 8, versions[args.device].major, versions[args.device].minor))
    print("{}Build ........... {}".format(" " * 8,
                                          versions[args.device].build_number))

    if args.cpu_extension and "CPU" in args.device:
        ie.add_extension(args.cpu_extension, "CPU")
        log.info("CPU extension loaded: {}".format(args.cpu_extension))

    if "CPU" in args.device:
        supported_layers = ie.query_network(net, "CPU")
        not_supported_layers = [
            l for l in net.layers.keys() if l not in supported_layers
        ]
        if len(not_supported_layers) != 0:
            log.error(
                "Following layers are not supported by the plugin for specified device {}:\n {}"
                .format(args.device, ', '.join(not_supported_layers)))
            log.error(
                "Please try to specify cpu extensions library path in sample's command line parameters using -l "
                "or --cpu_extension command line argument")
            sys.exit(1)
    # -----------------------------------------------------------------------------------------------------

    # --------------------------- 4. Configure input & output ---------------------------------------------
    # --------------------------- Prepare input blobs -----------------------------------------------------
    log.info("Preparing input blobs")
    assert (len(net.inputs.keys()) == 1
            ), "Sample supports topologies only with 1 input"

    input_name = next(iter(net.inputs.keys()))
    input_info = net.inputs[input_name]
    input_info.precision = 'FP32'
    log.info('input shape: {}'.format(input_info.shape))

    # --------------------------- Prepare output blobs ----------------------------------------------------
    log.info('Preparing output blobs')
    assert (len(net.outputs.keys()) == 2
            ), "Sample supports topologies only with 2 output"

    loc_out_name = "797"
    class_out_name = "741"
    assert (loc_out_name in net.outputs.keys()) and (class_out_name
                                                     in net.outputs.keys())

    loc_out_info = net.outputs[loc_out_name]
    class_out_info = net.outputs[class_out_name]

    loc_out_info.precision = "FP32"
    class_out_info.precision = "FP32"
    # -----------------------------------------------------------------------------------------------------

    # -----------------------------------------------------------------------------------------------------
    log.info("Loading model to the device")
    # cpu_throughput = {'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'}
    ie.set_config({'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'},
                  args.device)
    ie.set_config({'CPU_BIND_THREAD': 'YES'}, args.device)
    exec_net = ie.load_network(network=net,
                               device_name=args.device,
                               num_requests=0)

    infer_requests = exec_net.requests
    request_queue = InferRequestsQueue(infer_requests)
    log.info('nreqs: {}, nstream:{}'.format(
        len(infer_requests),
        ie.get_config(args.device, 'CPU_THROUGHPUT_STREAMS')))

    # --------------------------- 3. Read and preprocess input --------------------------------------------
    # -----------------------------------------------------------------------------------------------------
    if not os.path.exists(args.result_dir):
        os.makedirs(args.result_dir)

    if args.voc_res_file and os.path.exists(args.voc_res_file):
        os.remove(args.voc_res_file)

    load_data_timer = Timer(name='load_data')
    post_process_timer = Timer(name='post_process')

    adapter = RetinaNetAdapter(input_shape=args.patch_size)

    # --------------------------- Performing inference ----------------------------------------------------
    result_all_images = defaultdict(list)
    data_loader = DataLoader(args.image_dir, args.strides, args.patch_size)
    while True:
        load_data_timer.tic()
        input_data = data_loader.next()
        load_data_timer.toc()

        if input_data == None:
            break

        infer_request = request_queue.get_idle_request()
        if not infer_request:
            raise Exception('No idle Infer Requests!')

        if infer_request.cur_meta == None:
            infer_request.start_async(input_name, input_data)
            continue

        # get result
        post_process_timer.tic()
        image_name = infer_request.cur_meta['image_name']
        x = infer_request.cur_meta['x']
        y = infer_request.cur_meta['y']

        loc_out = infer_request.request.outputs[loc_out_name][0]
        class_out = infer_request.request.outputs[class_out_name][0]

        ## start infer
        infer_request.start_async(input_name, input_data)

        ## post-process
        result = adapter.process(loc_out, class_out)
        result, _ = nms(result, thresh=0.5, keep_top_k=100)

        result[:, 0] += x
        result[:, 1] += y
        result[:, 2] += x
        result[:, 3] += y
        result_all_images[image_name].append(result)
        post_process_timer.toc()

    # wait the latest inference executions
    request_queue.wait_all()
    post_process_timer.tic()
    for infer_request in request_queue.requests:
        # get result
        image_name = infer_request.cur_meta['image_name']
        x = infer_request.cur_meta['x']
        y = infer_request.cur_meta['y']

        loc_out = infer_request.request.outputs[loc_out_name][0]
        class_out = infer_request.request.outputs[class_out_name][0]

        ## post-process
        result = adapter.process(loc_out, class_out)
        result, _ = nms(result, thresh=0.5, keep_top_k=100)

        result[:, 0] += x
        result[:, 1] += y
        result[:, 2] += x
        result[:, 3] += y
        result_all_images[image_name].append(result)
    post_process_timer.toc()

    post_process_timer.tic()
    ## process total image result
    for image_name, result_per_image in result_all_images.items():
        result_per_image = np.concatenate(result_per_image, axis=0)
        nms_result, _ = nms(result_per_image, thresh=0.5)

        voc_format = '{} {:.4f} {} {} {} {}'
        pos_all = []
        voc_all = []
        for i in range(nms_result.shape[0]):
            x = int(nms_result[i, 0])
            y = int(nms_result[i, 1])
            w = max(int(nms_result[i, 2] - nms_result[i, 0]), 1)
            h = max(int(nms_result[i, 3] - nms_result[i, 1]), 1)
            p = float(nms_result[i, 4])
            pos = {'x': x, 'y': y, 'w': w, 'h': h, 'p': p}
            pos_all.append(pos)

            if args.voc_res_file:
                xmin = x
                ymin = y
                xmax = int(nms_result[i, 2])
                ymax = int(nms_result[i, 3])
                voc_str = voc_format.format(
                    os.path.splitext(image_name)[0], p, xmin, ymin, xmax, ymax)
                voc_all.append(voc_str)

        file_name = os.path.splitext(image_name)[0] + '.json'
        with open(os.path.join(args.result_dir, file_name), 'w') as f:
            json.dump(pos_all, f)

        if args.voc_res_file:
            with open(args.voc_res_file, 'a') as f:
                for voc_str in voc_all:
                    f.write(voc_str + '\n')

    post_process_timer.toc()
    total_timer.toc()
    # -----------------------------------------------------------------------------------------------------
    all_timers = []
    # all_timers.extend([create_anchor_timer,
    #                    read_img_timer,
    #                    preprocess_timer,
    #                    infer_timer,
    #                    adapter_timer,
    #                    patch_img_nms_timer,
    #                    whole_img_nms_timer,
    #                    add_offset_timer,
    #                    write_result_timer,
    #                    total_timer])
    all_timers.extend([load_data_timer, post_process_timer, total_timer])
    for timer in all_timers:
        log.info('{}: avg: {:.2f} ms, total: {:.2f}s'.format(
            timer.name, timer.avg * 1000, timer.total))

    log.info('infer: {:2f}s'.format(request_queue.get_duration_in_seconds()))
    log.info("Execution successful\n")
Exemplo n.º 8
0
class Classification:
    def __init__(self, device: str, number_infer_requests, number_iterations, duration_seconds, api_type):
        self.device = device
        self.ie = IECore()
        self.nireq = number_infer_requests
        self.niter = number_iterations
        self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device)
        self.api_type = api_type
        self.device_number_streams = {}

    def __del__(self):
        del self.ie

    def add_extension(self, path_to_extension: str=None, path_to_cldnn_config: str=None):
        if GPU_DEVICE_NAME in self.device:
            if path_to_cldnn_config:
                self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config}, GPU_DEVICE_NAME)
                logger.info('GPU extensions is loaded {}'.format(path_to_cldnn_config))
        if CPU_DEVICE_NAME in self.device or MYRIAD_DEVICE_NAME in self.device:
            if path_to_extension:
                self.ie.add_extension(extension_path=path_to_extension, device_name=CPU_DEVICE_NAME)
                logger.info('CPU extensions is loaded {}'.format(path_to_extension))

    def get_version_info(self) -> str:
        logger.info('InferenceEngine:\n{: <9}{:.<24} {}'.format('', 'API version', get_version()))
        version_string = 'Device info\n'
        for device, version in self.ie.get_versions(self.device).items():
            version_string += '{: <9}{}\n'.format('', device)
            version_string += '{: <9}{:.<24}{} {}.{}\n'.format('', version.description, ' version', version.major,
                                                               version.minor)
            version_string += '{: <9}{:.<24} {}\n'.format('', 'Build', version.build_number)
        return version_string

    @staticmethod
    def reshape(ie_network: IENetwork, batch_size: int):
        new_shapes = {}
        for input_layer_name, input_layer in ie_network.inputs.items():
            new_shapes[input_layer_name] = get_blob_shape(input_layer, batch_size)

        if new_shapes:
            logger.info('Resizing network to batch = {}'.format(batch_size))
            ie_network.reshape(new_shapes)

    def set_config(self, number_streams: int, api_type: str = 'async',
                   number_threads: int = None, infer_threads_pinning: int = None):
        devices = parse_devices(self.device)
        self.device_number_streams = parse_nstreams_value_per_device(devices, number_streams)
        for device_name in  self.device_number_streams.keys():
            key = device_name + "_THROUGHPUT_STREAMS"
            supported_config_keys = self.ie.get_metric(device_name, 'SUPPORTED_CONFIG_KEYS')
            if key not in supported_config_keys:
                raise Exception("Device " + device_name + " doesn't support config key '" + key + "'! " +
                                "Please specify -nstreams for correct devices in format  <dev1>:<nstreams1>,<dev2>:<nstreams2>");

        for device in devices:
            if device == CPU_DEVICE_NAME:  # CPU supports few special performance-oriented keys
                # limit threading for CPU portion of inference
                if number_threads:
                    self.ie.set_config({'CPU_THREADS_NUM': str(number_threads)}, device)

                if MULTI_DEVICE_NAME in self.device and GPU_DEVICE_NAME in self.device:
                    self.ie.set_config({'CPU_BIND_THREAD': 'NO'}, CPU_DEVICE_NAME)
                else:
                    # pin threads for CPU portion of inference
                    self.ie.set_config({'CPU_BIND_THREAD': infer_threads_pinning}, device)

                # for CPU execution, more throughput-oriented execution via streams
                # for pure CPU execution, more throughput-oriented execution via streams
                if api_type == 'async':
                    cpu_throughput = {'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'}
                    if device in self.device_number_streams.keys():
                        cpu_throughput['CPU_THROUGHPUT_STREAMS'] = str(self.device_number_streams.get(device))
                    self.ie.set_config(cpu_throughput, device)
                    self.device_number_streams[device] = self.ie.get_config(device, 'CPU_THROUGHPUT_STREAMS')

            elif device == GPU_DEVICE_NAME:
                if api_type == 'async':
                    gpu_throughput = {'GPU_THROUGHPUT_STREAMS': 'GPU_THROUGHPUT_AUTO'}
                    if device in self.device_number_streams.keys():
                        gpu_throughput['GPU_THROUGHPUT_STREAMS'] = str(self.device_number_streams.get(device))
                    self.ie.set_config(gpu_throughput, device)
                    self.device_number_streams[device] = self.ie.get_config(device, 'GPU_THROUGHPUT_STREAMS')

                if MULTI_DEVICE_NAME in self.device and CPU_DEVICE_NAME in self.device:
                    # multi-device execution with the CPU+GPU performs best with GPU trottling hint,
                    # which releases another CPU thread (that is otherwise used by the GPU driver for active polling)
                    self.ie.set_config({'CLDNN_PLUGIN_THROTTLE': '1'}, device)

            elif device == MYRIAD_DEVICE_NAME:
                self.ie.set_config({'LOG_LEVEL': 'LOG_INFO'}, MYRIAD_DEVICE_NAME)

    def read_network(self, path_to_model: str):
        xml_filename = os.path.abspath(path_to_model)
        head, tail = os.path.splitext(xml_filename)
        bin_filename = os.path.abspath(head + BIN_EXTENSION)

        ie_network = self.ie.read_network(xml_filename, bin_filename)

        input_info = ie_network.inputs

        if not input_info:
            raise AttributeError('No inputs info is provided')

        return ie_network

    def load_network(self, ie_network: IENetwork, perf_counts: bool):
        config = {'PERF_COUNT': ('YES' if perf_counts else 'NO')}

        exe_network = self.ie.load_network(ie_network,
                                           self.device,
                                           config=config,
                                           num_requests=1 if self.api_type == 'sync' else self.nireq or 0)
        # Number of requests
        self.nireq = len(exe_network.requests)
        return exe_network

    def infer(self, exe_network, batch_size, progress_bar=None):
        progress_count = 0
        infer_requests = exe_network.requests
        # warming up - out of scope
        if self.api_type == 'sync':
            infer_requests[0].infer()
        else:
            infer_requests[0].async_infer()
            status = exe_network.wait()
            if status != StatusCode.OK:
                raise Exception("Wait for all requests is failed with status code {}!".format(status))

        out_blob = next(iter(exe_network.outputs))
        start_time = datetime.utcnow()
        exec_time = 0
        iteration = 0

        times = []
        in_fly = set()
        # Start inference & calculate performance
        # to align number if iterations to guarantee that last infer requests are executed in the same conditions **/
        #(self.duration_seconds and exec_time < self.duration_seconds) or \
        while (self.niter and iteration < self.niter) or \
              (len(in_fly) == iteration*self.nireq) or \
              (self.api_type == 'async' and iteration % self.nireq):
            if self.api_type == 'sync':
                infer_requests[0].infer()
                times.append(infer_requests[0].latency)
            else:
                infer_request_id = exe_network.get_idle_request_id()
                if infer_request_id < 0:
                    status = exe_network.wait(num_requests=1)
                    if status != StatusCode.OK:
                        raise Exception("Wait for idle request failed!")
                    infer_request_id = exe_network.get_idle_request_id()
                    if infer_request_id < 0:
                        raise Exception("Invalid request id!")
                if infer_request_id in in_fly:
                    times.append(infer_requests[infer_request_id].latency)
                else:
                    in_fly.add(infer_request_id)
                infer_requests[infer_request_id].async_infer() 
                #times.append(infer_requests[infer_request_id].latency)   
            iteration += 1
            exec_time = (datetime.utcnow() - start_time).total_seconds()
            

            if progress_bar:
              if self.duration_seconds:
                  # calculate how many progress intervals are covered by current iteration.
                  # depends on the current iteration time and time of each progress interval.
                  # Previously covered progress intervals must be skipped.
                  progress_interval_time = self.duration_seconds / progress_bar.total_num
                  new_progress = int(exec_time / progress_interval_time - progress_count)
                  progress_bar.add_progress(new_progress)
                  progress_count += new_progress
              elif self.niter:
                  progress_bar.add_progress(1)

        # wait the latest inference executions
        inference_output = []
        status = exe_network.wait()
        for infer_request in infer_requests:
            output = infer_request.outputs[out_blob]
            inference_output.append(output.tolist())
        if status != StatusCode.OK:
            raise Exception("Wait for all requests is failed with status code {}!".format(status))
        total_duration_sec = (datetime.utcnow() - start_time).total_seconds()
        for infer_request_id in in_fly:
            times.append(infer_requests[infer_request_id].latency)
        times.sort()
        latency_ms = median(times)
        fps = batch_size * 1000 / latency_ms if self.api_type == 'sync' else batch_size * iteration / total_duration_sec
        if progress_bar:
            progress_bar.finish()
        return inference_output,fps, latency_ms, total_duration_sec, iteration
Exemplo n.º 9
0
def main():
    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = build_argparser().parse_args()
    model_xml = args.model
    model_bin = os.path.splitext(model_xml)[0] + ".bin"
    # Plugin initialization for specified device and load extensions library if specified
    log.info("Initializing plugin for {} device...".format(args.device))
    device_nstreams = parseValuePerDevice(args.device, args.number_streams)
    ie = IECore()
    if args.cpu_extension and 'CPU' in args.device:
        ie.add_extension(args.cpu_extension, "CPU")

    # Read IR
    log.info("Reading IR...")
    net = IENetwork(model=model_xml, weights=model_bin)

    if "CPU" in args.device:
        ie.set_config(
            {
                'CPU_THROUGHPUT_STREAMS':
                str(device_nstreams.get(args.device)) if args.device
                in device_nstreams.keys() else 'CPU_THROUGHPUT_AUTO'
            }, args.device)
        device_nstreams[args.device] = int(
            ie.get_config(args.device, 'CPU_THROUGHPUT_STREAMS'))
        ie.add_extension(args.cpu_extension, "CPU")
        supported_layers = ie.query_network(net, "CPU")
        not_supported_layers = [
            l for l in net.layers.keys() if l not in supported_layers
        ]
        if len(not_supported_layers) != 0:
            log.error(
                "Following layers are not supported by the plugin for specified device {}:\n {}"
                .format(args.device, ', '.join(not_supported_layers)))
            log.error(
                "Please try to specify cpu extensions library path in sample's command line parameters using -l "
                "or --cpu_extension command line argument")
            sys.exit(1)
    elif "MYRIAD" in args.device:
        ie.set_config({
            'LOG_LEVEL': 'LOG_INFO',
            'VPU_LOG_LEVEL': 'LOG_WARNING'
        }, MYRIAD_DEVICE_NAME)

    input_blob = next(iter(net.inputs))
    netoutput = iter(net.outputs)
    out_blob1 = next(netoutput)
    print("output1:", out_blob1)
    print("shape:", net.outputs[out_blob1].shape)
    out_blob2 = next(netoutput)
    print("output2:", out_blob2)
    print("shape:", net.outputs[out_blob2].shape)
    out_blob3 = next(netoutput)
    print("output3:", out_blob3)
    print("shape:", net.outputs[out_blob3].shape)

    log.info("Loading IR to the plugin...")
    config = {'PERF_COUNT': ('YES' if args.perf_counts else 'NO')}

    exec_net = ie.load_network(network=net, device_name=args.device)
    # Read and pre-process input image
    n, c, h, w = net.inputs[input_blob].shape

    image = Image.open(args.input[0])
    ori_width, ori_height = image.size
    print("image ori shape:{},{}".format(ori_width, ori_height))
    boxed_image = letterbox_image(image, tuple(reversed((416, 416))))
    image_data = np.array(boxed_image, dtype='float32')
    image_data /= 255.
    image_data = np.expand_dims(image_data, 0)
    image_data = image_data.transpose((0, 3, 1, 2))

    print("image shape:{}".format(image_data.shape))

    if args.labels:
        with open(args.labels, 'r') as f:
            labels_map = [x.strip() for x in f]
    else:
        labels_map = None

    inf_start = time.time()

    output = exec_net.infer({input_blob: image_data})

    ## wait the latest inference executions
    inf_end = time.time()
    det_time = inf_end - inf_start
    print("[Performance] inference time:{} ms".format(det_time * 1000))
    #post-processing part
    objects = list()
    for layer_name, out_blob in output.items():
        out_blob = out_blob.reshape(
            net.layers[net.layers[layer_name].parents[0]].shape)
        layer_params = YoloParams(net.layers[layer_name].params,
                                  out_blob.shape[2])
        log.info("Layer {} parameters: ".format(layer_name))
        layer_params.log_params()
        objects += parse_yolo_region(out_blob, image_data.shape[2:],
                                     (416, 416), layer_params,
                                     args.prob_threshold)

    objects = sorted(objects, key=lambda obj: obj['confidence'], reverse=True)
    for i in range(len(objects)):
        if objects[i]['confidence'] == 0:
            continue
        for j in range(i + 1, len(objects)):
            if intersection_over_union(objects[i],
                                       objects[j]) > args.iou_threshold:
                objects[j]['confidence'] = 0

        # Drawing objects with respect to the --prob_threshold CLI parameter
    objects = [
        obj for obj in objects if obj['confidence'] >= args.prob_threshold
    ]
    log.info("\nDetected boxes for batch {}:".format(1))
    log.info(" Class ID | Confidence | XMIN | YMIN | XMAX | YMAX | COLOR ")

    origin_im_size = (416, 416)
    for obj in objects:
        draw = ImageDraw.Draw(boxed_image)
        # Validation bbox of detected object
        if obj['xmax'] > origin_im_size[1] or obj['ymax'] > origin_im_size[
                0] or obj['xmin'] < 0 or obj['ymin'] < 0:
            continue
        color = (int(min(obj['class_id'] * 12.5,
                         255)), min(obj['class_id'] * 7,
                                    255), min(obj['class_id'] * 5, 255))
        det_label = labels_map[obj['class_id']] if labels_map and len(labels_map) >= obj['class_id'] else \
            str(obj['class_id'])
        draw.rectangle([obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']],
                       outline=color)

        del draw
        log.info("{:^9} | {:10f} | {:4} | {:4} | {:4} | {:4} | {} ".format(
            det_label, obj['confidence'], obj['xmin'], obj['ymin'],
            obj['xmax'], obj['ymax'], color))

    boxed_image.show()

    if args.perf_counts:
        for ni in range(int(args.number_infer_requests)):
            perf_counts = exe_network.requests[ni].get_perf_counts()
            logger.info(
                "Pefrormance counts for {}-th infer request".format(ni))
            for layer, stats in perf_counts.items():
                max_layer_name = 30
                print("{:<30}{:<15}{:<30}{:<20}{:<20}{:<20}".format(
                    layer[:max_layer_name - 4] + '...' if
                    (len(layer) >= max_layer_name) else layer, stats['status'],
                    'layerType: ' + str(stats['layer_type']),
                    'realTime: ' + str(stats['real_time']),
                    'cpu: ' + str(stats['cpu_time']),
                    'execType: ' + str(stats['exec_type'])))