Python DpuOverlay示例，pynq_dpu.DpuOverlay Python示例

示例#1

0

显示文件

文件： utils.py 项目： shengwenLeong/lpcvc2020_water

    def run(self):
        overlay = DpuOverlay("./bitstream/dpu.bit")
        overlay.load_model("./model/dpu_tf_efficientnet.elf")
        cv2.setUseOptimized(True)
        cv2.setNumThreads(4)
        threadnum = 4
        num_iterations = 0
        listimage = [[] * i for i in range(threadnum)]
        result = [[] * i for i in range(threadnum)]
        img_processed = [[] * i for i in range(threadnum)]
        
        cnt = 0
        thread = 0
        list_image = sorted([i for i in os.listdir(image_folder) if i.endswith("JPEG")])
        picture_num = 0
        picture_num = len(list_image)
        for i in list_image:
            listimage[thread].append(i)
            if cnt % math.ceil(picture_num/threadnum) == 0 and cnt != 0:
                thread = thread + 1
            cnt = cnt + 1
        
        n2cube.dpuOpen()
        kernel = n2cube.dpuLoadKernel(KERNEL_CONV)
        threadAll = []
        for i in range(threadnum):
            t1 = threading.Thread(target=self.run_dpu_task, args=(kernel, i, len(listimage[i]), listimage, result))
            threadAll.append(t1)
        for x in threadAll:
            x.start()
        for x in threadAll:
            x.join()               

        with open(RESULT_FILE, 'w') as result_file:
            for item in result:
                for i in item:
                    result_file.write("%s\n" % i)
        
        rtn = n2cube.dpuDestroyKernel(kernel)
        n2cube.dpuClose()
        # Run all date set and write your outputs to result file.
        # Please see README and "classification_result.sample" to know the result file format.
        #time.sleep(10)

        return

示例#2

0

显示文件

    def __init__(self, elf_file, env):
        self.overlay = DpuOverlay("dpu.bit")
        self.overlay.set_runtime("vart")
        self.overlay.load_model(elf_file)
        self.dpu = self.overlay.runner
        self.env = env
        self.scale = self.env.scale

        self.inputTensors = self.dpu.get_input_tensors()
        outputTensors = self.dpu.get_output_tensors()
        tensorformat = self.dpu.get_tensor_format()
        if tensorformat == self.dpu.TensorFormat.NCHW:
            outputHeight = outputTensors[0].dims[2]
            outputWidth = outputTensors[0].dims[3]
            outputChannel = outputTensors[0].dims[1]
        elif tensorformat == self.dpu.TensorFormat.NHWC:
            outputHeight = outputTensors[0].dims[1]
            outputWidth = outputTensors[0].dims[2]
            outputChannel = outputTensors[0].dims[3]
        else:
            raise ValueError("Input format error.")

        self.outputSize = outputHeight * outputWidth * outputChannel
        self.tanh = np.empty(self.outputSize)

        shape_in = (1, ) + tuple([
            self.inputTensors[0].dims[i]
            for i in range(self.inputTensors[0].ndims)
        ][1:])
        shape_out = (1, outputHeight, outputWidth, outputChannel)
        self.input_data = []
        self.output_data = []
        self.input_data.append(
            np.empty((shape_in), dtype=np.float32, order='C'))
        self.output_data.append(
            np.empty((shape_out), dtype=np.float32, order='C'))
        self.input = self.input_data[0]
        signal.signal(signal.SIGINT, self.interrupt_handle)

示例#3

0

显示文件

文件： dpu_face_detection.py 项目： cetic/Face-Pynq-DPU-U96V2

    self.inputHeight = []
    self.inputWidth = []
    self.inputShape = []
    self.output0Channels = []
    self.output0Height = []
    self.output0Width = []
    self.output0Size = []
    self.output1Channels = []
    self.output1Height = []
    self.output1Width = []
    self.output1Size = []



if __name__ == "__main__":
    overlay = DpuOverlay("dpu.bit")
    print("[INFO] dpu overlay loaded")
    overlay.set_runtime("vart")
    overlay.load_model("dpu_densebox.elf")
    dpu = overlay.runner
    dpu_face_detector = FaceDetect(dpu,0.55,0.35)
    dpu_face_detector.start()
    print("[INFO] model densebox_640_360 loaded ")
    print("[INFO] starting camera input ...")
    cam = cv2.VideoCapture(0)
    cam.set(cv2.CAP_PROP_FRAME_WIDTH,640)
    cam.set(cv2.CAP_PROP_FRAME_HEIGHT,480)
    if not (cam.isOpened()):
        print("[ERROR] Failed to open camera ", inputId )
        exit()
    window_name = 'main'

示例#4

0

显示文件

def main():

  # UI: DPU
  ui = UI()
  ui.update_boot_window('Initializing DPU...')

  from dnndk import n2cube
  from pynq_dpu import DpuOverlay

  # Set up the DPU IP
  overlay = DpuOverlay(str(fh.dir_dpu / fh.dpu_bit_file))
  overlay.load_model(str(fh.dir_dpu / fh.dpu_assembly_file))

  # Set up the Neural Network Runtime (N2Cube)
  kernel_name = fh.kernel_name

  kernel_conv_input = fh.kernel_conv_input
  kernel_fc_output = fh.kernel_fc_output

  n2cube.dpuOpen()
  kernel = n2cube.dpuLoadKernel(kernel_name)
  task = n2cube.dpuCreateTask(kernel, 0)

  input_tensor_size = n2cube.dpuGetInputTensorSize(task, kernel_conv_input)

  output_tensor_size = n2cube.dpuGetOutputTensorSize(task, kernel_fc_output)
  output_tensor_channel = n2cube.dpuGetOutputTensorChannel(task, kernel_fc_output)
  output_tensor_address = n2cube.dpuGetOutputTensorAddress(task, kernel_fc_output)
  output_tensor_scale = n2cube.dpuGetOutputTensorScale(task, kernel_fc_output)

  # UI: Camera
  ui.update_boot_window('Initializing Camera...')

  # libcamera
  libcamera = ctypes.CDLL(fh.dir_cam / fh.libcamera_file)

  # Getter
  libcamera.get_frame_ptr.restype = ctypes.POINTER(ctypes.c_ubyte)
  libcamera.get_frame_ptr.argtypes = [ctypes.c_uint]
  libcamera.get_throw_bgn_idx.restype = ctypes.c_uint
  libcamera.get_throw_bgn_idx.argtypes = None
  libcamera.get_throw_end_idx.restype = ctypes.c_uint
  libcamera.get_throw_end_idx.argtypes = None
  libcamera.get_throw_bgn.restype = ctypes.c_bool
  libcamera.get_throw_bgn.argtypes = None
  libcamera.get_throw_end.restype = ctypes.c_bool
  libcamera.get_throw_end.argtypes = None

  # Setter
  libcamera.set_frame_rate.restype = None
  libcamera.set_frame_rate.argtypes = [ctypes.c_double]
  libcamera.set_buff_size.restype = None
  libcamera.set_buff_size.argtypes = [ctypes.c_uint]
  libcamera.set_exposure_time.restype = None
  libcamera.set_exposure_time.argtypes = [ctypes.c_double]
  libcamera.set_camera_gain.restype = None
  libcamera.set_camera_gain.argtypes = [ctypes.c_double]
  libcamera.set_avg_diffs.restype = None
  libcamera.set_avg_diffs.argtypes = [ctypes.c_uint]
  libcamera.set_threshold_mult.restype = None
  libcamera.set_threshold_mult.argtypes = [ctypes.c_double]
  libcamera.set_frames_to_acquire.restype = None
  libcamera.set_frames_to_acquire.argtypes = [ctypes.c_uint]

  # Camera
  libcamera.initialize.restype = ctypes.c_int
  libcamera.initialize.argtypes = None
  libcamera.reset_global_variables.restype = None
  libcamera.reset_global_variables.argtypes = None
  libcamera.start_acquisition.restype = ctypes.c_int
  libcamera.start_acquisition.argtypes = None
  libcamera.terminate.restype = ctypes.c_int
  libcamera.terminate.argtypes = None

  # Set the global variables according to the module `fhnwtoys.settings`
  libcamera.set_frame_rate(fh.frame_rate)
  libcamera.set_buff_size(fh.buff_size)
  libcamera.set_exposure_time(fh.exposure_time)
  libcamera.set_camera_gain(fh.camera_gain)
  libcamera.set_avg_diffs(fh.avg_diffs)
  libcamera.set_threshold_mult(fh.threshold_mult)
  libcamera.set_frames_to_acquire(fh.frames_to_acquire)

  # Initialize Camera
  initialize = fh.ReturnCodes.NOT_INITIALIZED(*\label{lst:ln:camera_init1}*)
  initialization_tries = 0

  while initialize != fh.ReturnCodes.SUCCESS:
    if initialization_tries > 0:
      try:
        return_code = fh.ReturnCodes(initialize).name
      except ValueError:
        return_code = initialize
      ui.update_boot_window(f'Camera Error ({return_code}), try to replug the camera.')
    initialize = libcamera.initialize()
    initialization_tries += 1(*\label{lst:ln:camera_init2}*)

  # UI: Ready
  ui.update_boot_window('READY')

  # Set up the `frames` array
  frames = np.empty((fh.frames_to_consider,) + fh.bgr_shape, dtype=np.uint8)

  while True:
    # Reset the predictions
    predictions = np.zeros((fh.frames_to_consider, fh.num_objects), dtype=np.float32)(*\label{lst:ln:predictions_matrix}*)

    # Start acquisition (threaded)
    # todo: error handling ('Unexpected Error, system reboot required.')
    # start_acquisition = libcamera.start_acquisition() # non threaded approach
    t = Thread(target=libcamera.start_acquisition)(*\label{lst:ln:threading}*) # threaded approach (process due to ctypes)
    t.start()

    # Wait until the throw has ended (the Ultra96-V2 is not powerful enough to process the data during the acquisition)
    while not libcamera.get_throw_end():
      pass(*\label{lst:ln:polling}*)

    throw_bgn_idx = libcamera.get_throw_bgn_idx()
    throw_end_idx = libcamera.get_throw_end_idx()

    num_frames = throw_end_idx - throw_bgn_idx - 1 # Ignore the last two captured frames

    # Image processing (including inference)
    for idx, frame_id in enumerate(range(throw_bgn_idx, throw_end_idx - 1)):
      frame_ptr = libcamera.get_frame_ptr(frame_id)(*\label{lst:ln:image_preprocessing1}*)
      raw_frame = np.ctypeslib.as_array(frame_ptr, shape=fh.raw_shape) # Raw Baumer BayerRG8 frame
      # Transform Baumer BayerRG8 to BGR8 (Baumer BayerRG = OpenCV BayerBG)
      frames[idx] = cv2.cvtColor(raw_frame, cv2.COLOR_BayerBG2BGR) # Color space conversion
      # Image scaling using nearest-neighbor interpolation
      frame_resized = cv2.resize(frames[idx], fh.inf_dsize, interpolation=fh.Interpolation.NEAREST)
      frame_inference = frame_resized.astype(np.float32) / 255.0(*\label{lst:ln:image_preprocessing2}*) # Normalization (float32 precision)

      # Inference
      n2cube.dpuSetInputTensorInHWCFP32(task, kernel_conv_input, frame_inference, input_tensor_size)
      n2cube.dpuRunTask(task)(*\label{lst:ln:image_classification}*)

      # Softmax function (normalized exponential function)
      # Confident predictions lead to all zeros and a NaN, when run through `n2cube.dpuRunSoftmax(.)`
      # This section replaces the first occurrence of NaN in the `prediction` array with 1.0 and sets everything else to 0.0
      prediction = n2cube.dpuRunSoftmax(output_tensor_address, output_tensor_channel, output_tensor_size//output_tensor_channel, output_tensor_scale)(*\label{lst:ln:softmax1}*)
      nan = np.isnan(prediction)
      if nan.any():
        nan_idx = nan.argmax() # returns the index of the first occurrence of NaN
        prediction = np.zeros((fh.num_objects,), dtype=np.float32)
        prediction[nan_idx] = 1.0(*\label{lst:ln:softmax2}*)
      predictions[idx] = prediction

      # Only consider `fh.frames_to_consider` frames
      if idx == fh.frames_to_consider - 1: # (-1: idx starts with 0)
        break

    num_frames_considered = min(fh.frames_to_consider, num_frames)

    window = sine_squared_window(num_frames, num_frames_considered) # weighting function
    weighted_prediction = np.matmul(window, predictions) / np.sum(window)(*\label{lst:ln:matrix_multiplication}*) # computation of the weighted prediction

    # UI: Prepare data for the UI
    weighted_prediction_percent = weighted_prediction * 100
    weighted_prediction_sorted = np.sort(weighted_prediction_percent)[::-1]
    weighted_prediction_argsorted = np.argsort(weighted_prediction_percent)[::-1]

    # this is the index of the best guess (computed by weighting the `fh.frames_to_consider` frames)
    guess_idx = weighted_prediction_argsorted[0]

    relevant_pct_ui = np.asarray(weighted_prediction_percent >= 1.0).nonzero()[0] # value of prediction must be at least 1.0%
    relevant_pct_ui_len = len(relevant_pct_ui)
    predictions_ui_len = min(4, relevant_pct_ui_len) # show at most Top 4

    predictions_ui = [] # the object names
    percentages_ui = np.empty((predictions_ui_len + 1,), dtype=np.float32) # the percentages (+1: 'Others')
    for i, w in enumerate(weighted_prediction_argsorted[0:predictions_ui_len]):
      predictions_ui.append(fh.objects_ui[w])
      percentages_ui[i] = weighted_prediction_percent[w]

    # the object names
    predictions_ui.append('Others')

    # the percentages
    percentages_ui[-1] = np.sum(weighted_prediction_sorted[predictions_ui_len:])
    percentages_ui = lrm_round(percentages_ui)

    # the frame
    wighted_guesses = np.multiply(window, predictions[:, guess_idx])(*\label{lst:ln:frame_selection1}*)
    frame_ui_idx = wighted_guesses.argmax()

    frame_ui_resized = cv2.resize(frames[frame_ui_idx], fh.ui_dsize, interpolation=fh.Interpolation.NEAREST)
    _, frame_ui_png = cv2.imencode('.png', frame_ui_resized)
    frame_ui = frame_ui_png.tobytes()(*\label{lst:ln:frame_selection2}*) # the frame

    # UI: Show results
    if percentages_ui[-1] == 0.0:
      predictions_ui = predictions_ui[:-1]
      percentages_ui = percentages_ui[:-1]

    # UI: Inference
    ui.update_inference_window(predictions_ui, percentages_ui, frame_ui)

    # Wait until the camera thread (process due to ctypes) is terminated
    t.join()

    # Reset the global variables (has to be done manually to avoid race conditions)
    libcamera.reset_global_variables()

  # Under regular circumstances, this section should never be reached

  # Terminate Camera
  terminate = libcamera.terminate()

  # Clean up the DPU IP
  n2cube.dpuDestroyKernel(kernel)
  n2cube.dpuDestroyTask(task)

示例#5

0

显示文件

import os
from pynq_dpu import DpuOverlay
overlay = DpuOverlay("dpu.bit")
os.system("dexplorer -w")

示例#6

0

显示文件

文件： parking.py 项目： BhavyanshM/ParkingApp

from pynq_dpu import DpuOverlay
overlay = DpuOverlay("dpu.bit")
overlay.load_model("dpu_tf_yolov3.elf")

import numpy as np
import random
import cv2
import colorsys
from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
#%matplotlib inline
from pynq_dpu.edge.dnndk.tf_yolov3_voc_py.tf_yolov3_voc import *

anchor_list = [
    10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373,
    326
]
anchor_float = [float(x) for x in anchor_list]
anchors = np.array(anchor_float).reshape(-1, 2)

classes_path = "files/voc_classes.txt"
class_names = get_class(classes_path)

num_classes = len(class_names)
hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(
    map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
random.seed(0)
random.shuffle(colors)
random.seed(None)

示例#7

0

显示文件

文件： inference_throughput.py 项目： MuellerDominik/AIonFPGA

def main():

    # Set up the DPU IP
    overlay = DpuOverlay(str(fh.dir_dpu / fh.dpu_bit_file))
    overlay.load_model(str(fh.dir_dpu / fh.dpu_assembly_file))

    # Set up the Neural Network Runtime (N2Cube)
    kernel_name = fh.kernel_name

    kernel_conv_input = fh.kernel_conv_input
    kernel_fc_output = fh.kernel_fc_output

    n2cube.dpuOpen()
    kernel = n2cube.dpuLoadKernel(kernel_name)
    task = n2cube.dpuCreateTask(kernel, 0)

    input_tensor_size = n2cube.dpuGetInputTensorSize(task, kernel_conv_input)

    output_tensor_size = n2cube.dpuGetOutputTensorSize(task, kernel_fc_output)
    output_tensor_channel = n2cube.dpuGetOutputTensorChannel(
        task, kernel_fc_output)
    output_tensor_address = n2cube.dpuGetOutputTensorAddress(
        task, kernel_fc_output)
    output_tensor_scale = n2cube.dpuGetOutputTensorScale(
        task, kernel_fc_output)

    # libcamera
    libcamera = ctypes.CDLL(fh.dir_cam / fh.libcamera_file)

    libcamera.get_frame_ptr.restype = ctypes.POINTER(ctypes.c_ubyte)
    libcamera.get_throw_bgn_idx.restype = ctypes.c_uint
    libcamera.get_throw_end_idx.restype = ctypes.c_uint
    libcamera.get_throw_bgn.restype = ctypes.c_bool
    libcamera.get_throw_end.restype = ctypes.c_bool

    libcamera.set_frame_rate.restype = None
    libcamera.set_buff_size.restype = None
    libcamera.set_exposure_time.restype = None
    libcamera.set_camera_gain.restype = None
    libcamera.set_avg_diffs.restype = None
    libcamera.set_threshold_mult.restype = None
    libcamera.set_frames_to_acquire.restype = None

    libcamera.initialize.restype = ctypes.c_int
    libcamera.start_acquisition.restype = ctypes.c_int
    libcamera.terminate.restype = ctypes.c_int

    # Set up of variables
    frames = np.empty((fh.frames_to_consider, ) + fh.bgr_shape, dtype=np.uint8)

    # Initialize Camera
    initialize = libcamera.initialize()

    if initialize != fh.ReturnCodes.SUCCESS:
        try:
            return_code = fh.ReturnCodes(initialize).name
        except ValueError:
            return_code = initialize
        print(f'Initialization failed: {return_code}')
        sys.exit()
    else:
        print(
            '================================= READY ================================='
        )

    # Reset predictions
    predictions = np.zeros((fh.frames_to_consider, fh.num_objects),
                           dtype=np.float32)

    # Start acquisition (Threaded)
    t = Thread(target=libcamera.start_acquisition)
    t.start()

    # Wait until the throw has ended
    while not libcamera.get_throw_end():
        pass

    stages = [
        'Get raw bayer', 'Transform color', 'Resize', 'Normalize',
        'Run inference', 'Softmax', 'Weighting'
    ]
    meas_time = {s: get_dict() for s in stages}

    throw_bgn_idx = libcamera.get_throw_bgn_idx()
    throw_end_idx = libcamera.get_throw_end_idx()

    num_frames = throw_end_idx - throw_bgn_idx - 1  # Ignore the last two captured frames

    for idx, frame_id in enumerate(range(throw_bgn_idx, throw_end_idx - 1)):

        meas_time['Get raw bayer']['start'].append(datetime.now())
        frame_ptr = libcamera.get_frame_ptr(frame_id)
        raw_frame = np.ctypeslib.as_array(frame_ptr, shape=fh.raw_shape)
        meas_time['Get raw bayer']['end'].append(datetime.now())

        # Transform Baumer BayerRG8 to BGR8 (Baumer BayerRG ≙ OpenCV BayerBG)
        meas_time['Transform color']['start'].append(datetime.now())
        frames[idx] = cv2.cvtColor(raw_frame, cv2.COLOR_BayerBG2BGR)
        meas_time['Transform color']['end'].append(datetime.now())

        meas_time['Resize']['start'].append(datetime.now())
        frame_resized = cv2.resize(frames[idx],
                                   fh.inf_dsize,
                                   interpolation=fh.Interpolation.NEAREST)
        meas_time['Resize']['end'].append(datetime.now())

        meas_time['Normalize']['start'].append(datetime.now())
        frame_inference = frame_resized.astype(np.float32) / 255.0
        meas_time['Normalize']['end'].append(datetime.now())

        meas_time['Run inference']['start'].append(datetime.now())
        n2cube.dpuSetInputTensorInHWCFP32(task, kernel_conv_input,
                                          frame_inference, input_tensor_size)
        n2cube.dpuRunTask(task)
        meas_time['Run inference']['end'].append(datetime.now())

        # n2cube.dpuRunSoftmax(.) sometimes returns all zeros except one NaN
        # This section replaces the first occurrence of NaN in the prediction array with 1.0 and sets everything else to 0.0
        meas_time['Softmax']['start'].append(datetime.now())
        prediction = n2cube.dpuRunSoftmax(
            output_tensor_address, output_tensor_channel,
            output_tensor_size // output_tensor_channel, output_tensor_scale)
        nan = np.isnan(prediction)
        if nan.any():
            nan_idx = nan.argmax(
            )  # return the index of the first occurrence of NaN
            prediction = np.zeros((fh.num_objects, ), dtype=np.float32)
            prediction[nan_idx] = 1.0
        predictions[idx] = prediction
        meas_time['Softmax']['end'].append(datetime.now())

        if idx == fh.frames_to_consider - 1:
            break

    meas_time['Weighting']['start'].append(datetime.now())
    num_frames_considered = min(fh.frames_to_consider, num_frames)
    window = sine_window(num_frames, num_frames_considered)  # weighting
    weighted_prediction = np.matmul(window, predictions) / np.sum(window)
    meas_time['Weighting']['end'].append(datetime.now())

    for k in meas_time:
        meas_time[k] = [
            (e - s).total_seconds() * 1000
            for s, e in zip(meas_time[k]['start'], meas_time[k]['end'])
        ]
        meas_time[k] = sum(meas_time[k]) / len(meas_time[k])

    # create output file
    mmax = 0
    for s in stages:
        if len(s) > mmax:
            mmax = len(s)
    output = f'Number of captured frames: {num_frames_considered}\n\n'
    for idx, s in enumerate(stages):
        output += f'{s}:{" "*(mmax - len(stages[idx]))} {meas_time[s]:.3f} ms\n'

    output += f'\nSum:{" "*(mmax - len("Sum"))} {sum(meas_time.values()):.3f} ms\n'

    output += f'Frame rate:{" "*(mmax - len("Frame rate"))} {1000 / sum(meas_time.values()):.3f} fps\n'

    print(output)

    with open(fh.dir_verification / 'throughput.log', 'w') as f:
        f.write(output)

    # Wait until the camera thread (process due to ctypes) is terminated
    t.join()

    # Terminate Camera
    terminate = libcamera.terminate()

    # Clean up the DPU IP
    n2cube.dpuDestroyKernel(kernel)
    n2cube.dpuDestroyTask(task)

示例#8

0

显示文件

文件： app.py 项目： NitinBhaskar/BAM

import os
import time
import numpy as np
import math
import argparse
import threading
import sys
import time

from queue import Queue
from serial import Serial
from mindlink import read_raw_eeg

from pynq_dpu import DpuOverlay
overlay = DpuOverlay("dpu.bit")
overlay.set_runtime("vart")
overlay.load_model("dpu_bam.elf")  # Compiled model


# Read out data from mindlink
def producer(out_q, ser, common_q):
    total_run = common_q.get()
    out_q.put(total_run)
    while total_run > 0:
        samples = read_raw_eeg(ser, 512)  # Fetch 1 second of reading
        # Put the samples in the queue for consumer to fetch
        out_q.put(samples)
        total_run -= 1
        #print('Producer')

示例#9

0

显示文件

bit_path = DPU_DIR + "dpu.bit"
elf_path =  DPU_DIR + dpu_elf
label_path= DPU_DIR + LABEL_FILE

session = boto3.session.Session(region_name=region) 
s3_client = session.client('s3', 
     config=boto3.session.Config(signature_version='s3v4'),
     aws_access_key_id=AWS_ACCESS_KEY_ID,
     aws_secret_access_key=AWS_SECRET_ACCESS_KEY)

#Accesing the elf model file from the S3
s3_client.download_file(BUCKETDPU, dpu_elf,      DPU_DIR + dpu_elf)
s3_client.download_file(BUCKETDPU, LABEL_FILE,   DPU_DIR + LABEL_FILE)

from pynq_dpu import DpuOverlay
overlay = DpuOverlay(bit_path)
overlay.load_model(elf_path)

from dnndk import n2cube
from pynq_dpu import dputils 
 

n2cube.dpuOpen()
kernel = n2cube.dpuLoadKernel(KERNEL_CONV) 

with open(lable_path, "r") as f:
    lines = f.readlines()
slabels = lines

def predict_label(imfile):
    task = n2cube.dpuCreateTask(kernel, 0)

示例#10

0

显示文件

class RL_agent:
    def __init__(self, elf_file, env):
        self.overlay = DpuOverlay("dpu.bit")
        self.overlay.set_runtime("vart")
        self.overlay.load_model(elf_file)
        self.dpu = self.overlay.runner
        self.env = env
        self.scale = self.env.scale

        self.inputTensors = self.dpu.get_input_tensors()
        outputTensors = self.dpu.get_output_tensors()
        tensorformat = self.dpu.get_tensor_format()
        if tensorformat == self.dpu.TensorFormat.NCHW:
            outputHeight = outputTensors[0].dims[2]
            outputWidth = outputTensors[0].dims[3]
            outputChannel = outputTensors[0].dims[1]
        elif tensorformat == self.dpu.TensorFormat.NHWC:
            outputHeight = outputTensors[0].dims[1]
            outputWidth = outputTensors[0].dims[2]
            outputChannel = outputTensors[0].dims[3]
        else:
            raise ValueError("Input format error.")

        self.outputSize = outputHeight * outputWidth * outputChannel
        self.tanh = np.empty(self.outputSize)

        shape_in = (1, ) + tuple([
            self.inputTensors[0].dims[i]
            for i in range(self.inputTensors[0].ndims)
        ][1:])
        shape_out = (1, outputHeight, outputWidth, outputChannel)
        self.input_data = []
        self.output_data = []
        self.input_data.append(
            np.empty((shape_in), dtype=np.float32, order='C'))
        self.output_data.append(
            np.empty((shape_out), dtype=np.float32, order='C'))
        self.input = self.input_data[0]
        signal.signal(signal.SIGINT, self.interrupt_handle)

    def interrupt_handle(self, signal, frame):
        print('[Ultra96] Stopping')
        self.env.close()
        exit(0)

    def act(self, state):
        self.input[0, ...] = state.reshape(self.inputTensors[0].dims[1],
                                           self.inputTensors[0].dims[2],
                                           self.inputTensors[0].dims[3])
        job_id = self.dpu.execute_async(self.input_data, self.output_data)
        self.dpu.wait(job_id)
        temp = [j.reshape(1, self.outputSize) for j in self.output_data]
        self.tanh = self.calculate_tanh(temp[0][0])
        action = self.tanh * self.scale
        return action

    def post_process(self, outputs):
        throttle = np.random.normal(outputs[0], np.square(outputs[3]))
        roll = np.random.normal(outputs[1], np.square(outputs[4]))
        pitch = np.random.normal(outputs[2], np.square(outputs[5]))
        return np.clip(np.array([throttle, roll, pitch]), -1, 1)

    def calculate_tanh(self, data):
        result = np.tanh(data)
        return result