Пример #1
0
    def run(self):
        overlay = DpuOverlay("./bitstream/dpu.bit")
        overlay.load_model("./model/dpu_tf_efficientnet.elf")
        cv2.setUseOptimized(True)
        cv2.setNumThreads(4)
        threadnum = 4
        num_iterations = 0
        listimage = [[] * i for i in range(threadnum)]
        result = [[] * i for i in range(threadnum)]
        img_processed = [[] * i for i in range(threadnum)]
        
        cnt = 0
        thread = 0
        list_image = sorted([i for i in os.listdir(image_folder) if i.endswith("JPEG")])
        picture_num = 0
        picture_num = len(list_image)
        for i in list_image:
            listimage[thread].append(i)
            if cnt % math.ceil(picture_num/threadnum) == 0 and cnt != 0:
                thread = thread + 1
            cnt = cnt + 1
        
        n2cube.dpuOpen()
        kernel = n2cube.dpuLoadKernel(KERNEL_CONV)
        threadAll = []
        for i in range(threadnum):
            t1 = threading.Thread(target=self.run_dpu_task, args=(kernel, i, len(listimage[i]), listimage, result))
            threadAll.append(t1)
        for x in threadAll:
            x.start()
        for x in threadAll:
            x.join()               

        with open(RESULT_FILE, 'w') as result_file:
            for item in result:
                for i in item:
                    result_file.write("%s\n" % i)
        
        rtn = n2cube.dpuDestroyKernel(kernel)
        n2cube.dpuClose()
        # Run all date set and write your outputs to result file.
        # Please see README and "classification_result.sample" to know the result file format.
        #time.sleep(10)

        return
Пример #2
0
    def __init__(self, elf_file, env):
        self.overlay = DpuOverlay("dpu.bit")
        self.overlay.set_runtime("vart")
        self.overlay.load_model(elf_file)
        self.dpu = self.overlay.runner
        self.env = env
        self.scale = self.env.scale

        self.inputTensors = self.dpu.get_input_tensors()
        outputTensors = self.dpu.get_output_tensors()
        tensorformat = self.dpu.get_tensor_format()
        if tensorformat == self.dpu.TensorFormat.NCHW:
            outputHeight = outputTensors[0].dims[2]
            outputWidth = outputTensors[0].dims[3]
            outputChannel = outputTensors[0].dims[1]
        elif tensorformat == self.dpu.TensorFormat.NHWC:
            outputHeight = outputTensors[0].dims[1]
            outputWidth = outputTensors[0].dims[2]
            outputChannel = outputTensors[0].dims[3]
        else:
            raise ValueError("Input format error.")

        self.outputSize = outputHeight * outputWidth * outputChannel
        self.tanh = np.empty(self.outputSize)

        shape_in = (1, ) + tuple([
            self.inputTensors[0].dims[i]
            for i in range(self.inputTensors[0].ndims)
        ][1:])
        shape_out = (1, outputHeight, outputWidth, outputChannel)
        self.input_data = []
        self.output_data = []
        self.input_data.append(
            np.empty((shape_in), dtype=np.float32, order='C'))
        self.output_data.append(
            np.empty((shape_out), dtype=np.float32, order='C'))
        self.input = self.input_data[0]
        signal.signal(signal.SIGINT, self.interrupt_handle)
    self.inputHeight = []
    self.inputWidth = []
    self.inputShape = []
    self.output0Channels = []
    self.output0Height = []
    self.output0Width = []
    self.output0Size = []
    self.output1Channels = []
    self.output1Height = []
    self.output1Width = []
    self.output1Size = []



if __name__ == "__main__":
    overlay = DpuOverlay("dpu.bit")
    print("[INFO] dpu overlay loaded")
    overlay.set_runtime("vart")
    overlay.load_model("dpu_densebox.elf")
    dpu = overlay.runner
    dpu_face_detector = FaceDetect(dpu,0.55,0.35)
    dpu_face_detector.start()
    print("[INFO] model densebox_640_360 loaded ")
    print("[INFO] starting camera input ...")
    cam = cv2.VideoCapture(0)
    cam.set(cv2.CAP_PROP_FRAME_WIDTH,640)
    cam.set(cv2.CAP_PROP_FRAME_HEIGHT,480)
    if not (cam.isOpened()):
        print("[ERROR] Failed to open camera ", inputId )
        exit()
    window_name = 'main'
Пример #4
0
def main():

  # UI: DPU
  ui = UI()
  ui.update_boot_window('Initializing DPU...')

  from dnndk import n2cube
  from pynq_dpu import DpuOverlay

  # Set up the DPU IP
  overlay = DpuOverlay(str(fh.dir_dpu / fh.dpu_bit_file))
  overlay.load_model(str(fh.dir_dpu / fh.dpu_assembly_file))

  # Set up the Neural Network Runtime (N2Cube)
  kernel_name = fh.kernel_name

  kernel_conv_input = fh.kernel_conv_input
  kernel_fc_output = fh.kernel_fc_output

  n2cube.dpuOpen()
  kernel = n2cube.dpuLoadKernel(kernel_name)
  task = n2cube.dpuCreateTask(kernel, 0)

  input_tensor_size = n2cube.dpuGetInputTensorSize(task, kernel_conv_input)

  output_tensor_size = n2cube.dpuGetOutputTensorSize(task, kernel_fc_output)
  output_tensor_channel = n2cube.dpuGetOutputTensorChannel(task, kernel_fc_output)
  output_tensor_address = n2cube.dpuGetOutputTensorAddress(task, kernel_fc_output)
  output_tensor_scale = n2cube.dpuGetOutputTensorScale(task, kernel_fc_output)

  # UI: Camera
  ui.update_boot_window('Initializing Camera...')

  # libcamera
  libcamera = ctypes.CDLL(fh.dir_cam / fh.libcamera_file)

  # Getter
  libcamera.get_frame_ptr.restype = ctypes.POINTER(ctypes.c_ubyte)
  libcamera.get_frame_ptr.argtypes = [ctypes.c_uint]
  libcamera.get_throw_bgn_idx.restype = ctypes.c_uint
  libcamera.get_throw_bgn_idx.argtypes = None
  libcamera.get_throw_end_idx.restype = ctypes.c_uint
  libcamera.get_throw_end_idx.argtypes = None
  libcamera.get_throw_bgn.restype = ctypes.c_bool
  libcamera.get_throw_bgn.argtypes = None
  libcamera.get_throw_end.restype = ctypes.c_bool
  libcamera.get_throw_end.argtypes = None

  # Setter
  libcamera.set_frame_rate.restype = None
  libcamera.set_frame_rate.argtypes = [ctypes.c_double]
  libcamera.set_buff_size.restype = None
  libcamera.set_buff_size.argtypes = [ctypes.c_uint]
  libcamera.set_exposure_time.restype = None
  libcamera.set_exposure_time.argtypes = [ctypes.c_double]
  libcamera.set_camera_gain.restype = None
  libcamera.set_camera_gain.argtypes = [ctypes.c_double]
  libcamera.set_avg_diffs.restype = None
  libcamera.set_avg_diffs.argtypes = [ctypes.c_uint]
  libcamera.set_threshold_mult.restype = None
  libcamera.set_threshold_mult.argtypes = [ctypes.c_double]
  libcamera.set_frames_to_acquire.restype = None
  libcamera.set_frames_to_acquire.argtypes = [ctypes.c_uint]

  # Camera
  libcamera.initialize.restype = ctypes.c_int
  libcamera.initialize.argtypes = None
  libcamera.reset_global_variables.restype = None
  libcamera.reset_global_variables.argtypes = None
  libcamera.start_acquisition.restype = ctypes.c_int
  libcamera.start_acquisition.argtypes = None
  libcamera.terminate.restype = ctypes.c_int
  libcamera.terminate.argtypes = None

  # Set the global variables according to the module `fhnwtoys.settings`
  libcamera.set_frame_rate(fh.frame_rate)
  libcamera.set_buff_size(fh.buff_size)
  libcamera.set_exposure_time(fh.exposure_time)
  libcamera.set_camera_gain(fh.camera_gain)
  libcamera.set_avg_diffs(fh.avg_diffs)
  libcamera.set_threshold_mult(fh.threshold_mult)
  libcamera.set_frames_to_acquire(fh.frames_to_acquire)

  # Initialize Camera
  initialize = fh.ReturnCodes.NOT_INITIALIZED(*\label{lst:ln:camera_init1}*)
  initialization_tries = 0

  while initialize != fh.ReturnCodes.SUCCESS:
    if initialization_tries > 0:
      try:
        return_code = fh.ReturnCodes(initialize).name
      except ValueError:
        return_code = initialize
      ui.update_boot_window(f'Camera Error ({return_code}), try to replug the camera.')
    initialize = libcamera.initialize()
    initialization_tries += 1(*\label{lst:ln:camera_init2}*)

  # UI: Ready
  ui.update_boot_window('READY')

  # Set up the `frames` array
  frames = np.empty((fh.frames_to_consider,) + fh.bgr_shape, dtype=np.uint8)

  while True:
    # Reset the predictions
    predictions = np.zeros((fh.frames_to_consider, fh.num_objects), dtype=np.float32)(*\label{lst:ln:predictions_matrix}*)

    # Start acquisition (threaded)
    # todo: error handling ('Unexpected Error, system reboot required.')
    # start_acquisition = libcamera.start_acquisition() # non threaded approach
    t = Thread(target=libcamera.start_acquisition)(*\label{lst:ln:threading}*) # threaded approach (process due to ctypes)
    t.start()

    # Wait until the throw has ended (the Ultra96-V2 is not powerful enough to process the data during the acquisition)
    while not libcamera.get_throw_end():
      pass(*\label{lst:ln:polling}*)

    throw_bgn_idx = libcamera.get_throw_bgn_idx()
    throw_end_idx = libcamera.get_throw_end_idx()

    num_frames = throw_end_idx - throw_bgn_idx - 1 # Ignore the last two captured frames

    # Image processing (including inference)
    for idx, frame_id in enumerate(range(throw_bgn_idx, throw_end_idx - 1)):
      frame_ptr = libcamera.get_frame_ptr(frame_id)(*\label{lst:ln:image_preprocessing1}*)
      raw_frame = np.ctypeslib.as_array(frame_ptr, shape=fh.raw_shape) # Raw Baumer BayerRG8 frame
      # Transform Baumer BayerRG8 to BGR8 (Baumer BayerRG = OpenCV BayerBG)
      frames[idx] = cv2.cvtColor(raw_frame, cv2.COLOR_BayerBG2BGR) # Color space conversion
      # Image scaling using nearest-neighbor interpolation
      frame_resized = cv2.resize(frames[idx], fh.inf_dsize, interpolation=fh.Interpolation.NEAREST)
      frame_inference = frame_resized.astype(np.float32) / 255.0(*\label{lst:ln:image_preprocessing2}*) # Normalization (float32 precision)

      # Inference
      n2cube.dpuSetInputTensorInHWCFP32(task, kernel_conv_input, frame_inference, input_tensor_size)
      n2cube.dpuRunTask(task)(*\label{lst:ln:image_classification}*)

      # Softmax function (normalized exponential function)
      # Confident predictions lead to all zeros and a NaN, when run through `n2cube.dpuRunSoftmax(.)`
      # This section replaces the first occurrence of NaN in the `prediction` array with 1.0 and sets everything else to 0.0
      prediction = n2cube.dpuRunSoftmax(output_tensor_address, output_tensor_channel, output_tensor_size//output_tensor_channel, output_tensor_scale)(*\label{lst:ln:softmax1}*)
      nan = np.isnan(prediction)
      if nan.any():
        nan_idx = nan.argmax() # returns the index of the first occurrence of NaN
        prediction = np.zeros((fh.num_objects,), dtype=np.float32)
        prediction[nan_idx] = 1.0(*\label{lst:ln:softmax2}*)
      predictions[idx] = prediction

      # Only consider `fh.frames_to_consider` frames
      if idx == fh.frames_to_consider - 1: # (-1: idx starts with 0)
        break

    num_frames_considered = min(fh.frames_to_consider, num_frames)

    window = sine_squared_window(num_frames, num_frames_considered) # weighting function
    weighted_prediction = np.matmul(window, predictions) / np.sum(window)(*\label{lst:ln:matrix_multiplication}*) # computation of the weighted prediction

    # UI: Prepare data for the UI
    weighted_prediction_percent = weighted_prediction * 100
    weighted_prediction_sorted = np.sort(weighted_prediction_percent)[::-1]
    weighted_prediction_argsorted = np.argsort(weighted_prediction_percent)[::-1]

    # this is the index of the best guess (computed by weighting the `fh.frames_to_consider` frames)
    guess_idx = weighted_prediction_argsorted[0]

    relevant_pct_ui = np.asarray(weighted_prediction_percent >= 1.0).nonzero()[0] # value of prediction must be at least 1.0%
    relevant_pct_ui_len = len(relevant_pct_ui)
    predictions_ui_len = min(4, relevant_pct_ui_len) # show at most Top 4

    predictions_ui = [] # the object names
    percentages_ui = np.empty((predictions_ui_len + 1,), dtype=np.float32) # the percentages (+1: 'Others')
    for i, w in enumerate(weighted_prediction_argsorted[0:predictions_ui_len]):
      predictions_ui.append(fh.objects_ui[w])
      percentages_ui[i] = weighted_prediction_percent[w]

    # the object names
    predictions_ui.append('Others')

    # the percentages
    percentages_ui[-1] = np.sum(weighted_prediction_sorted[predictions_ui_len:])
    percentages_ui = lrm_round(percentages_ui)

    # the frame
    wighted_guesses = np.multiply(window, predictions[:, guess_idx])(*\label{lst:ln:frame_selection1}*)
    frame_ui_idx = wighted_guesses.argmax()

    frame_ui_resized = cv2.resize(frames[frame_ui_idx], fh.ui_dsize, interpolation=fh.Interpolation.NEAREST)
    _, frame_ui_png = cv2.imencode('.png', frame_ui_resized)
    frame_ui = frame_ui_png.tobytes()(*\label{lst:ln:frame_selection2}*) # the frame

    # UI: Show results
    if percentages_ui[-1] == 0.0:
      predictions_ui = predictions_ui[:-1]
      percentages_ui = percentages_ui[:-1]

    # UI: Inference
    ui.update_inference_window(predictions_ui, percentages_ui, frame_ui)

    # Wait until the camera thread (process due to ctypes) is terminated
    t.join()

    # Reset the global variables (has to be done manually to avoid race conditions)
    libcamera.reset_global_variables()

  # Under regular circumstances, this section should never be reached

  # Terminate Camera
  terminate = libcamera.terminate()

  # Clean up the DPU IP
  n2cube.dpuDestroyKernel(kernel)
  n2cube.dpuDestroyTask(task)
Пример #5
0
import os
from pynq_dpu import DpuOverlay
overlay = DpuOverlay("dpu.bit")
os.system("dexplorer -w")
Пример #6
0
from pynq_dpu import DpuOverlay
overlay = DpuOverlay("dpu.bit")
overlay.load_model("dpu_tf_yolov3.elf")

import numpy as np
import random
import cv2
import colorsys
from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
#%matplotlib inline
from pynq_dpu.edge.dnndk.tf_yolov3_voc_py.tf_yolov3_voc import *

anchor_list = [
    10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373,
    326
]
anchor_float = [float(x) for x in anchor_list]
anchors = np.array(anchor_float).reshape(-1, 2)

classes_path = "files/voc_classes.txt"
class_names = get_class(classes_path)

num_classes = len(class_names)
hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(
    map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
random.seed(0)
random.shuffle(colors)
random.seed(None)
def main():

    # Set up the DPU IP
    overlay = DpuOverlay(str(fh.dir_dpu / fh.dpu_bit_file))
    overlay.load_model(str(fh.dir_dpu / fh.dpu_assembly_file))

    # Set up the Neural Network Runtime (N2Cube)
    kernel_name = fh.kernel_name

    kernel_conv_input = fh.kernel_conv_input
    kernel_fc_output = fh.kernel_fc_output

    n2cube.dpuOpen()
    kernel = n2cube.dpuLoadKernel(kernel_name)
    task = n2cube.dpuCreateTask(kernel, 0)

    input_tensor_size = n2cube.dpuGetInputTensorSize(task, kernel_conv_input)

    output_tensor_size = n2cube.dpuGetOutputTensorSize(task, kernel_fc_output)
    output_tensor_channel = n2cube.dpuGetOutputTensorChannel(
        task, kernel_fc_output)
    output_tensor_address = n2cube.dpuGetOutputTensorAddress(
        task, kernel_fc_output)
    output_tensor_scale = n2cube.dpuGetOutputTensorScale(
        task, kernel_fc_output)

    # libcamera
    libcamera = ctypes.CDLL(fh.dir_cam / fh.libcamera_file)

    libcamera.get_frame_ptr.restype = ctypes.POINTER(ctypes.c_ubyte)
    libcamera.get_throw_bgn_idx.restype = ctypes.c_uint
    libcamera.get_throw_end_idx.restype = ctypes.c_uint
    libcamera.get_throw_bgn.restype = ctypes.c_bool
    libcamera.get_throw_end.restype = ctypes.c_bool

    libcamera.set_frame_rate.restype = None
    libcamera.set_buff_size.restype = None
    libcamera.set_exposure_time.restype = None
    libcamera.set_camera_gain.restype = None
    libcamera.set_avg_diffs.restype = None
    libcamera.set_threshold_mult.restype = None
    libcamera.set_frames_to_acquire.restype = None

    libcamera.initialize.restype = ctypes.c_int
    libcamera.start_acquisition.restype = ctypes.c_int
    libcamera.terminate.restype = ctypes.c_int

    # Set up of variables
    frames = np.empty((fh.frames_to_consider, ) + fh.bgr_shape, dtype=np.uint8)

    # Initialize Camera
    initialize = libcamera.initialize()

    if initialize != fh.ReturnCodes.SUCCESS:
        try:
            return_code = fh.ReturnCodes(initialize).name
        except ValueError:
            return_code = initialize
        print(f'Initialization failed: {return_code}')
        sys.exit()
    else:
        print(
            '================================= READY ================================='
        )

    # Reset predictions
    predictions = np.zeros((fh.frames_to_consider, fh.num_objects),
                           dtype=np.float32)

    # Start acquisition (Threaded)
    t = Thread(target=libcamera.start_acquisition)
    t.start()

    # Wait until the throw has ended
    while not libcamera.get_throw_end():
        pass

    stages = [
        'Get raw bayer', 'Transform color', 'Resize', 'Normalize',
        'Run inference', 'Softmax', 'Weighting'
    ]
    meas_time = {s: get_dict() for s in stages}

    throw_bgn_idx = libcamera.get_throw_bgn_idx()
    throw_end_idx = libcamera.get_throw_end_idx()

    num_frames = throw_end_idx - throw_bgn_idx - 1  # Ignore the last two captured frames

    for idx, frame_id in enumerate(range(throw_bgn_idx, throw_end_idx - 1)):

        meas_time['Get raw bayer']['start'].append(datetime.now())
        frame_ptr = libcamera.get_frame_ptr(frame_id)
        raw_frame = np.ctypeslib.as_array(frame_ptr, shape=fh.raw_shape)
        meas_time['Get raw bayer']['end'].append(datetime.now())

        # Transform Baumer BayerRG8 to BGR8 (Baumer BayerRG ≙ OpenCV BayerBG)
        meas_time['Transform color']['start'].append(datetime.now())
        frames[idx] = cv2.cvtColor(raw_frame, cv2.COLOR_BayerBG2BGR)
        meas_time['Transform color']['end'].append(datetime.now())

        meas_time['Resize']['start'].append(datetime.now())
        frame_resized = cv2.resize(frames[idx],
                                   fh.inf_dsize,
                                   interpolation=fh.Interpolation.NEAREST)
        meas_time['Resize']['end'].append(datetime.now())

        meas_time['Normalize']['start'].append(datetime.now())
        frame_inference = frame_resized.astype(np.float32) / 255.0
        meas_time['Normalize']['end'].append(datetime.now())

        meas_time['Run inference']['start'].append(datetime.now())
        n2cube.dpuSetInputTensorInHWCFP32(task, kernel_conv_input,
                                          frame_inference, input_tensor_size)
        n2cube.dpuRunTask(task)
        meas_time['Run inference']['end'].append(datetime.now())

        # n2cube.dpuRunSoftmax(.) sometimes returns all zeros except one NaN
        # This section replaces the first occurrence of NaN in the prediction array with 1.0 and sets everything else to 0.0
        meas_time['Softmax']['start'].append(datetime.now())
        prediction = n2cube.dpuRunSoftmax(
            output_tensor_address, output_tensor_channel,
            output_tensor_size // output_tensor_channel, output_tensor_scale)
        nan = np.isnan(prediction)
        if nan.any():
            nan_idx = nan.argmax(
            )  # return the index of the first occurrence of NaN
            prediction = np.zeros((fh.num_objects, ), dtype=np.float32)
            prediction[nan_idx] = 1.0
        predictions[idx] = prediction
        meas_time['Softmax']['end'].append(datetime.now())

        if idx == fh.frames_to_consider - 1:
            break

    meas_time['Weighting']['start'].append(datetime.now())
    num_frames_considered = min(fh.frames_to_consider, num_frames)
    window = sine_window(num_frames, num_frames_considered)  # weighting
    weighted_prediction = np.matmul(window, predictions) / np.sum(window)
    meas_time['Weighting']['end'].append(datetime.now())

    for k in meas_time:
        meas_time[k] = [
            (e - s).total_seconds() * 1000
            for s, e in zip(meas_time[k]['start'], meas_time[k]['end'])
        ]
        meas_time[k] = sum(meas_time[k]) / len(meas_time[k])

    # create output file
    mmax = 0
    for s in stages:
        if len(s) > mmax:
            mmax = len(s)
    output = f'Number of captured frames: {num_frames_considered}\n\n'
    for idx, s in enumerate(stages):
        output += f'{s}:{" "*(mmax - len(stages[idx]))} {meas_time[s]:.3f} ms\n'

    output += f'\nSum:{" "*(mmax - len("Sum"))} {sum(meas_time.values()):.3f} ms\n'

    output += f'Frame rate:{" "*(mmax - len("Frame rate"))} {1000 / sum(meas_time.values()):.3f} fps\n'

    print(output)

    with open(fh.dir_verification / 'throughput.log', 'w') as f:
        f.write(output)

    # Wait until the camera thread (process due to ctypes) is terminated
    t.join()

    # Terminate Camera
    terminate = libcamera.terminate()

    # Clean up the DPU IP
    n2cube.dpuDestroyKernel(kernel)
    n2cube.dpuDestroyTask(task)
Пример #8
0
import os
import time
import numpy as np
import math
import argparse
import threading
import sys
import time

from queue import Queue
from serial import Serial
from mindlink import read_raw_eeg

from pynq_dpu import DpuOverlay
overlay = DpuOverlay("dpu.bit")
overlay.set_runtime("vart")
overlay.load_model("dpu_bam.elf")  # Compiled model


# Read out data from mindlink
def producer(out_q, ser, common_q):
    total_run = common_q.get()
    out_q.put(total_run)
    while total_run > 0:
        samples = read_raw_eeg(ser, 512)  # Fetch 1 second of reading
        # Put the samples in the queue for consumer to fetch
        out_q.put(samples)
        total_run -= 1
        #print('Producer')

Пример #9
0
bit_path = DPU_DIR + "dpu.bit"
elf_path =  DPU_DIR + dpu_elf
label_path= DPU_DIR + LABEL_FILE

session = boto3.session.Session(region_name=region) 
s3_client = session.client('s3', 
     config=boto3.session.Config(signature_version='s3v4'),
     aws_access_key_id=AWS_ACCESS_KEY_ID,
     aws_secret_access_key=AWS_SECRET_ACCESS_KEY)

#Accesing the elf model file from the S3
s3_client.download_file(BUCKETDPU, dpu_elf,      DPU_DIR + dpu_elf)
s3_client.download_file(BUCKETDPU, LABEL_FILE,   DPU_DIR + LABEL_FILE)

from pynq_dpu import DpuOverlay
overlay = DpuOverlay(bit_path)
overlay.load_model(elf_path)

from dnndk import n2cube
from pynq_dpu import dputils 
 

n2cube.dpuOpen()
kernel = n2cube.dpuLoadKernel(KERNEL_CONV) 

with open(lable_path, "r") as f:
    lines = f.readlines()
slabels = lines

def predict_label(imfile):
    task = n2cube.dpuCreateTask(kernel, 0)
Пример #10
0
class RL_agent:
    def __init__(self, elf_file, env):
        self.overlay = DpuOverlay("dpu.bit")
        self.overlay.set_runtime("vart")
        self.overlay.load_model(elf_file)
        self.dpu = self.overlay.runner
        self.env = env
        self.scale = self.env.scale

        self.inputTensors = self.dpu.get_input_tensors()
        outputTensors = self.dpu.get_output_tensors()
        tensorformat = self.dpu.get_tensor_format()
        if tensorformat == self.dpu.TensorFormat.NCHW:
            outputHeight = outputTensors[0].dims[2]
            outputWidth = outputTensors[0].dims[3]
            outputChannel = outputTensors[0].dims[1]
        elif tensorformat == self.dpu.TensorFormat.NHWC:
            outputHeight = outputTensors[0].dims[1]
            outputWidth = outputTensors[0].dims[2]
            outputChannel = outputTensors[0].dims[3]
        else:
            raise ValueError("Input format error.")

        self.outputSize = outputHeight * outputWidth * outputChannel
        self.tanh = np.empty(self.outputSize)

        shape_in = (1, ) + tuple([
            self.inputTensors[0].dims[i]
            for i in range(self.inputTensors[0].ndims)
        ][1:])
        shape_out = (1, outputHeight, outputWidth, outputChannel)
        self.input_data = []
        self.output_data = []
        self.input_data.append(
            np.empty((shape_in), dtype=np.float32, order='C'))
        self.output_data.append(
            np.empty((shape_out), dtype=np.float32, order='C'))
        self.input = self.input_data[0]
        signal.signal(signal.SIGINT, self.interrupt_handle)

    def interrupt_handle(self, signal, frame):
        print('[Ultra96] Stopping')
        self.env.close()
        exit(0)

    def act(self, state):
        self.input[0, ...] = state.reshape(self.inputTensors[0].dims[1],
                                           self.inputTensors[0].dims[2],
                                           self.inputTensors[0].dims[3])
        job_id = self.dpu.execute_async(self.input_data, self.output_data)
        self.dpu.wait(job_id)
        temp = [j.reshape(1, self.outputSize) for j in self.output_data]
        self.tanh = self.calculate_tanh(temp[0][0])
        action = self.tanh * self.scale
        return action

    def post_process(self, outputs):
        throttle = np.random.normal(outputs[0], np.square(outputs[3]))
        roll = np.random.normal(outputs[1], np.square(outputs[4]))
        pitch = np.random.normal(outputs[2], np.square(outputs[5]))
        return np.clip(np.array([throttle, roll, pitch]), -1, 1)

    def calculate_tanh(self, data):
        result = np.tanh(data)
        return result