Exemplo n.º 1
0
def RunDPU(kernel, img, count):
    """
    DPU run function
    kernel: dpu kernel
    img: image to be run
    count : test rounds count
    """
    """Create DPU Tasks from DPU Kernel"""
    task = n2cube.dpuCreateTask(kernel, 0)
    while count < 1000:
        """Load image to DPU"""
        dputils.dpuSetInputImage2(task, KERNEL_CONV_INPUT, img)
        """Get input Tesor"""
        tensor = n2cube.dpuGetInputTensor(task, KERNEL_CONV_INPUT)
        """Model run on DPU"""
        n2cube.dpuRunTask(task)
        """Get the output tensor size from FC output"""
        size = n2cube.dpuGetOutputTensorSize(task, KERNEL_FC_OUTPUT)
        """Get the output tensor channel from FC output"""
        channel = n2cube.dpuGetOutputTensorChannel(task, KERNEL_FC_OUTPUT)

        softmax = np.zeros(size, dtype=float32)
        """Get FC result"""
        conf = n2cube.dpuGetOutputTensorAddress(task, KERNEL_FC_OUTPUT)
        """Get output scale of FC"""
        outputScale = n2cube.dpuGetOutputTensorScale(task, KERNEL_FC_OUTPUT)
        """Run softmax"""
        softmax = n2cube.dpuRunSoftmax(conf, channel, size // channel,
                                       outputScale)

        l.acquire()
        count = count + threadnum
        l.release()
    """Destroy DPU Tasks & free resources"""
    n2cube.dpuDestroyTask(task)
Exemplo n.º 2
0
def predict_label(imfile):
    task = n2cube.dpuCreateTask(kernel, 0)

    # Set client to get file from S3 
    s3_client.download_file(BUCKET, imfile, image_folder + imfile)
    img_obj = os.path.join(image_folder, imfile)
    
    #To get it from local path
    #img_file = os.path.join(image_folder, imfile)
    
    img = cv2.imread(img_obj) 
    img = cv2.resize(img, (IMG_DIMS, IMG_DIMS))
    img = img.astype(np.float32)
    img = (img/255.0) 
        
    """Get input Tensor"""
    tensor = n2cube.dpuGetInputTensor(task, KERNEL_CONV_INPUT)
    input_len = n2cube.dpuGetInputTensorSize(task, KERNEL_CONV_INPUT)   
        
    """Set input Tesor"""
    n2cube.dpuSetInputTensorInHWCFP32(task, KERNEL_CONV_INPUT, img, input_len)

    """Model run on DPU"""
    n2cube.dpuRunTask(task)
        
    """Get the output tensor size from FC output"""
    size = n2cube.dpuGetOutputTensorSize(task, KERNEL_FC_OUTPUT)

    """Get the output tensor channel from FC output"""
    channel = n2cube.dpuGetOutputTensorChannel(task, KERNEL_FC_OUTPUT)

    softmax = np.zeros(size,dtype=np.float32)

    """Get FC result"""
    conf = n2cube.dpuGetOutputTensorAddress(task, KERNEL_FC_OUTPUT)

    """Get output scale of FC"""
    outputScale = n2cube.dpuGetOutputTensorScale(task, KERNEL_FC_OUTPUT)

    """Run softmax"""
    softmax = n2cube.dpuRunSoftmax(conf, channel, size // channel, outputScale)
     
    #print("softmax =", softmax)

    n2cube.dpuDestroyTask(task)
    
    return slabels[np.argmax(softmax)].strip('\n')
Exemplo n.º 3
0
def main():

    print("STARTING UNETv2 on DPU...")

    if USE_DPU:
        # Attach to DPU driver
        n2cube.dpuOpen()

        # Load DPU Kernel and create a task
        kernel = n2cube.dpuLoadKernel(KERNEL_CONV)
        task = n2cube.dpuCreateTask(kernel, 0)

    # load and preprocess images and load segmentation labels
    assert os.path.isdir(IMG_TEST_DIR)
    #print(IMG_TEST_DIR)
    x_test, y_test, img_file, seg_file = dpu_get_data(IMG_TEST_DIR,
                                                      SEG_TEST_DIR,
                                                      cfg.NUM_CLASSES,
                                                      cfg.WIDTH, cfg.HEIGHT)

    y_pred = []
    # process all images
    for i in range(len(x_test)):

        # opened image as BGR, convert it to RGB
        #B,G,R  = cv2.split(x_test[i])
        #imageRun = cv2.merge((R,G,B))
        imageRun = x_test[i]
        imageRun = imageRun.reshape(
            (imageRun.shape[0] * imageRun.shape[1] * imageRun.shape[2]))
        input_len = len(imageRun)

        if USE_DPU:
            # load pre-processed image as DPU input
            n2cube.dpuSetInputTensorInHWCFP32(task, CONV_INPUT_NODE, imageRun,
                                              input_len)
            dpu_in = n2cube.dpuGetInputTensor(task, CONV_INPUT_NODE)
            ti_scale = n2cube.dpuGetTensorScale(dpu_in)
            ti_h = n2cube.dpuGetTensorHeight(dpu_in)
            ti_w = n2cube.dpuGetTensorWidth(dpu_in)
            ti_sz = n2cube.dpuGetTensorSize(dpu_in)
            ti_ch = n2cube.dpuGetTensorChannel(dpu_in)
            if (i == 0):
                print(
                    "Input  tensor=%3d ch=%3d H=%3d W=%3d Size=%6d scale=%4d" %
                    (i, ti_ch, ti_h, ti_w, ti_sz, ti_scale))
            # run DPU task
            n2cube.dpuRunTask(task)

            # get output tensor address
            dpu_out = n2cube.dpuGetOutputTensorAddress(task, CONV_OUTPUT_NODE)

            # get number of channels in output tensor
            to_ch = n2cube.dpuGetOutputTensorChannel(task, CONV_OUTPUT_NODE)
            # get size in bytes of output tensor
            to_sz = n2cube.dpuGetOutputTensorSize(task, CONV_OUTPUT_NODE)
            # get width output tensor
            to_w = n2cube.dpuGetOutputTensorWidth(task, CONV_OUTPUT_NODE)
            # get height output tensor
            to_h = n2cube.dpuGetOutputTensorHeight(task, CONV_OUTPUT_NODE)
            # get output tensor scale
            to_scale = n2cube.dpuGetOutputTensorScale(task, CONV_OUTPUT_NODE)

            softmax = np.zeros(to_sz, dtype=np.float32)

            if (i == 0):
                print("Output tensor=%3d ch=%3d H=%3d W=%3d Size=%6d" %
                      (i, to_ch, to_h, to_w, to_sz))
                print("Output tensor scaling factor", to_scale)

            softmax = n2cube.dpuRunSoftmax(dpu_out, to_ch, to_sz // to_ch,
                                           to_scale)

            prediction = softmax.reshape((to_h, to_w, to_ch))

            y_pred.append(prediction)
            if (i == 0):
                print("prediction shape: ", prediction.shape)

    # Calculate intersection over union for each segmentation class
    y_pred = np.asarray(y_pred)
    y_test = np.asarray(y_test)
    print("y_pred shape: ", y_pred.shape)
    print("y_test shape: ", y_test.shape)

    y_predi = np.argmax(y_pred, axis=3)
    y_testi = np.argmax(y_test, axis=3)
    print("shape of y_testi and y_predi ", y_testi.shape, y_predi.shape)

    dpu_IoU(y_testi, y_predi)

    # print results
    print("Processed", len(x_test), "images")
    print("FINISHED")

    if USE_DPU:
        # Destroy DPU Kernel & detach
        n2cube.dpuDestroyKernel(kernel)
        n2cube.dpuClose()