Пример #1
0
def main():

    """ Attach to DPU driver and prepare for running """
    n2cube.dpuOpen()

    """ Create DPU Kernels for CONV NODE in imniResNet """
    kernel = n2cube.dpuLoadKernel(KERNEL_CONV)

    """ Create DPU Tasks for CONV NODE in miniResNet """
    task = n2cube.dpuCreateTask(kernel, 0)

    listimage = os.listdir(calib_image_dir)

    for i in range(len(listimage)):
        path = os.path.join(calib_image_dir, listimage[i])
        if os.path.splitext(path)[1] != ".png":
            continue
        print("Loading %s" %listimage[i])

        """ Load image and Set image into CONV Task """
        imageRun=graph_input_fn.calib_input(path)
        imageRun=imageRun.reshape((imageRun.shape[0]*imageRun.shape[1]*imageRun.shape[2]))
        input_len=len(imageRun)
        n2cube.dpuSetInputTensorInHWCFP32(task,CONV_INPUT_NODE,imageRun,input_len)

        """  Launch miniRetNet task """
        n2cube.dpuRunTask(task)

        """ Get output tensor address of CONV """
        conf = n2cube.dpuGetOutputTensorAddress(task, CONV_OUTPUT_NODE)
        
        """ Get output channel of CONV  """
        channel = n2cube.dpuGetOutputTensorChannel(task, CONV_OUTPUT_NODE)
        
        """ Get output size of CONV  """
        size = n2cube.dpuGetOutputTensorSize(task, CONV_OUTPUT_NODE)
        
        softmax = [0 for i in range(size)]
       
        """ Get output scale of CONV  """
        scale = n2cube.dpuGetOutputTensorScale(task, CONV_OUTPUT_NODE)
        
        batchSize=size//channel
        """ Calculate softmax and show TOP5 classification result """
        n2cube.dpuRunSoftmax(conf, softmax, channel, batchSize, scale)
        TopK(softmax, calib_image_list)

    """ Destroy DPU Tasks & free resources """
    n2cube.dpuDestroyTask(task)
    """ Destroy DPU Kernels & free resources """
    rtn = n2cube.dpuDestroyKernel(kernel)
    """ Dettach from DPU driver & free resources """
    n2cube.dpuClose()
Пример #2
0
    def run(self):
        overlay = DpuOverlay("./bitstream/dpu.bit")
        overlay.load_model("./model/dpu_tf_efficientnet.elf")
        cv2.setUseOptimized(True)
        cv2.setNumThreads(4)
        threadnum = 4
        num_iterations = 0
        listimage = [[] * i for i in range(threadnum)]
        result = [[] * i for i in range(threadnum)]
        img_processed = [[] * i for i in range(threadnum)]
        
        cnt = 0
        thread = 0
        list_image = sorted([i for i in os.listdir(image_folder) if i.endswith("JPEG")])
        picture_num = 0
        picture_num = len(list_image)
        for i in list_image:
            listimage[thread].append(i)
            if cnt % math.ceil(picture_num/threadnum) == 0 and cnt != 0:
                thread = thread + 1
            cnt = cnt + 1
        
        n2cube.dpuOpen()
        kernel = n2cube.dpuLoadKernel(KERNEL_CONV)
        threadAll = []
        for i in range(threadnum):
            t1 = threading.Thread(target=self.run_dpu_task, args=(kernel, i, len(listimage[i]), listimage, result))
            threadAll.append(t1)
        for x in threadAll:
            x.start()
        for x in threadAll:
            x.join()               

        with open(RESULT_FILE, 'w') as result_file:
            for item in result:
                for i in item:
                    result_file.write("%s\n" % i)
        
        rtn = n2cube.dpuDestroyKernel(kernel)
        n2cube.dpuClose()
        # Run all date set and write your outputs to result file.
        # Please see README and "classification_result.sample" to know the result file format.
        #time.sleep(10)

        return
Пример #3
0
def main(argv):

    """Attach to DPU driver and prepare for runing"""
    n2cube.dpuOpen()

    """Create DPU Kernels for GoogLeNet"""
    kernel = n2cube.dpuLoadKernel(KERNEL_CONV)

    image_path = "./../common/image_224_224/"
    
    listimage = os.listdir(image_path)
    
    path = os.path.join(image_path, listimage[0])
    
    print("Loading  %s" %listimage[0])
    
    img = cv2.imread(path)
    
    threadAll = []
    global threadnum
    threadnum = int(argv[1])
    print("Input thread number is: %d" %threadnum)
    
    time1 = time.time()
    
    for i in range(int(threadnum)):
        t1 = threading.Thread(target=RunDPU, args=(kernel, img, i))
        threadAll.append(t1)
    for x in threadAll:
        x.start()
    for x in threadAll:
        x.join()
    
    time2 = time.time()
    
    timetotal = time2 - time1
    fps = float(1000 / timetotal)
    print("%.2f FPS" %fps)

    """Destroy DPU Tasks & free resources"""
    rtn = n2cube.dpuDestroyKernel(kernel)

    """Dettach from DPU driver & release resources"""
    n2cube.dpuClose()
Пример #4
0
def main():

    print("STARTING UNETv2 on DPU...")

    if USE_DPU:
        # Attach to DPU driver
        n2cube.dpuOpen()

        # Load DPU Kernel and create a task
        kernel = n2cube.dpuLoadKernel(KERNEL_CONV)
        task = n2cube.dpuCreateTask(kernel, 0)

    # load and preprocess images and load segmentation labels
    assert os.path.isdir(IMG_TEST_DIR)
    #print(IMG_TEST_DIR)
    x_test, y_test, img_file, seg_file = dpu_get_data(IMG_TEST_DIR,
                                                      SEG_TEST_DIR,
                                                      cfg.NUM_CLASSES,
                                                      cfg.WIDTH, cfg.HEIGHT)

    y_pred = []
    # process all images
    for i in range(len(x_test)):

        # opened image as BGR, convert it to RGB
        #B,G,R  = cv2.split(x_test[i])
        #imageRun = cv2.merge((R,G,B))
        imageRun = x_test[i]
        imageRun = imageRun.reshape(
            (imageRun.shape[0] * imageRun.shape[1] * imageRun.shape[2]))
        input_len = len(imageRun)

        if USE_DPU:
            # load pre-processed image as DPU input
            n2cube.dpuSetInputTensorInHWCFP32(task, CONV_INPUT_NODE, imageRun,
                                              input_len)
            dpu_in = n2cube.dpuGetInputTensor(task, CONV_INPUT_NODE)
            ti_scale = n2cube.dpuGetTensorScale(dpu_in)
            ti_h = n2cube.dpuGetTensorHeight(dpu_in)
            ti_w = n2cube.dpuGetTensorWidth(dpu_in)
            ti_sz = n2cube.dpuGetTensorSize(dpu_in)
            ti_ch = n2cube.dpuGetTensorChannel(dpu_in)
            if (i == 0):
                print(
                    "Input  tensor=%3d ch=%3d H=%3d W=%3d Size=%6d scale=%4d" %
                    (i, ti_ch, ti_h, ti_w, ti_sz, ti_scale))
            # run DPU task
            n2cube.dpuRunTask(task)

            # get output tensor address
            dpu_out = n2cube.dpuGetOutputTensorAddress(task, CONV_OUTPUT_NODE)

            # get number of channels in output tensor
            to_ch = n2cube.dpuGetOutputTensorChannel(task, CONV_OUTPUT_NODE)
            # get size in bytes of output tensor
            to_sz = n2cube.dpuGetOutputTensorSize(task, CONV_OUTPUT_NODE)
            # get width output tensor
            to_w = n2cube.dpuGetOutputTensorWidth(task, CONV_OUTPUT_NODE)
            # get height output tensor
            to_h = n2cube.dpuGetOutputTensorHeight(task, CONV_OUTPUT_NODE)
            # get output tensor scale
            to_scale = n2cube.dpuGetOutputTensorScale(task, CONV_OUTPUT_NODE)

            softmax = np.zeros(to_sz, dtype=np.float32)

            if (i == 0):
                print("Output tensor=%3d ch=%3d H=%3d W=%3d Size=%6d" %
                      (i, to_ch, to_h, to_w, to_sz))
                print("Output tensor scaling factor", to_scale)

            softmax = n2cube.dpuRunSoftmax(dpu_out, to_ch, to_sz // to_ch,
                                           to_scale)

            prediction = softmax.reshape((to_h, to_w, to_ch))

            y_pred.append(prediction)
            if (i == 0):
                print("prediction shape: ", prediction.shape)

    # Calculate intersection over union for each segmentation class
    y_pred = np.asarray(y_pred)
    y_test = np.asarray(y_test)
    print("y_pred shape: ", y_pred.shape)
    print("y_test shape: ", y_test.shape)

    y_predi = np.argmax(y_pred, axis=3)
    y_testi = np.argmax(y_test, axis=3)
    print("shape of y_testi and y_predi ", y_testi.shape, y_predi.shape)

    dpu_IoU(y_testi, y_predi)

    # print results
    print("Processed", len(x_test), "images")
    print("FINISHED")

    if USE_DPU:
        # Destroy DPU Kernel & detach
        n2cube.dpuDestroyKernel(kernel)
        n2cube.dpuClose()
Пример #5
0
def run(image_folder, shortsize, KERNEL_CONV, KERNEL_CONV_INPUT,
        KERNEL_FC_OUTPUT, inputscale):

    start = time.time()
    #    listimage = [i for i in os.listdir(image_folder) if i.endswith("JPEG")]
    listimage = [i for i in os.listdir(image_folder) if i.endswith("jpg")]
    listimage.sort()
    #    wordstxt = os.path.join(image_folder, "words.txt")
    #    with open(wordstxt, "r") as f:
    #        lines = f.readlines()
    fo = open(resultname, "w")
    n2cube.dpuOpen()
    kernel = n2cube.dpuLoadKernel(KERNEL_CONV)
    task = n2cube.dpuCreateTask(kernel, 0)
    height, width, inputchannel, mean = parameter(task, KERNEL_CONV_INPUT)
    #    print("mean = %f"%mean[0])
    outsize = n2cube.dpuGetOutputTensorSize(task, KERNEL_FC_OUTPUT)
    #    print("size = %d"%size)
    outputchannel = n2cube.dpuGetOutputTensorChannel(task, KERNEL_FC_OUTPUT)
    #    print("outputchannel = %d"%outputchannel)
    conf = n2cube.dpuGetOutputTensorAddress(task, KERNEL_FC_OUTPUT)
    #    print("conf = {}".format(conf))
    #    print("inputscale = %f"%inputscale)
    inputscale = n2cube.dpuGetInputTensorScale(task, KERNEL_CONV_INPUT)
    #    print("inputscalenow = %f"%inputscale)
    outputscale = n2cube.dpuGetOutputTensorScale(task, KERNEL_FC_OUTPUT)
    #    print("outputscale = %f"%outputscale)
    imagenumber = len(listimage)
    print("\nimagenumber = %d\n" % imagenumber)
    softlist = []
    #    imagenumber = 1000
    correct = 0
    wrong = 0
    for i in range(imagenumber):
        print(f"i = {i+1}")
        print(listimage[i])
        #        path = os.path.join(image_folder, listimage[i])
        #        if i % 50 == 0:
        #        print("\r", listimage[i], end = "")
        path = image_folder + listimage[i]
        img = cv2.imread(path)
        imageRun = predict_label(img, task, inputscale, mean, height, width,
                                 inputchannel, shortsize, KERNEL_CONV_INPUT)
        input_len = len(imageRun)
        #        print(f"input_len = {input_len}")
        #        soft = threadPool.submit(run_dpu_task, outsize, task, outputchannel, conf, outputscale, listimage[i], imageRun, KERNEL_CONV_INPUT, KERNEL_FC_OUTPUT)
        #        softlist.append(soft)
        #    for future in as_completed(softlist):
        #        softmax, listimage = future.result()
        softmax, listimage[i] = run_dpu_task(outsize, task, outputchannel,
                                             conf, outputscale, listimage[i],
                                             imageRun, KERNEL_CONV_INPUT,
                                             KERNEL_FC_OUTPUT)
        correct, wrong = TopK(softmax, listimage[i], fo, correct, wrong)
        print("")

    fo.close()
    accuracy = correct / imagenumber
    print('Correct:', correct, ' Wrong:', wrong, ' Accuracy:', accuracy)
    n2cube.dpuDestroyTask(task)
    n2cube.dpuDestroyKernel(kernel)
    n2cube.dpuClose()
    print("")

    end = time.time()
    total_time = end - start
    print('\nAll processing time: {} seconds.'.format(total_time))
    print('\n{} ms per frame\n'.format(10000 * total_time / imagenumber))