Exemplo n.º 1
0
def _fscale(data, split=False, load=False, verbose=False):
    # initialize scaler
    scaler = pp.MinMaxScaler()
    
    # initialize variables
    config = _configinfo()
    sdpath = config['root_data_path'] + 'scaled.npy'
    
    # scale data
    if verbose: print('scaling features............... ', end = '')
    data = np.array(data, dtype='float64')
    if load and os.path.isfile(sdpath):
        m = np.load(sdpath)[0]
        r = np.load(sdpath)[1]
        r[r==0] = 1
        data = (data - m) / r
    elif split:
        train = data[:config['train_d']]
        valid = data[config['train_d']:]
        scaler.fit(train)
        m = scaler.data_min_
        r = scaler.data_range_
        train = scaler.transform(train)
        valid = scaler.transform(valid)
        data  = np.vstack((train, valid))
    else:
        data = scaler.fit_transform(data)
        m = scaler.data_min_
        r = scaler.data_range_
    if verbose: print('done')
    
    # save scaled config
    if not load: np.save(sdpath, np.vstack((m, r)))
    
    # return scaled data
    return data
Exemplo n.º 2
0
def main():
    # parse arguments
    args = _args()
    
    # begin time
    t_beg = time.time()
    
    # load config
    config     = _configinfo()
    preprocess = config['preprocess']
    shape_x    = config['shape_x']
    shape_y    = config['shape_y']
    clfrpath   = config['clfr_data_path']
    rootpath   = config['root_data_path']
    filepath   = args.filepath
    savename   = rootpath + os.path.split(filepath)[-1].split('.')[0]
    
    if args.verbose: print('\ninitializing configurations.... done')
    
    # load classifier
    if args.verbose: print('loading classifier............. ', end = '')
    if os.path.isfile(clfrpath):
        classifier = skex.joblib.load(clfrpath)
        if args.verbose: print('done')
    else:
        if args.verbose: print('not found\n')
        sys.exit()
    
    # load image
    if args.verbose: print('loading image.................. ', end = '')
    if os.path.isfile(filepath):
        (_, extn) = os.path.splitext(filepath)
        if extn in ['.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff']:
            image = cv.imread(filepath)
            if args.verbose: print('done')
        else:
            if args.verbose: print('unsupported format\n')
            sys.exit()
    else:
        if args.verbose: print('not found\n')
        sys.exit()
    
    image_input = image.copy()
    
    # convert image to grayscale
    if args.verbose: print('converting colorspace.......... ', end = '')
    image_gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    if args.verbose: print('done')
    
    # apply guassian blurring to remove noise
    if args.verbose: print('removing noise from image...... ', end = '')
    image_blur = cv.GaussianBlur(image_gray, (3, 3), 0, 0)
    if args.verbose: print('done')
    
    # threshold image
    if args.verbose: print('thresholding image............. ', end = '')
    (_, image_th) = cv.threshold(image_blur, 95, 255, cv.THRESH_BINARY_INV)
    if args.verbose: print('done')
    
    image_rois = image_th.copy()
    
    # find contours
    if args.verbose: print('finding contours in  image..... ', end = '')
    (_, contours, _) = cv.findContours(image_th.copy(), cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE)
    if args.verbose: print('done')
    
    # find bounding rectangle around each contour
    bn_rects = []
    for cntr in contours:
        bn_rects.append(cv.boundingRect(cntr))
    if args.verbose: print('number of objects detected..... %d' %(len(bn_rects)))
    
    # process each bounding rectangle
    loop = 0
    for rect in bn_rects:
        if args.verbose: print('\rprocessing objects............. %d%%'
                               %(loop*100//len(bn_rects)), end = '')
        
        # attributes of bounding rectangle
        x = rect[0]
        y = rect[1]
        w = rect[2]
        h = rect[3]
        
        # ignore tiny objects assuming them as noise
        if h <= 8:
            continue
        
        # draw bounding rectangle on images
        cv.rectangle(image_rois, (x, y), (x+w, y+h), (255, 255, 255), 1)
        cv.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)
        
        # extract region of interest from thresholded image using attributes of bounding rectangle
        image_roi = image_th[y:y+h, x:x+w]
        
        # pad region of interest
        if w >= h:
            length = int(1.8 * w)
        else:
            length = int(1.8 * h)
        image_pad = np.zeros((length, length), dtype='uint8')
        i = (length - h) // 2
        j = (length - w) // 2
        image_pad[i:i+h, j:j+w] = image_roi
        
        # resize padded image
        image_roi = cv.resize(image_pad, (shape_x, shape_y), interpolation=cv.INTER_AREA)
        
        # perform dilation as morphological transformation
        image_roi = cv.dilate(image_roi, (3, 3))
        
        # preprocess roi
        image_roi = image_roi.reshape(1, -1)
        
        if   preprocess == 'haar': X = _haar(image_roi, load=False)
        elif preprocess == 'hogs': X = _hogs(image_roi, load=False)
        elif preprocess == 'sift': X = _sift(image_roi, load=False)
        elif preprocess == 'surf': X = _surf(image_roi, load=False)
        else:X = _fscale(image_roi, load=True)
        
        X = X.reshape(1, -1)
        
        # predict label
        pred = classifier.predict(X)
        pred = pred[0]
        
        # put predicted label on image
        cv.putText(image, str(pred), (x, y-8), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        
        loop = loop + 1
    
    if args.verbose: print('\rprocessing objects............. done')
    
    # save images
    if args.save:
        if args.verbose: print('saving images.................. ', end = '')
        cv.imwrite(savename+'_1_input.png' , image_input)
        cv.imwrite(savename+'_2_gray.png'  , image_gray)
        cv.imwrite(savename+'_3_blur.png'  , image_blur)
        cv.imwrite(savename+'_4_th.png'    , image_th)
        cv.imwrite(savename+'_5_rois.png'  , image_rois)
        cv.imwrite(savename+'_6_output.png', image)
        if args.verbose: print('done')
    
    # end time
    t_end = time.time()
    
    if args.verbose: print('processing time................ %.2f sec\n'
                           %(t_end - t_beg))
    
    # display image with predictions
    cv.imshow('DIGIT RECOGNIZER', image)
    
    cv.waitKey(0)
    cv.destroyAllWindows()