示例#1
0
    def __len__(self):
        return self.dataset.shape[0]

    def __getitem__(self, index):
        return self.dataset[index], self.label[index]

    def _init_dataset(self, image_abs_path):
        for (_, _, filelist) in os.walk(image_abs_path):
            self.dataset = np.array(
                [[cv2.imread(os.path.join(image_abs_path, filename), 1)]
                 for filename in filelist])
            self.label = np.array(
                [to_argmax(filename[:5]) for filename in filelist])


if __name__ == "__main__":
    IMAGE_PATH = "./split/test"
    dataset = captchaDataset(IMAGE_PATH)
    print("Length of dataset:",
          len(dataset))  # for verifying whether all data has benn loaded
    (images, labels) = dataset[0:2]
    print(images.shape)

    for image, label in zip(images, labels):
        visualize(image.cpu(), to_argmax(argmax_to_string(label.cpu())))

    # Or show in this way:
    # img = np.array(image[0].cpu())
    # cv2.imshow("The first image", img)
    # cv2.waitKey(0)
matchID = 0
for slide in range(0, NUM_SLIDES):
    for frame in range(0, NUM_SLIDES):

        slidekpts, slideDescs = slideImgKeyptsDesc[slideNames[slide]]
        framekpts, frameDescs = vidImgKeyptsDesc[videoFrameNames[frame]]
        matches = flann.knnMatch(np.asarray(slideDescs,np.float32),np.asarray(frameDescs,np.float32), k=2)

        matchPairs = []
        distances = []
        good = []
        slideKeypoints = []
        frameKeypoints = []
        for m, n in matches:
            if m.distance < 0.75*n.distance:

                slideKeypoints.append(slidekpts[m.queryIdx])
                frameKeypoints.append(framekpts[m.trainIdx])
                distances.append(m.distance)
        matchID += 1
        distances.sort(reverse=True)

        matchMetric[(slide, frame)] = sum(distances[0:10]) / len(matches)
        matchMetricNotNormalized[(slide, frame)] = sum(distances[0:10]) 
        matchSlideFrameDict[(slide, frame)  ] = matchPairs


visualize(matchMetric, NUM_SLIDES, "./new_results/normalized_distanceReversed.jpg")
visualize(matchMetricNotNormalized, NUM_SLIDES, "./new_results/distanceReversed.jpg")

predictARGMIN(matchMetric)
        slidekpts, slideDescs = slideImgKeyptsDesc[slideNames[slide]]
        framekpts, frameDescs = vidImgKeyptsDesc[videoFrameNames[frame]]
        matches = flann.knnMatch(np.asarray(slideDescs, np.float32),
                                 np.asarray(frameDescs, np.float32),
                                 k=2)

        matchPairs = []
        distances = []
        good = []
        slideKeypoints = []
        frameKeypoints = []
        for m, n in matches:
            if m.distance < 0.75 * n.distance:
                print("start")
                print(len(slidekpts))
                print(len(framekpts))
                print(m.trainIdx)
                print(m.queryIdx)

                slideKeypoints.append(slidekpts[m.queryIdx])
                frameKeypoints.append(framekpts[m.trainIdx])
                distances.append(m.distance)
        matchID += 1
        distances.sort(reverse=True)

        print(distances)
        matchMetric[(slide, frame)] = sum(distances[0:10])
        matchSlideFrameDict[(slide, frame)] = matchPairs

visualize(matchMetric, 28, "./distanceReversed.jpg")
    def match_and_draw(match, r_threshold):
        m = match(desc1, desc2, r_threshold)
        matched_p1 = np.array([kp1[i].pt for i, j in m])
        matched_p2 = np.array([kp2[j].pt for i, j in m])
        H, status = cv2.findHomography(matched_p1, matched_p2, cv2.RANSAC, 5.0)
        print '%d / %d  inliers/matched' % (np.sum(status), len(status))
        
        #print matched_p1
        #print "---------"
        #print matched_p2
                
        #Size of image
        size = (img1.shape[1] * 2, img1.shape[0] * 2)
        #Initial prepare(Shift images far way from corners
        #of resulting mosaic
        print 'center of imges '
        c_y, c_x = (np.asarray(img1.shape[:2]) / 2.).tolist()
        #c_y, c_x = (np.asarray(img2.shape[:2]) / 2.).tolist()
        
        
        #Translate matched points in the middle
        for i in range(matched_p1.shape[0]):
            matched_p1[i] -= np.array([c_x, c_y])
            matched_p2[i] -= np.array([c_x, c_y])
        #print matched_p1
        #print "---------"
        #print matched_p2
        
        #Intitial parameters
        theta_1 = np.array([0, 1., 1, 0, 0, 0, 0])
        theta_2 = np.array([0, 1., 1, 0, 0, 0, 0])
         
        src = np.array(matched_p1[3:6], np.float32)
        dst = np.array(matched_p2[3:6], np.float32)
        warp_affine = cv2.getAffineTransform(src, dst)
        
        print "warp_affine ", warp_affine
        
        theta_1[0] = -np.cos(warp_affine[0][0])
        theta_1[5] = -warp_affine[0][2]
        theta_1[6] = -warp_affine[1][2] 
         
        #for LMA
        lam = 10.0
        penalty = 10e2
        threshold = 0.000001
        #Run LEVENBERG-MARQUARDT
        params = lma.levenberg_marquardt(matched_p1,
                                         matched_p2,
                                         theta_1,
                                         theta_2,
                                         lam,
                                         penalty, 
                                         threshold,
                                         img1,
                                         img2)
        
        t_1 = params[0:params.size / 2] 
        t_2 = params[params.size / 2:]
        
        print "T_1", t_1
        print "T_2", t_2
        
        print 'points after transform'
        for i in range(matched_p1.shape[0]):
            err_1, err_2 = util.transformPoint(matched_p1[i], t_1), util.transformPoint(matched_p2[i], t_2)
            print err_1, err_2, "  --error--  ", np.abs(err_1 - err_2)
			
        print '---------------------------------------------------'
      
        result = util.stitch_for_visualization(img1, img2, t_1, t_2, c_x, c_y, size)
        
        util.visualize(img1, img2, matched_p1, matched_p2, t_1, t_2)
        
        cv2.imwrite('output.jpg', result)
    
        return None
示例#5
0
                frameKeypoints.append(framekpts[m.trainIdx].pt)
                good.append(m)
        '''
        if (frame == 5 and slide==5):
            print(slideImgPath + slideNames[slide])
            print(vidFramePath + videoFrameNames[frame])
            slideImg = cv2.imread(slideImgPath + slideNames[slide], 0)
            vidImg = cv2.imread(vidFramePath + videoFrameNames[frame], 0)
                
            drawMatches(slidekpts, framekpts, slideImg, vidImg, good)
            
        if (frame == 6 and slide==5):
            print(slideImgPath + slideNames[slide])
            print(vidFramePath + videoFrameNames[frame])
            slideImg = cv2.imread(slideImgPath + slideNames[slide], 0)
            vidImg = cv2.imread(vidFramePath + videoFrameNames[frame], 0)
                
            drawMatches(slidekpts, framekpts, slideImg, vidImg, good)
        '''
        matchMetric[(slide, frame)] = computeRDistanceDifference(
            slideKeypoints, frameKeypoints, slideDimensions, vidDimensions)
        matchMetricNotNormalized[(slide, frame)] = computeRDistanceDifference(
            slideKeypoints, frameKeypoints, slideDimensions, vidDimensions,
            False)

mat = visualize(matchMetric, NUM_SLIDES,
                "./normalized_differenceCentroidDifferences.jpg")
visualize(matchMetricNotNormalized, NUM_SLIDES,
          "./differenceCentroidDifferences.jpg")

predictARGMIN(mat)
示例#6
0
def compute_match_save(SLIDES_PATH, SLIDE_START, SLIDE_END,
                       FRAME_SAMPLE_SAVE_FOLDER,
                       SLIDE_KEYPTS_DESCS_PKL_SAVE_PATH, VID_START, VID_END,
                       VID_KEYPTS_DESCS_PKL_SAVE_PATH,
                       FINAL_FIGURES_RESULTS_SAVE_PATH):

    NUM_SLIDES = SLIDE_END - SLIDE_START
    NUM_FRAMES = VID_END - VID_START
    ## getting filenames
    videoFrameNames = [
        filename for filename in os.listdir(FRAME_SAMPLE_SAVE_FOLDER)
        if filename.endswith(".png")
    ]
    videoFrameNames = slidesInRange(videoFrameNames, VID_START, VID_END)

    slideNames = [
        filename for filename in os.listdir(SLIDES_PATH)
        if filename.endswith(".png")
    ]
    slideNames = slidesInRange(slideNames, SLIDE_START, SLIDE_END)

    sampleImgSlides = cv2.imread(slideImgPath + slideNames[0], 0)
    sampleVidSlides = cv2.imread(vidFramePath + videoFrameNames[0], 0)

    ## slide and video dimensions
    slideDimensions = (sampleImgSlides.shape[0], sampleImgSlides.shape[1])
    vidDimensions = (sampleVidSlides.shape[0], sampleVidSlides.shape[1])

    ## load dictionary of things
    slideImgKeypts = pickle.load(open(SLIDE_KEYPTS_DESCS_PKL_SAVE_PATH, "rb"))
    vidImgKeypts = pickle.load(open(VID_KEYPTS_DESCS_PKL_SAVE_PATH, "rb"))

    slideImgKeyptsDesc = dict()
    vidImgKeyptsDesc = dict()

    ## load slide img, keypoints
    for sn in slideNames:
        path = slideImgPath + sn
        slideImgKeyptsDesc[sn] = convertPickledToKPDesc(slideImgKeypts[path])

    for vidName in videoFrameNames:
        path = vidFramePath + vidName
        vidImgKeyptsDesc[vidName] = convertPickledToKPDesc(vidImgKeypts[path])

    FLANN_INDEX_KDTREE = 0
    index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
    search_params = dict(checks=50)

    ## matches is a list of DMatch objects
    flann = cv2.FlannBasedMatcher(index_params, search_params)

    matchSlideFrameDict = dict()

    matchMetric = dict()
    matchMetricNotNormalized = dict()

    ## cross matching
    matchID = 0
    for slide in range(0, NUM_SLIDES):
        for frame in range(0, NUM_FRAMES):
            print("Slide, Frame", (slide, frame), slideNames[slide],
                  videoFrameNames[frame])
            slidekpts, slideDescs = slideImgKeyptsDesc[slideNames[slide]]
            framekpts, frameDescs = vidImgKeyptsDesc[videoFrameNames[frame]]
            matches = flann.knnMatch(np.asarray(slideDescs, np.float32),
                                     np.asarray(frameDescs, np.float32),
                                     k=2)

            matchPairs = []
            distances = []
            good = []
            slideKeypoints = []
            frameKeypoints = []

            good = []

            for m, n in matches:
                if m.distance < 0.75 * n.distance:
                    slideKeypoints.append(slidekpts[m.queryIdx].pt)
                    frameKeypoints.append(framekpts[m.trainIdx].pt)
                    good.append(m)
            '''
            slideImg = cv2.imread(slideImgPath + slideNames[slide], 0)  
            vidImg = cv2.imread(vidFramePath + videoFrameNames[frame], 0)
            saveMatches(slidekpts, framekpts, slideImg, vidImg, good, savePath + str(slide) + '-' + str(frame) + '-match.jpg')
            '''

            matchMetric[(slide, frame)] = computeSumDiffKeypoints(
                slideKeypoints, frameKeypoints, slideDimensions, vidDimensions)
            matchMetricNotNormalized[(slide, frame)] = computeSumDiffKeypoints(
                slideKeypoints, frameKeypoints, slideDimensions, vidDimensions,
                False)

    ## save visualizations
    mat = visualize(
        matchMetric, NUM_SLIDES, NUM_FRAMES, FINAL_FIGURES_RESULTS_SAVE_PATH +
        "normalized_sumDistanceKeypoints.jpg")
    visualize(matchMetricNotNormalized, NUM_SLIDES, NUM_FRAMES,
              FINAL_FIGURES_RESULTS_SAVE_PATH + "sumDistanceKeypoints.jpg")

    ## save matrix as txt
    np.savetxt(FINAL_FIGURES_RESULTS_SAVE_PATH +
               "slides_by_vidframes_metric.txt",
               mat,
               fmt='%d')

    col_best_prediction = {}
    for col in range(NUM_FRAMES):
        slides = list(mat[:, col])
        best = slides.index(min(slides))
        col_best_prediction[col] = best

    print(json.dumps(col_best_prediction, sort_keys=True))
    def match_and_draw(match, r_threshold):
        m = match(desc1, desc2, r_threshold)
        matched_p1 = np.array([kp1[i].pt for i, j in m])
        matched_p2 = np.array([kp2[j].pt for i, j in m])
        H, status = cv2.findHomography(matched_p1, matched_p2, cv2.RANSAC, 5.0)
        print '%d / %d  inliers/matched' % (np.sum(status), len(status))
        
        #print matched_p1
        #print "---------"
        #print matched_p2
                
        #Size of image
        size = (img1.shape[1] * 2, img1.shape[0] * 2)
        #Initial prepare(Shift images far way from corners
        #of resulting mosaic
        print 'center of imges '
        c_y, c_x = (np.asarray(img1.shape[:2]) / 2.).tolist()
        #c_y, c_x = (np.asarray(img2.shape[:2]) / 2.).tolist()
        
        
        #Translate matched points in the middle
        for i in range(matched_p1.shape[0]):
            matched_p1[i] -= np.array([c_x, c_y])
            matched_p2[i] -= np.array([c_x, c_y])
        #print matched_p1
        #print "---------"
        #print matched_p2
        
        #Parameters for Gradient Descent
        iterations = 1000
        gamma = 0.000002
        gamma_transl = 0.05
        #gamma = 10e-10
        lambd = 10e4
        #Intitial parameters
        theta_1 = np.array([0, 1., 1, 0, 0, 0, 0])
        theta_2 = np.array([0, 1., 1, 0, 0, 0, 0])
        
        src = np.array(matched_p1[0:3], np.float32)
        dst = np.array(matched_p2[0:3], np.float32)
        warp_affine = cv2.getAffineTransform(src, dst)
        
        print "warp_affine ", warp_affine
        
        theta_1[0] = -np.cos(warp_affine[0][0])
        theta_1[5] = -warp_affine[0][2]
        theta_1[6] = -warp_affine[1][2]
        
        print "warp_affine.ravel()", warp_affine.ravel()
        #theta_1 = np.concatenate((warp_affine.ravel(), [0, 0, 1]))
        print "theta_1", theta_1
         
        #Run Gradient
        t_1, t_2 = gradientDescent(iterations, matched_p1, matched_p2,
                                       theta_1, theta_2, gamma, lambd, gamma_transl,
                                       img1, img2, size, c_x, c_y)
        
        print 'points after transform'
        for i in range(matched_p1.shape[0]):
            err_1, err_2 = util.transformPoint(matched_p1[i], t_1), util.transformPoint(matched_p2[i], t_2)
            print err_1, err_2, "  --error--  ", np.abs(err_1 - err_2)
			
        print '---------------------------------------------------'
      
        result = util.stitch_for_visualization(img1, img2, t_1, t_2, c_x, c_y, size)
        util.visualize(img1, img2, matched_p1, matched_p2, t_1, t_2)
        
        cv2.imwrite('output.jpg', result)
    
        return None
示例#8
0
matchID = 0
for slide in range(0, NUM_SLIDES):
    for frame in range(0, NUM_SLIDES):

        slidekpts, slideDescs = slideImgKeyptsDesc[slideNames[slide]]
        framekpts, frameDescs = vidImgKeyptsDesc[videoFrameNames[frame]]
        matches = flann.knnMatch(np.asarray(slideDescs, np.float32),
                                 np.asarray(frameDescs, np.float32),
                                 k=2)

        matchPairs = []
        distances = []
        good = []
        slideKeypoints = []
        frameKeypoints = []
        for m, n in matches:
            if m.distance < 0.75 * n.distance:

                slideKeypoints.append(slidekpts[m.queryIdx])
                frameKeypoints.append(framekpts[m.trainIdx])
                good.append(m)
                distances.append(m.distance)
        matchID += 1
        distances.sort(reverse=True)

        print(distances)
        matchMetric[(slide, frame)] = len(good)
        matchSlideFrameDict[(slide, frame)] = matchPairs

visualize(matchMetric, NUM_SLIDES, "./numFriends.jpg")
示例#9
0
 def test_visualize(self):
     plt.clf()
     utilities.visualize(utilities.load_data()[-1])
    for frame in range(0, 28):

        slidekpts, slideDescs = slideImgKeyptsDesc[slideNames[slide]]
        framekpts, frameDescs = vidImgKeyptsDesc[videoFrameNames[frame]]
        matches = flann.knnMatch(np.asarray(slideDescs, np.float32),
                                 np.asarray(frameDescs, np.float32),
                                 k=2)

        matchPairs = []
        distances = []
        good = []
        slideKeypoints = []
        frameKeypoints = []
        for m, n in matches:
            if m.distance < 0.75 * n.distance:
                slideKeypoints.append(slidekpts[m.queryIdx].pt)
                frameKeypoints.append(framekpts[m.trainIdx].pt)
            '''
            if (frame == 23 and slide==18):
                print(slideImgPath + slideNames[slide])
                print(vidFramePath + videoFrameNames[frame])
                slideImg = cv2.imread(slideImgPath + slideNames[slide], 0)
                vidImg = cv2.imread(vidFramePath + videoFrameNames[frame], 0)
                
                drawMatches(slidekpts, framekpts, slideImg, vidImg, good)
            '''
        matchMetric[(slide, frame)] = computeRDistanceDifference(
            slideKeypoints, frameKeypoints, slideDimensions, vidDimensions)

visualize(matchMetric, 28, "./distanceCentroid.jpg")