Пример #1
0
def checkBallotFlipped(I, Iref, verbose=False):
    rszFac = sh.resizeOrNot(I.shape, sh.FLIP_CHECK_HEIGHT)
    Iref1 = sh.fastResize(Iref, rszFac)
    I1 = sh.fastResize(I, rszFac)
    IR = sh.fastFlip(I1)
    (H, Io, err) = imagesAlign(I1, Iref1, trfm_type='translation')
    (HR, IoR, errR) = imagesAlign(IR, Iref1, trfm_type='translation')

    if (verbose):
        print 'flip margin: ', err, errR

    if err > errR:
        return (True, sh.fastFlip(I), errR)
    else:
        return (False, I, err)
Пример #2
0
def imagesAlign(I, Iref, fillval=np.nan, trfm_type='similarity',
                vCells=1, hCells=1, rszFac=1, verbose=False,
                minArea=np.power(2, 11), applyWarp=True):
    """ Aligns I to IREF.
    Input:
        np.array I: Image you want to align. I must be larger than IREF.
        np.array Iref: Image you want to align against.
        int fillval:
        str trfm_type: What image transformation to solve for. They are (in
            order of complexity): 'translation', 'rigid', 'similarity',
            'affine', and 'projective'. A nice page that describes
            these are at:
                http://homepages.inf.ed.ac.uk/rbf/HIPR2/affine.htm
        int vCells, hCells: Params to allow aligning subcells of the
            image, followed by stitching. Appears to rarely be used.
        float rszFac: Amount by which to scale the image - for
            performance, you want to scale down (i.e. 0.75).
        applyWarp: Causes imagesAlign to apply the found transformation
        to the input image I and return it. Without applyWarp, the function
        will only return the transformation matrix. This is used when
        cropped images are passed to the function, so the warp should not
        yet be applied to the cropped image but rather the original image,
        which is now the responsibility of the caller.
    Output:
        (H, Ireg, err). H is the transformation matrix that was found
        to best align I to Iref. Ireg is the result of aligning I to
        Iref. err is the alignment error.
    """
    if len(I.shape) == 3:
        I1 = sh.rgb2gray(I)
    else:
        I1 = I

    if len(Iref.shape) == 3:
        Iref1 = sh.rgb2gray(Iref)
    else:
        Iref1 = Iref

    WARN_USER, ORIG_DTYPE = False, None
    if I1.dtype != 'float32':
        WARN_USER, ORIG_DTYPE = True, I1.dtype
        I1 = I1.astype('float32')
    if Iref1.dtype != 'float32':
        WARN_USER, ORIG_DTYPE = True, Iref1.dtype
        Iref1 = Iref1.astype('float32')
    if WARN_USER:
        print "(Info) imagesAlign was called with input image dtype={0}. \
imagesAlign expects image dtype='float32' (Also, intensity vals in range \
[0.0,1.0]. The image dtype conversion was \
automatically done, but this slows down the computation a little. Consider \
trying to work in 'float32' in the first place if convenient for a little \
speed boost.".format(ORIG_DTYPE)

    t1 = time.clock()
    # check if more than one vertical and horizontal cell
    if (vCells > 1) and (hCells > 1):
        I2 = imagesAlign(I1, Iref1, trfm_type=trfm_type, minArea=minArea)[1]
        Iout = np.copy(Iref1)
        pFac = .25
        vStep = math.ceil(I1.shape[0] / vCells)
        vPad = pFac * vStep
        hStep = math.ceil(I1.shape[1] / hCells)
        hPad = pFac * vStep
        for i in range(vCells):
            for j in range(hCells):
                # 2. chop + pad each cell then align
                # 3. stitch back together
                i1 = i * vStep
                i1 = max(i1, 0)
                i2 = (i + 1) * vStep
                i2 = min(i2, I1.shape[0] - 1)
                j1 = j * hStep
                j1 = max(j1, 0)
                j2 = (j + 1) * hStep
                j2 = min(j2, I1.shape[1] - 1)

                i1p = i1 - vPad
                i1p = max(i1p, 0)
                i2p = i2 + vPad
                i2p = min(i2p, I1.shape[0] - 1)
                j1p = j1 - hPad
                j1p = max(j1p, 0)
                j2p = j2 + hPad
                j2p = min(j2p, I1.shape[1] - 1)

                Ic = I2[i1p:i2p, j1p:j2p]
                Irefc = Iref1[i1p:i2p, j1p:j2p]
                (H, err) = imagesAlign1(Ic, Irefc,
                                        trfm_type=trfm_type, verbose=verbose, minArea=minArea)
                IcT = sh.imtransform(Ic, H)
                Iout[i1:i2, j1:j2] = IcT[
                    i1 - i1p:(i1 - i1p) + (i2 - i1), j1 - j1p:(j1 - j1p) + (j2 - j1)]

        return (np.eye(3), Iout, -1)

    if rszFac == 1:
        t0 = time.clock()
        (H, err) = imagesAlign1(I1, Iref1,
                                trfm_type=trfm_type, verbose=verbose, minArea=minArea)
        if verbose:
            print 'alignment time:', time.clock() - t0, '(s)'

        # print 'alignment time:',time.clock()-t0,'(s)'
    else:
        I1 = sh.fastResize(I1, rszFac)
        Iref1 = sh.fastResize(Iref1, rszFac)
        S = np.eye(3, dtype=np.float32)
        S[0, 0] = 1 / rszFac
        S[1, 1] = 1 / rszFac
        H0 = np.eye(3, dtype=np.float32)
        H0 = np.dot(np.dot(np.linalg.inv(S), H0), S)
        t0 = time.clock()
        (H, err) = imagesAlign1(I1, Iref1, H0=H0,
                                trfm_type=trfm_type, verbose=verbose, minArea=minArea)
        if verbose:
            print 'alignment time:', time.clock() - t0, '(s)'

        # print 'alignment time:',time.clock()-t0,'(s)'
        H = np.dot(S, np.dot(H, np.linalg.inv(S)))

    # print "overall time: ", time.clock() - t1
    if applyWarp:
        return (H, sh.imtransform(I, H, fillval=fillval), err)
    else:
        return (H, err)
def dist2patches(patchTuples, scale, debug=False):
    """
    Input:
        list patchTuples: EITHER (!) of the form:
              ((imgpatch_i, attrpatch_i, str attval_i, isflip_i), ...)
            or
              ((imgpatch_i, [attrpatch_i, ...], str attrval_i, int page_i, isflip_i), ...)
            I'm not entirely sure when it's a 4-tuple or a 5-tuple...but beware.
        float scale: Current scale factor.
    Output:
        (scores, locs, exemplar_idxs)
    """
    # patchTuples ((K img super regions),(K template patches))
    # for each pair, compute avg distance at scale sc
    scores = np.zeros(len(patchTuples))
    idx = 0
    locs = []
    exemplar_idxs = [
    ]  # Keeps track of which exemplar patch was the best for a given voted ballot

    for idx in range(len(patchTuples)):
        # pt is either 4-tuple:
        #     ((imgpatch_i,[attrpatch_i, ...],,attrval_i,isflip_i), ...)
        # or a 5-tuple:
        #     ((imgpatch_i,[attrpatch_i, ...],attrval_i,page_i,isflip_i), ...)
        pt = patchTuples[idx]
        imgpatch = pt[0]
        attrpatches = pt[1]
        attrval = pt[2]
        flag = False
        # A fix for a very bizarre openCv bug follows..... [check pixel_reg/opencv_bug_repo.py]
        I = np.round(sh.fastResize(imgpatch, scale) * 255.) / 255.
        # opencv appears to not like pure 1.0 and 0.0 values.
        #I[I==1.0]=.999; I[I==0.0]=.001
        #patchScale = sh.resizeOrNot(attrpatch.shape, int(round(max(attrpatch.shape)*scale)))
        bestscore = None
        bestloc = None
        best_idx_ex = None  # Index of the best exemplar
        # Get the best score, over all possible exemplars (this is to
        # account for background variation).
        for idx_ex, attrpatch in enumerate(attrpatches):
            patch = np.round(sh.fastResize(attrpatch, scale) * 255.) / 255.
            #patch[patch==1.0]=.999; patch[patch==0.0]=.001
            try:
                res = evalPatchSimilarity2(I, patch, debug=flag)
            except Exception as e:
                traceback.print_exc()
                print "CRASHED AT IDX:", idx
                print "    Scale was: {0}".format(scale)
                print "    I.shape: {0} patch.shape: {1}".format(
                    I.shape, patch.shape)
                print "    imgpatch: {0} attrpatch: {1}".format(
                    imgpatch.shape, attrpatch.shape)
                pdb.set_trace()
                raise e
            # TODO: Do I want to maximize, or minimize 'score'?
            score = res[0]  # I'm pretty sure we want to maximize.
            score = res[0] / (patch.shape[0] * patch.shape[1])
            if bestscore == None or score > bestscore:
                bestscore = score
                best_idx_ex = idx_ex
                bestloc = (res[1][0] / scale, res[1][1] / scale)
            #scores[idx]=res[0]
            #locs.append((res[1][0]/scale,res[1][1]/scale))
        scores[idx] = bestscore
        locs.append(bestloc)
        exemplar_idxs.append(best_idx_ex)
    return (scores, locs, exemplar_idxs)
Пример #4
0
def imagesAlign1(I,Iref,H0=np.eye(3,dtype=np.float32),
                 trfm_type='similarity',verbose=False, minArea = np.power(2, 11)):
    """
    Input:
        nparray I: Assumes that I is larger than IREF
        nparray Iref:
        nparray H0: Trans. mat.
        str trfm_type: Transformation trfm_type.
        int minArea: The minimum area that IREF is allowed to be - if 
            IREF.width*IREF.height is greater than this, then imagesAlign1
            will shrink both I and IREF by 50% until the area < MINAREA.
            Smaller values of MINAREA allow higher tolerance for wider
            translations, yet can lead to less-predictable results.
            Suggestion: For coarse global alignment, try smaller values
            of MINAREA. For finer local alignment, use larger MINAREA.
    """
    lbda=1e-6
    wh=Iref.shape
    eps=1e-3
    sig=2

    # recursive check
    if np.prod(wh)<minArea:
        H=H0
    else:
        I1=sh.fastResize(I,.5)
        Iref1=sh.fastResize(Iref,.5)
        S=np.eye(3); S[0,0]=2; S[1,1]=2;
        H0=np.dot(np.dot(np.linalg.inv(S),H0),S)
        (H,errx)=imagesAlign1(I1,Iref1,H0=H0,trfm_type=trfm_type,verbose=verbose, minArea=minArea)
        H=np.dot(S,np.dot(H,np.linalg.inv(S)))


    # smooth images
    Iref=gaussian_filter(Iref,sig)
    I=gaussian_filter(I,sig)

    # pad image with NaNs
    ws=np.concatenate(([0],[0],range(wh[0]),[wh[0]-1],[wh[0]-1]))
    hs=np.concatenate(([0],[0],range(wh[1]),[wh[1]-1],[wh[1]-1]))
    try:
        Iref=Iref[np.ix_(ws,hs)]
        I=I[np.ix_(ws,hs)]
    except Exception as e:
        traceback.print_exc()
        print '...Iref.shape:', Iref.shape
        print '...I.shape:', I.shape
        misc.imsave("_Iref_{0}.png".format(str(t)), Iref)
        misc.imsave("_I_{0}.png".format(str(t)), I)
        raise e
    hs=np.array([0,1,wh[1]+2,wh[1]+3])
    ws=np.array([0,1,wh[0]+2,wh[0]+3])

    Iref[ws,:]=np.nan; I[ws,:]=np.nan;
    Iref[:,hs]=np.nan; I[:,hs]=np.nan;
    
    wts=np.array([1,1,1.0204,.03125,1.0313,.0204,.000555,.000555]);
    s=math.sqrt(Iref.size)/128.0
    wts[2]=math.pow(wts[2],1/s)
    wts[3]=wts[3]/s
    wts[4]=math.pow(wts[4],1/s)
    wts[5]=wts[5]/s
    wts[6]=wts[6]/(s*s)
    wts[7]=wts[7]/(s*s)

    # compute differences

    if trfm_type=='translation':
        keep=[0,1];
    elif trfm_type=='rigid':
        keep=[0,1,5];
    elif trfm_type=='similarity':
        keep=[0,1,2,5];
    elif trfm_type=='affine':
        keep=[0,1,2,3,4,5];
    elif trfm_type=='projective':
        keep=[0,1,2,3,4,5,6,7];
                
    # compute transformations
    HH=ds2H(-1*np.ones(8),wts)
    Hs=HH[1][keep,:]

    # apply transformations
    Ts=np.zeros([Hs.shape[0],Iref.shape[0],Iref.shape[1]])
    Ms=np.ones([Iref.shape[0],Iref.shape[1]],dtype=np.float32)
    
    for i in range(Hs.shape[0]):
        Ts[i,:,:]=sh.imtransform(Iref,Hs[i,:,:])
        Ms=Ms * (np.float32(~np.isnan(Ts[i,:,:])))

    Ds=Ts-np.tile(Iref,[Hs.shape[0],1,1])
    D=Ds.reshape(Ds.shape[0],np.prod(Iref.shape))
    Lbda=lbda*np.prod(Iref.shape)*np.eye(Ds.shape[0])
    err=np.Inf
    ds=np.zeros([8,1])

    for i in xrange(100):
        # warp image with current esimate
        Ip=sh.imtransform(I,H)
        M=Ms * np.float32(~np.isnan(Ip) & ~np.isnan(Iref))
        Mf=M.reshape(I.size,1)
        dI=Ip-Iref; dIf=dI.reshape(np.prod(I.shape),1)

        # guard against bad things
        if np.sum(Mf) < 2:
            H = np.eye(3)
            err = np.Inf
            break

        # check if > half of pixels turn to NAN
        # subtract new nans from old nans, divide by old valids
        origValidPixels=np.sum(1-(np.isnan(I)+0))
        newValidPixels=np.sum(1-(np.isnan(Ip+I)+0))
        if newValidPixels<(origValidPixels/3.):
            return (np.eye(3),np.inf)

        
        #=== CODE PRIOR TO REFACTOR ===
        idx=np.nonzero(np.squeeze(Mf))
        D_valid=D[:,idx]
        D0=np.squeeze(D_valid);
        dI1=dIf[idx]
        _A = np.dot(D0, D0.T)
        _B = np.linalg.inv(_A + Lbda)
        _C = np.dot(D0, dI1)
        ds1 = np.dot(_B, _C)

        #ds1=np.dot(np.linalg.inv(np.dot(D0,D0.T)+Lbda),np.dot(D0,dI1))
        ds[keep]=ds1;
        ds = np.squeeze(ds)
        HH=ds2H(ds,wts); H=np.dot(H,HH[0]); H=H/H[2,2]
        err0=err; err=np.abs(dI1); err=np.mean(err); delta=err0-err;
        if verbose:
            print I.shape," i=",i," err=",err," del=",delta
        if delta<eps:
            break
    return (H,err)
Пример #5
0
def imagesAlign1(I,
                 Iref,
                 H0=np.eye(3, dtype=np.float32),
                 trfm_type='similarity',
                 verbose=False,
                 minArea=np.power(2, 11)):
    """
    Input:
        nparray I: Assumes that I is larger than IREF
        nparray Iref:
        nparray H0: Trans. mat.
        str trfm_type: Transformation trfm_type.
        int minArea: The minimum area that IREF is allowed to be - if
            IREF.width*IREF.height is greater than this, then imagesAlign1
            will shrink both I and IREF by 50% until the area < MINAREA.
            Smaller values of MINAREA allow higher tolerance for wider
            translations, yet can lead to less-predictable results.
            Suggestion: For coarse global alignment, try smaller values
            of MINAREA. For finer local alignment, use larger MINAREA.
    """
    lbda = 1e-6
    wh = Iref.shape
    eps = 1e-3
    sig = 2

    # recursive check
    if np.prod(wh) < minArea:
        H = H0
    else:
        I1 = sh.fastResize(I, .5)
        Iref1 = sh.fastResize(Iref, .5)
        S = np.eye(3)
        S[0, 0] = 2
        S[1, 1] = 2
        H0 = np.dot(np.dot(np.linalg.inv(S), H0), S)
        (H, errx) = imagesAlign1(I1,
                                 Iref1,
                                 H0=H0,
                                 trfm_type=trfm_type,
                                 verbose=verbose,
                                 minArea=minArea)
        H = np.dot(S, np.dot(H, np.linalg.inv(S)))

    # smooth images
    Iref = gaussian_filter(Iref, sig)
    I = gaussian_filter(I, sig)

    # pad image with NaNs
    ws = np.concatenate(([0], [0], range(wh[0]), [wh[0] - 1], [wh[0] - 1]))
    hs = np.concatenate(([0], [0], range(wh[1]), [wh[1] - 1], [wh[1] - 1]))
    try:
        Iref = Iref[np.ix_(ws, hs)]
        I = I[np.ix_(ws, hs)]
    except Exception as e:
        traceback.print_exc()
        print '...Iref.shape:', Iref.shape
        print '...I.shape:', I.shape
        misc.imsave("_Iref_{0}.png".format(str(t)), Iref)
        misc.imsave("_I_{0}.png".format(str(t)), I)
        raise e
    hs = np.array([0, 1, wh[1] + 2, wh[1] + 3])
    ws = np.array([0, 1, wh[0] + 2, wh[0] + 3])

    Iref[ws, :] = np.nan
    I[ws, :] = np.nan
    Iref[:, hs] = np.nan
    I[:, hs] = np.nan

    wts = np.array([1, 1, 1.0204, .03125, 1.0313, .0204, .000555, .000555])
    s = math.sqrt(Iref.size) / 128.0
    wts[2] = math.pow(wts[2], 1 / s)
    wts[3] = wts[3] / s
    wts[4] = math.pow(wts[4], 1 / s)
    wts[5] = wts[5] / s
    wts[6] = wts[6] / (s * s)
    wts[7] = wts[7] / (s * s)

    # compute differences

    if trfm_type == 'translation':
        keep = [0, 1]
    elif trfm_type == 'rigid':
        keep = [0, 1, 5]
    elif trfm_type == 'similarity':
        keep = [0, 1, 2, 5]
    elif trfm_type == 'affine':
        keep = [0, 1, 2, 3, 4, 5]
    elif trfm_type == 'projective':
        keep = [0, 1, 2, 3, 4, 5, 6, 7]

    # compute transformations
    HH = ds2H(-1 * np.ones(8), wts)
    Hs = HH[1][keep, :]

    # apply transformations
    Ts = np.zeros([Hs.shape[0], Iref.shape[0], Iref.shape[1]])
    Ms = np.ones([Iref.shape[0], Iref.shape[1]], dtype=np.float32)

    for i in range(Hs.shape[0]):
        Ts[i, :, :] = sh.imtransform(Iref, Hs[i, :, :])
        Ms = Ms * (np.float32(~np.isnan(Ts[i, :, :])))

    Ds = Ts - np.tile(Iref, [Hs.shape[0], 1, 1])
    D = Ds.reshape(Ds.shape[0], np.prod(Iref.shape))
    Lbda = lbda * np.prod(Iref.shape) * np.eye(Ds.shape[0])
    err = np.Inf
    ds = np.zeros([8, 1])

    D_zerod = np.nan_to_num(Ds.reshape(Ds.shape[0], np.prod(Iref.shape)))
    use_refactored_loop = True

    for i in xrange(100):
        # warp image with current esimate
        Ip = sh.imtransform(I, H)
        M = Ms * np.float32(~np.isnan(Ip) & ~np.isnan(Iref))
        Mf = M.reshape(I.size, 1)
        dI = Ip - Iref
        dIf = dI.reshape(np.prod(I.shape), 1)

        # guard against bad things
        if np.sum(Mf) < 2:
            H = np.eye(3)
            err = np.Inf
            break

        # check if > half of pixels turn to NAN
        # subtract new nans from old nans, divide by old valids
        origValidPixels = np.sum(1 - (np.isnan(I) + 0))
        newValidPixels = np.sum(1 - (np.isnan(Ip + I) + 0))
        if newValidPixels < (origValidPixels / 3.):
            return (np.eye(3), np.inf)

        # === CODE PRIOR TO REFACTOR ===
        '''
        idx=np.nonzero(np.squeeze(Mf))
        D_valid=D[:,idx]
        D0=np.squeeze(D_valid);
        dI1=dIf[idx]
        _A = np.dot(D0, D0.T)
        _B = np.linalg.inv(_A + Lbda)
        _C = np.dot(D0, dI1)
        ds1 = np.dot(_B, _C)

        '''
        # NEW: apply mask via multiply rather than index
        Mf_stacked = np.tile(Mf.T, (D.shape[0], 1))
        D0_masked = np.multiply(D_zerod, Mf_stacked)
        dI1 = np.nan_to_num(np.multiply(dIf, Mf))
        _A_masked = np.dot(D0_masked, D0_masked.T)
        _B_masked = np.linalg.inv(_A_masked + Lbda)
        _C_masked = np.dot(D0_masked, dI1)
        ds1 = np.dot(_B_masked, _C_masked)

        # ds1=np.dot(np.linalg.inv(np.dot(D0,D0.T)+Lbda),np.dot(D0,dI1))
        ds[keep] = ds1
        ds = np.squeeze(ds)
        HH = ds2H(ds, wts)
        H = np.dot(H, HH[0])
        H = H / H[2, 2]
        err0 = err
        err = np.abs(dI1)
        err = np.mean(err)
        delta = err0 - err
        if verbose:
            print I.shape, " i=", i, " err=", err, " del=", delta
        if delta < eps:
            break
    return (H, err)
Пример #6
0
def dist2patches(patchTuples,scale,debug=False):
    """
    Input:
        list patchTuples: EITHER (!) of the form:
              ((imgpatch_i, attrpatch_i, str attval_i, isflip_i), ...)
            or
              ((imgpatch_i, [attrpatch_i, ...], str attrval_i, int page_i, isflip_i), ...)
            I'm not entirely sure when it's a 4-tuple or a 5-tuple...but beware.
        float scale: Current scale factor.
    Output:
        (scores, locs, exemplar_idxs)
    """
    # patchTuples ((K img super regions),(K template patches))
    # for each pair, compute avg distance at scale sc
    scores=np.zeros(len(patchTuples))
    idx=0;
    locs=[]
    exemplar_idxs = []  # Keeps track of which exemplar patch was the best for a given voted ballot

    for idx in range(len(patchTuples)):
        # pt is either 4-tuple:
        #     ((imgpatch_i,[attrpatch_i, ...],,attrval_i,isflip_i), ...)
        # or a 5-tuple:
        #     ((imgpatch_i,[attrpatch_i, ...],attrval_i,page_i,isflip_i), ...)
        pt=patchTuples[idx]
        imgpatch = pt[0]
        attrpatches = pt[1]
        attrval = pt[2]
        flag = False
        # A fix for a very bizarre openCv bug follows..... [check pixel_reg/opencv_bug_repo.py]
        I=np.round(sh.fastResize(imgpatch,scale)*255.)/255.
        # opencv appears to not like pure 1.0 and 0.0 values.
        #I[I==1.0]=.999; I[I==0.0]=.001
        #patchScale = sh.resizeOrNot(attrpatch.shape, int(round(max(attrpatch.shape)*scale)))
        bestscore = None
        bestloc = None
        best_idx_ex = None # Index of the best exemplar
        # Get the best score, over all possible exemplars (this is to
        # account for background variation). 
        for idx_ex, attrpatch in enumerate(attrpatches):
            patch=np.round(sh.fastResize(attrpatch,scale)*255.)/255.
            #patch[patch==1.0]=.999; patch[patch==0.0]=.001
            try:
                res=evalPatchSimilarity2(I,patch, debug=flag)
            except Exception as e:
                traceback.print_exc()
                print "CRASHED AT IDX:", idx
                print "    Scale was: {0}".format(scale)
                print "    I.shape: {0} patch.shape: {1}".format(I.shape, patch.shape)
                print "    imgpatch: {0} attrpatch: {1}".format(imgpatch.shape, attrpatch.shape)
                pdb.set_trace()
                raise e
            # TODO: Do I want to maximize, or minimize 'score'?
            score = res[0] # I'm pretty sure we want to maximize.
            score = res[0] / (patch.shape[0]*patch.shape[1])
            if bestscore == None or score > bestscore:
                bestscore = score
                best_idx_ex = idx_ex
                bestloc = (res[1][0]/scale,res[1][1]/scale)
            #scores[idx]=res[0]
            #locs.append((res[1][0]/scale,res[1][1]/scale))
        scores[idx] = bestscore
        locs.append(bestloc)
        exemplar_idxs.append(best_idx_ex)
    return (scores,locs, exemplar_idxs)