Exemplo n.º 1
0
def imagesAlign(I, Iref, fillval=np.nan, trfm_type='similarity',
                vCells=1, hCells=1, rszFac=1, verbose=False,
                minArea=np.power(2, 11), applyWarp=True):
    """ Aligns I to IREF.
    Input:
        np.array I: Image you want to align. I must be larger than IREF.
        np.array Iref: Image you want to align against.
        int fillval:
        str trfm_type: What image transformation to solve for. They are (in
            order of complexity): 'translation', 'rigid', 'similarity',
            'affine', and 'projective'. A nice page that describes
            these are at:
                http://homepages.inf.ed.ac.uk/rbf/HIPR2/affine.htm
        int vCells, hCells: Params to allow aligning subcells of the
            image, followed by stitching. Appears to rarely be used.
        float rszFac: Amount by which to scale the image - for
            performance, you want to scale down (i.e. 0.75).
        applyWarp: Causes imagesAlign to apply the found transformation
        to the input image I and return it. Without applyWarp, the function
        will only return the transformation matrix. This is used when
        cropped images are passed to the function, so the warp should not
        yet be applied to the cropped image but rather the original image,
        which is now the responsibility of the caller.
    Output:
        (H, Ireg, err). H is the transformation matrix that was found
        to best align I to Iref. Ireg is the result of aligning I to
        Iref. err is the alignment error.
    """
    if len(I.shape) == 3:
        I1 = sh.rgb2gray(I)
    else:
        I1 = I

    if len(Iref.shape) == 3:
        Iref1 = sh.rgb2gray(Iref)
    else:
        Iref1 = Iref

    WARN_USER, ORIG_DTYPE = False, None
    if I1.dtype != 'float32':
        WARN_USER, ORIG_DTYPE = True, I1.dtype
        I1 = I1.astype('float32')
    if Iref1.dtype != 'float32':
        WARN_USER, ORIG_DTYPE = True, Iref1.dtype
        Iref1 = Iref1.astype('float32')
    if WARN_USER:
        print "(Info) imagesAlign was called with input image dtype={0}. \
imagesAlign expects image dtype='float32' (Also, intensity vals in range \
[0.0,1.0]. The image dtype conversion was \
automatically done, but this slows down the computation a little. Consider \
trying to work in 'float32' in the first place if convenient for a little \
speed boost.".format(ORIG_DTYPE)

    t1 = time.clock()
    # check if more than one vertical and horizontal cell
    if (vCells > 1) and (hCells > 1):
        I2 = imagesAlign(I1, Iref1, trfm_type=trfm_type, minArea=minArea)[1]
        Iout = np.copy(Iref1)
        pFac = .25
        vStep = math.ceil(I1.shape[0] / vCells)
        vPad = pFac * vStep
        hStep = math.ceil(I1.shape[1] / hCells)
        hPad = pFac * vStep
        for i in range(vCells):
            for j in range(hCells):
                # 2. chop + pad each cell then align
                # 3. stitch back together
                i1 = i * vStep
                i1 = max(i1, 0)
                i2 = (i + 1) * vStep
                i2 = min(i2, I1.shape[0] - 1)
                j1 = j * hStep
                j1 = max(j1, 0)
                j2 = (j + 1) * hStep
                j2 = min(j2, I1.shape[1] - 1)

                i1p = i1 - vPad
                i1p = max(i1p, 0)
                i2p = i2 + vPad
                i2p = min(i2p, I1.shape[0] - 1)
                j1p = j1 - hPad
                j1p = max(j1p, 0)
                j2p = j2 + hPad
                j2p = min(j2p, I1.shape[1] - 1)

                Ic = I2[i1p:i2p, j1p:j2p]
                Irefc = Iref1[i1p:i2p, j1p:j2p]
                (H, err) = imagesAlign1(Ic, Irefc,
                                        trfm_type=trfm_type, verbose=verbose, minArea=minArea)
                IcT = sh.imtransform(Ic, H)
                Iout[i1:i2, j1:j2] = IcT[
                    i1 - i1p:(i1 - i1p) + (i2 - i1), j1 - j1p:(j1 - j1p) + (j2 - j1)]

        return (np.eye(3), Iout, -1)

    if rszFac == 1:
        t0 = time.clock()
        (H, err) = imagesAlign1(I1, Iref1,
                                trfm_type=trfm_type, verbose=verbose, minArea=minArea)
        if verbose:
            print 'alignment time:', time.clock() - t0, '(s)'

        # print 'alignment time:',time.clock()-t0,'(s)'
    else:
        I1 = sh.fastResize(I1, rszFac)
        Iref1 = sh.fastResize(Iref1, rszFac)
        S = np.eye(3, dtype=np.float32)
        S[0, 0] = 1 / rszFac
        S[1, 1] = 1 / rszFac
        H0 = np.eye(3, dtype=np.float32)
        H0 = np.dot(np.dot(np.linalg.inv(S), H0), S)
        t0 = time.clock()
        (H, err) = imagesAlign1(I1, Iref1, H0=H0,
                                trfm_type=trfm_type, verbose=verbose, minArea=minArea)
        if verbose:
            print 'alignment time:', time.clock() - t0, '(s)'

        # print 'alignment time:',time.clock()-t0,'(s)'
        H = np.dot(S, np.dot(H, np.linalg.inv(S)))

    # print "overall time: ", time.clock() - t1
    if applyWarp:
        return (H, sh.imtransform(I, H, fillval=fillval), err)
    else:
        return (H, err)
def pm1(digit_hash,
        I,
        nDigits,
        hspace,
        hackConstant=250,
        rejected_hash=None,
        accepted_hash=None):
    """
    Applies digit-OCR to an image.
    Input:
        dict digit_hash: maps {str digit: img digit_exemplar}
        obj I: image to search over (i.e. voted ballot)
        int nDigits: number of digits to find
        hspace: 
        hackConstant:
        dict rejected_hash: maps {str digit: [((y1,y2,x1,x2), str side_i, bool isflip_i), ...]}
        dict accepted_hash: maps {str digit: [((y1,y2,x1,x2), str side_i, bool isflip_i), ...]}
    """
    # either load previously computed results or compute new
    reject_penalty = .2
    accept_bonus = .2
    matchMat = []
    count = 0
    keys = digit_hash.keys()
    t0 = time.clock()
    for key in keys:
        Iout = sh.NCC(I, digit_hash[key])
        #misc.imsave("_Iout_{0}.png".format(key), Iout)
        # mask out any part if given by param
        if rejected_hash and rejected_hash.has_key(key):
            for (bbMask, side, isflip) in rejected_hash[key]:
                # TODO: I don't ever use the 'side'. Is it worth removing it
                #       from rejected_hashes, or will it be used downstream?
                h = bbMask[1] - bbMask[0]
                w = bbMask[3] - bbMask[2]
                # Expand the mask-region a little bit
                i1 = max(0, bbMask[0] - (h / 4))
                i2 = min(Iout.shape[0], bbMask[0] + (h / 4))
                j1 = max(0, bbMask[2] - (w / 4))
                j2 = min(Iout.shape[1], bbMask[2] + (w / 4))
                Iout[i1:i2, j1:j2] = Iout[i1:i2, j1:j2] - reject_penalty
            #misc.imsave("_Iout_{0}_postmask.png".format(key), Iout)

        if accepted_hash and accepted_hash.has_key(key):
            for (bbMask, side, isflip) in accepted_hash[key]:
                # TODO: I don't ever use the 'side'. Is it worth removing it
                #       from rejected_hashes, or will it be used downstream?
                h = bbMask[1] - bbMask[0]
                w = bbMask[3] - bbMask[2]
                # Expand the mask-region a little bit
                i1 = max(0, bbMask[0] - (h / 4))
                i2 = min(Iout.shape[0], bbMask[0] + (h / 4))
                j1 = max(0, bbMask[2] - (w / 4))
                j2 = min(Iout.shape[1], bbMask[2] + (w / 4))
                Iout[i1:i2, j1:j2] = Iout[i1:i2, j1:j2] + accept_bonus
            #misc.imsave("_Iout_{0}_postmask.png".format(key), Iout)

        if len(matchMat) == 0:
            matchMat = np.zeros((Iout.shape[0], Iout.shape[1], len(keys)))

        matchMat[:, :, count] = Iout
        count += 1

    print 'match time:', time.clock() - t0, '(s)'
    maxResp = np.amax(matchMat, axis=2)
    maxObj = np.argmax(matchMat, axis=2)

    tDP = time.clock()
    # re-scale resp
    unary = hackConstant * np.power(2 - (maxResp + 1), 2)

    #res = dt2(unary)
    res = distance_transform.dt2(unary)

    # cache bottom up
    M = [[]] * nDigits
    Mx = [[]] * nDigits
    My = [[]] * nDigits
    M[0] = res[0]
    Mx[0] = res[1]
    My[0] = res[2]

    for i in range(1, nDigits - 1):
        prev = M[i - 1]
        shiftH = np.eye(3)
        shiftH[0, 2] = hspace
        prevT = sh.imtransform(prev, shiftH, fillval=prev.max())
        # shift
        #t1=time.clock()
        # old
        #res0 = dt2(prevT+unary)
        #print 'old DP time:',time.clock()-t1,'(s)'

        # new cython implementation
        res = distance_transform.dt2(prevT + unary)
        #res = dt2(prevT+unary)
        #print 'diff = ', np.sum(np.abs(res0[0] - res[0]))
        M[i] = res[0]
        Mx[i] = res[1]
        My[i] = res[2]

    prev = M[nDigits - 2]
    shiftH = np.eye(3)
    shiftH[0, 2] = hspace
    prevT = sh.imtransform(prev, shiftH, fillval=prev.max())
    M[nDigits - 1] = prevT + unary
    # get best root position
    rootM = M[nDigits - 1]
    YX = np.unravel_index(rootM.argmin(), rootM.shape)
    miny = YX[0]
    minx = YX[1]

    # store top down
    optYX = [[]] * nDigits
    optYX[nDigits - 1] = (miny, minx)

    for i in reversed(range(0, nDigits - 1)):
        prevMiny = optYX[i + 1][0]
        prevMinx = optYX[i + 1][1]
        curMx = Mx[i]
        curMy = My[i]
        optYX[i] = (round(curMy[prevMiny, prevMinx - hspace]),
                    round(curMx[prevMiny, prevMinx - hspace]))

    patches = []
    bbs = []
    scores = []
    ocr_str = ''
    for i in range(len(optYX)):
        (i1, j1) = optYX[i]

        key = keys[maxObj[(i1, j1)]]
        ocr_str += key
        i2 = i1 + digit_hash[key].shape[0]
        j2 = j1 + digit_hash[key].shape[1]
        P = I[i1:i2, j1:j2]
        bbs.append((i1, i2, j1, j2))
        #patches.append(P)
        patches.append(None)
        scores.append(maxResp[(i1, j1)])

    print 'DP time:', time.clock() - tDP, '(s)'
    return (ocr_str, patches, bbs, scores)
Exemplo n.º 3
0
def imagesAlign1(I,
                 Iref,
                 H0=np.eye(3, dtype=np.float32),
                 trfm_type='similarity',
                 verbose=False,
                 minArea=np.power(2, 11)):
    """
    Input:
        nparray I: Assumes that I is larger than IREF
        nparray Iref:
        nparray H0: Trans. mat.
        str trfm_type: Transformation trfm_type.
        int minArea: The minimum area that IREF is allowed to be - if
            IREF.width*IREF.height is greater than this, then imagesAlign1
            will shrink both I and IREF by 50% until the area < MINAREA.
            Smaller values of MINAREA allow higher tolerance for wider
            translations, yet can lead to less-predictable results.
            Suggestion: For coarse global alignment, try smaller values
            of MINAREA. For finer local alignment, use larger MINAREA.
    """
    lbda = 1e-6
    wh = Iref.shape
    eps = 1e-3
    sig = 2

    # recursive check
    if np.prod(wh) < minArea:
        H = H0
    else:
        I1 = sh.fastResize(I, .5)
        Iref1 = sh.fastResize(Iref, .5)
        S = np.eye(3)
        S[0, 0] = 2
        S[1, 1] = 2
        H0 = np.dot(np.dot(np.linalg.inv(S), H0), S)
        (H, errx) = imagesAlign1(I1,
                                 Iref1,
                                 H0=H0,
                                 trfm_type=trfm_type,
                                 verbose=verbose,
                                 minArea=minArea)
        H = np.dot(S, np.dot(H, np.linalg.inv(S)))

    # smooth images
    Iref = gaussian_filter(Iref, sig)
    I = gaussian_filter(I, sig)

    # pad image with NaNs
    ws = np.concatenate(([0], [0], range(wh[0]), [wh[0] - 1], [wh[0] - 1]))
    hs = np.concatenate(([0], [0], range(wh[1]), [wh[1] - 1], [wh[1] - 1]))
    try:
        Iref = Iref[np.ix_(ws, hs)]
        I = I[np.ix_(ws, hs)]
    except Exception as e:
        traceback.print_exc()
        print '...Iref.shape:', Iref.shape
        print '...I.shape:', I.shape
        misc.imsave("_Iref_{0}.png".format(str(t)), Iref)
        misc.imsave("_I_{0}.png".format(str(t)), I)
        raise e
    hs = np.array([0, 1, wh[1] + 2, wh[1] + 3])
    ws = np.array([0, 1, wh[0] + 2, wh[0] + 3])

    Iref[ws, :] = np.nan
    I[ws, :] = np.nan
    Iref[:, hs] = np.nan
    I[:, hs] = np.nan

    wts = np.array([1, 1, 1.0204, .03125, 1.0313, .0204, .000555, .000555])
    s = math.sqrt(Iref.size) / 128.0
    wts[2] = math.pow(wts[2], 1 / s)
    wts[3] = wts[3] / s
    wts[4] = math.pow(wts[4], 1 / s)
    wts[5] = wts[5] / s
    wts[6] = wts[6] / (s * s)
    wts[7] = wts[7] / (s * s)

    # compute differences

    if trfm_type == 'translation':
        keep = [0, 1]
    elif trfm_type == 'rigid':
        keep = [0, 1, 5]
    elif trfm_type == 'similarity':
        keep = [0, 1, 2, 5]
    elif trfm_type == 'affine':
        keep = [0, 1, 2, 3, 4, 5]
    elif trfm_type == 'projective':
        keep = [0, 1, 2, 3, 4, 5, 6, 7]

    # compute transformations
    HH = ds2H(-1 * np.ones(8), wts)
    Hs = HH[1][keep, :]

    # apply transformations
    Ts = np.zeros([Hs.shape[0], Iref.shape[0], Iref.shape[1]])
    Ms = np.ones([Iref.shape[0], Iref.shape[1]], dtype=np.float32)

    for i in range(Hs.shape[0]):
        Ts[i, :, :] = sh.imtransform(Iref, Hs[i, :, :])
        Ms = Ms * (np.float32(~np.isnan(Ts[i, :, :])))

    Ds = Ts - np.tile(Iref, [Hs.shape[0], 1, 1])
    D = Ds.reshape(Ds.shape[0], np.prod(Iref.shape))
    Lbda = lbda * np.prod(Iref.shape) * np.eye(Ds.shape[0])
    err = np.Inf
    ds = np.zeros([8, 1])

    D_zerod = np.nan_to_num(Ds.reshape(Ds.shape[0], np.prod(Iref.shape)))
    use_refactored_loop = True

    for i in xrange(100):
        # warp image with current esimate
        Ip = sh.imtransform(I, H)
        M = Ms * np.float32(~np.isnan(Ip) & ~np.isnan(Iref))
        Mf = M.reshape(I.size, 1)
        dI = Ip - Iref
        dIf = dI.reshape(np.prod(I.shape), 1)

        # guard against bad things
        if np.sum(Mf) < 2:
            H = np.eye(3)
            err = np.Inf
            break

        # check if > half of pixels turn to NAN
        # subtract new nans from old nans, divide by old valids
        origValidPixels = np.sum(1 - (np.isnan(I) + 0))
        newValidPixels = np.sum(1 - (np.isnan(Ip + I) + 0))
        if newValidPixels < (origValidPixels / 3.):
            return (np.eye(3), np.inf)

        # === CODE PRIOR TO REFACTOR ===
        '''
        idx=np.nonzero(np.squeeze(Mf))
        D_valid=D[:,idx]
        D0=np.squeeze(D_valid);
        dI1=dIf[idx]
        _A = np.dot(D0, D0.T)
        _B = np.linalg.inv(_A + Lbda)
        _C = np.dot(D0, dI1)
        ds1 = np.dot(_B, _C)

        '''
        # NEW: apply mask via multiply rather than index
        Mf_stacked = np.tile(Mf.T, (D.shape[0], 1))
        D0_masked = np.multiply(D_zerod, Mf_stacked)
        dI1 = np.nan_to_num(np.multiply(dIf, Mf))
        _A_masked = np.dot(D0_masked, D0_masked.T)
        _B_masked = np.linalg.inv(_A_masked + Lbda)
        _C_masked = np.dot(D0_masked, dI1)
        ds1 = np.dot(_B_masked, _C_masked)

        # ds1=np.dot(np.linalg.inv(np.dot(D0,D0.T)+Lbda),np.dot(D0,dI1))
        ds[keep] = ds1
        ds = np.squeeze(ds)
        HH = ds2H(ds, wts)
        H = np.dot(H, HH[0])
        H = H / H[2, 2]
        err0 = err
        err = np.abs(dI1)
        err = np.mean(err)
        delta = err0 - err
        if verbose:
            print I.shape, " i=", i, " err=", err, " del=", delta
        if delta < eps:
            break
    return (H, err)
Exemplo n.º 4
0
def imagesAlign1(I,Iref,H0=np.eye(3,dtype=np.float32),
                 trfm_type='similarity',verbose=False, minArea = np.power(2, 11)):
    """
    Input:
        nparray I: Assumes that I is larger than IREF
        nparray Iref:
        nparray H0: Trans. mat.
        str trfm_type: Transformation trfm_type.
        int minArea: The minimum area that IREF is allowed to be - if 
            IREF.width*IREF.height is greater than this, then imagesAlign1
            will shrink both I and IREF by 50% until the area < MINAREA.
            Smaller values of MINAREA allow higher tolerance for wider
            translations, yet can lead to less-predictable results.
            Suggestion: For coarse global alignment, try smaller values
            of MINAREA. For finer local alignment, use larger MINAREA.
    """
    lbda=1e-6
    wh=Iref.shape
    eps=1e-3
    sig=2

    # recursive check
    if np.prod(wh)<minArea:
        H=H0
    else:
        I1=sh.fastResize(I,.5)
        Iref1=sh.fastResize(Iref,.5)
        S=np.eye(3); S[0,0]=2; S[1,1]=2;
        H0=np.dot(np.dot(np.linalg.inv(S),H0),S)
        (H,errx)=imagesAlign1(I1,Iref1,H0=H0,trfm_type=trfm_type,verbose=verbose, minArea=minArea)
        H=np.dot(S,np.dot(H,np.linalg.inv(S)))


    # smooth images
    Iref=gaussian_filter(Iref,sig)
    I=gaussian_filter(I,sig)

    # pad image with NaNs
    ws=np.concatenate(([0],[0],range(wh[0]),[wh[0]-1],[wh[0]-1]))
    hs=np.concatenate(([0],[0],range(wh[1]),[wh[1]-1],[wh[1]-1]))
    try:
        Iref=Iref[np.ix_(ws,hs)]
        I=I[np.ix_(ws,hs)]
    except Exception as e:
        traceback.print_exc()
        print '...Iref.shape:', Iref.shape
        print '...I.shape:', I.shape
        misc.imsave("_Iref_{0}.png".format(str(t)), Iref)
        misc.imsave("_I_{0}.png".format(str(t)), I)
        raise e
    hs=np.array([0,1,wh[1]+2,wh[1]+3])
    ws=np.array([0,1,wh[0]+2,wh[0]+3])

    Iref[ws,:]=np.nan; I[ws,:]=np.nan;
    Iref[:,hs]=np.nan; I[:,hs]=np.nan;
    
    wts=np.array([1,1,1.0204,.03125,1.0313,.0204,.000555,.000555]);
    s=math.sqrt(Iref.size)/128.0
    wts[2]=math.pow(wts[2],1/s)
    wts[3]=wts[3]/s
    wts[4]=math.pow(wts[4],1/s)
    wts[5]=wts[5]/s
    wts[6]=wts[6]/(s*s)
    wts[7]=wts[7]/(s*s)

    # compute differences

    if trfm_type=='translation':
        keep=[0,1];
    elif trfm_type=='rigid':
        keep=[0,1,5];
    elif trfm_type=='similarity':
        keep=[0,1,2,5];
    elif trfm_type=='affine':
        keep=[0,1,2,3,4,5];
    elif trfm_type=='projective':
        keep=[0,1,2,3,4,5,6,7];
                
    # compute transformations
    HH=ds2H(-1*np.ones(8),wts)
    Hs=HH[1][keep,:]

    # apply transformations
    Ts=np.zeros([Hs.shape[0],Iref.shape[0],Iref.shape[1]])
    Ms=np.ones([Iref.shape[0],Iref.shape[1]],dtype=np.float32)
    
    for i in range(Hs.shape[0]):
        Ts[i,:,:]=sh.imtransform(Iref,Hs[i,:,:])
        Ms=Ms * (np.float32(~np.isnan(Ts[i,:,:])))

    Ds=Ts-np.tile(Iref,[Hs.shape[0],1,1])
    D=Ds.reshape(Ds.shape[0],np.prod(Iref.shape))
    Lbda=lbda*np.prod(Iref.shape)*np.eye(Ds.shape[0])
    err=np.Inf
    ds=np.zeros([8,1])

    for i in xrange(100):
        # warp image with current esimate
        Ip=sh.imtransform(I,H)
        M=Ms * np.float32(~np.isnan(Ip) & ~np.isnan(Iref))
        Mf=M.reshape(I.size,1)
        dI=Ip-Iref; dIf=dI.reshape(np.prod(I.shape),1)

        # guard against bad things
        if np.sum(Mf) < 2:
            H = np.eye(3)
            err = np.Inf
            break

        # check if > half of pixels turn to NAN
        # subtract new nans from old nans, divide by old valids
        origValidPixels=np.sum(1-(np.isnan(I)+0))
        newValidPixels=np.sum(1-(np.isnan(Ip+I)+0))
        if newValidPixels<(origValidPixels/3.):
            return (np.eye(3),np.inf)

        
        #=== CODE PRIOR TO REFACTOR ===
        idx=np.nonzero(np.squeeze(Mf))
        D_valid=D[:,idx]
        D0=np.squeeze(D_valid);
        dI1=dIf[idx]
        _A = np.dot(D0, D0.T)
        _B = np.linalg.inv(_A + Lbda)
        _C = np.dot(D0, dI1)
        ds1 = np.dot(_B, _C)

        #ds1=np.dot(np.linalg.inv(np.dot(D0,D0.T)+Lbda),np.dot(D0,dI1))
        ds[keep]=ds1;
        ds = np.squeeze(ds)
        HH=ds2H(ds,wts); H=np.dot(H,HH[0]); H=H/H[2,2]
        err0=err; err=np.abs(dI1); err=np.mean(err); delta=err0-err;
        if verbose:
            print I.shape," i=",i," err=",err," del=",delta
        if delta<eps:
            break
    return (H,err)
Exemplo n.º 5
0
def pm1(digit_hash,I,nDigits,hspace,hackConstant=250,rejected_hash=None,accepted_hash=None):
    """
    Applies digit-OCR to an image.
    Input:
        dict digit_hash: maps {str digit: img digit_exemplar}
        obj I: image to search over (i.e. voted ballot)
        int nDigits: number of digits to find
        hspace: 
        hackConstant:
        dict rejected_hash: maps {str digit: [((y1,y2,x1,x2), str side_i, bool isflip_i), ...]}
        dict accepted_hash: maps {str digit: [((y1,y2,x1,x2), str side_i, bool isflip_i), ...]}
    """
    # either load previously computed results or compute new
    reject_penalty = .2
    accept_bonus = .2
    matchMat = []
    count = 0;
    keys = digit_hash.keys()
    t0=time.clock()    
    for key in keys:
        Iout = sh.NCC(I,digit_hash[key])
        #misc.imsave("_Iout_{0}.png".format(key), Iout)
        # mask out any part if given by param
        if rejected_hash and rejected_hash.has_key(key):
            for (bbMask, side, isflip) in rejected_hash[key]:
                # TODO: I don't ever use the 'side'. Is it worth removing it
                #       from rejected_hashes, or will it be used downstream?
                h = bbMask[1] - bbMask[0]
                w = bbMask[3] - bbMask[2]
                # Expand the mask-region a little bit
                i1 = max(0,bbMask[0]-(h/4))
                i2 = min(Iout.shape[0], bbMask[0]+(h/4))
                j1 = max(0,bbMask[2]-(w/4))
                j2 = min(Iout.shape[1],bbMask[2]+(w/4))
                Iout[i1:i2,j1:j2]=Iout[i1:i2,j1:j2]-reject_penalty
            #misc.imsave("_Iout_{0}_postmask.png".format(key), Iout)

        if accepted_hash and accepted_hash.has_key(key):
            for (bbMask, side, isflip) in accepted_hash[key]:
                # TODO: I don't ever use the 'side'. Is it worth removing it
                #       from rejected_hashes, or will it be used downstream?
                h = bbMask[1] - bbMask[0]
                w = bbMask[3] - bbMask[2]
                # Expand the mask-region a little bit
                i1 = max(0,bbMask[0]-(h/4))
                i2 = min(Iout.shape[0], bbMask[0]+(h/4))
                j1 = max(0,bbMask[2]-(w/4))
                j2 = min(Iout.shape[1],bbMask[2]+(w/4))
                Iout[i1:i2,j1:j2]=Iout[i1:i2,j1:j2]+accept_bonus
            #misc.imsave("_Iout_{0}_postmask.png".format(key), Iout)

        if len(matchMat) == 0:
            matchMat = np.zeros((Iout.shape[0],Iout.shape[1],len(keys)))

        matchMat[:,:,count] = Iout;
        count += 1

    print 'match time:',time.clock()-t0,'(s)'
    maxResp = np.amax(matchMat,axis=2)
    maxObj = np.argmax(matchMat,axis=2)

    tDP=time.clock()    
    # re-scale resp
    unary = hackConstant*np.power(2-(maxResp+1),2)

    #res = dt2(unary)
    res = distance_transform.dt2(unary)

    # cache bottom up
    M = [[]]*nDigits; 
    Mx = [[]]*nDigits; 
    My = [[]]*nDigits; 
    M[0] = res[0]
    Mx[0] = res[1]
    My[0] = res[2]

    for i in range(1,nDigits-1):
        prev = M[i-1]
        shiftH = np.eye(3)
        shiftH[0,2] = hspace
        prevT = sh.imtransform(prev,shiftH,fillval=prev.max());
        # shift
        #t1=time.clock()    
        # old
        #res0 = dt2(prevT+unary) 
        #print 'old DP time:',time.clock()-t1,'(s)'

        # new cython implementation
        res = distance_transform.dt2(prevT+unary)
        #res = dt2(prevT+unary)
        #print 'diff = ', np.sum(np.abs(res0[0] - res[0]))
        M[i] = res[0]
        Mx[i] = res[1]
        My[i] = res[2]

    prev = M[nDigits-2]
    shiftH = np.eye(3)
    shiftH[0,2] = hspace
    prevT = sh.imtransform(prev,shiftH,fillval=prev.max());
    M[nDigits-1] = prevT+unary
    # get best root position
    rootM = M[nDigits-1]
    YX=np.unravel_index(rootM.argmin(),rootM.shape)
    miny=YX[0]; minx=YX[1];

    # store top down
    optYX = [[]]*nDigits; 
    optYX[nDigits-1] = (miny,minx)
    
    for i in reversed(range(0,nDigits-1)):
        prevMiny = optYX[i+1][0]
        prevMinx = optYX[i+1][1]
        curMx = Mx[i]
        curMy = My[i]
        optYX[i] = (round(curMy[prevMiny,prevMinx-hspace]),
                    round(curMx[prevMiny,prevMinx-hspace]))
        
    patches = []
    bbs = []
    scores = []
    ocr_str = ''
    for i in range(len(optYX)):
        (i1,j1)=optYX[i]

        key = keys[maxObj[(i1,j1)]]
        ocr_str += key
        i2=i1+digit_hash[key].shape[0]
        j2=j1+digit_hash[key].shape[1]
        P = I[i1:i2,j1:j2]
        bbs.append((i1,i2,j1,j2))
        #patches.append(P)
        patches.append(None)
        scores.append(maxResp[(i1,j1)])

    print 'DP time:',time.clock()-tDP,'(s)'
    return (ocr_str,patches,bbs,scores)