def baseline_reverse_hamming(term, ri, theta, esquare, xloc, xlen, posx, lenfilter, overlapx): postings = ri.get(term) if postings == None: return for y, posy in reversed(postings): ytext = y[3] ylen = len(ytext) if ylen < lenfilter: return if distsquare(xloc, y[2]) > esquare: continue yid = y[1] o = overlapx.get(yid, (ytext, 0)) if o == None: continue o = o[1] alpha = int(ceil(theta*float(xlen+ylen)/(1+theta))) ubound = min(xlen-posx, ylen-posy) if o + ubound < alpha: overlapx[yid] = None continue hmax = xlen + ylen - 2*alpha if (posx-o) + (posy-o) + abs((xlen-posx) - (ylen-posy)) > hmax: overlapx[yid] = None continue overlapx[yid] = (ytext, o+1)
def baseline_reverse_hamming(term, ri, theta, esquare, xloc, xlen, posx, lenfilter, overlapx): postings = ri.get(term) if postings == None: return for y, posy in reversed(postings): ytext = y[3] ylen = len(ytext) if ylen < lenfilter: return if distsquare(xloc, y[2]) > esquare: continue yid = y[1] o = overlapx.get(yid, (ytext, 0)) if o == None: continue o = o[1] alpha = int(ceil(theta * float(xlen + ylen) / (1 + theta))) ubound = min(xlen - posx, ylen - posy) if o + ubound < alpha: overlapx[yid] = None continue hmax = xlen + ylen - 2 * alpha if (posx - o) + (posy - o) + abs((xlen - posx) - (ylen - posy)) > hmax: overlapx[yid] = None continue overlapx[yid] = (ytext, o + 1)
def grouping(theta, esquare, xlen, xprefix, xobjs, overlapx, counter): px = len(xprefix) wx = xprefix[-1] for o in overlapx.itervalues(): if o == None: continue y = o[0] o = o[1] ylen = y[0][0] alpha = int(ceil(theta*float(xlen+ylen)/(1+theta))) yprefix = y[0][2] py = min(len(yprefix), ylen - int(ceil(2*theta*float(ylen)/(theta+1))) + 1) wy = yprefix[py-1] y = y[2] if wx < wy: ubound = o + xlen - px py = o elif wx > wy: ubound = o + ylen - py px = o else: ubound = o + min(xlen-px, ylen-py) if ubound < alpha: continue for xo in xobjs: for yo in y: if distsquare(xo[1], yo[1]) > esquare: continue counter.add(1) if o + overlap(xo[2], px, xlen, yo[2], py, ylen) >= alpha: yield (xo[0], yo[0])
def grouping_self(theta, esquare, xlen, xprefixlen, xobjs, counter): if len(xobjs) <= 1: return alpha = int(ceil(theta*float(2*xlen)/(1+theta))) for i in xrange(0, len(xobjs) - 1): xo = xobjs[i] for j in xrange(i+1, len(xobjs)): yo = xobjs[j] if distsquare(xo[1], yo[1]) > esquare: continue counter.add(1) if xprefixlen + overlap(xo[2], xprefixlen, xlen, yo[2], xprefixlen, xlen) >= alpha: yield (xo[0], yo[0])
def baseline(term, ri, theta, esquare, xloc, xlen, posx, lenfilter, overlapx): for y, posy in ri.get(term, []): ytext = y[3] ylen = len(ytext) if ylen < lenfilter or distsquare(xloc, y[2]) > esquare: continue yid = y[1] o = overlapx.get(yid, (ytext, 0)) if o == None: continue o = o[1] alpha = int(ceil(theta * float(xlen + ylen) / (1 + theta))) ubound = min(xlen - posx, ylen - posy) overlapx[yid] = (ytext, o + 1) if o + ubound >= alpha else None
def extra_pos(term, ri, theta, esquare, xloc, xlen, posx, lenfilter, overlapx): for y, posy in ri.get(term, []): ytext = y[3] ylen = len(ytext) if ylen < lenfilter or distsquare(xloc, y[2]) > esquare: continue yid = y[1] o = overlapx.get(yid, (ytext, 0, posx, posy)) if o == None: continue o = o[1] alpha = int(ceil(theta*float(xlen+ylen)/(1+theta))) ubound = min(xlen-posx, ylen-posy) overlapx[yid] = (ytext, o+1, posx, posy) if o + ubound >= alpha else None
def baseline_reverse(term, ri, theta, esquare, xloc, xlen, posx, lenfilter, overlapx): postings = ri.get(term) if postings == None: return for y, posy in reversed(postings): ytext = y[3] ylen = len(ytext) if ylen < lenfilter: return if distsquare(xloc, y[2]) > esquare: continue yid = y[1] o = overlapx.get(yid, (ytext, 0)) if o == None: continue o = o[1] alpha = int(ceil(theta*float(xlen+ylen)/(1+theta))) ubound = min(xlen-posx, ylen-posy) overlapx[yid] = (ytext, o+1) if o + ubound >= alpha else None