示例#1
0
def baseline_reverse_hamming(term, ri, theta, esquare, xloc, xlen, posx, lenfilter, overlapx):
  postings = ri.get(term)
  if postings == None:
    return
  for y, posy in reversed(postings):
    ytext = y[3]
    ylen = len(ytext)
    if ylen < lenfilter:
      return
    if distsquare(xloc, y[2]) > esquare:
      continue
    yid = y[1]
    o = overlapx.get(yid, (ytext, 0))
    if o == None:
      continue
    o = o[1]
    alpha = int(ceil(theta*float(xlen+ylen)/(1+theta)))
    ubound = min(xlen-posx, ylen-posy)
    if o + ubound < alpha:
      overlapx[yid] = None
      continue
    hmax = xlen + ylen - 2*alpha
    if (posx-o) + (posy-o) + abs((xlen-posx) - (ylen-posy)) > hmax:
      overlapx[yid] = None
      continue
    overlapx[yid] = (ytext, o+1)
示例#2
0
def baseline_reverse_hamming(term, ri, theta, esquare, xloc, xlen, posx,
                             lenfilter, overlapx):
    postings = ri.get(term)
    if postings == None:
        return
    for y, posy in reversed(postings):
        ytext = y[3]
        ylen = len(ytext)
        if ylen < lenfilter:
            return
        if distsquare(xloc, y[2]) > esquare:
            continue
        yid = y[1]
        o = overlapx.get(yid, (ytext, 0))
        if o == None:
            continue
        o = o[1]
        alpha = int(ceil(theta * float(xlen + ylen) / (1 + theta)))
        ubound = min(xlen - posx, ylen - posy)
        if o + ubound < alpha:
            overlapx[yid] = None
            continue
        hmax = xlen + ylen - 2 * alpha
        if (posx - o) + (posy - o) + abs((xlen - posx) - (ylen - posy)) > hmax:
            overlapx[yid] = None
            continue
        overlapx[yid] = (ytext, o + 1)
示例#3
0
def grouping(theta, esquare, xlen, xprefix, xobjs, overlapx, counter):
  px = len(xprefix)
  wx = xprefix[-1]
  for o in overlapx.itervalues():
    if o == None:
      continue
    y = o[0]
    o = o[1]
    ylen = y[0][0]
    alpha = int(ceil(theta*float(xlen+ylen)/(1+theta)))
    yprefix = y[0][2]
    py = min(len(yprefix), ylen - int(ceil(2*theta*float(ylen)/(theta+1))) + 1)
    wy = yprefix[py-1]
    y = y[2]
    if wx < wy:
      ubound = o + xlen - px
      py = o
    elif wx > wy:
      ubound = o + ylen - py
      px = o
    else:
      ubound = o + min(xlen-px, ylen-py)
    if ubound < alpha:
      continue
    for xo in xobjs:
      for yo in y:
        if distsquare(xo[1], yo[1]) > esquare:
          continue
        counter.add(1)
        if o + overlap(xo[2], px, xlen, yo[2], py, ylen) >= alpha:
          yield (xo[0], yo[0])
示例#4
0
def grouping_self(theta, esquare, xlen, xprefixlen, xobjs, counter):
    if len(xobjs) <= 1:
      return
    alpha = int(ceil(theta*float(2*xlen)/(1+theta)))
    for i in xrange(0, len(xobjs) - 1):
      xo = xobjs[i]
      for j in xrange(i+1, len(xobjs)):
        yo = xobjs[j]
        if distsquare(xo[1], yo[1]) > esquare:
          continue
        counter.add(1)
        if xprefixlen + overlap(xo[2], xprefixlen, xlen, yo[2], xprefixlen, xlen) >= alpha:
          yield (xo[0], yo[0])
示例#5
0
def baseline(term, ri, theta, esquare, xloc, xlen, posx, lenfilter, overlapx):
    for y, posy in ri.get(term, []):
        ytext = y[3]
        ylen = len(ytext)
        if ylen < lenfilter or distsquare(xloc, y[2]) > esquare:
            continue
        yid = y[1]
        o = overlapx.get(yid, (ytext, 0))
        if o == None:
            continue
        o = o[1]
        alpha = int(ceil(theta * float(xlen + ylen) / (1 + theta)))
        ubound = min(xlen - posx, ylen - posy)
        overlapx[yid] = (ytext, o + 1) if o + ubound >= alpha else None
示例#6
0
def extra_pos(term, ri, theta, esquare, xloc, xlen, posx, lenfilter, overlapx):
  for y, posy in ri.get(term, []):
    ytext = y[3]
    ylen = len(ytext)
    if ylen < lenfilter or distsquare(xloc, y[2]) > esquare:
      continue
    yid = y[1]
    o = overlapx.get(yid, (ytext, 0, posx, posy))
    if o == None:
      continue
    o = o[1]
    alpha = int(ceil(theta*float(xlen+ylen)/(1+theta)))
    ubound = min(xlen-posx, ylen-posy)
    overlapx[yid] = (ytext, o+1, posx, posy) if o + ubound >= alpha else None
示例#7
0
def baseline_reverse(term, ri, theta, esquare, xloc, xlen, posx, lenfilter, overlapx):
  postings = ri.get(term)
  if postings == None:
    return
  for y, posy in reversed(postings):
    ytext = y[3]
    ylen = len(ytext)
    if ylen < lenfilter:
      return
    if distsquare(xloc, y[2]) > esquare:
      continue
    yid = y[1]
    o = overlapx.get(yid, (ytext, 0))
    if o == None:
      continue
    o = o[1]
    alpha = int(ceil(theta*float(xlen+ylen)/(1+theta)))
    ubound = min(xlen-posx, ylen-posy)
    overlapx[yid] = (ytext, o+1) if o + ubound >= alpha else None