def lookoutsideoftheline(linenumber: int, numberofextrawords: int, workid: str,
                         searchobject: SearchObject, cursor) -> str:
    """

	grab a line and add the N words at the tail and head of the previous and next lines
	this will let you search for phrases that fall along a line break "και δη | και"

	if you wanted to look for 'ἀείδων Ϲπάρτηϲ'
	you need this individual line:
		2.1.374  δεξιτερὴν γὰρ ἀνέϲχε μετάρϲιον, ὡϲ πρὶν ἀείδων
	to turn extend out to:
		ὑφαίνων δεξιτερὴν γὰρ ἀνέϲχε μετάρϲιον ὡϲ πρὶν ἀείδων ϲπάρτηϲ

	:param linenumber:
	:param numberofextrawords:
	:param workid:
	:param searchobject:
	:param cursor:
	:return:
	"""
    whitespace = ' '
    workdbname = workid[0:6]

    query = 'SELECT {wltmp} FROM {db} WHERE index BETWEEN %s AND %s ORDER BY index ASC'.format(
        wltmp=worklinetemplate, db=workdbname)
    data = (linenumber - 1, linenumber + 1)
    cursor.execute(query, data)
    results = cursor.fetchall()

    lines = [dblineintolineobject(r) for r in results]
    # will get key errors if there is no linenumber+/-1
    if len(lines) == 2:
        if lines[0].index == linenumber:
            lines = [makeablankline(workdbname, linenumber - 1)] + lines
        else:
            lines.append(makeablankline(workdbname, linenumber + 1))
    if len(lines) == 1:
        lines = [makeablankline(workdbname, linenumber - 1)] + lines
        lines.append(makeablankline(workdbname, linenumber + 1))

    text = list()
    for line in lines:
        wordsinline = line.wordlist(searchobject.usewordlist)
        if line.index == linenumber - 1:
            text = wordsinline[(numberofextrawords * -1):]
        elif line.index == linenumber:
            text += wordsinline
        elif line.index == linenumber + 1:
            text += wordsinline[0:numberofextrawords]

    aggregate = whitespace.join(text)
    aggregate = re.sub(r'\s\s', whitespace, aggregate)
    aggregate = ' {a} '.format(a=aggregate)

    return aggregate
示例#2
0
def brackethtmlifysearchfinds(listoflineobjects: list, searchobject: SearchObject, linehtmltemplate: str) -> list:
	"""

	can't do comprehensions: require a thisline/previousline structure so you can call setcontinuationvalue()

	:param listoflineobjects:
	:param searchobject:
	:param linehtmltemplate:
	:return:
	"""

	brackettypes = findactivebrackethighlighting(searchobject.session)
	continuationdict = {t: False for t in brackettypes}

	passage = list()
	lines = deque(listoflineobjects)
	try:
		previous = lines.popleft()
	except IndexError:
		previous = makeablankline('gr0000w000', -1)

	passage.append(linehtmltemplate.format(id=previous.getlineurl(), lc=previous.locus(), ft=previous.markeditorialinsersions(continuationdict)))

	while lines:
		ln = lines.popleft()
		passage.append(linehtmltemplate.format(id=ln.getlineurl(), lc=ln.locus(), ft=ln.markeditorialinsersions(continuationdict)))
		continuationdict = {t: setcontinuationvalue(ln, previous, continuationdict[t], t) for t in brackettypes}
		previous = ln

	return passage
示例#3
0
def linesintoindex(lineobjects: List[dbWorkLine], activepoll) -> dict:
    """
	generate the condordance dictionary:
		{ wordA: [(workid1, index1, locus1), (workid2, index2, locus2),..., wordB: ...]}
		{'illic': [('lt0472w001', 2048, '68A.35')], 'carpitur': [('lt0472w001', 2048, '68A.35')], ...}

	:return:
	"""

    # kill off titles and salutations: dangerous as there l1='t' has not been 100% ruled out as a valid body citation
    # lineobjects = [ln for ln in lineobjects if ln.l1 not in ['t', 'sa']]

    completeindex = dict()
    try:
        defaultwork = lineobjects[0].wkuinversalid
    except IndexError:
        return completeindex

    # clickable entries will break after too many words. Toggle bewteen indexing methods by guessing N words per line and
    # then pick 'locus' when you have too many lineobjects: a nasty hack
    # a RangeError arises from jquery trying to push too many items onto its stack?
    # in which case if you had 32k indexlocationa and then indexlocationb and then ... you could avoid this?
    # pretty hacky, but it might work; then again, jquery might die after N of any kind not just N of a specific kind

    if len(lineobjects) < hipparchia.config[
            'CLICKABLEINDEXEDPASSAGECAP'] or hipparchia.config[
                'CLICKABLEINDEXEDPASSAGECAP'] < 0:
        # [a] '<indexedlocation id="linenumbergr0032w008/31011">2.17.6</indexedlocation>' vs [b] just '2.17.6'
        indexingmethod = 'anchoredlocus'
    elif session['indexskipsknownwords']:
        indexingmethod = 'anchoredlocus'
    else:
        indexingmethod = 'locus'

    while lineobjects:
        try:
            line = lineobjects.pop()
            if activepoll:
                activepoll.remain(len(lineobjects))
        except IndexError:
            line = makeablankline(defaultwork, None)

        if line.index:
            words = line.indexablewordlist()
            for w in words:
                referencestyle = getattr(line, indexingmethod)
                try:
                    completeindex[w].append(
                        (line.wkuinversalid, line.index, referencestyle()))
                except KeyError:
                    completeindex[w] = [(line.wkuinversalid, line.index,
                                         referencestyle())]

    return completeindex
def grableadingandlagging(hitline: dbWorkLine,
                          searchobject: SearchObject,
                          cursor,
                          override=None) -> dict:
    """

	take a dbline and grab the N words in front of it and after it

	it would be a good idea to have an autocommit connection here?

	override was added so that the rewritten so of precomposedphraseandproximitysearch() can set 'seeking' as it
	wishes

	:param hitline:
	:param searchobject:
	:param cursor:
	:return:
	"""

    so = searchobject
    # look out for off-by-one errors
    distance = so.distance + 1

    if override:
        seeking = override
    elif so.lemma:
        seeking = wordlistintoregex(so.lemma.formlist)
        so.usewordlist = 'polytonic'
    else:
        seeking = so.termone

    # expanded searchzone bacause "seeking" might be a multi-line phrase
    prev = grabonelinefromwork(hitline.authorid, hitline.index - 1, cursor)
    next = grabonelinefromwork(hitline.authorid, hitline.index + 1, cursor)
    prev = dbWorkLine(*prev)
    next = dbWorkLine(*next)

    searchzone = ' '.join([
        getattr(prev, so.usewordlist),
        getattr(hitline, so.usewordlist),
        getattr(next, so.usewordlist)
    ])

    match = re.search(r'{s}'.format(s=seeking), searchzone)
    # but what if you just found 'paucitate' inside of 'paucitatem'?
    # you will have 'm' left over and this will throw off your distance-in-words count
    past = None
    upto = None
    lagging = list()
    leading = list()
    ucount = 0
    pcount = 0

    try:
        past = searchzone[match.end():].strip()
    except AttributeError:
        # AttributeError: 'NoneType' object has no attribute 'end'
        pass

    try:
        upto = searchzone[:match.start()].strip()
    except AttributeError:
        pass

    if upto:
        ucount = len([x for x in upto.split(' ') if x])
        lagging = [x for x in upto.split(' ') if x]

    if past:
        pcount = len([x for x in past.split(' ') if x])
        leading = [x for x in past.split(' ') if x]

    atline = hitline.index

    while ucount < distance + 1:
        atline -= 1
        try:
            previous = dblineintolineobject(
                grabonelinefromwork(hitline.authorid, atline, cursor))
        except TypeError:
            # 'NoneType' object is not subscriptable
            previous = makeablankline(hitline.authorid, -1)
            ucount = 999
        lagging = previous.wordlist(so.usewordlist) + lagging
        ucount += previous.wordcount()
    lagging = lagging[-1 * (distance - 1):]
    lagging = ' '.join(lagging)

    atline = hitline.index
    while pcount < distance + 1:
        atline += 1
        try:
            nextline = dblineintolineobject(
                grabonelinefromwork(hitline.authorid, atline, cursor))
        except TypeError:
            # 'NoneType' object is not subscriptable
            nextline = makeablankline(hitline.authorid, -1)
            pcount = 999
        leading += nextline.wordlist(so.usewordlist)
        pcount += nextline.wordcount()
    leading = leading[:distance - 1]
    leading = ' '.join(leading)

    returndict = {'lag': lagging, 'lead': leading}

    return returndict
def textmaker(author: str,
              work=None,
              passage=None,
              endpoint=None,
              citationdelimiter='|') -> JSON_STR:
    """
	build a text suitable for display

		"GET /textof/lt0474/024/20/30"

	:return:
	"""

    probeforsessionvariables()

    dbconnection = ConnectionObject('autocommit')
    dbcursor = dbconnection.cursor()

    linesevery = hipparchia.config['SHOWLINENUMBERSEVERY']

    po = TextmakerInputParsingObject(author, work, passage, endpoint,
                                     citationdelimiter)

    ao = po.authorobject
    wo = po.workobject

    segmenttext = str()

    # consolewarning('po.passageaslist: {p}'.format(p=po.passageaslist))

    if ao and wo:
        # we have both an author and a work, maybe we also have a subset of the work
        if endpoint:
            firstlinenumber = finddblinefromincompletelocus(
                wo, po.passageaslist, dbcursor)
            lastlinenumber = finddblinefromincompletelocus(wo,
                                                           po.endpointlist,
                                                           dbcursor,
                                                           findlastline=True)
            if firstlinenumber['code'] == 'success' and lastlinenumber[
                    'code'] == 'success':
                startline = firstlinenumber['line']
                endline = lastlinenumber['line']
                startlnobj = dblineintolineobject(
                    grabonelinefromwork(ao.universalid, startline, dbcursor))
                stoplnobj = dblineintolineobject(
                    grabonelinefromwork(ao.universalid, endline, dbcursor))
            else:
                msg = '"buildtexttospan/" could not find first and last: {a}w{b} - {c} TO {d}'
                consolewarning(
                    msg.format(a=author, b=work, c=passage, d=endpoint))
                startlnobj = makeablankline(work, 0)
                stoplnobj = makeablankline(work, 1)
                startline = 0
                endline = 1
            segmenttext = 'from {a} to {b}'.format(a=startlnobj.shortlocus(),
                                                   b=stoplnobj.shortlocus())
        elif not po.passageaslist:
            # whole work
            startline = wo.starts
            endline = wo.ends
        else:
            startandstop = textsegmentfindstartandstop(ao, wo,
                                                       po.passageaslist,
                                                       dbcursor)
            startline = startandstop['startline']
            endline = startandstop['endline']
        texthtml = buildtext(wo.universalid, startline, endline, linesevery,
                             dbcursor)
    else:
        texthtml = str()

    if hipparchia.config['INSISTUPONSTANDARDANGLEBRACKETS']:
        texthtml = gtltsubstitutes(texthtml)

    if not segmenttext:
        segmenttext = '.'.join(po.passageaslist)

    if not ao or not wo:
        ao = makeanemptyauthor('gr0000')
        wo = makeanemptywork('gr0000w000')

    results = dict()
    results['authorname'] = avoidsmallvariants(ao.shortname)
    results['title'] = avoidsmallvariants(wo.title)
    results['structure'] = avoidsmallvariants(wo.citation())
    results['worksegment'] = segmenttext
    results['texthtml'] = texthtml

    results = json.dumps(results)

    dbconnection.connectioncleanup()

    return results
def buildindexto(searchid: str,
                 author: str,
                 work=None,
                 passage=None,
                 endpoint=None,
                 citationdelimiter='|',
                 justvocab=False) -> JSON_STR:
    """
	build a complete index to a an author, work, or segment of a work

	:return:
	"""

    probeforsessionvariables()

    pollid = validatepollid(searchid)

    starttime = time.time()

    progresspolldict[pollid] = ProgressPoll(pollid)
    progresspolldict[pollid].activate()

    dbconnection = ConnectionObject('autocommit')
    dbcursor = dbconnection.cursor()

    po = IndexmakerInputParsingObject(author, work, passage, endpoint,
                                      citationdelimiter)

    ao = po.authorobject
    wo = po.workobject
    psg = po.passageaslist
    stop = po.endpointlist

    if not work:
        wo = makeanemptywork('gr0000w000')

    # bool
    useheadwords = session['headwordindexing']

    allworks = list()
    output = list()
    cdict = dict()
    segmenttext = str()
    valid = True

    if ao and work and psg and stop:
        start = psg
        firstlinenumber = finddblinefromincompletelocus(wo, start, dbcursor)
        lastlinenumber = finddblinefromincompletelocus(wo,
                                                       stop,
                                                       dbcursor,
                                                       findlastline=True)
        if firstlinenumber['code'] == 'success' and lastlinenumber[
                'code'] == 'success':
            cdict = {
                wo.universalid:
                (firstlinenumber['line'], lastlinenumber['line'])
            }
            startln = dblineintolineobject(
                grabonelinefromwork(ao.universalid, firstlinenumber['line'],
                                    dbcursor))
            stopln = dblineintolineobject(
                grabonelinefromwork(ao.universalid, lastlinenumber['line'],
                                    dbcursor))
        else:
            msg = '"indexspan/" could not find first and last: {a}w{b} - {c} TO {d}'
            consolewarning(msg.format(a=author, b=work, c=passage, d=endpoint))
            startln = makeablankline(work, 0)
            stopln = makeablankline(work, 1)
            valid = False
        segmenttext = 'from {a} to {b}'.format(a=startln.shortlocus(),
                                               b=stopln.shortlocus())
    elif ao and work and psg:
        # subsection of a work of an author
        progresspolldict[pollid].statusis(
            'Preparing a partial index to {t}'.format(t=wo.title))
        startandstop = textsegmentfindstartandstop(ao, wo, psg, dbcursor)
        startline = startandstop['startline']
        endline = startandstop['endline']
        cdict = {wo.universalid: (startline, endline)}
    elif ao and work:
        # one work
        progresspolldict[pollid].statusis(
            'Preparing an index to {t}'.format(t=wo.title))
        startline = wo.starts
        endline = wo.ends
        cdict = {wo.universalid: (startline, endline)}
    elif ao:
        # whole author
        allworks = [
            '{w}  ⇒ {t}'.format(w=w.universalid[6:10], t=w.title)
            for w in ao.listofworks
        ]
        allworks.sort()
        progresspolldict[pollid].statusis(
            'Preparing an index to the works of {a}'.format(a=ao.shortname))
        for wkid in ao.listworkids():
            cdict[wkid] = (workdict[wkid].starts, workdict[wkid].ends)
    else:
        # we do not have a valid selection
        valid = False
        output = ['invalid input']

    if not stop:
        segmenttext = '.'.join(psg)

    if valid and justvocab:
        dbconnection.connectioncleanup()
        del progresspolldict[pollid]
        return cdict

    if valid:
        output = buildindextowork(cdict, progresspolldict[pollid],
                                  useheadwords, dbcursor)

    # get ready to send stuff to the page
    count = len(output)

    try:
        locale.setlocale(locale.LC_ALL, 'en_US')
        count = locale.format_string('%d', count, grouping=True)
    except locale.Error:
        count = str(count)

    progresspolldict[pollid].statusis('Preparing the index HTML')
    indexhtml = wordindextohtmltable(output, useheadwords)

    buildtime = time.time() - starttime
    buildtime = round(buildtime, 2)
    progresspolldict[pollid].deactivate()

    if not ao:
        ao = makeanemptyauthor('gr0000')

    results = dict()
    results['authorname'] = avoidsmallvariants(ao.shortname)
    results['title'] = avoidsmallvariants(wo.title)
    results['structure'] = avoidsmallvariants(wo.citation())
    results['worksegment'] = segmenttext
    results['elapsed'] = buildtime
    results['wordsfound'] = count
    results['indexhtml'] = indexhtml
    results['keytoworks'] = allworks
    results['newjs'] = supplementalindexjs()
    results = json.dumps(results)

    dbconnection.connectioncleanup()
    del progresspolldict[pollid]

    return results
def subqueryphrasesearch(workerid, foundlineobjects: ListProxy,
                         searchphrase: str, listofplacestosearch: ListProxy,
                         searchobject: SearchObject,
                         dbconnection) -> ListProxy:
    """

    foundlineobjects, searchingfor, searchlist, commitcount, whereclauseinfo, activepoll

    use subquery syntax to grab multi-line windows of text for phrase searching

    line ends and line beginning issues can be overcome this way, but then you have plenty of
    bookkeeping to do to to get the proper results focussed on the right line

    tablestosearch:
        ['lt0400', 'lt0022', ...]

    a search inside of Ar., Eth. Eud.:

        SELECT secondpass.index, secondpass.accented_line
                FROM (SELECT firstpass.index, firstpass.linebundle, firstpass.accented_line FROM
                    (SELECT index, accented_line,
                        concat(accented_line, ' ', lead(accented_line) OVER (ORDER BY index ASC)) as linebundle
                        FROM gr0086 WHERE ( (index BETWEEN 15982 AND 18745) ) ) firstpass
                    ) secondpass
                WHERE secondpass.linebundle ~ %s  LIMIT 200

    a search in x., hell and x., mem less book 3 of hell and book 2 of mem:
        SELECT secondpass.index, secondpass.accented_line
                FROM (SELECT firstpass.index, firstpass.linebundle, firstpass.accented_line FROM
                    (SELECT index, accented_line,
                        concat(accented_line, ' ', lead(accented_line) OVER (ORDER BY index ASC)) as linebundle
                        FROM gr0032 WHERE ( (index BETWEEN 1 AND 7918) OR (index BETWEEN 7919 AND 11999) ) AND ( (index NOT BETWEEN 1846 AND 2856) AND (index NOT BETWEEN 8845 AND 9864) ) ) firstpass
                    ) secondpass
                WHERE secondpass.linebundle ~ %s  LIMIT 200

    :return:
    """
    # print('subqueryphrasesearch()')
    so = searchobject
    activepoll = so.poll

    # build incomplete sfo that will handle everything other than iteratethroughsearchlist()
    sfo = returnsearchfncobject(workerid, foundlineobjects,
                                listofplacestosearch, so, dbconnection, None)

    querytemplate = """
		SELECT secondpass.index, secondpass.{co} FROM 
			(SELECT firstpass.index, firstpass.linebundle, firstpass.{co} FROM
					(SELECT index, {co}, concat({co}, ' ', lead({co}) OVER (ORDER BY index ASC)) AS linebundle
						FROM {db} {whr} ) firstpass
			) secondpass
		WHERE secondpass.linebundle ~ %s {lim}"""

    wheretempate = """
	WHERE EXISTS
		(SELECT 1 FROM {tbl}_includelist_{a} incl WHERE incl.includeindex = {tbl}.index)
	"""

    # substringsearch() needs ability to CREATE TEMPORARY TABLE
    sfo.dbconnection.setreadonly(False)
    dbcursor = sfo.dbconnection.cursor()

    qcomb = QueryCombinator(searchphrase)
    # the last item is the full phrase:  ('one two three four five', '')
    combinations = qcomb.combinations()
    combinations.pop()
    # lines start/end
    sp = re.sub(r'^\s', r'(^|\\s)', searchphrase)
    sp = re.sub(r'\s$', r'(\\s|$)', sp)
    # on the reasoning behind the following substitution see 'DEBUGGING notes: SQL oddities' above
    # sp = re.sub(r' ', r'\\s', sp)

    if not so.onehit:
        lim = ' LIMIT ' + str(so.cap)
    else:
        # the windowing problem means that '1' might be something that gets discarded
        lim = ' LIMIT 5'

    if so.redissearchlist:
        listofplacestosearch = True

    while listofplacestosearch and activepoll.gethits() <= so.cap:
        # sfo.getnextfnc() also takes care of the commitcount
        authortable = sfo.getnextfnc()
        sfo.updatepollremaining()

        if authortable:
            whr = str()
            r = so.indexrestrictions[authortable]
            if r['type'] == 'between':
                indexwedwhere = buildbetweenwhereextension(authortable, so)
                if indexwedwhere != '':
                    # indexwedwhere will come back with an extraneous ' AND'
                    indexwedwhere = indexwedwhere[:-4]
                    whr = 'WHERE {iw}'.format(iw=indexwedwhere)
            elif r['type'] == 'temptable':
                avoidcollisions = assignuniquename()
                q = r['where']['tempquery']
                q = re.sub('_includelist',
                           '_includelist_{a}'.format(a=avoidcollisions), q)
                dbcursor.execute(q)
                whr = wheretempate.format(tbl=authortable, a=avoidcollisions)

            query = querytemplate.format(db=authortable,
                                         co=so.usecolumn,
                                         whr=whr,
                                         lim=lim)
            data = (sp, )
            # print('subqueryphrasesearch() find indices() q,d:\n\t',query, data)
            dbcursor.execute(query, data)
            indices = [i[0] for i in dbcursor.fetchall()]
            # this will yield a bunch of windows: you need to find the centers; see 'while...' below

            locallineobjects = list()
            if indices:
                for i in indices:
                    query = 'SELECT {wtmpl} FROM {tb} WHERE index=%s'.format(
                        wtmpl=worklinetemplate, tb=authortable)
                    data = (i, )
                    # print('subqueryphrasesearch() iterate through indices() q,d:\n\t', query, data)
                    dbcursor.execute(query, data)
                    locallineobjects.append(
                        dblineintolineobject(dbcursor.fetchone()))

            locallineobjects.reverse()
            # debugging
            # for l in locallineobjects:
            #	print(l.universalid, l.locus(), getattr(l,so.usewordlist))

            gotmyonehit = False
            while locallineobjects and activepoll.gethits(
            ) <= so.cap and not gotmyonehit:
                # windows of indices come back: e.g., three lines that look like they match when only one matches [3131, 3132, 3133]
                # figure out which line is really the line with the goods
                # it is not nearly so simple as picking the 2nd element in any run of 3: no always runs of 3 + matches in
                # subsequent lines means that you really should check your work carefully; this is not an especially costly
                # operation relative to the whole search and esp. relative to the speed gains of using a subquery search
                lineobject = locallineobjects.pop()
                if re.search(sp, getattr(lineobject, so.usewordlist)):
                    sfo.addnewfindstolistoffinds([lineobject])
                    activepoll.addhits(1)
                    if so.onehit:
                        gotmyonehit = True
                else:
                    try:
                        nextline = locallineobjects[0]
                    except IndexError:
                        nextline = makeablankline('gr0000w000', -1)

                    if lineobject.wkuinversalid != nextline.wkuinversalid or lineobject.index != (
                            nextline.index - 1):
                        # you grabbed the next line on the pile (e.g., index = 9999), not the actual next line (e.g., index = 101)
                        # usually you won't get a hit by grabbing the next db line, but sometimes you do...
                        query = 'SELECT {wtmpl} FROM {tb} WHERE index=%s'.format(
                            wtmpl=worklinetemplate, tb=authortable)
                        data = (lineobject.index + 1, )
                        # print('subqueryphrasesearch() "while locallineobjects..." loop q,d:\n\t', query, data)
                        dbcursor.execute(query, data)
                        try:
                            nextline = dblineintolineobject(
                                dbcursor.fetchone())
                        except:
                            nextline = makeablankline('gr0000w000', -1)

                    for c in combinations:
                        tail = c[0] + '$'
                        head = '^' + c[1]
                        # debugging
                        # print('re',getattr(lo,so.usewordlist),tail, head, getattr(next,so.usewordlist))

                        t = False
                        h = False
                        try:
                            t = re.search(tail,
                                          getattr(lineobject, so.usewordlist))
                        except re.error:
                            pass
                        try:
                            h = re.search(head,
                                          getattr(nextline, so.usewordlist))
                        except re.error:
                            pass

                        if t and h:
                            sfo.addnewfindstolistoffinds([lineobject])
                            activepoll.addhits(1)
                            if so.onehit:
                                gotmyonehit = True
        else:
            # redis will return None for authortable if the set is now empty
            listofplacestosearch = None

    sfo.listcleanup()

    if sfo.needconnectioncleanup:
        sfo.dbconnection.connectioncleanup()

    return foundlineobjects
示例#8
0
def precomposedsqlsubqueryphrasesearch(so: SearchObject) -> List[dbWorkLine]:
    """

    use subquery syntax to grab multi-line windows of text for phrase searching

    line ends and line beginning issues can be overcome this way, but then you have plenty of
    bookkeeping to do to to get the proper results focussed on the right line

    these searches take linear time: same basic time for any given scope regardless of the query

    """

    # rebuild the searchsqldict but this time pass through rewritequerystringforsubqueryphrasesearching()
    so.searchsqldict = searchlistintosqldict(so,
                                             so.phrase,
                                             subqueryphrasesearch=True)

    # debugmessage('precomposedsqlsubqueryphrasesearch() so.searchsqldict: {d}'.format(d=so.searchsqldict))

    # the windowed collection of lines; you will need to work to find the centers
    # windowing will increase the number of hits: 2+ lines per actual find
    initialhitlines = generatepreliminaryhitlist(so, recap=so.cap * 3)

    m = 'Generating final list of hits by searching among the {h} preliminary hits'
    so.poll.statusis(m.format(h=so.poll.gethits()))
    so.poll.sethits(0)

    sp = re.sub(r'^\s', r'(^|\\s)', so.phrase)
    sp = re.sub(r'\s$', r'(\\s|$)', sp)

    combinations = QueryCombinator(so.phrase)
    # the last item is the full phrase and it will have already been searched:  ('one two three four five', '')
    combinations = combinations.combinations()
    combinations.pop()

    listoffinds = list()

    dbconnection = ConnectionObject()
    dbcursor = dbconnection.cursor()

    setofhits = set()

    while initialhitlines:
        # windows of indices come back: e.g., three lines that look like they match when only one matches [3131, 3132, 3133]
        # figure out which line is really the line with the goods
        # it is not nearly so simple as picking the 2nd element in any run of 3: no always runs of 3 + matches in
        # subsequent lines means that you really should check your work carefully; this is not an especially costly
        # operation relative to the whole search and esp. relative to the speed gains of using a subquery search
        lineobject = initialhitlines.pop()
        if not so.onehit or lineobject.authorid not in setofhits:
            if re.search(sp, getattr(lineobject, so.usewordlist)):
                listoffinds.append(lineobject)
                so.poll.addhits(1)
                setofhits.add(lineobject.authorid)
            else:
                try:
                    nextline = initialhitlines[0]
                except IndexError:
                    nextline = makeablankline('gr0000w000', -1)

                if lineobject.wkuinversalid != nextline.wkuinversalid or lineobject.index != (
                        nextline.index - 1):
                    # you grabbed the next line on the pile (e.g., index = 9999), not the actual next line (e.g., index = 101)
                    # usually you won't get a hit by grabbing the next db line, but sometimes you do...
                    query = 'SELECT {wtmpl} FROM {tb} WHERE index=%s'.format(
                        wtmpl=worklinetemplate, tb=lineobject.authorid)
                    data = (lineobject.index + 1, )
                    dbcursor.execute(query, data)
                    try:
                        nextline = dblineintolineobject(dbcursor.fetchone())
                    except:
                        nextline = makeablankline('gr0000w000', -1)

                for c in combinations:
                    tail = c[0] + '$'
                    head = '^' + c[1]

                    t = False
                    h = False
                    try:
                        t = re.search(tail, getattr(lineobject,
                                                    so.usewordlist))
                    except re.error:
                        pass
                    try:
                        h = re.search(head, getattr(nextline, so.usewordlist))
                    except re.error:
                        pass

                    if t and h:
                        listoffinds.append(lineobject)
                        so.poll.addhits(1)
                        setofhits.add(lineobject.authorid)

    dbconnection.connectioncleanup()
    return listoffinds