def sampleworkcitation(authorid: str, workid: str) -> JSON_STR: """ called by loadsamplecitation() in autocomplete.js we are using the maual input style on the web page so we need some hint on how to do things: check the end line for a sample citation "In Timarchum (w001)" yields... 127.0.0.1 - - [04/Apr/2021 13:48:53] "GET /get/json/samplecitation/gr0026/001 HTTP/1.1" 200 - /get/json/samplecitation {"firstline": "1.1", "lastline": "196.7"} :param authorid: :param workid: :return: """ dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() returnvals = dict() returnvals['firstline'] = str() returnvals['lastline'] = str() authorid = depunct(authorid) workid = depunct(workid) try: ao = authordict[authorid] wo = workdict[authorid + 'w' + workid] except KeyError: returnvals['firstline'] = 'no such author/work combination' return json.dumps(returnvals) toplevel = wo.availablelevels - 1 firstlineindex = returnfirstorlastlinenumber(wo.universalid, dbcursor, disallowt=True, disallowlevel=toplevel) flo = dblineintolineobject( grabonelinefromwork(authorid, firstlineindex, dbcursor)) lastlineidx = returnfirstorlastlinenumber(wo.universalid, dbcursor, findlastline=True) llo = dblineintolineobject( grabonelinefromwork(authorid, lastlineidx, dbcursor)) returnvals['firstline'] = flo.prolixlocus() returnvals['lastline'] = llo.prolixlocus() results = json.dumps(returnvals) dbconnection.connectioncleanup() return results
def lookoutsideoftheline(linenumber: int, numberofextrawords: int, workid: str, searchobject: SearchObject, cursor) -> str: """ grab a line and add the N words at the tail and head of the previous and next lines this will let you search for phrases that fall along a line break "και δη | και" if you wanted to look for 'ἀείδων Ϲπάρτηϲ' you need this individual line: 2.1.374 δεξιτερὴν γὰρ ἀνέϲχε μετάρϲιον, ὡϲ πρὶν ἀείδων to turn extend out to: ὑφαίνων δεξιτερὴν γὰρ ἀνέϲχε μετάρϲιον ὡϲ πρὶν ἀείδων ϲπάρτηϲ :param linenumber: :param numberofextrawords: :param workid: :param searchobject: :param cursor: :return: """ whitespace = ' ' workdbname = workid[0:6] query = 'SELECT {wltmp} FROM {db} WHERE index BETWEEN %s AND %s ORDER BY index ASC'.format( wltmp=worklinetemplate, db=workdbname) data = (linenumber - 1, linenumber + 1) cursor.execute(query, data) results = cursor.fetchall() lines = [dblineintolineobject(r) for r in results] # will get key errors if there is no linenumber+/-1 if len(lines) == 2: if lines[0].index == linenumber: lines = [makeablankline(workdbname, linenumber - 1)] + lines else: lines.append(makeablankline(workdbname, linenumber + 1)) if len(lines) == 1: lines = [makeablankline(workdbname, linenumber - 1)] + lines lines.append(makeablankline(workdbname, linenumber + 1)) text = list() for line in lines: wordsinline = line.wordlist(searchobject.usewordlist) if line.index == linenumber - 1: text = wordsinline[(numberofextrawords * -1):] elif line.index == linenumber: text += wordsinline elif line.index == linenumber + 1: text += wordsinline[0:numberofextrawords] aggregate = whitespace.join(text) aggregate = re.sub(r'\s\s', whitespace, aggregate) aggregate = ' {a} '.format(a=aggregate) return aggregate
def iteratethroughsearchlist(self): """ this is the simple core of the whole thing; the rest is about feeding it properly if you do not pickle the lineobjects here and now you will need to generate line objects at the other end foundlineobjects = [dblineintolineobject(item) for item in founddblineobjects] you will also need to use lo.decompose() in phrasesearching.py to feed the findslist :return: """ insertposition = self.searchfunctionparameters.index('parametertoswap') while self.emptytest and self.activepoll.gethits() <= self.so.cap: srchfunct = self.searchfunction nextitem = self.getnextfnc() if self.so.session['onehit']: # simplelemma chunk might have already searched and found in an author if self.so.lemma or self.so.proximatelemma: # nextitem looks like '(chunk, item)' if nextitem[1] in self.authorsamongthefinds(): srchfunct = None if nextitem and srchfunct: params = self.parameterswapper(nextitem, insertposition) foundlines = srchfunct(*tuple(params)) lineobjects = [dblineintolineobject(f) for f in foundlines] self.addnewfindstolistoffinds(lineobjects) self.updatepollfinds(lineobjects) self.updatepollremaining() elif not srchfunct: pass else: # listofplacestosearch has been exhausted break self.listcleanup() if self.needconnectioncleanup: self.dbconnection.connectioncleanup() # empty return because foundlineobjects is a ListProxy: # ask for self.foundlineobjects as the search result instead # print('{i} finished'.format(i=self.workerid)) return
def textsegmentfindstartandstop(authorobject, workobject, passageaslist, cursor) -> dict: """ find the first and last lines of a work segment :return: """ p = tuple(passageaslist) lookforline = finddblinefromincompletelocus(workobject, p, cursor) # assuming that lookforline['code'] == 'success' # lookforline['code'] is (allegedly) only relevant to the Perseus lookup problem where a bad locus can be sent foundline = lookforline['line'] line = grabonelinefromwork(authorobject.universalid, foundline, cursor) lo = dblineintolineobject(line) # let's say you looked for 'book 2' of something that has 'book, chapter, line' # that means that you want everything that has the same level2 value as the lineobject # build a where clause passageaslist.reverse() atloc = '|'.join(passageaslist) selection = '{uid}_AT_{line}'.format(uid=workobject.universalid, line=atloc) w = atsignwhereclauses(selection, '=', {authorobject.universalid: authorobject}) d = [workobject.universalid] qw = str() for i in range(0, len(w)): qw += 'AND (' + w[i][0] + ') ' d.append(w[i][1]) query = 'SELECT index FROM {au} WHERE wkuniversalid=%s {whr} ORDER BY index DESC LIMIT 1'.format( au=authorobject.universalid, whr=qw) data = tuple(d) cursor.execute(query, data) found = cursor.fetchone() startandstop = dict() startandstop['startline'] = lo.index startandstop['endline'] = found[0] return startandstop
def grableadingandlagging(hitline: dbWorkLine, searchobject: SearchObject, cursor, override=None) -> dict: """ take a dbline and grab the N words in front of it and after it it would be a good idea to have an autocommit connection here? override was added so that the rewritten so of precomposedphraseandproximitysearch() can set 'seeking' as it wishes :param hitline: :param searchobject: :param cursor: :return: """ so = searchobject # look out for off-by-one errors distance = so.distance + 1 if override: seeking = override elif so.lemma: seeking = wordlistintoregex(so.lemma.formlist) so.usewordlist = 'polytonic' else: seeking = so.termone # expanded searchzone bacause "seeking" might be a multi-line phrase prev = grabonelinefromwork(hitline.authorid, hitline.index - 1, cursor) next = grabonelinefromwork(hitline.authorid, hitline.index + 1, cursor) prev = dbWorkLine(*prev) next = dbWorkLine(*next) searchzone = ' '.join([ getattr(prev, so.usewordlist), getattr(hitline, so.usewordlist), getattr(next, so.usewordlist) ]) match = re.search(r'{s}'.format(s=seeking), searchzone) # but what if you just found 'paucitate' inside of 'paucitatem'? # you will have 'm' left over and this will throw off your distance-in-words count past = None upto = None lagging = list() leading = list() ucount = 0 pcount = 0 try: past = searchzone[match.end():].strip() except AttributeError: # AttributeError: 'NoneType' object has no attribute 'end' pass try: upto = searchzone[:match.start()].strip() except AttributeError: pass if upto: ucount = len([x for x in upto.split(' ') if x]) lagging = [x for x in upto.split(' ') if x] if past: pcount = len([x for x in past.split(' ') if x]) leading = [x for x in past.split(' ') if x] atline = hitline.index while ucount < distance + 1: atline -= 1 try: previous = dblineintolineobject( grabonelinefromwork(hitline.authorid, atline, cursor)) except TypeError: # 'NoneType' object is not subscriptable previous = makeablankline(hitline.authorid, -1) ucount = 999 lagging = previous.wordlist(so.usewordlist) + lagging ucount += previous.wordcount() lagging = lagging[-1 * (distance - 1):] lagging = ' '.join(lagging) atline = hitline.index while pcount < distance + 1: atline += 1 try: nextline = dblineintolineobject( grabonelinefromwork(hitline.authorid, atline, cursor)) except TypeError: # 'NoneType' object is not subscriptable nextline = makeablankline(hitline.authorid, -1) pcount = 999 leading += nextline.wordlist(so.usewordlist) pcount += nextline.wordcount() leading = leading[:distance - 1] leading = ' '.join(leading) returndict = {'lag': lagging, 'lead': leading} return returndict
def textmaker(author: str, work=None, passage=None, endpoint=None, citationdelimiter='|') -> JSON_STR: """ build a text suitable for display "GET /textof/lt0474/024/20/30" :return: """ probeforsessionvariables() dbconnection = ConnectionObject('autocommit') dbcursor = dbconnection.cursor() linesevery = hipparchia.config['SHOWLINENUMBERSEVERY'] po = TextmakerInputParsingObject(author, work, passage, endpoint, citationdelimiter) ao = po.authorobject wo = po.workobject segmenttext = str() # consolewarning('po.passageaslist: {p}'.format(p=po.passageaslist)) if ao and wo: # we have both an author and a work, maybe we also have a subset of the work if endpoint: firstlinenumber = finddblinefromincompletelocus( wo, po.passageaslist, dbcursor) lastlinenumber = finddblinefromincompletelocus(wo, po.endpointlist, dbcursor, findlastline=True) if firstlinenumber['code'] == 'success' and lastlinenumber[ 'code'] == 'success': startline = firstlinenumber['line'] endline = lastlinenumber['line'] startlnobj = dblineintolineobject( grabonelinefromwork(ao.universalid, startline, dbcursor)) stoplnobj = dblineintolineobject( grabonelinefromwork(ao.universalid, endline, dbcursor)) else: msg = '"buildtexttospan/" could not find first and last: {a}w{b} - {c} TO {d}' consolewarning( msg.format(a=author, b=work, c=passage, d=endpoint)) startlnobj = makeablankline(work, 0) stoplnobj = makeablankline(work, 1) startline = 0 endline = 1 segmenttext = 'from {a} to {b}'.format(a=startlnobj.shortlocus(), b=stoplnobj.shortlocus()) elif not po.passageaslist: # whole work startline = wo.starts endline = wo.ends else: startandstop = textsegmentfindstartandstop(ao, wo, po.passageaslist, dbcursor) startline = startandstop['startline'] endline = startandstop['endline'] texthtml = buildtext(wo.universalid, startline, endline, linesevery, dbcursor) else: texthtml = str() if hipparchia.config['INSISTUPONSTANDARDANGLEBRACKETS']: texthtml = gtltsubstitutes(texthtml) if not segmenttext: segmenttext = '.'.join(po.passageaslist) if not ao or not wo: ao = makeanemptyauthor('gr0000') wo = makeanemptywork('gr0000w000') results = dict() results['authorname'] = avoidsmallvariants(ao.shortname) results['title'] = avoidsmallvariants(wo.title) results['structure'] = avoidsmallvariants(wo.citation()) results['worksegment'] = segmenttext results['texthtml'] = texthtml results = json.dumps(results) dbconnection.connectioncleanup() return results
def buildindexto(searchid: str, author: str, work=None, passage=None, endpoint=None, citationdelimiter='|', justvocab=False) -> JSON_STR: """ build a complete index to a an author, work, or segment of a work :return: """ probeforsessionvariables() pollid = validatepollid(searchid) starttime = time.time() progresspolldict[pollid] = ProgressPoll(pollid) progresspolldict[pollid].activate() dbconnection = ConnectionObject('autocommit') dbcursor = dbconnection.cursor() po = IndexmakerInputParsingObject(author, work, passage, endpoint, citationdelimiter) ao = po.authorobject wo = po.workobject psg = po.passageaslist stop = po.endpointlist if not work: wo = makeanemptywork('gr0000w000') # bool useheadwords = session['headwordindexing'] allworks = list() output = list() cdict = dict() segmenttext = str() valid = True if ao and work and psg and stop: start = psg firstlinenumber = finddblinefromincompletelocus(wo, start, dbcursor) lastlinenumber = finddblinefromincompletelocus(wo, stop, dbcursor, findlastline=True) if firstlinenumber['code'] == 'success' and lastlinenumber[ 'code'] == 'success': cdict = { wo.universalid: (firstlinenumber['line'], lastlinenumber['line']) } startln = dblineintolineobject( grabonelinefromwork(ao.universalid, firstlinenumber['line'], dbcursor)) stopln = dblineintolineobject( grabonelinefromwork(ao.universalid, lastlinenumber['line'], dbcursor)) else: msg = '"indexspan/" could not find first and last: {a}w{b} - {c} TO {d}' consolewarning(msg.format(a=author, b=work, c=passage, d=endpoint)) startln = makeablankline(work, 0) stopln = makeablankline(work, 1) valid = False segmenttext = 'from {a} to {b}'.format(a=startln.shortlocus(), b=stopln.shortlocus()) elif ao and work and psg: # subsection of a work of an author progresspolldict[pollid].statusis( 'Preparing a partial index to {t}'.format(t=wo.title)) startandstop = textsegmentfindstartandstop(ao, wo, psg, dbcursor) startline = startandstop['startline'] endline = startandstop['endline'] cdict = {wo.universalid: (startline, endline)} elif ao and work: # one work progresspolldict[pollid].statusis( 'Preparing an index to {t}'.format(t=wo.title)) startline = wo.starts endline = wo.ends cdict = {wo.universalid: (startline, endline)} elif ao: # whole author allworks = [ '{w} ⇒ {t}'.format(w=w.universalid[6:10], t=w.title) for w in ao.listofworks ] allworks.sort() progresspolldict[pollid].statusis( 'Preparing an index to the works of {a}'.format(a=ao.shortname)) for wkid in ao.listworkids(): cdict[wkid] = (workdict[wkid].starts, workdict[wkid].ends) else: # we do not have a valid selection valid = False output = ['invalid input'] if not stop: segmenttext = '.'.join(psg) if valid and justvocab: dbconnection.connectioncleanup() del progresspolldict[pollid] return cdict if valid: output = buildindextowork(cdict, progresspolldict[pollid], useheadwords, dbcursor) # get ready to send stuff to the page count = len(output) try: locale.setlocale(locale.LC_ALL, 'en_US') count = locale.format_string('%d', count, grouping=True) except locale.Error: count = str(count) progresspolldict[pollid].statusis('Preparing the index HTML') indexhtml = wordindextohtmltable(output, useheadwords) buildtime = time.time() - starttime buildtime = round(buildtime, 2) progresspolldict[pollid].deactivate() if not ao: ao = makeanemptyauthor('gr0000') results = dict() results['authorname'] = avoidsmallvariants(ao.shortname) results['title'] = avoidsmallvariants(wo.title) results['structure'] = avoidsmallvariants(wo.citation()) results['worksegment'] = segmenttext results['elapsed'] = buildtime results['wordsfound'] = count results['indexhtml'] = indexhtml results['keytoworks'] = allworks results['newjs'] = supplementalindexjs() results = json.dumps(results) dbconnection.connectioncleanup() del progresspolldict[pollid] return results
def sessionselectionsinfo(authordict: dict, workdict: dict) -> dict: """ build the selections html either for a or b: #selectionstable + #selectioninfocell #selectionstable + #exclusioninfocell there are seven headings to populate [a] author classes [b] work genres [c] author location [d] work provenance [e] author selections [f] work selections [g] passage selections id numbers need to be attached to the selections so that they can be double-clicked so as to delete them :param authordict: :return: """ returndict = dict() thejs = list() tit = 'title="Double-click to remove this item"' try: # it is possible to hit this function before the session has been set, so... session['auselections'] except KeyError: probeforsessionvariables() sessionsearchlist = session['auselections'] + session['agnselections'] + session['wkgnselections'] + \ session['psgselections'] + session['wkselections'] + session['alocselections'] + \ session['wlocselections'] for selectionorexclusion in ['selections', 'exclusions']: thehtml = list() # if there are no explicit selections, then if not sessionsearchlist and selectionorexclusion == 'selections': thehtml.append('<span class="picklabel">Authors</span><br />') thehtml.append('[All in active corpora less exclusions]<br />') if selectionorexclusion == 'exclusions' and not sessionsearchlist and session['spuria'] == 'Y' and \ not session['wkgnexclusions'] and not session['agnexclusions'] and not session['auexclusions']: thehtml.append('<span class="picklabel">Authors</span><br />') thehtml.append('[No exclusions]<br />') # [a] author classes v = 'agn' var = v + selectionorexclusion if session[var]: thehtml.append( '<span class="picklabel">Author categories</span><br />') htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion, session) thehtml += htmlandjs['html'] thejs += htmlandjs['js'] # [b] work genres v = 'wkgn' var = v + selectionorexclusion if session[var]: thehtml.append('<span class="picklabel">Work genres</span><br />') htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion, session) thehtml += htmlandjs['html'] thejs += htmlandjs['js'] # [c] author location v = 'aloc' var = v + selectionorexclusion if session[var]: thehtml.append( '<span class="picklabel">Author location</span><br />') htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion, session) thehtml += htmlandjs['html'] thejs += htmlandjs['js'] # [d] work provenance v = 'wloc' var = v + selectionorexclusion if session[var]: thehtml.append( '<span class="picklabel">Work provenance</span><br />') htmlandjs = selectionlinehtmlandjs(v, selectionorexclusion, session) thehtml += htmlandjs['html'] thejs += htmlandjs['js'] # [e] authors v = 'au' var = v + selectionorexclusion if session[var]: thehtml.append('<span class="picklabel">Authors</span><br />') localval = -1 for s in session[var]: localval += 1 ao = authordict[s] thehtml.append( '<span class="{v}{soe} selection" id="{var}_0{lv}" {tit}>{s}</span>' '<br />'.format(v=v, soe=selectionorexclusion, var=var, lv=localval, s=ao.akaname, tit=tit)) thejs.append((var, localval)) # [f] works v = 'wk' var = v + selectionorexclusion if session[var] and selectionorexclusion == 'exclusions' and session[ 'spuria'] == 'N': thehtml.append('<span class="picklabel">Works</span><br />') thehtml.append('[All non-selected spurious works]<br />') if session[var]: thehtml.append('<span class="picklabel">Works</span><br />') if selectionorexclusion == 'exclusions' and session[ 'spuria'] == 'N': thehtml.append('[Non-selected spurious works]<br />') localval = -1 for s in session[var]: localval += 1 uid = s[:6] ao = authordict[uid] wk = workdict[s] thehtml.append( '<span class="{v}{soe} selection" id="{var}_0{lv}" {tit}>{au}, ' '<span class="pickedwork">{wk}</span></span>' '<br />'.format(v=v, var=var, soe=selectionorexclusion, lv=localval, au=ao.akaname, tit=tit, wk=wk.title)) thejs.append((var, localval)) # [g] passages v = 'psg' var = v + selectionorexclusion if session[var]: psgtemplate = '<span class="{v}{soe} selection" id="{var}_0{lv}" {tit}>{au}, <span class="pickedwork">{wk}</span> <span class="pickedsubsection">{loc}</span></span><br />' spantemplate = 'from {a} to {b}' thehtml.append('<span class="picklabel">Passages</span><br />') localval = -1 for s in session[var]: localval += 1 uid = s[:6] ao = authordict[uid] loc = str() # watch out for heterogenous passage selection formats; only _AT_ and _FROM_ exist ATM # session[psgselections] = ['lt0474w005_FROM_4501_TO_11915', 'lt2806w002_AT_3|4|5'] if '_AT_' in s: locus = s.split('_AT_')[1].split('|') locus.reverse() citationtuple = tuple(locus) for w in ao.listofworks: if w.universalid == s[0:10]: wk = w loc = prolixlocus(wk, citationtuple) elif '_FROM_' in s: dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() wk = workdict[s[0:10]] locus = s.split('_FROM_')[1] start = locus.split('_TO_')[0] stop = locus.split('_TO_')[1] startln = dblineintolineobject( grabonelinefromwork(uid, start, dbcursor)) stopln = dblineintolineobject( grabonelinefromwork(uid, stop, dbcursor)) dbconnection.connectioncleanup() # print('_FROM_', start, stop, startln.uncleanlocustuple(), stopln.uncleanlocustuple()) loc = spantemplate.format(a=startln.prolixlocus(), b=stopln.prolixlocus()) thehtml.append( psgtemplate.format(v=v, var=var, soe=selectionorexclusion, lv=localval, au=ao.akaname, wk=wk.title, loc=loc, tit=tit)) thejs.append((var, localval)) returndict[selectionorexclusion] = '\n'.join(thehtml) scount = len(session['auselections'] + session['wkselections'] + session['agnselections'] + session['wkgnselections'] + session['psgselections'] + session['alocselections'] + session['wlocselections']) scount += len(session['auexclusions'] + session['wkexclusions'] + session['agnexclusions'] + session['wkgnexclusions'] + session['psgexclusions'] + session['alocexclusions'] + session['wlocexclusions']) returndict['numberofselections'] = -1 if scount > 0: returndict['numberofselections'] = scount returndict['jstuples'] = thejs return returndict
def findvalidlevelvalues(workobject: dbOpus, partialcitationtuple: tuple, cursor) -> LowandHighInfo: """ tell me some of a citation and i can tell you what is a valid choice at the next step i expect the lowest level to be stored at position 0 in the tuple note that you should not send me a full citation because i will look at lowestlevel-1 sample imput: workid = lt0474w015 workstructure = {0: 'line', 1: 'section'} partialcitationtuple = ('13',) out: (Cicero, Pro Sulla 13) note that this is mildly costly as a function call when you convert all of the results to lineobjects OOP is a lot easier; but you pay a price this also means that lowering the init costs of dbworklines is a good idea -------------------------------------------------------------------------------- 127.0.0.1 - - [16/Apr/2019 21:55:27] "GET /getworksof/lt2349 HTTP/1.1" 200 - -------------------------------------------------------------------------------- PATH: '/getstructure/lt2349/005' 208072 function calls in 0.380 seconds Ordered by: internal time, call count List reduced from 254 to 25 due to restriction <25> ncalls tottime percall cumtime percall filename:lineno(function) 34548 0.099 0.000 0.174 0.000 /Users/erik/hipparchia_venv/HipparchiaServer/server/hipparchiaobjects/worklineobject.py:83(__init__) 69098 0.071 0.000 0.071 0.000 {method 'format' of 'str' objects} 1 0.063 0.063 0.063 0.063 {method 'execute' of 'psycopg2.extensions.cursor' objects} 1 0.061 0.061 0.061 0.061 {method 'fetchall' of 'psycopg2.extensions.cursor' objects} 34548 0.031 0.000 0.205 0.000 /Users/erik/hipparchia_venv/HipparchiaServer/server/dbsupport/dblinefunctions.py:24(dblineintolineobject) ... :param workid: :param workstructure: :param partialcitationtuple: :param cursor: :return: """ workid = workobject.universalid workstructure = workobject.structure partialcitation = list(partialcitationtuple) availablelevels = len(workstructure) atlevel = availablelevels - len(partialcitation) # cheat in the case where you want to find the top by sending a 'noncitation': 'top' # e.g.: /getstructure/gr0003w001/firstline if partialcitationtuple[0] == 'firstline': atlevel = availablelevels if atlevel < 1: # i am confused; threatening to probe for level "-1" # a selection at level00 will do this to me # /getstructure/gr0003w001/3|36|5|3 # this needs to be made uncontroversial: # /getstructure/gr0003w001/3|36|5 # and so: massage the data atlevel = 1 try: partialcitation.pop() except IndexError: atlevel = availablelevels audb = workid[0:6] lvl = 'l' + str(atlevel - 1) # select level_00_value from gr0565w001 where level_03_value='3' AND level_02_value='2' AND level_01_value='1' AND level_00_value NOT IN ('t') ORDER BY index ASC; # select level_01_value from gr0565w001 where level_03_value='2' AND level_02_value='1' AND level_01_value NOT IN ('t') ORDER BY index ASC; query = 'SELECT {wltmp} FROM {db} WHERE ( wkuniversalid=%s ) AND '.format( wltmp=worklinetemplate, lvl=atlevel - 1, db=audb) datalist = [workid] for level in range(availablelevels - 1, atlevel - 1, -1): query += ' level_0{lvl}_value=%s AND '.format(lvl=level) datalist.append(partialcitationtuple[availablelevels - level - 1]) query += 'level_0{lvl}_value NOT IN (%s) ORDER BY index'.format( lvl=atlevel - 1) datalist.append('t') data = tuple(datalist) cursor.execute(query, data) results = cursor.fetchall() if results: lines = [dblineintolineobject(r) for r in results] else: lines = None if not lines: lowandhighobject = LowandHighInfo(availablelevels, atlevel - 1, workstructure[atlevel - 1], '-9999', '', ['']) return lowandhighobject low = getattr(lines[0], lvl) high = getattr(lines[-1], lvl) rng = [getattr(l, lvl) for l in lines] # need to drop dupes and keep the index order deduper = set() for r in rng: if r not in deduper: deduper.add(r) rng = list(deduper) try: rng = [int(r) for r in rng] rng = sorted(rng) rng = [str(r) for r in rng] except ValueError: rng = sorted(rng) lowandhighobject = LowandHighInfo(availablelevels, atlevel - 1, workstructure[atlevel - 1], low, high, rng) return lowandhighobject
def buildtext(work: str, firstline: int, lastline: int, linesevery: int, cursor) -> str: """ make a readable/printable version of a work :param work: :param levelcount: :param higherlevels: :param linesevery: :param cursor: :return: """ workobject = workdict[work] auid = work[0:6] qtemplate = """ SELECT {wltmp} FROM {a} WHERE (index >= %s and index <= %s) ORDER BY index ASC """ query = qtemplate.format(wltmp=worklinetemplate, a=auid) data = (firstline, lastline) cursor.execute(query, data) results = cursor.fetchall() output = ['<table>\n'] # consecutive lines can get numbered twice # 660 ἤν τιϲ ὀφείλων ἐξαρνῆται. Πρ. πόθεν οὖν ἐδάνειϲ’ ὁ # 660 δανείϲαϲ, avoiddoubletap = False linetemplate = determinelinetemplate() # pull these outside the "line in results" loop lest you compile the regex 12000x over 1000 lines bracketfinder = { 'square': { 'ocreg': re.compile(r'\[(.*?)(\]|$)'), 'coreg': re.compile(r'(^|\[)(.*?)\]'), 'class': 'editorialmarker_squarebrackets', 'o': '[', 'c': ']' }, 'round': { 'ocreg': re.compile(r'\((.*?)(\)|$)'), 'coreg': re.compile(r'(^|\()(.*?)\)'), 'class': 'editorialmarker_roundbrackets', 'o': '(', 'c': ')' }, 'angled': { 'ocreg': re.compile(r'⟨(.*?)(⟩|$)'), 'coreg': re.compile(r'(^|⟨)(.*?)⟩'), 'class': 'editorialmarker_angledbrackets', 'o': '⟨', 'c': '⟩' }, 'curly': { 'ocreg': re.compile(r'\{(.*?)(\}|$)'), 'coreg': re.compile(r'(^|\{)(.*?)\}'), 'class': 'editorialmarker_curlybrackets', 'o': '{', 'c': '}' } } openfinder = { 'square': { 'regex': re.compile(r'\[[^\]]{0,}$'), 'exceptions': [re.compile(r'\[(ϲτρ|ἀντ)\. .\.'), re.compile(r'\[ἐπῳδόϲ')] }, 'round': { 'regex': re.compile(r'\([^\)]{0,}$') }, 'angled': { 'regex': re.compile(r'⟨[^⟩]{0,}$') }, 'curly': { 'regex': re.compile(r'\{[^\}]{0,}$') }, } closefinder = { 'square': { 'c': re.compile(r'\]') }, 'round': { 'c': re.compile(r'\)') }, 'angled': { 'c': re.compile(r'⟩') }, 'curly': { 'c': re.compile(r'\}') }, } if results: previousline = dblineintolineobject(results[0]) brackettypes = findactivebrackethighlighting() editorialcontinuation = { 'square': False, 'round': False, 'curly': False, 'angled': False } lines = [dblineintolineobject(line) for line in results] lines = paragraphformatting(lines) # polish up the HTML of the lines for thisline in lines: if workobject.isnotliterary( ) and thisline.index == workobject.starts: # line.index == workobject.starts added as a check because # otherwise you will re-see date info in the middle of some documents # it gets reasserted with a CD block reinitialization metadata = checkfordocumentmetadata(thisline, workobject) if metadata: output.append(metadata) if brackettypes: columnb = thisline.markeditorialinsersions( editorialcontinuation, bracketfinder=bracketfinder) editorialcontinuation = { t: setcontinuationvalue(thisline, previousline, editorialcontinuation[t], t, openfinder=openfinder, closefinder=closefinder) for t in brackettypes } else: columnb = thisline.markedup if thisline.samelevelas(previousline) is not True: columna = thisline.shortlocus() else: columna = str() try: linenumber = int(thisline.l0) except ValueError: # 973b is not your friend linenumber = 0 if linenumber % linesevery == 0 and not avoiddoubletap: columna = thisline.locus() avoiddoubletap = True else: avoiddoubletap = False notes = '; '.join(thisline.insetannotations()) if columna and session['simpletextoutput']: columna = '({a})'.format(a=columna) linehtml = linetemplate.format(ca=columna, cb=columnb, cc=notes) output.append(linehtml) previousline = thisline output.append('</table>\n') html = '\n'.join(output) return html
def buildbrowseroutputobject(authorobject: dbAuthor, workobject: dbOpus, locusindexvalue: int, dbcursor) -> BrowserOutputObject: """ this function does a lot of work via a number of subfunctions lots of refactoring required if you change anything... :param authorobject: :param workobject: :param locusindexvalue: :param linesofcontext: :param numbersevery: :param dbcursor: :return: """ thiswork = workobject.universalid linesofcontext = int(session['browsercontext']) numbersevery = hipparchia.config['SHOWLINENUMBERSEVERY'] # [a] acquire the lines we need to display surroundinglines = simplecontextgrabber(workobject.authorid, locusindexvalue, linesofcontext, dbcursor) lines = [dblineintolineobject(l) for l in surroundinglines] lines = [l for l in lines if l.wkuinversalid == thiswork] focusline = lines[0] for line in lines: if line.index == locusindexvalue: focusline = line passage = BrowserPassageObject(authorobject, workobject, locusindexvalue) passage.focusline = focusline passage.biblio = formatpublicationinfo(workobject.publication_info) passage.citation = locusintocitation(workobject, focusline) previousline = lines[0] brackettypes = findactivebrackethighlighting() continuationdict = { 'square': False, 'round': False, 'curly': False, 'angled': False } lineprefix = str() if session['debugdb']: lineprefix = '<smallcode>{id} </smallcode> ' # [b] format the lines and insert them into the BrowserPassageObject # [b1] check to see if this line is part of a larger formatting block: really only servius? lines = paragraphformatting(lines) # [b2] for line in lines: if workobject.isnotliterary() and line.index == workobject.starts: # line.index == workobject.starts added as a check because # otherwise you will re-see date info in the middle of some documents: # it gets reasserted with a CD block reinitialization metadata = checkfordocumentmetadata(line, workobject) if metadata: passage.browsedlines.append(metadata) if session['debughtml']: columnb = line.showlinehtml() else: columnb = insertparserids(line, continuationdict) if brackettypes: continuationdict = { t: setcontinuationvalue(line, previousline, continuationdict[t], t) for t in brackettypes } if line.index == focusline.index: # highlight the citationtuple line columna = line.locus() columnb = '<span class="focusline">{c}</span>'.format(c=columnb) else: try: linenumber = int(line.l0) except ValueError: # 973b is not your friend linenumber = 0 if line.samelevelas(previousline) is not True: columna = line.shortlocus() elif linenumber % numbersevery == 0: columna = line.locus() else: # do not insert a line number or special formatting columna = str() prefix = lineprefix.format(id=line.getlineurl()) columnb = prefix + columnb notes = '; '.join(line.insetannotations()) if columna and session['simpletextoutput']: columna = '({a})'.format(a=columna) linehtml = passage.linetemplate.format(l=columnb, n=notes, c=columna) passage.browsedlines.append(linehtml) previousline = line # [c] build the output outputobject = BrowserOutputObject(authorobject, workobject, locusindexvalue) outputobject.browserhtml = passage.generatepassagehtml() return outputobject
def subqueryphrasesearch(workerid, foundlineobjects: ListProxy, searchphrase: str, listofplacestosearch: ListProxy, searchobject: SearchObject, dbconnection) -> ListProxy: """ foundlineobjects, searchingfor, searchlist, commitcount, whereclauseinfo, activepoll use subquery syntax to grab multi-line windows of text for phrase searching line ends and line beginning issues can be overcome this way, but then you have plenty of bookkeeping to do to to get the proper results focussed on the right line tablestosearch: ['lt0400', 'lt0022', ...] a search inside of Ar., Eth. Eud.: SELECT secondpass.index, secondpass.accented_line FROM (SELECT firstpass.index, firstpass.linebundle, firstpass.accented_line FROM (SELECT index, accented_line, concat(accented_line, ' ', lead(accented_line) OVER (ORDER BY index ASC)) as linebundle FROM gr0086 WHERE ( (index BETWEEN 15982 AND 18745) ) ) firstpass ) secondpass WHERE secondpass.linebundle ~ %s LIMIT 200 a search in x., hell and x., mem less book 3 of hell and book 2 of mem: SELECT secondpass.index, secondpass.accented_line FROM (SELECT firstpass.index, firstpass.linebundle, firstpass.accented_line FROM (SELECT index, accented_line, concat(accented_line, ' ', lead(accented_line) OVER (ORDER BY index ASC)) as linebundle FROM gr0032 WHERE ( (index BETWEEN 1 AND 7918) OR (index BETWEEN 7919 AND 11999) ) AND ( (index NOT BETWEEN 1846 AND 2856) AND (index NOT BETWEEN 8845 AND 9864) ) ) firstpass ) secondpass WHERE secondpass.linebundle ~ %s LIMIT 200 :return: """ # print('subqueryphrasesearch()') so = searchobject activepoll = so.poll # build incomplete sfo that will handle everything other than iteratethroughsearchlist() sfo = returnsearchfncobject(workerid, foundlineobjects, listofplacestosearch, so, dbconnection, None) querytemplate = """ SELECT secondpass.index, secondpass.{co} FROM (SELECT firstpass.index, firstpass.linebundle, firstpass.{co} FROM (SELECT index, {co}, concat({co}, ' ', lead({co}) OVER (ORDER BY index ASC)) AS linebundle FROM {db} {whr} ) firstpass ) secondpass WHERE secondpass.linebundle ~ %s {lim}""" wheretempate = """ WHERE EXISTS (SELECT 1 FROM {tbl}_includelist_{a} incl WHERE incl.includeindex = {tbl}.index) """ # substringsearch() needs ability to CREATE TEMPORARY TABLE sfo.dbconnection.setreadonly(False) dbcursor = sfo.dbconnection.cursor() qcomb = QueryCombinator(searchphrase) # the last item is the full phrase: ('one two three four five', '') combinations = qcomb.combinations() combinations.pop() # lines start/end sp = re.sub(r'^\s', r'(^|\\s)', searchphrase) sp = re.sub(r'\s$', r'(\\s|$)', sp) # on the reasoning behind the following substitution see 'DEBUGGING notes: SQL oddities' above # sp = re.sub(r' ', r'\\s', sp) if not so.onehit: lim = ' LIMIT ' + str(so.cap) else: # the windowing problem means that '1' might be something that gets discarded lim = ' LIMIT 5' if so.redissearchlist: listofplacestosearch = True while listofplacestosearch and activepoll.gethits() <= so.cap: # sfo.getnextfnc() also takes care of the commitcount authortable = sfo.getnextfnc() sfo.updatepollremaining() if authortable: whr = str() r = so.indexrestrictions[authortable] if r['type'] == 'between': indexwedwhere = buildbetweenwhereextension(authortable, so) if indexwedwhere != '': # indexwedwhere will come back with an extraneous ' AND' indexwedwhere = indexwedwhere[:-4] whr = 'WHERE {iw}'.format(iw=indexwedwhere) elif r['type'] == 'temptable': avoidcollisions = assignuniquename() q = r['where']['tempquery'] q = re.sub('_includelist', '_includelist_{a}'.format(a=avoidcollisions), q) dbcursor.execute(q) whr = wheretempate.format(tbl=authortable, a=avoidcollisions) query = querytemplate.format(db=authortable, co=so.usecolumn, whr=whr, lim=lim) data = (sp, ) # print('subqueryphrasesearch() find indices() q,d:\n\t',query, data) dbcursor.execute(query, data) indices = [i[0] for i in dbcursor.fetchall()] # this will yield a bunch of windows: you need to find the centers; see 'while...' below locallineobjects = list() if indices: for i in indices: query = 'SELECT {wtmpl} FROM {tb} WHERE index=%s'.format( wtmpl=worklinetemplate, tb=authortable) data = (i, ) # print('subqueryphrasesearch() iterate through indices() q,d:\n\t', query, data) dbcursor.execute(query, data) locallineobjects.append( dblineintolineobject(dbcursor.fetchone())) locallineobjects.reverse() # debugging # for l in locallineobjects: # print(l.universalid, l.locus(), getattr(l,so.usewordlist)) gotmyonehit = False while locallineobjects and activepoll.gethits( ) <= so.cap and not gotmyonehit: # windows of indices come back: e.g., three lines that look like they match when only one matches [3131, 3132, 3133] # figure out which line is really the line with the goods # it is not nearly so simple as picking the 2nd element in any run of 3: no always runs of 3 + matches in # subsequent lines means that you really should check your work carefully; this is not an especially costly # operation relative to the whole search and esp. relative to the speed gains of using a subquery search lineobject = locallineobjects.pop() if re.search(sp, getattr(lineobject, so.usewordlist)): sfo.addnewfindstolistoffinds([lineobject]) activepoll.addhits(1) if so.onehit: gotmyonehit = True else: try: nextline = locallineobjects[0] except IndexError: nextline = makeablankline('gr0000w000', -1) if lineobject.wkuinversalid != nextline.wkuinversalid or lineobject.index != ( nextline.index - 1): # you grabbed the next line on the pile (e.g., index = 9999), not the actual next line (e.g., index = 101) # usually you won't get a hit by grabbing the next db line, but sometimes you do... query = 'SELECT {wtmpl} FROM {tb} WHERE index=%s'.format( wtmpl=worklinetemplate, tb=authortable) data = (lineobject.index + 1, ) # print('subqueryphrasesearch() "while locallineobjects..." loop q,d:\n\t', query, data) dbcursor.execute(query, data) try: nextline = dblineintolineobject( dbcursor.fetchone()) except: nextline = makeablankline('gr0000w000', -1) for c in combinations: tail = c[0] + '$' head = '^' + c[1] # debugging # print('re',getattr(lo,so.usewordlist),tail, head, getattr(next,so.usewordlist)) t = False h = False try: t = re.search(tail, getattr(lineobject, so.usewordlist)) except re.error: pass try: h = re.search(head, getattr(nextline, so.usewordlist)) except re.error: pass if t and h: sfo.addnewfindstolistoffinds([lineobject]) activepoll.addhits(1) if so.onehit: gotmyonehit = True else: # redis will return None for authortable if the set is now empty listofplacestosearch = None sfo.listcleanup() if sfo.needconnectioncleanup: sfo.dbconnection.connectioncleanup() return foundlineobjects
def withinxwords(workdbname: str, searchobject: SearchObject, dbconnection) -> List[dbWorkLine]: """ int(session['proximity']), searchingfor, proximate, curs, wkid, whereclauseinfo after finding x, look for y within n words of x getting to y: find the search term x and slice it out of its line then build forwards and backwards within the requisite range then see if you get a match in the range if looking for 'paucitate' near 'imperator' you will find: 'romani paucitate seruorum gloriatos itane tandem ne' this will become: 'romani' + 'seruorum gloriatos itane tandem ne' :param workdbname: :param searchobject: :return: """ so = searchobject dbcursor = dbconnection.cursor() dbconnection.setautocommit() # you will only get session['maxresults'] back from substringsearch() unless you raise the cap # "Roman" near "Aetol" will get 3786 hits in Livy, but only maxresults will come # back for checking: but the Aetolians are likley not among those passages... templimit = 9999 if so.lemma: chunksize = hipparchia.config['LEMMACHUNKSIZE'] terms = so.lemma.formlist chunked = [ terms[i:i + chunksize] for i in range(0, len(terms), chunksize) ] chunked = [wordlistintoregex(c) for c in chunked] hits = list() for c in chunked: hits += list( substringsearch(c, workdbname, so, dbcursor, templimit)) so.usewordlist = 'polytonic' else: hits = list( substringsearch(so.termone, workdbname, so, dbcursor, templimit)) fullmatches = list() for hit in hits: hitline = dblineintolineobject(hit) leadandlag = grableadingandlagging(hitline, so, dbcursor) lagging = leadandlag['lag'] leading = leadandlag['lead'] # print(hitline.universalid, so.termtwo, '\n\t[lag] ', lagging, '\n\t[lead]', leading) if so.near and (re.search(so.termtwo, leading) or re.search(so.termtwo, lagging)): fullmatches.append(hit) elif not so.near and not re.search( so.termtwo, leading) and not re.search(so.termtwo, lagging): fullmatches.append(hit) return fullmatches
def lemmatizedwithinxlines(searchobject: SearchObject, hitlist: List[tuple], dbcursor): """ BROKEN ATM: 1.7.4 (probably most/all of 1.7.x) the alternate way of doing withinxlines this will ask regex to do the heavy lifting nasty edge case 'fire' near 'burn' in Homer: simplewithinxlines() Sought all 5 known forms of »πῦρ« within 1 lines of all 359 known forms of »καίω« Searched 3 texts and found 24 passages (621.25s) lemmatizedwithinxlines() Sought all 5 known forms of »πῦρ« within 1 lines of all 359 known forms of »καίω« Searched 3 texts and found 24 passages (2.82s) note that this function is often slightly slower than simplewithinxlines(), but it does seem to be able to avoid the catastrophe lemmatized vs non-lemmatized is probably the key difference when it comes to speed :param hitlist: :return: """ so = searchobject columconverter = { 'marked_up_line': 'markedup', 'accented_line': 'polytonic', 'stripped_line': 'stripped' } col = columconverter[so.usecolumn] prox = int(so.session['proximity']) # note that at the moment we arrive here with a one-work per worker policy # that is all of the hits will come from the same table # this means extra/useless sifting below, but perhaps it is safer to be wasteful now lest we break later fullmatches = set() # set to avoid duplicate hits hitlinelist = list() linesintheauthors = dict() hitlinelist = [dblineintolineobject(h) for h in hitlist] for l in hitlinelist: wkid = l.universalid # prox = 2 # l = 100 # list(range(l-prox, l+prox+1)) # [98, 99, 100, 101, 102] environs = set(range(l.index - prox, l.index + prox + 1)) environs = ['{w}_ln_{x}'.format(w=wkid, x=e) for e in environs] try: linesintheauthors[wkid[0:6]] except KeyError: linesintheauthors[wkid[0:6]] = set() linesintheauthors[wkid[0:6]].update(environs) # now grab all of the lines you might need linecollection = set() for l in linesintheauthors: if linesintheauthors[l]: # example: {'lt0803': {952, 953, 951}} linecollection = grablistoflines(l, list(linesintheauthors[l]), dbcursor) linecollection = { '{w}_ln_{x}'.format(w=l.wkuinversalid, x=l.index): l for l in linecollection } # then associate all of the surrounding words with those lines wordbundles = dict() for l in hitlinelist: wkid = l.universalid environs = set(range(l.index - prox, l.index + prox + 1)) mylines = list() for e in environs: try: mylines.append(linecollection['{w}_ln_{x}'.format(w=wkid, x=e)]) except KeyError: # you went out of bounds and tried to grab something that is not really there # KeyError: 'lt1515w001_ln_1175' # line 1175 is actually the first line of lt1515w002... pass mywords = [getattr(l, col) for l in mylines] mywords = [w.split(' ') for w in mywords if mywords] mywords = flattenlistoflists(mywords) mywords = ' '.join(mywords) wordbundles[l] = mywords # then see if we have any hits... while True: for provisionalhitline in wordbundles: if len(fullmatches) > so.cap: break if so.near and re.search(so.termtwo, wordbundles[provisionalhitline]): fullmatches.add(provisionalhitline) elif not so.near and not re.search( so.termtwo, wordbundles[provisionalhitline]): fullmatches.add(provisionalhitline) break fullmatches = [m.decompose() for m in fullmatches] return fullmatches
def precomposedsqlsubqueryphrasesearch(so: SearchObject) -> List[dbWorkLine]: """ use subquery syntax to grab multi-line windows of text for phrase searching line ends and line beginning issues can be overcome this way, but then you have plenty of bookkeeping to do to to get the proper results focussed on the right line these searches take linear time: same basic time for any given scope regardless of the query """ # rebuild the searchsqldict but this time pass through rewritequerystringforsubqueryphrasesearching() so.searchsqldict = searchlistintosqldict(so, so.phrase, subqueryphrasesearch=True) # debugmessage('precomposedsqlsubqueryphrasesearch() so.searchsqldict: {d}'.format(d=so.searchsqldict)) # the windowed collection of lines; you will need to work to find the centers # windowing will increase the number of hits: 2+ lines per actual find initialhitlines = generatepreliminaryhitlist(so, recap=so.cap * 3) m = 'Generating final list of hits by searching among the {h} preliminary hits' so.poll.statusis(m.format(h=so.poll.gethits())) so.poll.sethits(0) sp = re.sub(r'^\s', r'(^|\\s)', so.phrase) sp = re.sub(r'\s$', r'(\\s|$)', sp) combinations = QueryCombinator(so.phrase) # the last item is the full phrase and it will have already been searched: ('one two three four five', '') combinations = combinations.combinations() combinations.pop() listoffinds = list() dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() setofhits = set() while initialhitlines: # windows of indices come back: e.g., three lines that look like they match when only one matches [3131, 3132, 3133] # figure out which line is really the line with the goods # it is not nearly so simple as picking the 2nd element in any run of 3: no always runs of 3 + matches in # subsequent lines means that you really should check your work carefully; this is not an especially costly # operation relative to the whole search and esp. relative to the speed gains of using a subquery search lineobject = initialhitlines.pop() if not so.onehit or lineobject.authorid not in setofhits: if re.search(sp, getattr(lineobject, so.usewordlist)): listoffinds.append(lineobject) so.poll.addhits(1) setofhits.add(lineobject.authorid) else: try: nextline = initialhitlines[0] except IndexError: nextline = makeablankline('gr0000w000', -1) if lineobject.wkuinversalid != nextline.wkuinversalid or lineobject.index != ( nextline.index - 1): # you grabbed the next line on the pile (e.g., index = 9999), not the actual next line (e.g., index = 101) # usually you won't get a hit by grabbing the next db line, but sometimes you do... query = 'SELECT {wtmpl} FROM {tb} WHERE index=%s'.format( wtmpl=worklinetemplate, tb=lineobject.authorid) data = (lineobject.index + 1, ) dbcursor.execute(query, data) try: nextline = dblineintolineobject(dbcursor.fetchone()) except: nextline = makeablankline('gr0000w000', -1) for c in combinations: tail = c[0] + '$' head = '^' + c[1] t = False h = False try: t = re.search(tail, getattr(lineobject, so.usewordlist)) except re.error: pass try: h = re.search(head, getattr(nextline, so.usewordlist)) except re.error: pass if t and h: listoffinds.append(lineobject) so.poll.addhits(1) setofhits.add(lineobject.authorid) dbconnection.connectioncleanup() return listoffinds
def workonprecomposedsqlsearch(workerid: int, foundlineobjects: ListProxy, listofplacestosearch: ListProxy, searchobject: SearchObject, dbconnection) -> ListProxy: """ iterate through listofplacestosearch execute precomposedsqlsearcher() on each item in the list gather the results... listofplacestosearch elements are dicts and the whole looks like: [{'temptable': '', 'query': 'SELECT ...', 'data': ('ὕβριν',)}, {'temptable': '', 'query': 'SELECT ...', 'data': ('ὕβριν',)} ...] this is supposed to give you one query per hipparchiaDB table unless you are lemmatizing """ if not dbconnection: dbconnection = ConnectionObject() so = searchobject activepoll = so.poll dbconnection.setreadonly(False) dbcursor = dbconnection.cursor() commitcount = 0 getnetxitem = listofplacestosearch.pop emptyerror = IndexError remaindererror = TypeError while listofplacestosearch and activepoll.gethits() <= so.cap: # if workerid == 0: # print('remain:', len(listofplacestosearch)) commitcount += 1 dbconnection.checkneedtocommit(commitcount) try: querydict = getnetxitem(0) # consolewarning("workonprecomposedsqlsearch() querydict:\n\t{q}".format(q=querydict)) except emptyerror: querydict = None listofplacestosearch = None if querydict: foundlines = precomposedsqlsearcher(querydict, dbcursor) lineobjects = [dblineintolineobject(f) for f in foundlines] foundlineobjects.extend(lineobjects) if lineobjects: numberoffinds = len(lineobjects) activepoll.addhits(numberoffinds) else: listofplacestosearch = None try: activepoll.remain(len(listofplacestosearch)) except remaindererror: pass if not icanpickleconnections(): dbconnection.connectioncleanup() return foundlineobjects