def updatesearchlistandsearchobject(so: SearchObject) -> SearchObject: """ you have a searchlist; now tell the searchobject more about it... this has been peeled off so that golangvectors() can call it too """ # mark works that have passage exclusions associated with them: # gr0001x001 instead of gr0001w001 if you are skipping part of w001 so.searchlist = flagexclusions(so.searchlist, so.session) so.poll.statusis('Calculating full authors to search') so.searchlist = calculatewholeauthorsearches(so.searchlist, authordict) so.usedcorpora = so.wholecorporasearched() so.poll.statusis('Configuring the search restrictions') so.indexrestrictions = configurewhereclausedata(so.searchlist, workdict, so) return so
def checkneedtoabort(so: SearchObject) -> str: """ can/should we even do this? """ if so.iamarobot: return str() abortjson = str() abort = lambda x: emptyvectoroutput(so, x) activecorpora = so.getactivecorpora() so.poll.statusis('Compiling the list of works to search') so.searchlist = compilesearchlist(listmapper, so.session) # so.seeking should only be set via a fallback when session['baggingmethod'] == 'unlemmatized' if (so.lemmaone or so.tovectorize or so.seeking) and activecorpora: pass elif not activecorpora: abortjson = abort(['no active corpora']) elif not so.searchlist: abortjson = abort(['empty list of places to look']) elif so.vectorquerytype == 'topicmodel': # we don't have and don't need a lemmaone, etc. pass elif so.vectorquerytype == 'analogies': if not so.lemmaone or not so.lemmatwo or not so.lemmathree: abortjson = abort('[did not have three lemmata]') else: # note that some vector queries do not need a term; fix this later... abortjson = abort(['there was no search term']) maxwords = hipparchia.config['MAXVECTORSPACE'] wordstotal = 0 for work in so.searchlist: work = work[:10] try: wordstotal += workdict[work].wordcount except TypeError: # TypeError: unsupported operand type(s) for +=: 'int' and 'NoneType' pass if wordstotal > maxwords: m = 'the vector scope max exceeded: {a} > {b} ' abortjson = abort([ m.format(a=locale.format_string('%d', wordstotal, grouping=True), b=locale.format_string('%d', maxwords, grouping=True)) ]) return abortjson
def perparesoforsecondsqldict(so: SearchObject, initialhitlines: List[dbWorkLine], usebetweensyntax=True) -> SearchObject: """ after finding initialhitlines sqlwithinxlinessearch() will run a second query it needs a new sqldict note that "usebetweensyntax=False" will break precomposedphraseandproximitysearch() """ so.indexrestrictions = dict() authorsandlines = dict() if not usebetweensyntax: # consolewarning('sqlwithinxlinessearch(): temptable') # time trials... # Sought all 13 known forms of »ὕβριϲ« within 4 lines of all 230 known forms of »φεύγω« # Searched 7,873 texts and found 9 passages (11.87s) # Searched between 400 B.C.E. and 350 B.C.E. # Sought all 230 known forms of »φεύγω« within 4 lines of all 16 known forms of »κρίϲιϲ« # Searched 7,873 texts and found 12 passages (14.64s) # Searched between 400 B.C.E. and 350 B.C.E. for hl in initialhitlines: linestosearch = list(range(hl.index - so.distance, hl.index + so.distance + 1)) try: authorsandlines[hl.authorid].extend(linestosearch) except KeyError: authorsandlines[hl.authorid] = linestosearch so.searchlist = list(authorsandlines.keys()) for a in authorsandlines: so.indexrestrictions[a] = dict() so.indexrestrictions[a]['type'] = 'temptable' so.indexrestrictions[a]['where'] = wholeworktemptablecontents(a, set(authorsandlines[a])) # print("so.indexrestrictions[a]['where']", so.indexrestrictions[a]['where']) else: # Sought all 13 known forms of »ὕβριϲ« within 4 lines of all 230 known forms of »φεύγω« # Searched 7,873 texts and found 9 passages (9.35s) # Searched between 400 B.C.E. and 350 B.C.E. # Sought all 230 known forms of »φεύγω« within 4 lines of all 16 known forms of »κρίϲιϲ« # Searched 7,873 texts and found 12 passages (11.35s) # Searched between 400 B.C.E. and 350 B.C.E. # consolewarning('sqlwithinxlinessearch(): between') for hl in initialhitlines: boundiaries = (hl.index - so.distance, hl.index + so.distance) try: authorsandlines[hl.authorid].append(boundiaries) except KeyError: authorsandlines[hl.authorid] = [boundiaries] for a in authorsandlines: so.searchlist = list(authorsandlines.keys()) so.indexrestrictions[a] = dict() so.indexrestrictions[a]['where'] = dict() so.indexrestrictions[a]['type'] = 'between' so.indexrestrictions[a]['where']['listofboundaries'] = authorsandlines[a] so.indexrestrictions[a]['where']['listofomissions'] = list() return so