def dbloadasingleworkobject(workuniversalid: str) -> dbOpus: """ if you get stranded down inside a series of function calls you have no way of regaining access to the master dictionary of work objects :param workuniversalid: :return: """ dbconnection = ConnectionObject() cursor = dbconnection.cursor() q = """ SELECT universalid, title, language, publication_info, levellabels_00, levellabels_01, levellabels_02, levellabels_03, levellabels_04, levellabels_05, workgenre, transmission, worktype, provenance, recorded_date, converted_date, wordcount, firstline, lastline, authentic FROM works WHERE universalid=%s """ d = (workuniversalid,) cursor.execute(q, d) r = cursor.fetchone() workobject = dbOpus(*r) dbconnection.connectioncleanup() return workobject
def grablistoflines(table: str, uidlist: list, dbcursor=None) -> list: """ fetch many lines at once select shortname from authors where universalid = ANY('{lt0860,gr1139}'); :param uidlist: :return: """ dbconnection = None needscleanup = False if not dbcursor: dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() needscleanup = True lines = [int(uid.split('_ln_')[1]) for uid in uidlist] qtemplate = 'SELECT {wtmpl} from {tb} WHERE index = ANY(%s)' q = qtemplate.format(wtmpl=worklinetemplate, tb=table) d = (lines, ) dbcursor.execute(q, d) lines = dbcursor.fetchall() if needscleanup: dbconnection.connectioncleanup() lines = [dblineintolineobject(l) for l in lines] return lines
def returnfirstwork(authorid: str, dbcursor=None) -> str: """ more exception handling this will produce bad results, but it will not kill the program :param authorid: :param dbcursor: :return: """ needscleanup = False if not dbcursor: dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() needscleanup = True # print('panic and grab first work of',authorid) query = 'SELECT universalid FROM works WHERE universalid LIKE %s ORDER BY universalid' data = (authorid+'%',) dbcursor.execute(query, data) found = dbcursor.fetchone() try: found = found[0] except IndexError: # yikes: an author we don't know about # perseus will send you gr1415, but he is not in the db # homer... found = returnfirstwork('gr0012w001', dbcursor) if needscleanup: dbconnection.connectioncleanup() return found
def loadallauthorsasobjects() -> dict: """ return a dict of all possible author objects :return: """ print('loading all authors...', end='') dbconnection = ConnectionObject() cursor = dbconnection.cursor() q = 'SELECT * FROM authors' cursor.execute(q) results = resultiterator(cursor) authorsdict = {r[0]: dbAuthor(*r) for r in results} print('\t', len(authorsdict), 'authors loaded', end='') dbconnection.connectioncleanup() return authorsdict
def buildoptionchecking() -> dict: """ check what build options were set hipparchiaDB=# SELECT corpusname, buildoptions FROM builderversion; corpusname | buildoptions ------------+--------------------------------------------------------------------------------------------------------- lt | hideknownblemishes: y, htmlifydatabase: n, simplifybrackets: y, simplifyquotes: y, smartsinglequotes: y :return: """ dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() q = 'SELECT corpusname, buildoptions FROM builderversion' try: dbcursor.execute(q) results = dbcursor.fetchall() except: # psycopg2.errors.UndefinedColumn; but Windows will tell you that there is no 'errors' module... results = None dbconnection.connectioncleanup() optiondict = dict() if results: for r in results: optiondict[r[0]] = r[1] for o in optiondict: optiondict[o] = optiondict[o].split(', ') # turn {'simplifyquotes: y', 'simplifybrackets: y', 'hideknownblemishes: y', 'smartsinglequotes: y', 'htmlifydatabase: n'} # into {'hideknownblemishes': 'y', 'htmlifydatabase': 'n', 'simplifybrackets': 'y', 'simplifyquotes': 'y', 'smartsinglequotes': 'y'} optiondict[o] = {a.split(': ')[0]: a.split(': ')[1] for a in optiondict[o]} return optiondict
def checkforstoredvector(so: SearchObject): """ the stored vector might not reflect the current math rules return False if you are 'outdated' hipparchiaDB=# select ts,thumbprint,uidlist from storedvectors; ts | thumbprint | uidlist ---------------------+--------------+-------------- 2018-02-14 20:49:00 | json | {lt0474w011} 2018-02-14 20:50:00 | json | {lt0474w057} (2 rows) :param so: :param vectortype: :param careabout: :return: """ currentvectorvalues = so.vectorvalues.getvectorvaluethumbprint() vectortype = so.vectorquerytype if vectortype == 'analogies': vectortype = 'nearestneighborsquery' uidlist = so.searchlistthumbprint # debugmessage('checkforstoredvector() checking for {u}'.format(u=uidlist)) dbconnection = ConnectionObject() cursor = dbconnection.cursor() q = """ SELECT calculatedvectorspace FROM public.storedvectors WHERE thumbprint=%s AND uidlist=%s AND vectortype=%s AND baggingmethod = %s """ d = (currentvectorvalues, uidlist, vectortype, so.session['baggingmethod']) try: cursor.execute(q, d) result = cursor.fetchone() except psycopg2.ProgrammingError: # psycopg2.ProgrammingError: relation "public.storedvectors" does not exist createvectorstable() result = False except psycopg2.errors.UndefinedTable: createvectorstable() result = False if not result: # debugmessage('checkforstoredvector(): returning "False"') return False returnval = pickle.loads(result[0]) dbconnection.connectioncleanup() # debugmessage('checkforstoredvector(): returning a model') return returnval
def loadallworksasobjects() -> dict: """ return a dict of all possible work objects :return: """ print('loading all works... ', end='') dbconnection = ConnectionObject() cursor = dbconnection.cursor() q = """ SELECT universalid, title, language, publication_info, levellabels_00, levellabels_01, levellabels_02, levellabels_03, levellabels_04, levellabels_05, workgenre, transmission, worktype, provenance, recorded_date, converted_date, wordcount, firstline, lastline, authentic FROM works """ cursor.execute(q) results = resultiterator(cursor) worksdict = {r[0]: dbOpus(*r) for r in results} print('\t', len(worksdict), 'works loaded', end='') dbconnection.connectioncleanup() return worksdict
def probefordatabases() -> dict: """ figure out which non-author tables are actually installed :return: """ dbconnection = ConnectionObject() cursor = dbconnection.cursor() available = dict() possible = ['greek_dictionary', 'greek_lemmata', 'greek_morphology', 'latin_dictionary', 'latin_lemmata', 'latin_morphology', 'wordcounts_0'] for p in possible: q = 'SELECT * FROM {table} LIMIT 1'.format(table=p) try: cursor.execute(q) results = cursor.fetchall() except psycopg2.ProgrammingError: # psycopg2.ProgrammingError: relation "greek_morphology" does not exist results = False if results: available[p] = True else: available[p] = False dbconnection.connectioncleanup() return available
def getpostgresserverversion() -> str: """ what it says on the label... hipparchiaDB=# select version(); ------------------------------------------------------------------------------------------------------------------ PostgreSQL 11.1 on x86_64-apple-darwin17.7.0, compiled by Apple LLVM version 10.0.0 (clang-1000.11.45.5), 64-bit (1 row) hipparchiaDB=# SHOW server_version; server_version ---------------- 11.1 (1 row) :return: """ dbconnection = ConnectionObject() cursor = dbconnection.cursor() q = 'SHOW server_version;' cursor.execute(q) v = cursor.fetchone() version = v[0] dbconnection.connectioncleanup() return version
def bulkfindwordcounts(listofwords: List[str]) -> List[dbWordCountObject]: """ note that the lists of words should all start with the same letter since the wordcount tables are letter-keyed hipparchiaDB=# CREATE TEMP TABLE bulkcounter_51807f8bbe08 AS SELECT values AS entriestocheck FROM unnest(ARRAY['κατακλειούϲηϲ', 'κατακλῇϲαι', 'κατακλεῖϲαι']) values; hipparchiaDB=# SELECT * FROM wordcounts_κ WHERE EXISTS (SELECT 1 FROM bulkcounter_51807f8bbe08 tocheck WHERE tocheck.entriestocheck = wordcounts_κ.entry_name); entry_name | total_count | gr_count | lt_count | dp_count | in_count | ch_count ---------------+-------------+----------+----------+----------+----------+---------- κατακλεῖϲαι | 31 | 30 | 0 | 0 | 1 | 0 κατακλειούϲηϲ | 3 | 3 | 0 | 0 | 0 | 0 κατακλῇϲαι | 1 | 1 | 0 | 0 | 0 | 0 (3 rows) :param listofwords: :return: """ dbconnection = ConnectionObject(readonlyconnection=False) dbcursor = dbconnection.cursor() try: firstletteroffirstword = stripaccents(listofwords[0][0]) except IndexError: return list() if firstletteroffirstword not in 'abcdefghijklmnopqrstuvwxyzαβψδεφγηιξκλμνοπρϲτυωχθζ': firstletteroffirstword = '0' tqtemplate = """ CREATE TEMP TABLE bulkcounter_{rnd} AS SELECT values AS entriestocheck FROM unnest(ARRAY[%s]) values """ uniquename = assignuniquename(12) tempquery = tqtemplate.format(rnd=uniquename) data = (listofwords, ) dbcursor.execute(tempquery, data) qtemplate = """ SELECT * FROM wordcounts_{x} WHERE EXISTS (SELECT 1 FROM bulkcounter_{rnd} tocheck WHERE tocheck.entriestocheck = wordcounts_{x}.entry_name) """ query = qtemplate.format(rnd=uniquename, x=firstletteroffirstword) try: dbcursor.execute(query) results = resultiterator(dbcursor) except psycopg2.ProgrammingError: # if you do not have the wordcounts installed: 'ProgrammingError: relations "wordcounts_a" does not exist results = list() wordcountobjects = [dbWordCountObject(*r) for r in results] dbconnection.connectioncleanup() return wordcountobjects
def sampleworkcitation(authorid: str, workid: str) -> JSON_STR: """ called by loadsamplecitation() in autocomplete.js we are using the maual input style on the web page so we need some hint on how to do things: check the end line for a sample citation "In Timarchum (w001)" yields... 127.0.0.1 - - [04/Apr/2021 13:48:53] "GET /get/json/samplecitation/gr0026/001 HTTP/1.1" 200 - /get/json/samplecitation {"firstline": "1.1", "lastline": "196.7"} :param authorid: :param workid: :return: """ dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() returnvals = dict() returnvals['firstline'] = str() returnvals['lastline'] = str() authorid = depunct(authorid) workid = depunct(workid) try: ao = authordict[authorid] wo = workdict[authorid + 'w' + workid] except KeyError: returnvals['firstline'] = 'no such author/work combination' return json.dumps(returnvals) toplevel = wo.availablelevels - 1 firstlineindex = returnfirstorlastlinenumber(wo.universalid, dbcursor, disallowt=True, disallowlevel=toplevel) flo = dblineintolineobject( grabonelinefromwork(authorid, firstlineindex, dbcursor)) lastlineidx = returnfirstorlastlinenumber(wo.universalid, dbcursor, findlastline=True) llo = dblineintolineobject( grabonelinefromwork(authorid, lastlineidx, dbcursor)) returnvals['firstline'] = flo.prolixlocus() returnvals['lastline'] = llo.prolixlocus() results = json.dumps(returnvals) dbconnection.connectioncleanup() return results
def querytotalwordcounts(word: str, dbcursor=None) -> dbHeadwordObject: """ use the dictionary_headword_wordcounts table [a] take a dictionary entry: ἄκρατοϲ [b] look it up return a countobject :param word: :param dbcursor: :return: """ dbconnection = None if not dbcursor: dbconnection = ConnectionObject() dbconnection.setautocommit() dbcursor = dbconnection.cursor() table = 'dictionary_headword_wordcounts' qtemplate = """ SELECT entry_name , total_count, gr_count, lt_count, dp_count, in_count, ch_count, frequency_classification, early_occurrences, middle_occurrences ,late_occurrences, acta, agric, alchem, anthol, apocalyp, apocryph, apol, astrol, astron, biogr, bucol, caten, chronogr, comic, comm, concil, coq, dialog, docu, doxogr, eccl, eleg, encom, epic, epigr, epist, evangel, exeget, fab, geogr, gnom, gramm, hagiogr, hexametr, hist, homilet, hymn, hypoth, iamb, ignotum, invectiv, inscr, jurisprud, lexicogr, liturg, lyr, magica, math, mech, med, metrolog, mim, mus, myth, narrfict, nathist, onir, orac, orat, paradox, parod, paroem, perieg, phil, physiognom, poem, polyhist, prophet, pseudepigr, rhet, satura, satyr, schol, tact, test, theol, trag FROM {tbl} WHERE entry_name=%s """ q = qtemplate.format(tbl=table) d = (word, ) try: dbcursor.execute(q, d) hw = dbcursor.fetchone() except psycopg2.ProgrammingError: # psycopg2.ProgrammingError: relation "dictionary_headword_wordcounts" does not exist # you have not installed the wordcounts (yet) hw = None try: hwcountobject = dbHeadwordObject(*hw) except: # print('failed to initialize dbHeadwordObject for', word) hwcountobject = None if dbconnection: dbconnection.connectioncleanup() return hwcountobject
def trimbypartofspeech(listofwords: List[str], partofspeech: str, baggingmethod: str) -> set: """ return only the verbs, e.g., in a list of words :param listofwords: :param partofspeech: :return: """ # needs to match list in sessionfunctions.py less 'none' trimmingmethods = ['conjugated', 'declined'] if partofspeech not in trimmingmethods: return set(listofwords) dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() morphologyobjecdict = { w: lookformorphologymatches(w, dbcursor) for w in listofwords } dbconnection.connectioncleanup() # {'serius¹': None, 'solacium': <server.hipparchiaobjects.dbtextobjects.dbMorphologyObject object at 0x155362780>, ... } possibilitieslistdict = { m: morphologyobjecdict[m].getpossible() for m in morphologyobjecdict if morphologyobjecdict[m] } possible = set() if partofspeech == 'conjugated': possible = { m for m in possibilitieslistdict if True in [ p.isconjugatedverb(bagging=baggingmethod) for p in possibilitieslistdict[m] ] } if partofspeech == 'declined': possible = { m for m in possibilitieslistdict if True in [ p.isnounoradjective(bagging=baggingmethod) for p in possibilitieslistdict[m] ] } trimmedlist = set([w for w in listofwords if w in possible]) return trimmedlist
def fetchvectorgraph(imagename) -> bytes: """ grab a graph in the image table so that you can subsequently display it in the browser note that images get deleted after use also note that we hand the data to the db and then immediatel grab it out of the db because of constraints imposed by the way flask works :param imagename: :return: """ if hipparchia.config['RETAINFIGURES']: deletewhendone = False else: deletewhendone = True dbconnection = ConnectionObject(ctype='rw') dbconnection.setautocommit() cursor = dbconnection.cursor() q = 'SELECT imagedata FROM public.storedvectorimages WHERE imagename=%s' d = (imagename,) cursor.execute(q, d) imagedata = cursor.fetchone() # need to convert to bytes, otherwise: # AttributeError: 'memoryview' object has no attribute 'read' try: imagedata = bytes(imagedata[0]) except TypeError: # TypeError: 'NoneType' object is not subscriptable # how did this happen... # if you right click and download a graph in Firefox it will try to pull via the URL # but that figure is almost certainly gone unless you are a debugger retaining figures... imagedata = b'' consolewarning('fetchvectorgraph() failed to fetch image {i}'.format(i=imagename)) # print('fetched {n} from vector image table'.format(n=randomid)) # now we should delete the image because we are done with it if deletewhendone: q = 'DELETE FROM public.storedvectorimages WHERE imagename=%s' d = (imagename,) cursor.execute(q, d) dbconnection.connectioncleanup() return imagedata
def partialworkbetweenclausecontents( workobject: dbOpus, searchobject: SearchObject) -> Tuple[str, Dict[str, list]]: """ example: Xenophon, Hellenica, Book 1 less Chapter 3 endpoints ('gr0032w001', {'listofboundaries': [(1, 907)], 'listofomissions': [(257, 349)]}) :param listofworkobjects: :param workswithselections: :param searchobject: :return: """ hasselections = [p[0:10] for p in searchobject.psgselections if p] dbconnection = ConnectionObject('autocommit') dbcursor = dbconnection.cursor() blist = list() olist = list() for sel in searchobject.psgselections: if workobject.universalid == sel[0:10]: boundariestuple = findselectionboundaries(workobject, sel, dbcursor) blist.append(boundariestuple) for sel in searchobject.psgexclusions: if workobject.universalid == sel[0:10]: boundariestuple = findselectionboundaries(workobject, sel, dbcursor) olist.append(boundariestuple) if workobject.universalid not in hasselections: # if you exclude a subsection, then you implicitly include the whole # unless you have only selected a higher level subsection # exclude x., mem. 3 means you want to search x., mem. # BUT exclude x., mem. 3.4 has a different force if you included x., mem. 3 blist.append((workobject.starts, workobject.ends)) blist = list(set(blist)) olist = list(set(olist)) endpoints = (workobject.universalid, { 'listofboundaries': blist, 'listofomissions': olist }) dbconnection.connectioncleanup() return endpoints
def reversedictionarylookup(seeking: str, usedict: str, limit=None) -> List: """ find an (approximate) entry in a dictionary note the syntax: ~ return a list of wordobjects :param seeking: :param usedict: :param limit: :return: """ cleanpoolifneeded() dbconnection = ConnectionObject() dbconnection.setautocommit() dbcursor = dbconnection.cursor() assert usedict in [ 'greek', 'latin' ], 'searchdbforlexicalentry() needs usedict to be "greek" or "latin"' objecttemplate = None fields = 'entry_name, metrical_entry, id_number, pos, translations, entry_body, {extra}' if usedict == 'greek': objecttemplate = dbGreekWord fields = fields.format(extra='unaccented_entry') elif usedict == 'latin': objecttemplate = dbLatinWord fields = fields.format(extra='entry_key') if limit: qstring = 'SELECT {f} FROM {d}_dictionary WHERE translations ~ %s LIMIT {lim}' else: qstring = 'SELECT {f} FROM {d}_dictionary WHERE translations ~ %s' query = qstring.format(f=fields, d=usedict, lim=limit) data = ('{s}'.format(s=seeking), ) dbcursor.execute(query, data) matches = dbcursor.fetchall() wordobjects = [objecttemplate(*m) for m in matches] dbconnection.connectioncleanup() return wordobjects
def bulklexicalgrab(listofwords: List[str], tabletouse: str, targetcolumn: str, language: str) -> list: """ grab a bunch of lex/morph entries by using a temp table e.g., lexicalresults = bulklexicalgrab(listofwords, 'dictionary', 'entry_name', language) results = bulklexicalgrab(listofwords, 'morphology', 'observed_form', language) :param listofwords: :param tabletouse: :return: """ dbconnection = ConnectionObject(readonlyconnection=False) dbcursor = dbconnection.cursor() tqtemplate = """ CREATE TEMP TABLE bulklex_{rnd} AS SELECT values AS entriestocheck FROM unnest(ARRAY[%s]) values """ uniquename = assignuniquename(12) tempquery = tqtemplate.format(rnd=uniquename) data = (listofwords, ) dbcursor.execute(tempquery, data) qtemplate = """ SELECT * FROM {lg}_{thetable} WHERE EXISTS (SELECT 1 FROM bulklex_{rnd} tocheck WHERE tocheck.entriestocheck = {lg}_{thetable}.{target}) """ query = qtemplate.format(rnd=uniquename, thetable=tabletouse, target=targetcolumn, lg=language) try: dbcursor.execute(query) results = resultiterator(dbcursor) except psycopg2.ProgrammingError: # if you do not have the wordcounts installed: 'ProgrammingError: relations "wordcounts_a" does not exist results = list() dbconnection.connectioncleanup() return results
def cleanpoolifneeded(): """ clean out the pool if neccessary before starting this seems like the safest time for a reset of the pool: otherwise you could have workers working but if you have a multi-user environment AND pool problems this code might make things worse :return: """ if hipparchia.config['ENABLEPOOLCLEANING'] and hipparchia.config['CONNECTIONTYPE'] == 'pool': c = ConnectionObject() if c.poolneedscleaning: c.resetpool() c.connectioncleanup() return
def createvectorstable(): """ zap and reconstitute the storedvectors table :return: """ consolewarning('resetting the stored vectors table', color='green') dbconnection = ConnectionObject(ctype='rw') dbcursor = dbconnection.cursor() query = """ DROP TABLE IF EXISTS public.storedvectors; CREATE TABLE public.storedvectors ( ts timestamp without time zone, thumbprint character varying(32) COLLATE pg_catalog."default", uidlist character varying(32) COLLATE pg_catalog."default", vectortype character varying(24) COLLATE pg_catalog."default", baggingmethod character varying(24) COLLATE pg_catalog."default", calculatedvectorspace bytea ) WITH ( OIDS = FALSE ) TABLESPACE pg_default; ALTER TABLE public.storedvectors OWNER to hippa_wr; GRANT SELECT ON TABLE public.storedvectors TO {reader}; GRANT ALL ON TABLE public.storedvectors TO {writer}; """ query = query.format(reader=hipparchia.config['DBUSER'], writer=hipparchia.config['DBWRITEUSER']) dbcursor.execute(query) dbconnection.connectioncleanup() return
def monobreaktextsintosentences(searchlist: list, searchobject) -> List[tuple]: """ A wrapper for breaktextsintosentences() since Windows can't MP it... findsentences() results[0] ('line/gr0014w001/1', 'ἀντὶ πολλῶν ἄν ὦ ἄνδρεϲ ἀθηναῖοι χρημάτων ὑμᾶϲ ἑλέϲθαι νομίζω εἰ φανερὸν γένοιτο τὸ μέλλον ϲυνοίϲειν τῇ πόλει περὶ ὧν νυνὶ ϲκοπεῖτε') :param searchlist: :param searchobject: :return: """ foundsentences = list() dbconnection = ConnectionObject(readonlyconnection=False) foundsentences = breaktextsintosentences(foundsentences, searchlist, searchobject, dbconnection) dbconnection.connectioncleanup() fs = list(foundsentences) return fs
def loadlemmataasobjects() -> dict: """ return a dict of all possible lemmataobjects hipparchiaDB=# select * from greek_lemmata limit 1; dictionary_entry | xref_number | derivative_forms ------------------+-------------+------------------------ ζῳοτροφία | 49550639 | {ζῳοτροφίᾳ,ζῳοτροφίαϲ} :return: """ print('loading all lemmata...', end=str()) dbconnection = ConnectionObject() cursor = dbconnection.cursor() q = """ SELECT dictionary_entry, xref_number, derivative_forms FROM {lang}_lemmata """ lemmatadict = dict() languages = {1: 'greek', 2: 'latin'} for key in languages: cursor.execute(q.format(lang=languages[key])) results = resultiterator(cursor) lemmatadict = { **{r[0]: dbLemmaObject(*r) for r in results}, **lemmatadict } print('\t', len(lemmatadict), 'lemmata loaded', end=str()) # print('lemmatadict["molestus"]', lemmatadict['molestus'].formlist) # print('lemmatadict["Mausoleus"]', lemmatadict['Mausoleus'].formlist) # print('lemmatadict["λύω"]', lemmatadict['λύω'].formlist) # print('lemmatadict["Δημοϲθένηϲ"]', lemmatadict['Δημοϲθένηϲ'].formlist) dbconnection.connectioncleanup() return lemmatadict
def createstoredimagestable(): """ zap and reconstitute the storedimages table :return: """ consolewarning('resetting the stored images table', color='green') dbconnection = ConnectionObject(ctype='rw') dbcursor = dbconnection.cursor() query = """ DROP TABLE IF EXISTS public.storedvectorimages; CREATE TABLE public.storedvectorimages ( imagename character varying(12), imagedata bytea ) WITH ( OIDS = FALSE ) TABLESPACE pg_default; ALTER TABLE public.storedvectorimages OWNER to hippa_wr; GRANT SELECT ON TABLE public.storedvectorimages TO {reader}; GRANT ALL ON TABLE public.storedvectorimages TO {writer}; """ query = query.format(reader=hipparchia.config['DBUSER'], writer=hipparchia.config['DBWRITEUSER']) dbcursor.execute(query) dbconnection.connectioncleanup() return
def findworkstructure(author, work, passage=None) -> JSON_STR: """ request detailed info about how a work works this is fed back to the js boxes : who should be active, what are the autocomplete values, etc? 127.0.0.1 - - [04/Apr/2021 13:36:16] "GET /get/json/workstructure/lt0474/037 HTTP/1.1" 200 - /get/json/workstructure {"totallevels": 3, "level": 2, "label": "book", "low": "1", "high": "3", "range": ["1", "2", "3"]} :return: """ dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() po = StructureInputParsingObject(author, work, passage) wo = po.workobject ws = dict() if wo: lowandhigh = findvalidlevelvalues(wo, po.getcitationtuple(), dbcursor) # example: (4, 3, 'Book', '1', '7', ['1', '2', '3', '4', '5', '6', '7']) ws['totallevels'] = lowandhigh.levelsavailable ws['level'] = lowandhigh.currentlevel ws['label'] = lowandhigh.levellabel ws['low'] = lowandhigh.low ws['high'] = lowandhigh.high ws['range'] = lowandhigh.valuerange else: # (2, 0, 'verse', '1', '100') ws['totallevels'] = 1 ws['level'] = 0 ws['label'] = 'Error: repick the work' ws['low'] = 'Error:' ws['high'] = 'again' ws['range'] = ['error', 'select', 'the', 'work', 'again'] results = json.dumps(ws) dbconnection.connectioncleanup() return results
def findparserxref(wordobject) -> str: """ used in LEXDEBUGMODE to find the parser xrefvalue for a headword :param entryname: :return: """ dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() if wordobject.isgreek(): lang = 'greek' else: lang = 'latin' trimmedentry = re.sub(r'[¹²³⁴⁵⁶⁷⁸⁹]', '', wordobject.entry) q = 'SELECT * FROM {lang}_lemmata WHERE dictionary_entry=%s'.format( lang=lang) d = (wordobject.entry, ) dbcursor.execute(q, d) results = dbcursor.fetchall() if not results: d = (trimmedentry, ) dbcursor.execute(q, d) results = dbcursor.fetchall() # it is not clear that more than one item will ever be returned # but if that happened, you need to be ready to deal with it lemmaobjects = [dbLemmaObject(*r) for r in results] xrefs = [str(l.xref) for l in lemmaobjects] xrefvalues = ', '.join(xrefs) dbconnection.connectioncleanup() return xrefvalues
def storevectorgraph(figureasbytes): """ store a graph in the image table so that you can subsequently display it in the browser note that images get deleted after use also note that we hand the data to the db and then immediatel grab it out of the db because of constraints imposed by the way flask works :param figureasbytes: :return: """ dbconnection = ConnectionObject(ctype='rw') dbconnection.setautocommit() cursor = dbconnection.cursor() # avoid psycopg2.DataError: value too long for type character varying(12) randomid = assignuniquename(12) q = """ INSERT INTO public.storedvectorimages (imagename, imagedata) VALUES (%s, %s) """ d = (randomid, figureasbytes) try: cursor.execute(q, d) except psycopg2.ProgrammingError: # psycopg2.ProgrammingError: relation "public.storedvectorimages" does not exist createstoredimagestable() cursor.execute(q, d) # print('stored {n} in vector image table'.format(n=randomid)) dbconnection.connectioncleanup() return randomid
def paredowntowithinxwords(so: SearchObject, firstterm: str, secondterm: str, hitlines: List[dbWorkLine]) -> List[dbWorkLine]: """ pare down hitlines finds to within words finds """ so.poll.sethits(0) dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() fullmatches = list() commitcount = 0 while hitlines and len(fullmatches) < so.cap: commitcount += 1 if commitcount == hipparchia.config['MPCOMMITCOUNT']: dbconnection.commit() commitcount = 0 hit = hitlines.pop() leadandlag = grableadingandlagging(hit, so, dbcursor, firstterm) # debugmessage('leadandlag for {h}: {l}'.format(h=hit.uniqueid, l=leadandlag)) lagging = leadandlag['lag'] leading = leadandlag['lead'] if so.near and (re.search(secondterm, leading) or re.search(secondterm, lagging)): fullmatches.append(hit) so.poll.addhits(1) elif not so.near and not re.search( secondterm, leading) and not re.search(secondterm, lagging): fullmatches.append(hit) so.poll.addhits(1) dbconnection.connectioncleanup() return fullmatches
def mpmorphology(terms: list, furtherdeabbreviate: bool, dictofmorphobjects, dbconnection: ConnectionObject) -> dict: """ build a dict of morphology objects :param terms: :param furtherdeabbreviate: :param dictofmorphobjects: :param dbconnection: :return: """ if not dbconnection: dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() commitcount = 0 while terms: commitcount += 1 dbconnection.checkneedtocommit(commitcount) try: term = terms.pop() except IndexError: term = None if term: mo = lookformorphologymatches( term, dbcursor, furtherdeabbreviate=furtherdeabbreviate) if mo: dictofmorphobjects[term] = mo else: dictofmorphobjects[term] = None if not icanpickleconnections(): dbconnection.connectioncleanup() return dictofmorphobjects
def findcountsviawordcountstable(wordtocheck): """ used to look up a list of specific observed forms (vs. dictionary headwords) :param wordtocheck: :return: """ dbconnection = ConnectionObject() dbcursor = dbconnection.cursor() initial = stripaccents(wordtocheck[0]) # alternatives = re.sub(r'[uv]','[uv]',c) # alternatives = '^'+alternatives+'$' if initial in 'abcdefghijklmnopqrstuvwxyzαβψδεφγηιξκλμνοπρϲτυωχθζ': # note that we just lost "'φερον", "'φερεν", "'φέρεν", "'φερεϲ", "'φερε",... # but the punctuation killer probably zapped them long ago # this needs to be addressed in HipparchiaBuilder q = 'SELECT * FROM wordcounts_{i} WHERE entry_name = %s'.format( i=initial) else: q = 'SELECT * FROM wordcounts_0 WHERE entry_name = %s' d = (wordtocheck, ) try: dbcursor.execute(q, d) result = dbcursor.fetchone() except psycopg2.ProgrammingError: # psycopg2.ProgrammingError: relation "wordcounts_ε" does not exist # you did not build the wordcounts at all? result = None dbconnection.connectioncleanup() return result
def grablemmataobjectfor(db, dbcursor=None, word=None, xref=None, allowsuperscripts=False): """ send a word, return a lemmaobject hipparchiaDB=# select * from greek_lemmata limit 0; dictionary_entry | xref_number | derivative_forms ------------------+-------------+------------------ EITHER 'word' should be set OR 'xref' should be set: not both at the moment we only use 'word' in both calls to this function: hipparchiaobjects/lexicaloutputobjects.py hipparchiaobjects/morphanalysisobjects.py 'allowsuperscripts' because sometimes you are supposed to search under δέω² and sometimes you are not... :param db: :param dbcursor: :param word: :param xref: :param allowsuperscripts: :return: """ dbconnection = None if not dbcursor: dbconnection = ConnectionObject() dbconnection.setautocommit() dbcursor = dbconnection.cursor() field = str() data = None if xref: field = 'xref_number' data = xref if word: field = 'dictionary_entry' data = word if not allowsuperscripts: data = re.sub(r'[¹²³⁴⁵⁶⁷⁸⁹]', '', data) if not session['available'][db]: lo = dbLemmaObject( '[parsing is impossible: lemmata data was not installed]', -1, '') return lo if not data: lo = dbLemmaObject( '[programming error: no word or xref set in grablemmataobjectfor()]', -1, '') return lo q = 'SELECT * FROM {db} WHERE {f}=%s'.format(db=db, f=field) d = (data, ) dbcursor.execute(q, d) lem = dbcursor.fetchone() try: lemmaobject = dbLemmaObject(*lem) except TypeError: # 'NoneType' object is not subscriptable lemmaobject = dbLemmaObject('[entry not found]', -1, '') if dbconnection: dbconnection.connectioncleanup() return lemmaobject
def probedictionary(usedictionary: str, usecolumn: str, seeking: str, syntax: str, dbcursor=None, trialnumber=0) -> List: """ this will make several stabs at finding a word in the dictionary we need to do this because sometimes a find in the morphology dictionary does not point to something you can find in the dictionary of meanings sample values: dictionary: 'greek_dictionary' usecolumn: 'entry_name' seeking: 'προχοΐδιον' syntax: '=' or 'LIKE' still unimplemented: τήθη vs τηθή; the parser has the latter, the dictionary expects the former (but knows of the latter) :param dbcursor: :param usedictionary: :param usecolumn: :param seeking: :param syntax: :param trialnumber: :return: """ # print('seeking/trial',seeking,trialnumber) dbconnection = None if not dbcursor: dbconnection = ConnectionObject() dbconnection.setautocommit() dbcursor = dbconnection.cursor() maxtrials = 8 trialnumber += 1 accenteddiaresis = re.compile(r'αί|εί|οί|υί|ηί|ωί') unaccenteddiaresis = re.compile(r'αι|ει|οι|υι|ηι|ωι') # nothingfound = convertdictionaryfindintoobject('nothing', 'nodict') if usedictionary == 'latin_dictionary': extracolumn = 'entry_key' else: extracolumn = 'unaccented_entry' qtemplate = """SELECT entry_name, metrical_entry, id_number, pos, translations, entry_body, {ec} FROM {d} WHERE {col} {sy} %s ORDER BY id_number ASC""" query = qtemplate.format(ec=extracolumn, d=usedictionary, col=usecolumn, sy=syntax) data = (seeking, ) # print('searchdictionary()',query,'\n\t',data) try: dbcursor.execute(query, data) found = dbcursor.fetchall() except psycopg2.DataError: # thrown by dbcursor.execute() # invalid regular expression: parentheses () not balanced # ό)μβροϲ is a (bogus) headword; how many others are there? found = list() # we might be at trial 2+ and so we need to strip the supplement we used at trial #1 if trialnumber > 2: seeking = re.sub(r'\[¹²³⁴⁵⁶⁷⁸⁹\]', '', seeking) seeking = re.sub(r'\^', '', seeking) foundobjects = None if len(found) > 0: foundobjects = [ convertdictionaryfindintowordobject(f, usedictionary, dbcursor) for f in found ] elif trialnumber == 1: # failure... # the word is probably there, we have just been given the wrong search term; try some other solutions # [1] first guess: there were multiple possible entries, not just one newword = re.sub(r'[¹²³⁴⁵⁶⁷⁸⁹]', '', seeking.lower()) foundobjects = probedictionary(usedictionary, usecolumn, newword, '=', dbcursor, trialnumber) elif trialnumber == 2: # grab any/all variants: ⁰¹²³⁴⁵⁶⁷⁸⁹ newword = '^{sk}[¹²³⁴⁵⁶⁷⁸⁹]'.format(sk=seeking) foundobjects = probedictionary(usedictionary, usecolumn, newword, '~', dbcursor, trialnumber) # elif trialnumber < maxtrials and '-' in seeking: # newword = attemptelision(seeking) # foundobject = searchdictionary(cursor, dictionary, usecolumn, newword, '=', trialnumber) elif trialnumber < maxtrials and seeking[-1] == 'ω': # ὑποϲυναλείφομαι is in the dictionary, but greek_lemmata says to look for ὑπό-ϲυναλείφω newword = seeking[:-1] + 'ομαι' foundobjects = probedictionary(usedictionary, usecolumn, newword, '=', dbcursor, trialnumber) elif trialnumber < maxtrials and re.search(r'ομαι$', seeking): # χαρίζω is in the dictionary, but greek_lemmata says to look for χαρίζομαι newword = seeking[:-4] + 'ω' foundobjects = probedictionary(usedictionary, usecolumn, newword, '=', dbcursor, trialnumber) elif trialnumber < maxtrials and re.search(accenteddiaresis, seeking): # false positives very easy here, but we are getting desperate and have nothing to lose diaresis = re.search(accenteddiaresis, seeking) head = seeking[:diaresis.start()] tail = seeking[diaresis.end():] vowels = diaresis.group(0) vowels = vowels[0] + 'ΐ' newword = head + vowels + tail foundobjects = probedictionary(usedictionary, usecolumn, newword, '=', dbcursor, trialnumber) elif trialnumber < maxtrials and re.search(unaccenteddiaresis, seeking): diaresis = re.search(unaccenteddiaresis, seeking) head = seeking[:diaresis.start()] tail = seeking[diaresis.end():] vowels = diaresis.group(0) vowels = vowels[0] + 'ϊ' newword = head + vowels + tail foundobjects = probedictionary(usedictionary, usecolumn, newword, '=', dbcursor, trialnumber) elif trialnumber < maxtrials: # τήθη vs τηθή; the parser has the latter, the dictionary expects the former (but knows of the latter) trialnumber = maxtrials - 1 newword = re.sub(r'\[¹²³⁴⁵⁶⁷⁸⁹\]', '', seeking) newword = stripaccents(newword) newword = universalregexequivalent(newword) # strip '(' and ')' newword = '^{wd}$'.format(wd=newword[1:-1]) foundobjects = probedictionary(usedictionary, usecolumn, newword, '~', dbcursor, trialnumber) if dbconnection: dbconnection.connectioncleanup() return foundobjects