Пример #1
0
def browserfindlinenumberfromperseus(citationlist: list, workobject: dbOpus,
                                     resultmessage: str, dbcursor) -> tuple:
    """

	here comes the fun part: alien format; inconsistent citation style; incorrect data...

	sample url:

		/browse/perseus/gr0016w001/7:130

	is
		Herodotus, Historiae, Book 7, section 130

	:param citationlist:
	:param workobject:
	:param resultmessage:
	:param dbcursor:
	:return:
	"""

    try:
        # dict does not always agree with our ids...
        # do an imperfect test for this by inviting the exception
        # you can still get a valid but wrong work, of course,
        # but if you ask for w001 and only w003 exists, this is supposed to take care of that
        returnfirstorlastlinenumber(workobject.universalid, dbcursor)
    except:
        # dict did not agree with our ids...: euripides, esp
        # what follows is a 'hope for the best' approach
        workid = perseusidmismatch(workobject.universalid, dbcursor)
        workobject = workdict[workid]

    # life=cal. or section=32
    # this has already been nuked?
    needscleaning = [True for c in citationlist if len(c.split('=')) > 1]
    if True in needscleaning:
        citationlist = perseusdelabeler(citationlist, workobject)

    # another problem 'J. ' in sallust <bibl id="perseus/lt0631w001/J. 79:3" default="NO" valid="yes"><author>Sall.</author> J. 79, 3</bibl>
    # 'lt0631w002/79:3' is what you need to send to finddblinefromincompletelocus()
    # note that the work number is wrong, so the next is only a partial fix and valid only if wNNN has been set right

    if ' ' in citationlist[-1]:
        citationlist[-1] = citationlist[-1].split(' ')[-1]

    p = finddblinefromincompletelocus(workobject, citationlist, dbcursor)
    resultmessage = p['code']
    thelinenumber = p['line']

    return thelinenumber, resultmessage
def textsegmentfindstartandstop(authorobject, workobject, passageaslist,
                                cursor) -> dict:
    """
	find the first and last lines of a work segment
	:return:
	"""

    p = tuple(passageaslist)
    lookforline = finddblinefromincompletelocus(workobject, p, cursor)
    # assuming that lookforline['code'] == 'success'
    # lookforline['code'] is (allegedly) only relevant to the Perseus lookup problem where a bad locus can be sent
    foundline = lookforline['line']
    line = grabonelinefromwork(authorobject.universalid, foundline, cursor)
    lo = dblineintolineobject(line)

    # let's say you looked for 'book 2' of something that has 'book, chapter, line'
    # that means that you want everything that has the same level2 value as the lineobject
    # build a where clause
    passageaslist.reverse()
    atloc = '|'.join(passageaslist)
    selection = '{uid}_AT_{line}'.format(uid=workobject.universalid,
                                         line=atloc)

    w = atsignwhereclauses(selection, '=',
                           {authorobject.universalid: authorobject})
    d = [workobject.universalid]
    qw = str()
    for i in range(0, len(w)):
        qw += 'AND (' + w[i][0] + ') '
        d.append(w[i][1])

    query = 'SELECT index FROM {au} WHERE wkuniversalid=%s {whr} ORDER BY index DESC LIMIT 1'.format(
        au=authorobject.universalid, whr=qw)
    data = tuple(d)

    cursor.execute(query, data)
    found = cursor.fetchone()

    startandstop = dict()
    startandstop['startline'] = lo.index
    startandstop['endline'] = found[0]

    return startandstop
Пример #3
0
def browserfindlinenumberfromlocus(citationlist: list, workobject: dbOpus,
                                   resultmessage: str, dbcursor) -> tuple:
    """

	you were sent here by the citation builder autofill boxes

	note that
		browse/locus/gr0016w001/3|11|5
	is
		Herodotus, Historiae, Book 3, section 11, line 5

	and
		locus/lt1056w001/3|5|_0
	is
		Vitruvius, De Architectura, book 3, chapter 5, section 1, line 1

	unfortunately you might need to find '300,19' as in 'Democritus, Fragmenta: Fragment 300,19, line 4'
	'-' is used for something like Ar., Nubes 1510-1511
	'_' is used for '_0' (which really means 'first available line at this level')
	( ) and / should be converted to equivalents in the builder: they do us no good here
	see dbswapoutbadcharsfromciations() in HipparchiaBuilder

	see also citationcharacterset() in InputParsingObject()

	:param citationlist:
	:param workobject:
	:param resultmessage:
	:param dbcursor:
	:return:
	"""

    ct = tuple(citationlist)

    if len(ct) == workobject.availablelevels:
        thelinenumber = finddblinefromlocus(workobject, ct, dbcursor)
    else:
        elements = finddblinefromincompletelocus(workobject, citationlist,
                                                 dbcursor)
        resultmessage = elements['code']
        thelinenumber = elements['line']

    return thelinenumber, resultmessage
Пример #4
0
def textmaker(author: str,
              work=None,
              passage=None,
              endpoint=None,
              citationdelimiter='|') -> JSON_STR:
    """
	build a text suitable for display

		"GET /textof/lt0474/024/20/30"

	:return:
	"""

    probeforsessionvariables()

    dbconnection = ConnectionObject('autocommit')
    dbcursor = dbconnection.cursor()

    linesevery = hipparchia.config['SHOWLINENUMBERSEVERY']

    po = TextmakerInputParsingObject(author, work, passage, endpoint,
                                     citationdelimiter)

    ao = po.authorobject
    wo = po.workobject

    segmenttext = str()

    # consolewarning('po.passageaslist: {p}'.format(p=po.passageaslist))

    if ao and wo:
        # we have both an author and a work, maybe we also have a subset of the work
        if endpoint:
            firstlinenumber = finddblinefromincompletelocus(
                wo, po.passageaslist, dbcursor)
            lastlinenumber = finddblinefromincompletelocus(wo,
                                                           po.endpointlist,
                                                           dbcursor,
                                                           findlastline=True)
            if firstlinenumber['code'] == 'success' and lastlinenumber[
                    'code'] == 'success':
                startline = firstlinenumber['line']
                endline = lastlinenumber['line']
                startlnobj = dblineintolineobject(
                    grabonelinefromwork(ao.universalid, startline, dbcursor))
                stoplnobj = dblineintolineobject(
                    grabonelinefromwork(ao.universalid, endline, dbcursor))
            else:
                msg = '"buildtexttospan/" could not find first and last: {a}w{b} - {c} TO {d}'
                consolewarning(
                    msg.format(a=author, b=work, c=passage, d=endpoint))
                startlnobj = makeablankline(work, 0)
                stoplnobj = makeablankline(work, 1)
                startline = 0
                endline = 1
            segmenttext = 'from {a} to {b}'.format(a=startlnobj.shortlocus(),
                                                   b=stoplnobj.shortlocus())
        elif not po.passageaslist:
            # whole work
            startline = wo.starts
            endline = wo.ends
        else:
            startandstop = textsegmentfindstartandstop(ao, wo,
                                                       po.passageaslist,
                                                       dbcursor)
            startline = startandstop['startline']
            endline = startandstop['endline']
        texthtml = buildtext(wo.universalid, startline, endline, linesevery,
                             dbcursor)
    else:
        texthtml = str()

    if hipparchia.config['INSISTUPONSTANDARDANGLEBRACKETS']:
        texthtml = gtltsubstitutes(texthtml)

    if not segmenttext:
        segmenttext = '.'.join(po.passageaslist)

    if not ao or not wo:
        ao = makeanemptyauthor('gr0000')
        wo = makeanemptywork('gr0000w000')

    results = dict()
    results['authorname'] = avoidsmallvariants(ao.shortname)
    results['title'] = avoidsmallvariants(wo.title)
    results['structure'] = avoidsmallvariants(wo.citation())
    results['worksegment'] = segmenttext
    results['texthtml'] = texthtml

    results = json.dumps(results)

    dbconnection.connectioncleanup()

    return results
Пример #5
0
def buildindexto(searchid: str,
                 author: str,
                 work=None,
                 passage=None,
                 endpoint=None,
                 citationdelimiter='|',
                 justvocab=False) -> JSON_STR:
    """
	build a complete index to a an author, work, or segment of a work

	:return:
	"""

    probeforsessionvariables()

    pollid = validatepollid(searchid)

    starttime = time.time()

    progresspolldict[pollid] = ProgressPoll(pollid)
    progresspolldict[pollid].activate()

    dbconnection = ConnectionObject('autocommit')
    dbcursor = dbconnection.cursor()

    po = IndexmakerInputParsingObject(author, work, passage, endpoint,
                                      citationdelimiter)

    ao = po.authorobject
    wo = po.workobject
    psg = po.passageaslist
    stop = po.endpointlist

    if not work:
        wo = makeanemptywork('gr0000w000')

    # bool
    useheadwords = session['headwordindexing']

    allworks = list()
    output = list()
    cdict = dict()
    segmenttext = str()
    valid = True

    if ao and work and psg and stop:
        start = psg
        firstlinenumber = finddblinefromincompletelocus(wo, start, dbcursor)
        lastlinenumber = finddblinefromincompletelocus(wo,
                                                       stop,
                                                       dbcursor,
                                                       findlastline=True)
        if firstlinenumber['code'] == 'success' and lastlinenumber[
                'code'] == 'success':
            cdict = {
                wo.universalid:
                (firstlinenumber['line'], lastlinenumber['line'])
            }
            startln = dblineintolineobject(
                grabonelinefromwork(ao.universalid, firstlinenumber['line'],
                                    dbcursor))
            stopln = dblineintolineobject(
                grabonelinefromwork(ao.universalid, lastlinenumber['line'],
                                    dbcursor))
        else:
            msg = '"indexspan/" could not find first and last: {a}w{b} - {c} TO {d}'
            consolewarning(msg.format(a=author, b=work, c=passage, d=endpoint))
            startln = makeablankline(work, 0)
            stopln = makeablankline(work, 1)
            valid = False
        segmenttext = 'from {a} to {b}'.format(a=startln.shortlocus(),
                                               b=stopln.shortlocus())
    elif ao and work and psg:
        # subsection of a work of an author
        progresspolldict[pollid].statusis(
            'Preparing a partial index to {t}'.format(t=wo.title))
        startandstop = textsegmentfindstartandstop(ao, wo, psg, dbcursor)
        startline = startandstop['startline']
        endline = startandstop['endline']
        cdict = {wo.universalid: (startline, endline)}
    elif ao and work:
        # one work
        progresspolldict[pollid].statusis(
            'Preparing an index to {t}'.format(t=wo.title))
        startline = wo.starts
        endline = wo.ends
        cdict = {wo.universalid: (startline, endline)}
    elif ao:
        # whole author
        allworks = [
            '{w}  ⇒ {t}'.format(w=w.universalid[6:10], t=w.title)
            for w in ao.listofworks
        ]
        allworks.sort()
        progresspolldict[pollid].statusis(
            'Preparing an index to the works of {a}'.format(a=ao.shortname))
        for wkid in ao.listworkids():
            cdict[wkid] = (workdict[wkid].starts, workdict[wkid].ends)
    else:
        # we do not have a valid selection
        valid = False
        output = ['invalid input']

    if not stop:
        segmenttext = '.'.join(psg)

    if valid and justvocab:
        dbconnection.connectioncleanup()
        del progresspolldict[pollid]
        return cdict

    if valid:
        output = buildindextowork(cdict, progresspolldict[pollid],
                                  useheadwords, dbcursor)

    # get ready to send stuff to the page
    count = len(output)

    try:
        locale.setlocale(locale.LC_ALL, 'en_US')
        count = locale.format_string('%d', count, grouping=True)
    except locale.Error:
        count = str(count)

    progresspolldict[pollid].statusis('Preparing the index HTML')
    indexhtml = wordindextohtmltable(output, useheadwords)

    buildtime = time.time() - starttime
    buildtime = round(buildtime, 2)
    progresspolldict[pollid].deactivate()

    if not ao:
        ao = makeanemptyauthor('gr0000')

    results = dict()
    results['authorname'] = avoidsmallvariants(ao.shortname)
    results['title'] = avoidsmallvariants(wo.title)
    results['structure'] = avoidsmallvariants(wo.citation())
    results['worksegment'] = segmenttext
    results['elapsed'] = buildtime
    results['wordsfound'] = count
    results['indexhtml'] = indexhtml
    results['keytoworks'] = allworks
    results['newjs'] = supplementalindexjs()
    results = json.dumps(results)

    dbconnection.connectioncleanup()
    del progresspolldict[pollid]

    return results
Пример #6
0
def selectionmade(requestargs: MultiDict) -> JSON_STR:
	"""

	once a choice is made, parse and register it inside session['selections']
	then return the human readable version of the same for display on the page

	'_AT_' syntax is used to restrict the scope of a search

	"GET /selection/make/_?auth=lt0474&work=001&locus=13|4&endpoint= HTTP/1.1"
	request.args ImmutableMultiDict([('auth', 'lt0474'), ('work', '001'), ('locus', '13|4'), ('endpoint', '')])

	"GET /selection/make/_?auth=lt0474&work=001&locus=10&endpoint=20&raw=t HTTP/1.1"
	request.args ImmutableMultiDict([('auth', 'lt0474'), ('work', '001'), ('locus', '10'), ('endpoint', '20'), ('raw', 't')])

	"GET /selection/make/_?auth=lt0474&work=001&exclude=t HTTP/1.1"
	request.args ImmutableMultiDict([('auth', 'lt0474'), ('work', '001'), ('exclude', 't')])

	:return:
	"""

	probeforsessionvariables()

	uid = depunct(requestargs.get('auth', str()))
	workid = depunct(requestargs.get('work', str()))
	genre = depunct(requestargs.get('genre', str()))
	auloc = depunct(requestargs.get('auloc', str()))

	rawdataentry = re.sub('[^tf]', str(), requestargs.get('raw', str()))
	exclude = re.sub('[^tf]', str(), requestargs.get('exclude', str()))

	allowedpunct = '|,.'
	locus = depunct(requestargs.get('locus', str()), allowedpunct)
	endpoint = depunct(requestargs.get('endpoint', str()), allowedpunct)

	allowedpunct = '.-?():'
	wkprov = depunct(requestargs.get('wkprov', str()), allowedpunct)

	allowedpunct = '.'
	wkgenre = depunct(requestargs.get('wkgenre', str()), allowedpunct)

	if exclude != 't':
		suffix = 'selections'
		other = 'exclusions'
	else:
		suffix = 'exclusions'
		other = 'selections'

	if rawdataentry == 't':
		locus = re.sub(r'\.', '|', locus)
		endpoint = re.sub(r'\.', '|', endpoint)

	# the selection box might contain stale info if you deselect a corpus while items are still in the box
	uid = selectionisactive(uid)

	if genre and genre not in returnactivelist(authorgenresdict):
		genre = str()

	if wkgenre and wkgenre not in returnactivelist(workgenresdict):
		wkgenre = str()

	if auloc and auloc not in returnactivelist(authorlocationdict):
		auloc = str()

	if wkprov and wkprov not in returnactivelist(workprovenancedict):
		wkprov = str()

	# you have validated the input, now do something with it...
	if uid and workid and locus and endpoint:
		# a span in an author: 3 verrine orations, e.g. [note that the selection is 'greedy': 1start - 3end]
		# http://127.0.0.1:5000/makeselection?auth=lt0474&work=005&locus=2|1&endpoint=2|3
		# convert this into a 'firstline' through 'lastline' format
		emptycursor = None
		workobject = None
		try:
			workobject = workdict['{a}w{b}'.format(a=uid, b=workid)]
		except KeyError:
			consolewarning('"/selection/make/" sent a bad workuniversalid: {a}w{b}'.format(a=uid, b=workid))
		start = locus.split('|')
		stop = endpoint.split('|')
		start.reverse()
		stop.reverse()
		if workobject:
			firstline = finddblinefromincompletelocus(workobject, start, emptycursor)
			lastline = finddblinefromincompletelocus(workobject, stop, emptycursor, findlastline=True)
			citationtemplate = '{a}w{b}_FROM_{c}_TO_{d}'
			if firstline['code'] == 'success' and lastline['code'] == 'success':
				fl = firstline['line']
				ll = lastline['line']
				loc = citationtemplate.format(a=uid, b=workid, c=fl, d=ll)
				# print('span selected:', loc)
				# span selected: lt0474w005_FROM_4501_TO_11915
				# Cicero, In Verrem: 2.1.t.1
				# Cicero, In Verrem: 2.3.228.15
				if ll > fl:
					session['psg' + suffix].append(loc)
					session['psg' + suffix] = tidyuplist(session['psg' + suffix])
				else:
					msg = '"makeselection/" sent a firstline greater than the lastine value: {a} > {b} [{c}; {d}]'
					consolewarning(msg.format(a=fl, b=ll, c=locus, d=endpoint))
				rationalizeselections(loc, suffix)
			else:
				msg = '"makeselection/" could not find first and last: {a}w{b} - {c} TO {d}'
				consolewarning(msg.format(a=uid, b=workid, c=locus, d=endpoint))
	elif uid and workid and locus:
		# a specific passage
		session['psg' + suffix].append(uid + 'w' + workid + '_AT_' + locus)
		session['psg' + suffix] = tidyuplist(session['psg' + suffix])
		rationalizeselections(uid + 'w' + workid + '_AT_' + locus, suffix)
	elif uid and workid:
		# a specific work
		session['wk' + suffix].append(uid + 'w' + workid)
		session['wk' + suffix] = tidyuplist(session['wk' + suffix])
		rationalizeselections(uid + 'w' + workid, suffix)
	elif uid and not workid:
		# a specific author
		session['au' + suffix].append(uid)
		session['au' + suffix] = tidyuplist(session['au' + suffix])
		rationalizeselections(uid, suffix)

	# if vs elif: allow multiple simultaneous instance
	if genre:
		# add to the +/- genre list and then subtract from the -/+ list
		session['agn' + suffix].append(genre)
		session['agn' + suffix] = tidyuplist(session['agn' + suffix])
		session['agn' + other] = dropdupes(session['agn' + other], session['agn' + suffix])
	if wkgenre:
		# add to the +/- genre list and then subtract from the -/+ list
		session['wkgn' + suffix].append(wkgenre)
		session['wkgn' + suffix] = tidyuplist(session['wkgn' + suffix])
		session['wkgn' + other] = dropdupes(session['wkgn' + other], session['wkgn' + suffix])
	if auloc:
		# add to the +/- locations list and then subtract from the -/+ list
		session['aloc' + suffix].append(auloc)
		session['aloc' + suffix] = tidyuplist(session['aloc' + suffix])
		session['aloc' + other] = dropdupes(session['aloc' + other], session['aloc' + suffix])
	if wkprov:
		# add to the +/- locations list and then subtract from the -/+ list
		session['wloc' + suffix].append(wkprov)
		session['wloc' + suffix] = tidyuplist(session['wloc' + suffix])
		session['wloc' + other] = dropdupes(session['wloc' + other], session['wloc' + suffix])

	# after the update to the session, you need to update the page html to reflect the changes
	# print('session["psgselections"]=', session['psgselections'])
	# print('session["psgexclusions"]=', session['psgexclusions'])

	return getcurrentselections()