Exemplo n.º 1
0
def dispatchvectorsearch(vectortype: str,
                         searchid: str,
                         one=None,
                         two=None,
                         three=None) -> JSON_STR:
    """

	dispatcher for "/vectors/..." requests

	"""

    if not hipparchia.config['SEMANTICVECTORSENABLED']:
        so = SearchObject(str(), str(), str(), str(), str(), session)
        oo = SearchOutputObject(so)
        target = 'searchsummary'
        message = '[semantic vectors have not been enabled]'
        return oo.generatenulloutput(itemname=target, itemval=message)

    pollid = validatepollid(searchid)
    one = depunct(one)
    two = depunct(two)
    three = depunct(three)

    simple = [pollid, one]
    triple = [pollid, one, two, three]

    knownfunctions = {
        'nearestneighborsquery': {
            'bso': simple,
            'pref': 'CONCEPTMAPPINGENABLED'
        },
        'analogies': {
            'bso': triple,
            'pref': 'VECTORANALOGIESENABLED'
        },
        'topicmodel': {
            'bso': simple,
            'pref': 'TOPICMODELINGENABLED'
        },
        'vectortestfunction': {
            'bso': simple,
            'pref': 'TESTINGVECTORBUTTONENABLED'
        },
        'unused': {
            'fnc': lambda: str(),
            'bso': None,
            'pref': None
        },
    }

    if not knownfunctions[vectortype]['pref'] or not hipparchia.config[
            knownfunctions[vectortype]['pref']]:
        return json.dumps('this type of search has not been enabled')

    bso = knownfunctions[vectortype]['bso']

    so = None

    if len(bso) == 4:
        so = buildtriplelemmasearchobject(*bso)

    if len(bso) == 2:
        so = buildsinglelemmasearchobject(*bso)

    so.vectorquerytype = vectortype

    progresspolldict[pollid] = ProgressPoll(pollid)
    so.poll = progresspolldict[pollid]
    so.poll.activate()
    so.poll.statusis('Preparing to vectorize')

    if hipparchia.config['EXTERNALVECTORHELPER']:
        j = externalvectors(so)
    else:
        j = pythonvectors(so)

    if hipparchia.config['JSONDEBUGMODE']:
        print('/vectors/{f}\n\t{j}'.format(f=vectortype, j=j))

    try:
        del so.poll
    except AttributeError:
        pass

    return j
Exemplo n.º 2
0
def buildfakesearchobject(qtype='nearestneighborsquery') -> SearchObject:
    """

	do what it takes to build a hollow searchobject

	:return:
	"""

    frozensession = dict()

    frozensession['vdim'] = hipparchia.config['VECTORDIMENSIONS']
    frozensession['vwindow'] = hipparchia.config['VECTORWINDOW']
    frozensession['viterat'] = hipparchia.config['VECTORTRAININGITERATIONS']
    frozensession['vminpres'] = hipparchia.config['VECTORMINIMALPRESENCE']
    frozensession['vdsamp'] = hipparchia.config['VECTORDOWNSAMPLE']
    frozensession['vcutloc'] = hipparchia.config['VECTORDISTANCECUTOFFLOCAL']
    frozensession['vcutneighb'] = hipparchia.config[
        'VECTORDISTANCECUTOFFNEARESTNEIGHBOR']
    frozensession['vcutlem'] = hipparchia.config[
        'VECTORDISTANCECUTOFFLEMMAPAIR']
    frozensession['vnncap'] = hipparchia.config['NEARESTNEIGHBORSCAP']
    frozensession['vsentperdoc'] = hipparchia.config['SENTENCESPERDOCUMENT']
    frozensession['ldamaxfeatures'] = hipparchia.config['LDAMAXFEATURES']
    frozensession['ldacomponents'] = hipparchia.config['LDACOMPONENTS']
    frozensession['ldamaxfreq'] = hipparchia.config['LDAMAXFREQ']
    frozensession['ldaminfreq'] = hipparchia.config['LDAMINFREQ']
    frozensession['ldaiterations'] = hipparchia.config['LDAITERATIONS']
    frozensession['ldamustbelongerthan'] = hipparchia.config[
        'LDAMUSTBELONGERTHAN']
    frozensession['baggingmethod'] = hipparchia.config['DEFAULTBAGGINGMETHOD']

    blanks = ['searchscope', 'nearornot', 'onehit']
    for b in blanks:
        frozensession[b] = None

    nulls = ['psgselections', 'psgexclusions']
    for n in nulls:
        frozensession[n] = list()

    zeroes = ['proximity', 'maxresults', 'linesofcontext']
    for z in zeroes:
        frozensession[z] = 0

    trueorfalse = [
        'onehit', 'icandodates', 'nearestneighborsquery', 'searchinsidemarkup'
    ]
    for x in trueorfalse:
        frozensession[x] = False

    for x in [
            'agnexclusions', 'agnselections', 'alocexclusions',
            'alocselections', 'analogyfinder', 'auexclusions', 'auselections'
    ]:
        frozensession[x] = list()

    for s in [
            'wkexclusions', 'wkgnexclusions', 'wkgnselections', 'wkselections',
            'wlocexclusions', 'wlocselections'
    ]:
        frozensession[s] = list()

    for p in ['psgexclusions', 'psgselections']:
        frozensession[p] = list()

    for c in [
            'christiancorpus', 'latincorpus', 'greekcorpus',
            'inscriptioncorpus'
    ]:
        frozensession[c] = True

    frozensession['latestdate'] = 1500
    frozensession['earliestdate'] = -850

    so = SearchObject('vectorbot', str(), str(), None, None, frozensession)

    # parsevectorsentences() needs the following:
    so.vectorquerytype = qtype
    so.usecolumn = 'marked_up_line'
    so.sortorder = 'shortname'
    so.iamarobot = True

    return so