def uscsdist(proto, cs='', usermvis=False): """ Return the relative cosine distance to ensure and quinine given a vector of activity for a particular event :param proto: dict of prototypical vectors from protovectors (based on GLM) :param event: max or mean around replay or stimulus, to be compared to protovector :param cs: required if usermvis is set to true :param usermvis: use the visual-subtracted glm vector if available if true :return: ensure-quinine distance where values > 0 are closer to ensure, values < 0 closer to quinine """ if not usermvis: if cs == 'plus': return cosinedist(proto['ensure'], proto['plus']) elif cs == 'minus': return cosinedist(proto['quinine'], proto['minus']) else: fitfun = lambda vs, x: vs[0] * x errfun = lambda vs, x, y: fitfun(vs, x) - y if cs == 'plus': [vscalc, success] = optimize.leastsq(errfun, [0.5], args=(proto['ensure'], proto['plus'])) return abs(vscalc[0]) elif cs == 'minus': [vscalc, success] = optimize.leastsq(errfun, [0.5], args=(proto['quinine'], proto['minus'])) return abs(vscalc[0]) return 1
def vectorcosineworker(headwords, workpiledict, resultdict): """ itemfromworkpile = (avalues, lemmavalues, lemmalength) :return: """ while headwords: try: headword = headwords.pop() except IndexError: headword = None if headword: item = workpiledict[headword] avalues = item[0] lemmavalues = item[1] lemmalength = item[2] try: cv = cosinedist(avalues, lemmavalues) except NameError: # scipy not available cv = findcosinedist(avalues, lemmavalues, lemmalength) resultdict[headword] = cv return resultdict
def qdist(proto, event, cs='', usermvis=True): """ Return the cosine distance to quinine given a vector of activity for a particular event :param proto: dict of prototypical vectors from protovectors (based on GLM) :param event: max or mean around replay or stimulus, to be compared to protovector :param cs: required if usermvis is set to true :param usermvis: use the visual-subtracted glm vector if available if true :return: ensure-quinine distance where values > 0 are closer to ensure, values < 0 closer to quinine """ unitvec = event / np.nansum(np.abs(event)) ensdist = np.nan if usermvis and '%s-quinine' % cs in proto: ensdist = cosinedist(proto['%s-quinine' % cs], unitvec) elif not usermvis and 'quinine' in proto: ensdist = cosinedist(proto['quinine'], unitvec) return ensdist
def caclulatecosinevalues(focusword, vectorspace, headwords): """ cos(α,β) = α · β / ||α|| ||β|| ||α|| = sqrt(α · α) α = sum([a1, a2, ... ai]) returns a dict: {'facultas': 0.7418011102528389, 'frater': 0.7418011102528389, 'superus': 0.6535898384862247, ...} :param focusword: :param vectorspace: :param headwords: :return: """ # lengths = dict() # for w in headwords: # vals = list() # for key in vectorspace.keys(): # vals.append(vectorspace[key][w]) # # lengths[w] = findvectorlength(vals) numberedsentences = vectorspace.keys() lemmavalues = list() for num in numberedsentences: try: lemmavalues.append(vectorspace[num][focusword]) except KeyError: # print('KeyError in caclulatecosinevalues()') # we know that the word appears, but it might have been there more than once... # we just lost that information when the KeyError bit lemmavalues.append(1) try: cosinedist lvl = None except NameError: # we did not import numpy/scipy lvl = findvectorlength(lemmavalues) cosinevals = dict() for w in headwords: avalues = list() for num in numberedsentences: avalues.append(vectorspace[num][w]) # scipy will choke if you send it div-by-zero data # RuntimeWarning: invalid value encountered in true_divide dist = 1.0 - np.dot(u, v) / (norm(u) * norm(v)) # be careful not to end up with 0 in lemmavalues with 'voluptas' becuase of the 'v-for-u' issue # print('w/av/bv', sum(avalues), sum(lemmavalues), '({w})'.format(w=w)) try: cosinevals[w] = cosinedist(avalues, lemmavalues) except NameError: # we did not import numpy/scipy cosinevals[w] = findcosinedist(avalues, lemmavalues, lvl) return cosinevals