def getQ(t,s): subj = None vb = None obj = None G = getGraph(s) pnodes = gSearchW(G, 'ROOT-0', 'aux') # look for primary node with an aux edge if t == 0: if len(pnodes)==0: pnodes = gSearchW(G, 'ROOT-0', 'cop') if len(pnodes)==0: print 'Could not decipher question: ' + api.toString(s) return None pnode = pnodes[0] if 'VB' in s[idx(pnode)]['POS']: vb = word(pnode) elif 'NN' in s[idx(pnode)]['POS']: obj = getPhrase(G,pnode, OBJPE) for nb in G[pnode]: w = nb[0] rel = nb[1] if rel == 'nsubj': subj = getPhrase(G,w,GPE) elif not vb and rel == 'cop': vb = word(w) elif not obj and rel in OBJ: obj = getPhrase(G,w, GPE) else: print 'not yet implemented' return (subj, vb, obj)
def main(fileloc, out): article = api.parsefile(fileloc) fout = open(out, 'w') for s in article: if not containsPronoun(s): fout.write(api.toString(s)+"\n") fout.close() return
def mostRelevant(q, article, V, N): dictq = {} for tok in q: dictq[tok['lemma']] = tok['POS'] sentencerank = [] for i in range(len(article)): s = article[i] sentencerank.append((api.toString(article[i]), cosDist(dictq, s, V, N))) sentencerank = sorted(sentencerank, key = lambda t: t[1], reverse=True) return sentencerank
def simScore2(questions, article): qs = open('semilar/q','w') sents = open('semilar/s', 'w') for q in questions: qs.write(api.toString(q)+"\n") for s in article: sents.write(api.toString(s)+"\n") qs.close() sents.close() scores = [] os.chdir('semilar') rc = subprocess.call('run q s > /dev/null 2>/dev/null', shell=True) os.chdir('..') results = open('semilar/out') for l in results: toks = l.split('\t') qindex = int(toks[0]) sindex = int(toks[1]) val = float(toks[2]) scores.append(val) results.close() return scores
def mostRelevant(q, article, V, N, sents): dictq = {} q = q[1:] for tok in q: dictq[tok['lemma']] = tok['POS'] sentencerank = [] simscores = simScore2([q], article) for i in range(len(article)): s = article[i] (score, matched) = cosDist(dictq, s, V, N) #(simscore, matched2) = simScore(api.toString(q), dictq, s, matched = matched) simscore = 0 if len(article)==len(simscores): simscore = simscores[i] #simscore = 1.0/(1+math.exp(-(simscore-0.5))) #pdbg(str(score)+","+str(simscore)+"|"+ api.toString(s),t1=simscore+score, t2=2) score = score + 6*simscore sentencerank.append((api.toString(article[i]), score, simscore, matched)) sentencerank = sorted(sentencerank, key = lambda t: t[1], reverse=True) return sentencerank
def getGraph(s): G = {} orig = api.toString(s) dep = api.corenlp.raw_parse(orig)['sentences'][0]['indexeddependencies'] for d in dep: #rel(x,y) gives the relation of y to x #interpret it as y is a rel(x,y) of x rel = d[0] #relationship between words x = d[1] # the word that y is in relation to y = d[2] # the word that we want to find the relation of if not y in G: G[y] = [] if x in G: if (y, rel) in G[x]: continue else: G[x].append((y,rel)) else: G[x] = [(y,rel)] return G