Exemplo n.º 1
0
def getQ(t,s):
    subj = None
    vb = None
    obj = None
    G = getGraph(s)
    pnodes = gSearchW(G, 'ROOT-0', 'aux') # look for primary node with an aux edge
    if t == 0:
        if len(pnodes)==0:
            pnodes = gSearchW(G, 'ROOT-0', 'cop')
        if len(pnodes)==0:
            print 'Could not decipher question: ' + api.toString(s)
            return None
        pnode = pnodes[0]
        if 'VB' in s[idx(pnode)]['POS']:
            vb = word(pnode)
        elif 'NN' in s[idx(pnode)]['POS']:
            obj = getPhrase(G,pnode, OBJPE)
        for nb in G[pnode]:
            w = nb[0]
            rel = nb[1]
            if rel == 'nsubj':
                subj = getPhrase(G,w,GPE)
            elif not vb and rel == 'cop':
                vb = word(w)
            elif not obj and rel in OBJ:
                obj = getPhrase(G,w, GPE)
    else:
        print 'not yet implemented'
    return (subj, vb, obj)
Exemplo n.º 2
0
def main(fileloc, out):
    article = api.parsefile(fileloc)
    fout = open(out, 'w')
    for s in article:
        if not containsPronoun(s):
            fout.write(api.toString(s)+"\n")
    fout.close()
    return
Exemplo n.º 3
0
def mostRelevant(q, article, V, N):
    dictq = {}
    for tok in q:
        dictq[tok['lemma']] = tok['POS']
    sentencerank = []
    for i in range(len(article)):
        s = article[i]
        sentencerank.append((api.toString(article[i]), cosDist(dictq, s, V, N)))
    sentencerank = sorted(sentencerank, key = lambda t: t[1], reverse=True)
    return sentencerank
Exemplo n.º 4
0
def simScore2(questions, article):
    qs = open('semilar/q','w')
    sents = open('semilar/s', 'w')
    for q in questions:
        qs.write(api.toString(q)+"\n")
    for s in article:
        sents.write(api.toString(s)+"\n")
    qs.close()
    sents.close()
    scores = []
    os.chdir('semilar')
    rc = subprocess.call('run q s > /dev/null 2>/dev/null', shell=True)
    os.chdir('..')
    results = open('semilar/out')
    for l in results:
        toks = l.split('\t')
        qindex = int(toks[0])
        sindex = int(toks[1])
        val = float(toks[2])
        scores.append(val)
    results.close()
    return scores
Exemplo n.º 5
0
def mostRelevant(q, article, V, N, sents):
    dictq = {}
    q = q[1:]
    for tok in q:
        dictq[tok['lemma']] = tok['POS']
    sentencerank = []
    simscores = simScore2([q], article)
    for i in range(len(article)):
        s = article[i]
        (score, matched) = cosDist(dictq, s, V, N)
        #(simscore, matched2) = simScore(api.toString(q), dictq, s, matched = matched)
        simscore = 0
        if len(article)==len(simscores):
            simscore = simscores[i]
        #simscore = 1.0/(1+math.exp(-(simscore-0.5)))
        #pdbg(str(score)+","+str(simscore)+"|"+ api.toString(s),t1=simscore+score, t2=2)
        score = score + 6*simscore
        sentencerank.append((api.toString(article[i]), score, simscore, matched))
    sentencerank = sorted(sentencerank, key = lambda t: t[1], reverse=True)
    return sentencerank
Exemplo n.º 6
0
def getGraph(s):
    G = {}
    orig = api.toString(s)
    dep = api.corenlp.raw_parse(orig)['sentences'][0]['indexeddependencies']
    for d in dep:
        #rel(x,y) gives the relation of y to x
        #interpret it as y is a rel(x,y) of x
        rel = d[0] #relationship between words
        x = d[1] # the word that y is in relation to
        y = d[2] # the word that we want to find the relation of

        if not y in G:
           G[y] = []
        if x in G:
            if (y, rel) in G[x]:
                continue
            else:
                G[x].append((y,rel))
        else:
            G[x] = [(y,rel)]
    return G