Python StanfordDependencyParser.raw_parse示例，nltk.parse.stanford.StanfordDependencyParser.raw_parse Python示例

示例#1

0

显示文件

文件： MWPS_rulebased.py 项目： shameem198/Hybrid-math-word-problem-solver

class linguistic_operations():
    def __init__(self):
        self.depRelations = []
        self.lastrelation = []
        self.question = ""
        self.ind = {}
        self.visited = []
        self.path_to_jar = "/home/user/stanford-parser/stanford-parser-3.4.1.jar"
        self.path_to_models_jar = "/home/user/stanford-parser/stanford-parser-3.4.1-models.jar"
        self.dependency_parser = StanfordDependencyParser(
            path_to_jar=self.path_to_jar,
            path_to_models_jar=self.path_to_models_jar)
        return

    def dependencyParse(self, sentence):
        self.question = sentence
        result = self.dependency_parser.raw_parse(sentence)
        dep = result.__next__()
        self.depRelations = list(dep.triples())
        print('Stanford Parsing: \n', self.depRelations)
        return self.depRelations

    def numNoun(self):
        allrelations = {}
        for relation in self.depRelations:
            if relation[1] == 'nummod':
                seed = ps.stem(relation[0][0])
                if seed not in allrelations:
                    allrelations[seed] = []
                allrelations[seed].append(w2n.word_to_num(str(relation[2][0])))
        self.allrelations = allrelations
        return allrelations

    def LastRelation(self):

        last = nltk.sent_tokenize(self.question)[-1]
        allwords = self.makeseedvocab(last)
        result = self.dependency_parser.raw_parse(last)
        dep = result.__next__()
        self.lastrelation = list(dep.triples())
        return

    def makeseedvocab(self, last):
        allwords = set()
        sentences = nltk.word_tokenize(last)
        for word in sentences:
            allwords.add(ps.stem(word))
        return allwords

    def whoseQuantity(self):
        for relation in self.lastrelation:
            if (relation[0][1] == "NNS" or relation[0][1] == "NN") and ps.stem(
                    relation[0][0]) in self.allrelations:
                print('Whose_quantity:', ps.stem(relation[0][0]))
                return ps.stem(relation[0][0])
            elif (relation[2][1] == "NNS" or relation[2][1]
                  == "NN") and ps.stem(relation[2][0]) in self.allrelations:
                print('Whose_quantity:', ps.stem(relation[2][0]))
                return ps.stem(relation[2][0])

示例#2

0

显示文件

def sent_to_dep(sent):
    """return a dictionary containing governor words and their dependency"""
    # set up StanfordNLP parser
    dependency_parser = StanfordDependencyParser(
        path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
    # parse a sentence and get the dependency
    result = dependency_parser.raw_parse(sent)
    dep = result.__next__()
    output = set(dep.triples())
    dic = {}
    # adjust the pattern
    for element in output:
        governor = element[0][0]
        dep = element[1]
        dependent = element[2][0]
        pos_tag = element[0][1]
        if governor not in set(dic.keys()):
            dic[governor] = {'pos_tag': pos_tag, dep: dependent}
        else:
            dic[governor][dep] = dependent
    # generate pos_tag for words without pos_tag
    tokens = nltk.word_tokenize(sent)
    pos_tag = nltk.pos_tag(tokens)
    for t in pos_tag:
        word = t[0]
        tag = t[1]
        if word not in dic.keys():
            dic[word] = {'pos_tag': tag}
    return dic

示例#3

0

显示文件

文件： textFeaturesExtraction.py 项目： nishkalavallabhi/textFeaturesExtraction

def performDependencyParsing(filename, output_dir):
    path_to_jar = '/Users/sagnik/Documents/stanford-corenlp-full-2017-06-09/stanford-corenlp-3.8.0.jar'
    path_to_models_jar = '/Users/sagnik/Documents/stanford-corenlp-full-2017-06-09/stanford-corenlp-3.8.0-models.jar'
    path_to_visual_jar = "/Users/sagnik/Documents/stanford-corenlp-full-2017-06-09/dependensee-3.7.0.jar"
    path_to_another_jar = "/Users/sagnik/Documents/stanford-corenlp-full-2017-06-09/slf4j-api.jar"
    dependency_parser = StanfordDependencyParser(
        path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)

    text = open(filename, "r").read()
    sent_tokenize_list = sent_tokenize(text)
    fhw = open(output_dir + "/dependency_parsed.txt", "w")
    for sentence in sent_tokenize_list:
        fhw.write(sentence)
        fhw.write("\n")
        #print(nltk.word_tokenize( sentence ))
        regex = re.compile(".*?\((.*?)\)")
        if "[" in sentence:
            result = re.findall(regex, sentence)
            sentence = re.sub("[\(\[].*?[\)\]]", "", sentence)
            #print("Removed []",sentence)
        result = dependency_parser.raw_parse(sentence)
        dep = result.__next__()
        result = list(dep.triples())
        for row in result:
            fhw.write(str(row))
            fhw.write("\n")
        #print("="*200)
        fhw.write("=====")
        fhw.write("\n")
    """result = dependency_parser.raw_parse(text)
    dep = result.__next__()
    result=list(dep.triples())
    for row in result:
        print(row)"""
    """cmd="java -cp "+path_to_visual_jar+":"+path_to_jar+":"+path_to_models_jar+":"+path_to_another_jar+" com.chaoticity.dependensee.Main "

示例#4

0

显示文件

文件： parse.py 项目： dgt12/clause-boundary-detection

def format(sentence):
    filename = 'stanford-parser.jar'
    command = ['locate', filename]
    output = subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0]
    path_to_jar = output.decode().strip()

    filename = 'models.jar'
    command = ['locate', filename]
    output = subprocess.Popen(
        command, stdout=subprocess.PIPE).communicate()[0].decode().strip()
    output = output.split('\n')
    for op in output:
        if 'parse' in op:
            path_to_models_jar = op

    dependency_parser = StanfordDependencyParser(
        path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
    tokens = word_tokenize(sentence)
    result = dependency_parser.raw_parse(sentence)

    for dep in result:
        # print(dep.tree())
        cf = CanvasFrame()
        t = dep.tree()
        tc = TreeWidget(cf.canvas(), t)
        cf.add_widget(tc, 10, 10)  # (10,10) offsets
        cf.print_to_file('tree.ps')
        cf.destroy()
        return (dep, tokens)

示例#5

0

显示文件

def genrate_triplet(i,sents,dependency_parser,filenames):
    from nltk.parse.stanford import StanfordDependencyParser
    path_to_jar = '/home/cs17mtech11004/stanford-parser-full-2018-02-27/stanford-parser.jar'
    path_to_models_jar = '/home/cs17mtech11004/stanford-parser-full-2018-02-27/stanford-parser-3.9.1-models.jar'
    dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
    triplets=[]
    count=0
    # print(len(sents))
    # for sent in sents:
    #     print(len(sent),count)
    #     try:
    #         result = dependency_parser.raw_parse(sent)
    #         dep = result.__next__()
    #         triplets.append(list(dep.triples()))
    #         # print(triplets)
    #     except:
    #         print("HERE",len(sent),count)
    #         pass
    #     if count%500==499:
    #         save_to_file('dp_data_pos/dp_'+str(i)+"_"+str(int(count/500)),triplets,filenames.output_folder)
    #         triplets=[]
    #     count += 1


    try:
        result = dependency_parser.raw_parse('. '.join(sents))
        dep = result.__next__()
        triplets.append(list(dep.triples()))
        # print(triplets)
    except:
        print("HERE",len(sents),count)
        pass
    print(triplets)

    save_to_file('dp_data_pos/dp_'+str(i)+"_last",triplets,filenames.output_folder)

示例#6

0

显示文件

文件： breakit.py 项目： mmvih/BioinformaticsNLP

def main():

    papersent = []
    with open(sys.argv[1], 'r') as input:
        for item in input:
            papersent.append(item)
    input.close()

    print "okay"

    path_to_jar = '/util/academic/snlp/parser_v3.8.0/stanford-parser.jar'
    path_to_models_jar = '/util/academic/snlp/parser_v3.8.0/stanford-parser-3.8.0-models.jar'
    dependency_parser = StanfordDependencyParser(
        path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)

    with open(sys.argv[2], 'a') as output:
        for item in papersent:
            output.write("%s\n" % item)
            try:
                result = dependency_parser.raw_parse(item)
                for e in result:
                    result = e
                    break
                output.write(result.to_dot())
            except UnicodeDecodeError:
                output.write("UnicodeDecodeError\n\n")
                continue
            except OSError:
                output.write("OSError\n\n")
                continue
            output.write("\n")

    print sys.argv[1]

示例#7

0

显示文件

文件： parse_show_tree.py 项目： MU-Data-Science/QIK

def dependencyParser(inputSentence):
    depParser = StanfordDependencyParser(
        model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
    depSentence = [
        parse.tree() for parse in depParser.raw_parse(inputSentence)
    ]
    printSentence(depSentence)

示例#8

0

显示文件

文件： NLPEngine.py 项目： hypernovas/lipfuzzer

 def depParse(self, inStr):
     dependency_parser = StanfordDependencyParser(
         path_to_jar=self.path_to_jar,
         path_to_models_jar=self.path_to_models_jar)
     result = dependency_parser.raw_parse(inStr)
     dep = next(result)
     return list(dep.triples())

示例#9

0

显示文件

def dependency_parse(sentence):
    dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
    dependencies = dependency_parser.raw_parse(sentence).__next__()
    rel = list()
    for dependency in list(dependencies.triples()):
        rel.append([dependency[0][0].lower(), dependency[2][0].lower()])
    return rel

示例#10

0

显示文件

文件： romanlp.py 项目： cgoliver/nlplotlib

def get_word_dependencies(text):
    dependencies = {}
    dep_parser = StanfordDependencyParser(
        model_path=osp.join(
            datadir,
            "stanford_data/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
        ),
        java_options="-mx4g -XX:-UseGCOverheadLimit")
    st = StanfordPOSTagger(osp.join(datadir, "stanford_pos/stanford-postagger-3.9.1.jar"),\
  osp.join(datadir, 'stanford_pos/models/english-bidirectional-distsim.tagger'), java_options='-mx4g, XX:-UseGCOverheadLimit')
    stanford_dir = st._stanford_jar.rpartition('/')[0]
    stanford_jars = find_jars_within_path(stanford_dir)
    st.stanford_jar = ':'.join(stanford_jars)
    result = dep_parser.raw_parse(text)
    dep = result.__next__()
    #print(list(dep.triples()))
    for i in list(dep.triples()):
        w1 = i[0][0]
        w2 = i[2][0]
        if w1 in dependencies:
            dependencies[w1].append((w2, i[1]))
        else:
            dependencies[w1] = [(w2, i[1])]
    #print(dependencies)
    return dependencies

示例#11

0

显示文件

文件： classes.py 项目： wolf1996/cp

def main():
    """
        main function
    """
    fl = open('input')
    #dumpfile = open('dumpfile','wb')
    path_to_jar = '../exp/stanford-corenlp-full-2015-12-09/stanford-corenlp-3.6.0.jar'
    path_to_models_jar = '../exp/stanford-corenlp-full-2015-12-09/stanford-\
english-corenlp-2016-01-10-models.jar'
    dep_parser = StanfordDependencyParser(
        path_to_jar=path_to_jar,
        path_to_models_jar=path_to_models_jar)
    pars_res = [[parse for parse in dep_parser.raw_parse(
        Myobject.string_analys(i))] for i in fl]  # doctest: +NORMALIZE_WHITESPACE
    # pickle.dump(pars_res,dumpfile)
    fl.seek(0)
    #val = Validator()
    #trip_pars([smp.tree() for smp in i])
    for i, j in zip(pars_res, fl):
        print([list(smp.triples()) for smp in i])
        print("-----------------------------------------------")
        print([smp.tree() for smp in i])
        #trip_pars([smp.tree() for smp in i], i)
        print("-----------------------------------------------")
        objlist = get_obj([list(smp.triples()) for smp in i][0])
        print(objlist_analise(objlist))
        print("-----------------------------------------------")
        print(j)
        print("###############################################")

示例#12

0

显示文件

def dStructure():
    print 'Depencency Structure'
    dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)

    result = dependency_parser.raw_parse('Who were the CEO of IBM')
    dep = result.next()
    print list(dep.triples())

示例#13

0

显示文件

文件： OntologyReasoner.py 项目： NanaLange/CL-HAABSA

    def is_negated(self, word, words_in_sentence):
        #negation check with window and dependency graph
        path_to_jar = 'data/externalData/stanford-parser-full-2018-02-27/stanford-parser.jar'
        path_to_models_jar = 'data/externalData/stanford-parser-full-2018-02-27/stanford-parser-3.9.1-models.jar'

        dependency_parser = StanfordDependencyParser(
            path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)

        index = words_in_sentence.index(word)
        negated = False

        if index < 3:
            for i in range(index):
                temp = words_in_sentence[i]
                if "not" in temp or "n't" in temp or "never" in temp:
                    negated = True
        else:
            for i in range(index - 3, index):
                temp = words_in_sentence[i]
                if "not" in temp or "n't" in temp or "never" in temp:
                    negated = True
        negations = ["not", "n,t", "never"]
        if negated == False and any(x in s for x in negations
                                    for s in words_in_sentence):
            print('negation parser')
            print(' '.join(words_in_sentence))
            result = dependency_parser.raw_parse(' '.join(words_in_sentence))
            dep = result.__next__()
            result = list(dep.triples())
            for triple in result:
                if triple[0][0] == word and triple[1] == 'neg':
                    negated = True
                    break
        return negated

示例#14

0

显示文件

文件： pre_deal.py 项目： zclore/helloworld

 def __dep2Tree(self, sentence):
     path_to_jar = 'D:/myPlugin/stanford-parser-full-2018-10-17/stanford-parser.jar'
     path_to_models_jar = 'D:/myPlugin/stanford-parser-full-2018-10-17/stanford-parser-3.9.2-models.jar'
     dependency_parser = StanfordDependencyParser(
         path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
     result = dependency_parser.raw_parse(sentence)
     t = result.__next__().tree()
     return t

示例#15

0

显示文件

文件： main.py 项目： stoic65/PAWS

 def stanParse(self, sent):
     os.environ['STANFORD_PARSER'] = self.cwd + '/stanford-parser'
     os.environ[
         'CLASSPATH'] = self.cwd + '/stanford-parser/stanford-parser-3.7.0-models.jar'
     dep_parser = StanfordDependencyParser(
         model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
     return [list(parse.triples())
             for parse in dep_parser.raw_parse(sent)][0]

示例#16

0

显示文件

文件： DependecyRecipeParser.py 项目： can-keklik/RecipePostagger

def parseSentenceWithDependencyParser(sentence):
    dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
    result = dependency_parser.raw_parse(sentence=" I would " + sentence)
    dep = result.next()
    arr = list(dep.triples())
    arr = [((w1, t1), dep, (w2, t2)) for ((w1, t1), dep, (w2, t2)) in arr
           if w1 != "I" and w2 != "would" and w1 != "would" and w2 != "I"]
    return arr

示例#17

0

显示文件

文件： key_concept_extraction.py 项目： jabhinav/Educational-Content-Enrichment

def parsing(sent):

    parser = StanfordDependencyParser(path_to_models_jar=my_path_to_models_jar,
                                      path_to_jar=my_path_to_parser_jar)
    result = parser.raw_parse(sent)
    dep = next(result)
    parsed = list(dep.triples())
    return parsed

示例#18

0

显示文件

文件： StanfordParserdemo.py 项目： eachsaj/Python-Natural-Language-Processing

def NLTKparserfordependancies(sentnece):

    path_to_jar = '/home/jalaj/stanford-corenlp-full-2016-10-31/stanford-corenlp-3.7.0.jar'
    path_to_models_jar = '/home/jalaj/stanford-corenlp-full-2016-10-31/stanford-corenlp-3.7.0-models.jar'
    dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
    result = dependency_parser.raw_parse(sentnece)
    dep = result.next()
    print "\n------Dependencies------\n"
    print list(dep.triples())

示例#19

0

显示文件

文件： Instance.py 项目： mariiakashpur/emocean

 def generate_deps(self):
     path_to_jar = '/Users/bobrusha/Downloads/stanford-corenlp-full-2017-06-09/stanford-corenlp-3.8.0.jar'
     path_to_models_jar = '/Users/bobrusha/Downloads/stanford-corenlp-full-2017-06-09/stanford-corenlp-3.8.0-models.jar'
     dependency_parser = StanfordDependencyParser(
         path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
     parse = dependency_parser.raw_parse(self.get_text())
     dep = parse.next()
     # dependencies in instance e.g. [((u'recieved', u'VBD'), u'nsubj', (u'Hailey', u'NNP')),...]
     self.deps = list(dep.triples())

示例#20

0

显示文件

def parseTree(sent):
    path_to_jar = '/home/knight/Downloads/stanford-corenlp-full-2017-06-09/stanford-corenlp-3.8.0.jar'
    path_to_models_jar = '/home/knight/Downloads/stanford-corenlp-full-2017-06-09/stanford-corenlp-3.8.0-models.jar'
    dependency_parser = StanfordDependencyParser(
        path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)

    result = dependency_parser.raw_parse(sent)
    depTree = result.next()
    return list(depTree.triples())

示例#21

0

显示文件

文件： featureExtraction.py 项目： romi2410/Wikipedia_Based_Information_Extraction

def syntacticParse(s):
    print("\n\nParsing:")
    stanford_parser_dir = 'libraries/'
    my_path_to_models_jar = stanford_parser_dir + "stanford-corenlp/stanford-corenlp-3.9.2-models.jar"
    my_path_to_jar = stanford_parser_dir + "stanford-parser/stanford-parser.jar"

    dependency_parser = StanfordDependencyParser(
        path_to_jar=my_path_to_jar, path_to_models_jar=my_path_to_models_jar)
    result = dependency_parser.raw_parse(s)
    print(list((result.__next__()).triples()))

示例#22

0

显示文件

def get_parse_tree():
    path_to_jar = 'path_to/stanford-parser-full-2014-08-27/stanford-parser.jar'
    path_to_models_jar = 'path_to/stanford-parser-full-2014-08-27/stanford-parser-3.4.1-models.jar'

    dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)

    result = dependency_parser.raw_parse('I shot an elephant in my sleep')
    dep = result.next()

    list(dep.triples())

示例#23

0

显示文件

def NLTKparserfordependancies(sentnece):

    path_to_jar = '/home/jalaj/stanford-corenlp-full-2016-10-31/stanford-corenlp-3.7.0.jar'
    path_to_models_jar = '/home/jalaj/stanford-corenlp-full-2016-10-31/stanford-corenlp-3.7.0-models.jar'
    dependency_parser = StanfordDependencyParser(
        path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
    result = dependency_parser.raw_parse(sentnece)
    dep = next(result)
    print("\n------Dependencies------\n")
    print(list(dep.triples()))

示例#24

0

显示文件

文件： utilities.py 项目： udayakp/nbot

def parse_sentence(
        user_input):  #returns root word, triples of StanfordDependencyParser
    path_to_jar = path + 'stanford-corenlp-3.8.0.jar'
    path_to_models_jar = path + 'stanford-corenlp-3.8.0-models.jar'
    dependency_parser = StanfordDependencyParser(
        path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
    os.environ['JAVA_HOME'] = 'C:\\ProgramData\\Oracle\\Java\\javapath'
    result = dependency_parser.raw_parse(user_input)
    dep = next(result)  # get next item from the iterator result
    return dep.triples(), dep.root["word"]

示例#25

0

显示文件

def construct(hello):
    num = 0
    sdp = StanfordDependencyParser()
    result = list(sdp.raw_parse(hello))
    dep_tree_dot_repr = [parse for parse in result][0].to_dot()
    num = num + 1
    source = Source(dep_tree_dot_repr,
                    filename="dep_tree" + str(main.index(hello)),
                    format="png")
    source.view()

示例#26

0

显示文件

文件： utilities.py 项目： Nandini9634/MapBot

def parse_sentence(user_input):   # returns root word, triples of StanfordDependencyParser   # noqa: E501
    import os
    from nltk.parse.stanford import StanfordDependencyParser
    import config
    path_to_jar = config.stanford_path_to_jar
    path_to_models_jar = config.stanford_path_to_models_jar
    dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)   # noqa: E501
    os.environ['JAVAHOME'] = config.javahome
    result = dependency_parser.raw_parse(user_input)
    dep = next(result)              # get next item from the iterator result
    return dep.triples(), dep.root["word"]

示例#27

0

显示文件

def impp(input_question):
	try:
		import numpy as np
		import os 
		os.getcwd()
		import pandas as pd
		import spacy
		from . import formula
		nlp = spacy.load('en_core_web_sm')
		from difflib import SequenceMatcher
		import re
		import nltk
		import pprint
		pp = pprint.PrettyPrinter(indent=4)
		from nltk import word_tokenize
		from nltk.corpus import stopwords
		path_to_jar = '/usr/local/lib/python2.7/dist-packages/nltk/tag/stanford-parser-3.8.0.jar'
		path_to_models_jar = '/usr/local/lib/python2.7/dist-packages/nltk/tag/stanford-parser-3.8.0-models.jar'

		jar = '/usr/local/lib/python2.7/dist-packages/nltk/tag/stanford-postagger-3.8.0.jar'
		model = '/usr/local/lib/python2.7/dist-packages/nltk/tag/models/english-left3words-distsim.tagger'
		from nltk.parse.corenlp import CoreNLPParser
		from nltk.tag import StanfordNERTagger
		from nltk.parse.stanford import StanfordParser
		from nltk.parse.stanford import StanfordDependencyParser
		from nltk.stem import PorterStemmer
		from nltk.tokenize import sent_tokenize
		from nltk.tag import StanfordPOSTagger
		pos_tagger = StanfordPOSTagger(model, jar, encoding='utf8')
	
		dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
		#print ("1")
		#print (os.path.exists('/home/piut/django-apps/wps/wps/patterns.csv'))
		#print ("2")	
		pattern=read('patterns.csv')
		#print ("1")	
		#print pattern
		question=input_question
		tagged_question=pos_tagger.tag(nltk.word_tokenize(question))
		doc = nlp(question)
		#print "###################################################################"
		#print doc
		#print ("2")
		result = dependency_parser.raw_parse(question)
		#pp.pprint(tagged_question)
		#print ("3")
		#return str(moreMoney(dependency,doc,pattern,unknown))
		unknown=find(tagged_question,question,doc,input_question)
		if unknown==0:
			return 0
		return unknown
  # 		fe
	except:
		return 0

示例#28

0

显示文件

文件： classes.py 项目： wolf1996/cp

def entpoint(querystring):
    path_to_jar = '../exp/stanford-corenlp-full-2015-12-09/stanford-corenlp-3.6.0.jar'
    path_to_models_jar = '../exp/stanford-corenlp-full-2015-12-09/stanford-\
english-corenlp-2016-01-10-models.jar'
    dep_parser = StanfordDependencyParser(
        path_to_jar=path_to_jar,
        path_to_models_jar=path_to_models_jar)
    pars_res = [parse for parse in dep_parser.raw_parse(
        Myobject.string_analys(querystring))]
    objlist = get_obj([list(smp.triples()) for smp in pars_res][0])
    return objlist_analise(objlist)

示例#29

0

显示文件

def dependencyParser(inputSentence):
    depParser = StanfordDependencyParser(
        model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
    depSentence = [
        parse.tree() for parse in depParser.raw_parse(inputSentence)
    ]
    sent = printSentence(depSentence)
    ret = str(sent).replace("\n", "").replace("  ", "").replace(
        " (", "{").replace("(", "{").replace(")", "}").replace(
            " ", "{").replace("}{", "}}{") + "}"
    return ret

示例#30

0

显示文件

文件： dep_graph_parser.py 项目： wwwpig2004/Onto_Topic_Labeling

    def proceed(self, textDataFile):
        
        javaHomePath = self.configs.get('Java','JAVA_HOME')
        sdpPath = self.configs.get('StanfordNLP','SDP_HOME_PATH')
        
        #verify the java's home
        os.environ['JAVAHOME'] = javaHomePath
                  
        #verify the stanford dependency parser          
        os.environ['STANFORD_PARSER'] = sdpPath
        os.environ['STANFORD_MODELS'] = sdpPath
                  
        dep_parser=StanfordDependencyParser(
                model_path=self.configs.get('StanfordNLP','SDP_MODEL_PATH'))
        
        depGraph = nx.DiGraph()
        
        #textDataFile = unicode(textDataFile, errors='ignore')
        
        sentences = sent_tokenize(textDataFile)
        
        print('Sentence spliting total -> [{}] sentences !'.format(len(sentences)))
        
        for index, sentence in enumerate(sentences):
            
            result = dep_parser.raw_parse(sentence)
            
            for dep in result:
                for index, triple in enumerate(list(dep.triples())):
#                    print('{} -> {}'.format(index, triple))
                    startVertex = '{}_[{}]'.format(triple[0][0], triple[0][1])
                    endVertex = '{}_[{}]'.format(triple[2][0], triple[2][1])
                    depGraph.add_edge(startVertex, endVertex, semantic_label=triple[1])
                    
        #visualizing the graph
#        drawGraph = depGraph
#        plt.figure(figsize=(10,10))
#        
#        graph_pos = nx.spring_layout(drawGraph)
#        nx.draw_networkx_nodes(drawGraph,  
#                               graph_pos, node_size=2000, 
#                               node_color='blue', alpha=0.9, label=None)
#        
#        
#        nx.draw_networkx_edges(drawGraph, graph_pos, arrows=True)
#        
#        edge_labels = nx.get_edge_attributes(drawGraph,'semantic_label')
#        nx.draw_networkx_edge_labels(drawGraph, graph_pos, font_size=15,
#                                     edge_labels = edge_labels)
#        nx.draw_networkx_labels(drawGraph, graph_pos, font_size=9, 
#                                font_color='white', font_family='sans-serif')
        
        return depGraph

示例#31

0

显示文件

文件： utilities.py 项目： laetit/MapBot

def parse_sentence(
        user_input):  #returns root word, triples of StanfordDependencyParser
    from nltk.parse.stanford import StanfordDependencyParser

    path_to_jar = config.CORENLP_JAR_PATH
    path_to_models_jar = config.CORENLP_MODELS_PATH
    dependency_parser = StanfordDependencyParser(
        path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
    # os.environ['JAVA_HOME'] = 'C:\\ProgramData\\Oracle\\Java\\javapath'
    result = dependency_parser.raw_parse(user_input)
    dep = next(result)  # get next item from the iterator result
    return dep.triples(), dep.root["word"]

示例#32

0

显示文件

文件： build_kg.py 项目： ruchir594/pokego

def lambda_function(event, context):
    #STANFORD

    from nltk.parse.stanford import StanfordDependencyParser
    path_to_jar = '../lib/stanford-parser/stanford-parser.jar'
    path_to_models_jar = '../lib/stanford-parser/stanford-parser-3.6.0-models.jar'
    dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)

    result = dependency_parser.raw_parse(event)
    dep = result.next()
    a = list(dep.triples())
    #print a
    #print len(a)
    a = get_b_q(a)
    make_graph(a[0], a[1])

示例#33

0

显示文件

文件： create_link_file.py 项目： lukuang/2016-rts

def get_links(queries):
    os.environ['CLASSPATH']="/infolab/node4/lukuang/Stanford/stanford-parser-full-2016-10-31/stanford-parser.jar:"
    os.environ['CLASSPATH'] += "/infolab/node4/lukuang/Stanford/stanford-parser-full-2016-10-31/stanford-parser-3.7.0-models.jar"
    parser=StanfordDependencyParser(model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
    
    links = {}

    for day in queries:
        links[day] = {}
        print "Process day %s" %(day)
        for qid in queries[day]:
            print "\tProcess query %s" %(qid)
            query_text = queries[day][qid]
            # print query_text
            triples = [list(parse.triples()) for parse in parser.raw_parse(query_text)][0]
            # print triples
            query_links = []
            for t in triples:
                a_link = "%s %s" %(procss_unit(t[0][0]),procss_unit(t[2][0]))
                query_links.append(a_link)
                # print "add link %s to query %s" %(a_link,qid)
            links[day][qid] = query_links
    return links

示例#34

0

显示文件

文件： Sentence.py 项目： cjaneyes/KnowledgeLearning

	def get_dependency_tree(self):

		sentence = if_then_parsing(self.text)
		self.logic_text = sentence
		#path_to_jar = '/Users/jane_C/Documents/CMU/Courses/10701-MachineLearning/project/KnowledgeLearning/lib/stanford-parser/stanford-parser.jar'
		#path_to_models_jar = '/Users/jane_C/Documents/CMU/Courses/10701-MachineLearning/project/KnowledgeLearning/lib/stanford-parser/stanford-parser-3.5.2-models.jar'

		path_to_jar = '../lib/stanford-parser/stanford-parser.jar'
		path_to_models_jar = '../lib/stanford-parser/stanford-parser-3.5.2-models.jar'
		
		dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)

		sentence_parse = dependency_parser.raw_parse(sentence)

		tokenList = []
		tokenInfo = {}
		tokenInfo["content"] = "ROOT"
		tokenInfo["pos"] = "ROOT"
		tokenInfo["head"] = -1
		tokenInfo["children"] = []
		tokenInfo["if_then"] = -1
		root = Token(0, tokenInfo)
		tokenList.append(root)

		left2right = True
		left2right_point = -1
		index = 0
		for sent in sentence_parse:
			sent_conll = sent.to_conll(10)
			tokens = sent_conll.split("\n")
			index = 0
			for term in tokens:
				index += 1
				tokenInfo = {}
				parse = term.strip().split("\t")
				if term == "" or len(parse) < 10:
					continue
				if parse[1] == ">" or parse[1] == "<":
					if parse[1] == "<":
						left2right = False
					left2right_point = index
					#continue
				tokenInfo["content"] = parse[1]
				tokenInfo["pos"] = parse[4]
				tokenInfo["head"] = int(parse[6])
				tokenInfo["children"] = []
				tokenInfo["if_then"] = 0
				t = Token(index, tokenInfo)
				tokenList.append(t)

		if left2right:
			for i in range(left2right_point, len(tokenList)):
				tokenList[i].if_then = 1
		else:
			for i in range(1, left2right_point):
				tokenList[i].if_then = 1
		tokenList[left2right_point].if_then = -1
		for i in range(1, len(tokenList)):
			token = tokenList[i]
			tokenList[token.head].children.append(i)

		self.tokens = tokenList

示例#35

0

显示文件

文件： extraction.py 项目： billy-inn/CMPUT690

					words[i] = tmp
				else:
					Distinct[words[i]] = tmp
			#print Distinct
			sentence = ""
			for word in words:
				if word in string.punctuation:
					continue
				sentence += word + " "
			sentence = sentence.strip()

			entityList = re.findall(regex, sentence)
			N = len(entityList)
			if N > 1:
				#print sentence
				edges = [list(parse.triples()) for parse in dep_parser.raw_parse(sentence)]
				#print edges
				G = {}
				relation = {}
				case = {}
				POS = {}
				Pa = {}
				for edge in edges[0]:
					POS[edge[0][0]] = edge[0][1]
					POS[edge[2][0]] = edge[2][1]
					if edge[1] == 'det':
						continue
					if edge[1] == 'case':
						case[edge[0][0]] = edge[2][0]
						continue
					relation[(edge[0][0], edge[2][0])] = edge[1];

示例#36

0

显示文件

文件： sample.py 项目： DerrickZhu1/11611teamproject-YenYuan-

'''
Created on Mar 11, 2016

@author: zhongzhu
'''
import os

from nltk.parse.stanford import StanfordDependencyParser
from nltk.parse.stanford import StanfordParser
from nltk.tag import StanfordNERTagger
from nltk.tag.stanford import StanfordPOSTagger


st = StanfordPOSTagger('english-bidirectional-distsim.tagger')
st.tag('What is the airspeed of an unladen swallow ?'.split())

st = StanfordNERTagger('english.all.3class.distsim.crf.ser.gz') 
st.tag('Rami Eid is studying at Stony Brook University in NY'.split())

parser = StanfordParser(model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
list(parser.raw_parse("the quick brown fox jumps over the lazy dog"))

dep_parser = StanfordDependencyParser(model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
print [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")]

示例#37

0

显示文件

文件： parse.py 项目： ruchir594/pokego

from nltk.parse import malt
mp = malt.MaltParser('../lib/maltparser-1.9.0', '../lib/engmalt.linear-1.7.mco')
print mp.parse_one('I shot an elephant in my pajamas .'.split()).tree()

millis2 = int(round(time.time() * 1000))
print millis2-millis1'''
millis2 = int(round(time.time() * 1000))
#STANFORD

from nltk.parse.stanford import StanfordDependencyParser
path_to_jar = '../lib/stanford-parser/stanford-parser.jar'
path_to_models_jar = '../lib/stanford-parser/stanford-parser-3.6.0-models.jar'
dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)

result = dependency_parser.raw_parse('I shot an elephant in my sleep')
dep = result.next()
a = list(dep.triples())

print a
print a[0]
print a[0][0]
print a[0][0][0]

millis3 = int(round(time.time() * 1000))
print millis3-millis2



millis4 = int(round(time.time() * 1000))
print millis4-millis3

示例#38

0

显示文件

文件： dependency_parsing.py 项目： 000Nelson000/text-analytics-with-python

                                            for t 
                                            in token.lefts],
                                  right=[t.orth_ 
                                             for t 
                                             in token.rights])
                                             

# set java path
import os
java_path = r'C:\Program Files\Java\jdk1.8.0_102\bin\java.exe'
os.environ['JAVAHOME'] = java_path
                                             
from nltk.parse.stanford import StanfordDependencyParser
sdp = StanfordDependencyParser(path_to_jar='E:/stanford/stanford-parser-full-2015-04-20/stanford-parser.jar',
                               path_to_models_jar='E:/stanford/stanford-parser-full-2015-04-20/stanford-parser-3.5.2-models.jar')    
result = list(sdp.raw_parse(sentence))  

result[0]

[item for item in result[0].triples()]

dep_tree = [parse.tree() for parse in result][0]
print dep_tree
dep_tree.draw()

# generation of annotated dependency tree shown in Figure 3-4
from graphviz import Source
dep_tree_dot_repr = [parse for parse in result][0].to_dot()
source = Source(dep_tree_dot_repr, filename="dep_tree", format="png")
source.view()

示例#39

0

显示文件

文件： stanford.py 项目： aadah/nlp_proj

class DepParser:
    def __init__(self):
        self.parser = StanfordDependencyParser(path_to_jar=config.STANFORD_PARSER_JAR,
                                               path_to_models_jar=config.STANFORD_PARSER_MODEL)

    def get_entity_pairs(self, text):
        pairs = []
        sents = nltk.sent_tokenize(text)
        for sent in sents:
            pairs.extend(self._get_entity_pairs(sent))
        return pairs
        
    def _get_entity_pairs(self, sent):
        #words = nltk.word_tokenize(sent)
        relations = [list(parse.triples()) for parse in self.parser.raw_parse(sent)]
        """
        print '***RELATIONS***'
        for r in relations[0]:
            print r
        """
        nnp_relations = self.filter_for_NNP(relations)

        print '***ONLY NAMED ENTITIES***'
        for r in nnp_relations:
            print r

        pairs = self.build_relation_pairs(nnp_relations, sent)
        return pairs

    def build_compound_dict(self, relations, words):
        compound_dict = collections.defaultdict(list)
        # works on the assumption that there are usually not many shared last names
        # so we can use the last name as the anchor for a compound NNP
        in_progress = False
        current = ''
        for r in relations:
            if r[1] == 'compound':
                # To prevent "Taipei, Taiwan" from being considered a compound entity
                if r[0][0] in words and words[words.index(r[0][0]) - 1] == ',':                    
                    continue
                if r[2][0] in TITLES:
                    continue
                current = r[0]
                compound_dict[r[0]].append(r[2][0])
                in_progress = True
            elif in_progress:
                in_progress = False
                if current[1] != 'NNS':
                    # We want to keep NNS entities because the compound modifiers preceding them
                    # could be important, but we don't want them being a part of set of named entities
                    compound_dict[current].append(current[0])
                current = ''
        # To catch ending compound entities
        if in_progress:
            if current[1] != 'NNS':                
                compound_dict[current].append(current[0])
        return compound_dict

    def normalize(self, entity, compound_dict):
        if entity in compound_dict:
            return ' '.join(compound_dict[entity])
        if type(entity) is tuple:
            entity = entity[0]
        return entity

    def build_relation_dict(self, relations, words):
        relation_dict = collections.defaultdict(set)
        related = set()
        for r in relations:
            if r[1] == 'compound' and r[0][0] in words:
                i = words.index(r[0][0])
                if words[i-1] == ',':
                    relation_dict[r[0]].add(r[2])
                    relation_dict[r[2]].add(r[0])
                continue
            #if r[1] in KEY_RELATIONS:
            relation_dict[r[0]].add(r[2])
            relation_dict[r[2]].add(r[0])
            related.add(r[2])
        return relation_dict

    def build_relation_pairs(self, relations, sent):
        pairs = set()
        words = nltk.word_tokenize(sent)
        relation_dict = self.build_relation_dict(relations, words)
        compound_dict = self.build_compound_dict(relations, words)
        subj = self.get_subj(relations)
        subj_norm = self.normalize(subj,compound_dict)
        obj = self.get_obj(relations)
        obj_norm = self.normalize(obj,compound_dict)
        print 'SUBJECT', subj_norm
        print 'OBJECT', obj_norm
        for entity in relation_dict:
            if not self.is_NNP(entity) or entity in STOP_ENTITIES:
                continue
            if subj and subj != entity:
                pairs.add((self.normalize(entity,compound_dict),subj_norm))
                pairs.add((subj_norm,self.normalize(entity,compound_dict)))
            if obj and obj != entity:
                pairs.add((self.normalize(entity,compound_dict),obj_norm))
                pairs.add((obj_norm,self.normalize(entity,compound_dict)))
            for one_deg_sep in relation_dict[entity]:
                if self.is_NNP(one_deg_sep):
                    if entity == one_deg_sep:
                        continue
                    pairs.add((self.normalize(entity,compound_dict),
                               self.normalize(one_deg_sep,compound_dict)))
                for two_deg_sep in relation_dict[one_deg_sep]:
                    if self.is_NNP(two_deg_sep):
                        if entity == two_deg_sep:
                            continue
                        pairs.add((self.normalize(entity,compound_dict),
                                   self.normalize(two_deg_sep,compound_dict)))
        return pairs

    def is_NNP(self, ent):
        return ent[1] in ['NNP','NNPS','NNS']

    def filter_for_NNP(self, relations):
        return [r for r in relations[0] if self.is_NNP(r[0]) or self.is_NNP(r[2])]

    def get_subj(self, relations):
        for r in relations:
            if 'subj' in r[1] or r[1] == 'agent':
                subj = r[2]
                if self.is_NNP(r[2]):
                    return r[2]
                for r in relations:
                    if r[0] == subj and self.is_NNP(r[2]):
                        return r[2]
    def get_obj(self, relations):
        for r in relations:
            if 'obj' in r[1]:
                obj = r[2]
                if self.is_NNP(r[2]):
                    return r[2]
                for r in relations:
                    if r[0] == obj and self.is_NNP(r[2]):
                        return r[2]

示例#40

0

显示文件

文件： Evaluate.py 项目： mwalker9/Alvin

class Evaluator(object):
	
	def __init__(self):
		self.data = None
		self.rules = []
		self.tree = None
		self.nodeList = []
		self.landmarks = []
		self.s = None
		self.t = None
		self.dependencies = []
		self.rebuiltDependencies = []
		self.minPath = []
		self.metaPath = []
		self.minPathLength = 999
		self.path = '.\InspirationSet\Paths.txt'
		self.ruleList = []
		self.rulePath = '.\InspirationSet\Rules.txt'
		self.learnedPaths = self.parsePaths(self.path)		
		self.pathCountsPath = '.\InspirationSet\PathCounts.txt'
		f = open(self.pathCountsPath,'r')
		self.trainingPathCounts = cPickle.load(f)
		self.pathCounts = np.zeros(len(self.learnedPaths))
		
		# load in rules
		f = open(self.rulePath, 'r')
		self.knownRules = cPickle.load(f)
		f.close()
		
		# dependency parsers to build parse tree
		#os.environ['JAVA_HOME'] = 'C:/Program Files (x86)/Java/jre1.8.0_65/bin/java.exe'
		self.path_to_jar = 'stanford-parser-full-2015-12-09/stanford-parser.jar'
		self.path_to_models_jar = 'stanford-parser-full-2015-12-09/stanford-parser-3.6.0-models.jar'
		self.dependencyParser = StanfordDependencyParser(path_to_jar=self.path_to_jar, path_to_models_jar=self.path_to_models_jar)
		
	# evaluates the line
	def evaluateLine(self, line):
		
		# clear previous data
		self.ruleList = []
		
		self.processLine(line)
		
		#for i in self.dependencies:
		#	print i
		
		# reset the  path count numbers
		self.pathCounts = np.zeros(len(self.learnedPaths))
		
		for path in self.learnedPaths:
			#print path
			self.parseRules(path)
			
		score = (self.pathCounts * self.trainingPathCounts).sum()
			
		# upload known rules
		# observe that we do not need to upload these rules. They were never stored to memory
		f = open(self.rulePath, 'r')
		knownRules = cPickle.load(f)
		f.close()
		
		for i in self.ruleList:
			if i in self.knownRules:
				#print i
				score += 100
	
		return score
		
	# builds and modifies the dependencies
	def processLine(self, line):
		# first derive the tree
		result = self.dependencyParser.raw_parse(line)
		dependencies = result.next()
		self.dependencies = list(dependencies.triples())
				
		# build the tree
		self.buildTrees(self.dependencies)
		
		# now combine compounds
		self.combineCompounds()
		self.prependAdjectiveWrapper()
		try:
			self.unificationWrapper()
		except:
			print 'unification crashed!'
			
		
		
		# creates the new list of dependencies
		self.treeToDependencies()
		
		#for i in self.dependencies:
		#	print i
			
	# creates the list of dependencies from the tree
	def treeToDependencies(self):
	
		self.rebuiltDependencies = []
		
		# start at root and move down
		self.nodeToTuple(self.tree.root)
		
		self.dependencies = self.rebuiltDependencies
		
	# creates a list tuple for the node	
	def nodeToTuple(self, Node):
	
		if len(Node.children) == 0:
			# we are done with this node
			return
			
		# create governor values
		g = (Node.value, Node.type)
	
		# depends on the children
		for child in Node.children:
			
			r = child.edge.relationship
			d = (child.value, child.type)
			self.rebuiltDependencies.append((g, r, d))
			self.nodeToTuple(child)
		
	def parsePaths(self, rulesPath):
	
		paths = []
		
		f = open(rulesPath, 'r')
		
		eof = False
		
		while not eof:
			
			try:
				path = cPickle.load(f)
				if path not in paths:
					paths.append(path)
			except:
				eof = True
				
		f.close()
		
		return paths
		
	# uploads data from different sources
	def parseData(self, path):
		f = open(path, 'r')
		text = f.read()
		
		# delete out hyperlinks and references
		procText = ''
		ignore = False
		punctuation = ['.', ',', ';', '-', "'"]
		for i in text:
			if (i.isalnum() or i.isspace() or i in punctuation) and not ignore:
				procText += i
			# need to ignore references
			if i == '[' or i =='(':
				ignore = True
			elif i == ']' or i == ')':
				ignore = False

		text = procText.split('. ')
		
		data = []
		for line in text:
			# double end of lines means there is a break in sentences
			line = line.split('\n\n')
			for sent in line:
				sent = sent.replace('\n', '')
				if sent != '':
					data.append(sent)
		
		return data
		
	def createTree(self, dependencies):
		
		# find the root first
		idx, root = self.findRoot(dependencies)

		# build the tree	
		self.tree = Tree.Tree(root, dependencies, idx)
		self.tree.buildTree()
		
	def findRoot(self, dependencies):
		# finds the root of the tree by find the head that has no dependencies
		for i, (g1, r1, d1) in enumerate(dependencies):
			isDependent = False
			for (g2, r2, d2) in dependencies:
				if g1[0] == d2[0]:
					isDependent = True
					
			if not isDependent:
				return i, g1[0]
				
	def textToRules(self, rawText):	
		valuations = []
		# 3 step process	
		#	1. Convert raw text to dependency graph
		#	2. Convert dependency graph to cfg
		#	3. Extract valuations
		#	4. Convert valuations to 1st order logic
		
		# 1. Convert raw text to dependency graph
		# http://stackoverflow.com/questions/7443330/how-do-i-do-dependency-parsing-in-nltk/33808164#33808164
		#	First parse text into atomic dependencies		
		result = self.dependencyParser.raw_parse(rawText)
		# list of dependency for each word
		dependencies = result.next()
		self.dependencies = list(dependencies.triples())
		
		#return valuations, dependencyList
		
		#print dependencyList
		self.buildTrees(self.dependencies)
		
		self.combineCompounds()
		self.prependAdjectiveWrapper()
		
		# creates the new list of dependencies
		self.treeToDependencies()
		
		# a series of joining common areas of the graph.
		# we can learn these!!! (learn common combinations from training data)
		self.parseRules(self.dependencies)
	
		#self.rootParse(dependencyList)
		
		# Extract valuations
		#valuations = self.extractVerbs(dependencyList)
		
	# combines all compounds	
	def combineCompounds(self):
		
		# the final compound will take the POS tag of the parent 
		self.addCompound(self.tree.root)
		
	# the node takes value from its children with compound relationships
	def addCompound(self, Node):
		
		if len(Node.children) == 0:
			# nothing to do here
			return
			
		popL = []
		s = ''
		for i,child in enumerate(Node.children):
			
			# check to see if it is a compound
			if child.edge.relationship == 'compound':
				s += child.value + '_'
				popL.append(i)
				
			else:
				self.addCompound(child)
				
		popL.reverse()
		
		# remove compound children
		for i in popL:
			Node.children.pop(i)
			
		# give the node its full name
		Node.value = s + Node.value
		
	# prepends adjectives
	def prependAdjectiveWrapper(self):
		
		self.prependAdjective(self.tree.root)
	
	# prepends JJ to each node from its children
	def prependAdjective(self, Node):
		if len(Node.children) == 0:
			# nothing to do here
			return
			
		popL = []
		s = ''
		for i,child in enumerate(Node.children):
			
			# check to see if it is a compound
			if child.type == 'JJ':
				s += child.value + '_'
				
				popL.append(i)
				
			else:
				self.prependAdjective(child)
				
		popL.reverse()
		
		# remove compound children
		for i in popL:
			Node.children.pop(i)
			
		# give the node its full name
		Node.value = s + Node.value
		
	# unifies the {W*} PoS to a noun ancestor and PRP
	def unificationWrapper(self):		
		
		self.unificationPronoun(self.tree.root)
		self.unificationW(self.tree.root)
	
	def unificationPronoun(self, Node):
		pass
		
	def unificationW(self, Node):
	
		if Node.type == 'WP':
			# return node of ancestor whose parent is connected by acl:relcl
			value, type = self.findRelationship(Node, 'acl:relcl')
			Node.value = value; Node.type = type
		elif len(Node.children) == 0:
			pass
		else:
			for child in Node.children:
				self.unificationW(child)
		
	# returns the type and value of a node that is connected to a parent by the specified relationship
	def findRelationship(self, Node, relationship):
			if Node.edge.relationship == relationship:
				return Node.parent.value, Node.parent.type
			else:
				return self.findRelationship(Node.parent, relationship)
			
		
	def concatenateCompounds(self, dependencies, governor, parent):
		# we want to return the last compound
		window = False
		compound = False
		for i,(g, r, d) in enumerate(dependencies):
		
			if window == False and g[0] == parent and d[0] == governor:
				# we can start to consider compounds
				window = True
			
			elif window == True and g[0] != parent and d[0] == governor:
				# we have come across a different node with the same value
				window = False
				# we are done
				break
			
			elif window == True and g[0] == governor and r == 'nummod':
				compound = d[0]
		
			elif window == True and g[0] == governor and r == 'compound':
				compound = d[0]
			
			# adjective
			elif window == True and g[0] == governor and r == 'amod':
				compound = d[0]
		
		return compound		
		
	# builds both the main tree and the substructures	
	def buildTrees(self, dependencies):		
	
		# find the root
		self.createTree(dependencies)
		
		# build substructures for xcomp
		#self.parseXComp(dependencies)		
	
	def rootParse(self, dependencies):

		# write rules to a document
		f = open('C:\Users\jkjohnson\Documents\CS 673\Alvin-master\Star Wars Data\Rules.txt', 'ab')
		
		# loop through and find triangles
		for i, (g, r, d) in enumerate(dependencies):
			if g[1][0] == 'V':
				
				# verb nodes
				vNodes = set([])
				# noun nodes
				nNodes = set([])
				
				self.tree.findNodeWrapper(g[0], g[1], '', '', 'buildtree')
				n = self.tree.foundNode
				
					
				# this is the case where the node has already been evaluated
				if n == None:
					continue
				# look for rules with children
				for child in n.children:
					#print 'looking for children of', g[0]
					if child.type[:2] == 'NN' or child.type == 'PRP' or child.type == 'WP':
						# we can never use this node for another purpose
						#child.checked = True
						nNodes.add(child)
					elif child.type[:1] == 'V':
						# these are very interesting
						vNodes.add(child)
						
				print g[0], len(nNodes), len(vNodes)
				
				# pull data from nodes
				nNL, vNL, tNL, rNL = self.organizeNodes(nNodes, dependencies)
				nVL, vVL, tVL, rVL = self.organizeNodes(vNodes, dependencies)				
						
				if len(nNL) == 1:
					# extract the node
					#n = nodes.pop()
					pass
				
					#print g[0] + "(" + n.value + ")", n.edge.relationship				
						
				# we can look for certain combinations of nouns and relationships
				elif len(nNL) >= 2:
				
					# classic structure of a subject and direct object
					if 'nsubj' in rNL and 'dobj' in rNL:
						rule = g[0] + "(" + vNL[rNL.index('nsubj')] + ", " + vNL[rNL.index('dobj')] + ")"
						f.write(rule + '\n')
						print rule
						
					elif 'nsubj' in rNL and 'xcomp' in rNL:
						rule = g[0] + "(" + vNL[rNL.index('nsubj')] + ", " + vNL[rNL.index('xcomp')] + ")"
						f.write(rule + '\n')
						print rule	

					elif 'nsubj' in rNL and 'nmod' in rNL:
						rule = g[0] + "(" + vNL[rNL.index('nsubj')] + ", " + vNL[rNL.index('nmod')] + ")"
						f.write(rule + '\n')
						print rule	
						
					elif 'nsubjpass' in rNL and 'nmod' in rNL:
						
						'''
						if 'auxpass' in rVL:
							rule = vVL[rVL.index('auxpass')] + '_' + g[0] + "(" + vNL[rNL.index('nsubjpass')] + ", " + vNL[rNL.index('nmod')] + ")"
							f.write(rule + '\n')
							print rule	
						'''
						
						rule = g[0] + "(" + vNL[rNL.index('nmod')] + ", " + vNL[rNL.index('nsubjpass')] + ")"
						f.write(rule + '\n')
						print rule	

				if len(nVL) > 0:
					# right now, we are just looking for conjunctions
					
					# conjunction					
					
					if 'conj' not in rVL:
						# save the trouble of looking for anything else for now. Maybe need something later!!!
						continue
						
					# there may be multiple conjunctions
					
					for verbNode in nVL:
					
						if verbNode.edge.relationship == 'xcomp':
							
							if 'nsubj' in rNL:
								rule = g[0] + "_" + self.tree.xcompD[verbNode.value]['verbConj'] + \
								"(" + vNL[rNL.index('nsubj')] + ", " + self.tree.xcompD[verbNode.value]['dobjConj'] + ")"
					
						elif verbNode.edge.relationship == 'conj':
							#print 'right here', verbNode.value
							#print rNL
						
							value = ''; adverb = ''
							for child in verbNode.children:
								if child.edge.relationship == 'dobj' or child.edge.relationship == 'xcomp':
									value = child.value
									compound = self.concatenateCompounds(dependencies, value, child.parent)
									if compound != False:
										value = compound + ' ' + value
										
								elif child.edge.relationship == 'advmod':
									adverb = child.value
							
							# go back and use the parent nmod
						
							if value == '':
								if 'nmod' in rNL:
									value = vNL[rNL.index('nmod')]
								elif 'xcomp' in rNL:
									value = vNL[rNL.index('xcomp')]
								
						
							if 'nsubj' in rNL:
								#		verb joined to head				subject of head verb			
								rule = verbNode.value + "(" + vNL[rNL.index('nsubj')] + ", " + value + ")"
								f.write(rule + '\n')
								print rule
								
							elif 'nsubjpass' in rNL:
								#		verb joined to head				subject of head verb			
								rule = verbNode.value + "(" + value + ", " + vNL[rNL.index('nsubjpass')] + ")"
								f.write(rule + '\n')
								print rule				
					
						
					
			# very simple rule for adjectives
			'''
			elif d[1] == 'JJ':
				# find any compounds
				newValue = ''
				comp = self.concatenateCompounds(dependencies, g[0])
				if comp == False:
					newValue = g[0]
				else:
					newValue = comp + " " + g[0]		
				
				rule = d[0] + "(" + newValue + ")"
				f.write(rule + '\n')
				print rule	
			'''
		f.close()
				
	# pops the nodes out of the set and also creates lists of their data
	def organizeNodes(self, nodeSet, dependencies):
		
		# structures to hold node data
		nodeL = []; valueL = []; typeL = []; relationL = []
		
		while len(nodeSet) > 0:
			n = nodeSet.pop()
			
			# find any compounds
			comp = self.concatenateCompounds(dependencies, n.value, n.parent)
			if comp == False:
				pass
			else:
				n.value = comp + " " + n.value	

			# switch out proper nouns
			# !!!
			
			valueL.append(n.value)
			typeL.append(n.type)
			relationL.append(n.edge.relationship)
			nodeL.append(n)
			
		return nodeL, valueL, typeL, relationL
		
							
	def findParent(self, dependencies, (gV, gT), i):
	
		for j, (g, r, d) in enumerate(dependencies[:i]):
			
			# it can only be the parent
			if d[0] == gV and d[1] == gT:
				return g[0], g[1], r