Python Tree.fromstring示例，nltk.tree.Tree.fromstring Python示例

示例#1

0

显示文件

文件： parser.py 项目： jonasrothfuss/equity_news_thesis

    def parse_tree(self, text, binary=False, preprocessed=False):
        nlp_output = self.nlp.annotate(text, properties={
            'annotators': 'tokenize,ssplit,pos,parse',
            'outputFormat': 'json',
            'parse.binaryTrees': 'true'
        })
        if type(nlp_output) == str:
            nlp_output = json.loads(nlp_output, strict=False)

        if len(nlp_output['sentences']) > 1:
            #merge trees from sentences
            tree_string = "(Top "
            for s in nlp_output['sentences']:
                p_tree = Tree.fromstring(s['parse'])
                tree_string += str(p_tree[0])
            tree_string += ")"
            merged_tree = Tree.fromstring(tree_string)
        else:
            #no merging required
            merged_tree = Tree.fromstring(nlp_output['sentences'][0]['parse'])
            #remove root
            merged_tree = merged_tree[0]

        if binary:
            nltk.treetransforms.chomsky_normal_form(merged_tree)

        if preprocessed:
            merged_tree = preprocess_parse_tree(merged_tree)

        return merged_tree

示例#2

0

显示文件

文件： simplify.py 项目： DerrickZhu1/11611teamproject-YenYuan-

def removeNounMods(tree):
    tree_str = tsurgeon.remove_internal_mods(tree)
    if tree_str != '':
        tree = Tree.fromstring(tree_str)
    tree_str = tsurgeon.remove_participle_mods(tree)
    if tree_str != '':
        tree = Tree.fromstring(tree_str)
    return tree

示例#3

0

显示文件

文件： old_parser_scorer.py 项目： jonpiffle/ltag_parser

def parser_output_to_parse_deriv_trees(output):
    lines = output.strip().split("\n")
    deriv_tree_lines = lines[::2]
    parse_tree_lines = lines[1::2]

    parse_trees = [Tree.fromstring(line.replace('\x06', 'epsilon_')) for line in parse_tree_lines if line != '']
    deriv_trees = [Tree.fromstring(line) for line in deriv_tree_lines if line != '']
    return parse_trees, deriv_trees

示例#4

0

显示文件

文件： test_baselines.py 项目： acapello/PLN-2015

    def test_flat_parse(self):
        model = Flat([], 'S')  # empty training set

        trees = [model.parse(s) for s in self.tagged_sents]

        trees2 = [
            Tree.fromstring("(S (D El) (N gato) (V come) (N pescado) (P .))"),
            Tree.fromstring("(S (D La) (N gata) (V come) (N salmón) (P .))"),
        ]
        self.assertEqual(trees, trees2)

示例#5

0

显示文件

文件： test_baselines.py 项目： acapello/PLN-2015

    def test_lbranch_parse(self):
        model = LBranch([], 'S')  # empty training set

        trees = [model.parse(s) for s in self.tagged_sents]

        trees2 = [
            Tree.fromstring("""(S (S|<> (S|<> (S|<> (D El) (N gato)) (V come)) (N pescado)) (P .))"""),
            Tree.fromstring("""(S (S|<> (S|<> (S|<> (D La) (N gata)) (V come)) (N salmón)) (P .))"""),
        ]
        self.assertEqual(trees, trees2)

示例#6

0

显示文件

文件： simplify.py 项目： DerrickZhu1/11611teamproject-YenYuan-

def extractParticiple(tree):
    part_mod = tsurgeon.hasParticipleMod(tree)
    if part_mod != '':
        subject = tsurgeon.findSubject(tree)
        subject_words = Tree.fromstring(subject).leaves()
        part_tree = Tree.fromstring(part_mod)
        part_words = part_tree.leaves()
        # Ignoring inflection
        result_words = subject_words + ['is'] + part_words[1:]
        sentence = ' '.join(result_words).strip() + '.'
        return sentence
    pass

示例#7

0

显示文件

文件： try_parse_1.py 项目： folagit/resumatcher

def test_tree4():   
    
    annotator=Annotator()
    sent = "There are people dying make this world a better place for you and for me."
    sent = "Biplab is a good boy." 
    sent = "He created the robot and broke it after making it."
    sent = "Bachelor 's degree in computer science , design or related field."    
    sent = "B.S. in Computer Science , a related degree or its equivalent"    
    sent = "BS , MS , or PhD in Computer Science or a similar field preferred"
    sent = "Computer Science or related technical degree from an accredited four year university "
    sent = "Degree in Computer Science or Engineering with a high GPA ."    
    sent = "A Master's degree in Computer Science or Engineering is mandatory ."
    
    sent = "A Computer Science or related degree "
    sent = "I love science and SciFi book"
    sent = "I love music and SciFi book"
   
    result = annotator.getAnnotations(sent)
    tree_str = result['syntax_tree']
    print     
    print tree_str
    
    tree = Tree.fromstring(tree_str)[0]
    print
    print "Root label=",tree.label()
    tree.draw()

示例#8

0

显示文件

文件： 7_3_SVArule.py 项目： eachsaj/Python-Natural-Language-Processing

def rulelogic(sentnece):
    leaves_list = []
    text = (sentnece)

    output = nlp.annotate(text, properties={
        'annotators': 'tokenize,ssplit,pos,depparse,parse',
        'outputFormat': 'json'
    })
    parsetree = output['sentences'][0]['parse']
    #print parsetree
    for i in Tree.fromstring(parsetree).subtrees():
        if i.label() == 'PRP':
            #print i.leaves(), i.label()
            leaves_list.append(i.leaves())
        if i.label() == 'VBP' or i.label() == 'VBZ':
            #print i.leaves(), i.label()
            leaves_list.append(i.label())
    #print leaves_list
    if (any("We" in x for x in leaves_list) or any("I" in x for x in leaves_list) or any(
                    "You" in x for x in leaves_list) or any("They" in x for x in leaves_list)) and any("VBZ" in x for x in leaves_list):
        print "Alert: \nPlease check Subject and verb in the sentence.\nYou may have plural subject and singular verb. "
    elif(any("He" in x for x in leaves_list) or any("She" in x for x in leaves_list) or any(
                    "It" in x for x in leaves_list)) and any("VBP" in x for x in leaves_list):
        print "Alert: \nPlease check subject and verb in the sentence.\n" \
              "You may have singular subject and plural verb."
    else:
        print "You have correct sentence."

示例#9

0

显示文件

文件： simplify.py 项目： DerrickZhu1/11611teamproject-YenYuan-

def removeLeadingMods(tree):
    tree_str = tsurgeon.remove_leading_mods(tree)
    if tree_str != '':
        new = Tree.fromstring(tree_str)
        if new != tree:
            return removeLeadingMods(new)
    return tree

示例#10

0

显示文件

文件： gen_question.py 项目： DerrickZhu1/11611teamproject-YenYuan-

def question(inputstr):
    entities = supersense_tag(inputstr)
#     print("Supersense-tagging done")
    entities.update(named_entities(inputstr))
#     print("NER done")
    main_tree = parser.raw_parse(inputstr).next()
#     print("Parsing done")
    '''
    main_tree_str = save_embedded_clause(main_tree_str)
    print(main_tree_str)
    '''
    main_tree_str = clean_sentence(main_tree)
    
#     Tree.fromstring(main_tree_str).pprint()
    # TODO: mark_unmovable_tags

    main_tree = inverse_verb(main_tree_str)
    sentence = str(' '.join(Tree.fromstring(main_tree_str).leaves()))
    sentence_inversed = str(' '.join(main_tree.leaves()))
    questions = []
    prep = []  # use to store prep when traverse the tree
    gen_question_recur(main_tree, sentence_inversed, sentence, questions, entities, prep)
    questions = [cleanup_question(q) for q in questions]
    questions.append(fix_output(main_tree))
    return questions

示例#11

0

显示文件

文件： rels.py 项目： OC-NTNU/baleen-python

def tag_var_nodes(vars_dir, trees_dir, tagged_dir):
    """
    Tag variable nodes in tree

    Tag variables nodes in trees with "_VAR:f:n:m:e" suffix where
    f is the name of the parse file,
    n is the tree number,
    m is the variable's node number and
    e is name of the pattern used for extracting this variable.
    Will only output those trees containing at least two variables.
    """
    # At first I used the tregex's '-f' option to print the filename,
    # but when traversing the files in a directory,
    # it prints the wrong filenames (after the first one?),
    # so now the filename is encoded in the node label too.
    tagged_dir = Path(tagged_dir)
    tagged_dir.makedirs_p()

    for vars_fname in Path(vars_dir).glob('*.json'):
        d = defaultdict(list)

        # create a dict mapping each tree number to a list of
        # (nodeNumber, extractName) tuples for its variables
        for record in json.load(vars_fname.open()):
            pair = record['nodeNumber'], record['key']
            d[record['treeNumber']].append(pair)

        lemtree_fname = record['filename']
        parses = (Path(trees_dir) / lemtree_fname).lines()
        tagged_parses = []

        for tree_number, pairs in d.items():
            if len(pairs) > 1:
                # tree numbers in records count from one
                tree = Tree.fromstring(parses[tree_number - 1])
                # get NLTK-style indices for all nodes in a preorder
                # traversal of the tree
                positions = tree.treepositions()
                vars_count = 0

                for node_number, key in pairs:
                    # node numbers in records count from one
                    position = positions[node_number - 1]
                    subtree = tree[position]
                    try:
                        subtree.set_label(
                            '{}_VAR_{}'.format(subtree.label(), key))
                    except AttributeError:
                        log.error('skipping variable "{}" because it is a leaf '
                                  'node ({})'.format(subtree, key))
                    else:
                        vars_count += 1

                if vars_count > 1:
                    tagged_parses.append(tree.pformat(margin=99999))

        if tagged_parses:
            tagged_fname = derive_path(lemtree_fname, new_dir=tagged_dir)
            log.info('writing tagged trees to ' + tagged_fname)
            tagged_fname.write_lines(tagged_parses)

示例#12

0

显示文件

文件： predict.py 项目： Jasmeet107/serapis

    def add_tree(self, datum):
        # parse tree and binarize
        tree = Tree.fromstring(datum["raw_tree"])
        tree.chomsky_normal_form()
        tree.collapse_unary(collapsePOS=True)
        tree = ParentedTree.convert(tree)

        # assign indices to subtrees
        indices = {}
        counter = 0
        for t in tree.subtrees():
            indices[t.treeposition()] = counter
            counter += 1

        # generate parent pointers and labels
        # (labels = one instance of sent in sents by treelstm terminology)
        parents = [0] * (counter - 1)
        labels = []
        counter = 0
        for t in tree.subtrees():
            parent = t.parent()
            if parent != None:
                parents[counter] = indices[parent.treeposition()]
                counter += 1
            if type(t[0]) is str or type(t[0]) is unicode: labels.append(t[0])

        self.parents_file.write(" ".join(map(str, parents)) + "\n")
        self.sents_file.write(" ".join(labels) + "\n")
        self.trees.append(datum)
        return len(self.trees) - 1 # ID

示例#13

0

显示文件

文件： test_upcfg.py 项目： acapello/PLN-2015

    def test_productions(self):
        t = Tree.fromstring(
            """
                (S
                    (NP (Det el) (Noun gato))
                    (VP (Verb come) (NP (Noun pescado) (Adj crudo)))
                )
            """)

        # Bugfix from official test (, start='S')
        model = UPCFG([t], start='S')

        prods = model.productions()

        prods2 = [
            ProbabilisticProduction(N('S'), [N('NP'), N('VP')], prob=1.0),
            ProbabilisticProduction(N('NP'), [N('Det'), N('Noun')], prob=0.5),
            ProbabilisticProduction(N('Det'), ['Det'], prob=1.0),
            ProbabilisticProduction(N('Noun'), ['Noun'], prob=1.0),
            ProbabilisticProduction(N('VP'), [N('Verb'), N('NP')], prob=1.0),
            ProbabilisticProduction(N('Verb'), ['Verb'], prob=1.0),
            ProbabilisticProduction(N('NP'), [N('Noun'), N('Adj')], prob=0.5),
            ProbabilisticProduction(N('Adj'), ['Adj'], prob=1.0),
        ]

        self.assertEqual(set(prods), set(prods2))

示例#14

0

显示文件

文件： segmentation_tree.py 项目： WladimirSidorenko/DiscourseSegmenter

def read_segtree_file(fn):
    """reads a string representing a discourse tree (from the seg.
       annotation) and returns a list of its child tree objects"""
    with codecs.open(fn, 'r', 'utf-8') as f:
        s = f.read()
        text_tree = Tree.fromstring(s, read_leaf=prefix_number_seg_token)
        return [segment for segment in text_tree]

示例#15

0

显示文件

文件： dseq.py 项目： karins/CoherenceFramework

def find_subtrees(tree, depth):
    """
    Returns all subtrees at a given depth

    Arguments
    ---------
    tree: either an nltk.tree.Tree or a PTB-formatted string
    depth: the target depth

    Returns
    -------
    list of nlt.tree.Tree objects representing the selected subtrees

    >>> ptb_str = "(ROOT (S (NP (DT The) (VBG following)) (VP (VBP are) (NP (NP (JJ major) (NN news) (NNS items)) (PP (IN in) (NP (NP (VBG leading) (JJ Turkish) (NNS newspapers)) (PP (IN on) (NP (NNP Monday))))))) (. .)))"
    >>> ptb_tree = Tree.fromstring(ptb_str)   
    >>> subtrees = find_subtrees(ptb_str, 2)  # find_subtrees accepts strings
    >>> [t.label() for t in subtrees]         # and it returns a list of subtrees (ojbects of the kind nlt.tree.Tree)
    ['NP', 'VP', '.']
    >>> subtrees = find_subtrees(ptb_tree, 3) # and trees
    >>> [t.label() for t in subtrees]
    ['DT', 'VBG', 'VBP', 'NP']
    >>> subtrees = find_subtrees(ptb_tree, 4) 
    >>> [t.label() for t in subtrees]
    ['NP', 'PP']
    """
    if isinstance(tree, str):
        tree = Tree.fromstring(tree)
    subtrees = []
    _find_subtrees(tree, 0, depth, subtrees)
    return subtrees

示例#16

0

显示文件

文件： sentence_parser.py 项目： shuoh/question-processor

    def parse(self, text):
        """
        NOTE: since the Stanford tagger and parser libraries are case-sensitive, the casing of the output of this
              method is preserved. Caller must remember to normalize the casing when conducting comparison
        :param text: text to be parsed
        :return: a SentenceParseResult object
        }
        """
        server = jsonrpc.ServerProxy(jsonrpc.JsonRpc20(),
                                     jsonrpc.TransportTcpIp(addr=(CORENLP_SERVER_HOST, CORENLP_SERVER_PORT)))

        parsed_sentences = loads(server.parse(text))['sentences']
        if len(parsed_sentences) > 1:
            raise Exception('Multi-sentence query is not supported')
        parsed_sentence = parsed_sentences[0]

        word_tokens = [ParsedWordToken(word_wire_format) for word_wire_format in parsed_sentence['words']]
        # word_tokens = self._recover_contractions(word_tokens)

        normalized_sentence = ' '.join([word_token.text for word_token in word_tokens])

        parsed_tree = Tree.fromstring(parsed_sentence['parsetree'])

        word_dependency = SentenceWordDependency(parsed_sentence['dependencies'])

        return SentenceParseResult(word_tokens=word_tokens,
                                   normalized_sentence=normalized_sentence,
                                   parsed_tree=parsed_tree,
                                   word_dependency=word_dependency)

示例#17

0

显示文件

文件： client.py 项目： SmartText/EntityExtraction

def extract_entities(pos_server, assimilator, mode, text, link):
    """
    Extract tokens in the buckets of nouns and other entities
    pos_server: part of speech tagger address
    assimilarot: assimilator address
    mode: metadata or content
    """
    content = get_assimilator_data(mode=mode, assimilator=assimilator, text=text, link=link)
    if mode == "meta":
        import json
        yield json.dumps(json.loads(content.decode()), indent=4)
    else:
        import json
        from .semantic_parser import read_dep
        from nltk.tree import Tree

        concept_map = {}

        pos_generator = process_pos(pos_server, content=content)
        for line in pos_generator:
            data = json.loads(line.decode())
            tree = Tree.fromstring(data['tree'])

            tokens = read_dep(tree)
            yield tokens

示例#18

0

显示文件

文件： treeprettyprinter.py 项目： CaptainAL/Spyder

def test():
    """Do some tree drawing tests."""
    def print_tree(n, tree, sentence=None, ansi=True, **xargs):
        print()
        print('{0}: "{1}"'.format(n, ' '.join(sentence or tree.leaves())))
        print(tree)
        print()
        drawtree = TreePrettyPrinter(tree, sentence)
        try:
            print(drawtree.text(unicodelines=ansi, ansi=ansi, **xargs))
        except (UnicodeDecodeError, UnicodeEncodeError):
            print(drawtree.text(unicodelines=False, ansi=False, **xargs))

    from nltk.corpus import treebank
    for n in [0, 1440, 1591, 2771, 2170]:
        tree = treebank.parsed_sents()[n]
        print_tree(n, tree, nodedist=2, maxwidth=8)
    print()
    print('ASCII version:')
    print(TreePrettyPrinter(tree).text(nodedist=2))

    tree = Tree.fromstring(
        '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
        '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
        '(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int)
    sentence = ('Ze had met haar moeder kunnen gaan winkelen ,'
                ' zwemmen of terrassen .'.split())
    print_tree('Discontinuous tree', tree, sentence, nodedist=2)

示例#19

0

显示文件

文件： __init__.py 项目： meyersbs/SPLAT

def yngve_redux(treestring):
	""" For the given parsers-tree-string, return the word count and the yngve score. """
	tree = Tree.fromstring(treestring)
	total = float(calc_yngve_score(tree, 0))
	words = float(get_word_score(tree))

	return [total, words]

示例#20

0

显示文件

文件： discourse_parsing.py 项目： BinbinBian/discourse-parsing

    def initialize_edu_data(edus):
        '''
        Create a representation of the list of EDUS that make up the input.
        '''

        wnum = 0  # counter for distance features
        res = []
        for edu_index, edu in enumerate(edus):
            # lowercase all words
            edu_words = [x[0].lower() for x in edu]
            edu_pos_tags = [x[1] for x in edu]

            # make a dictionary for each EDU
            new_tree = Tree.fromstring('(text)')
            new_tree.append('{}'.format(edu_index))
            tmp_item = {"head_idx": wnum,
                        "start_idx": wnum,
                        "end_idx": wnum,
                        "nt": "text",
                        "head": edu_words,
                        "hpos": edu_pos_tags,
                        "tree": new_tree}
            wnum += 1
            res.append(tmp_item)
        return res

示例#21

0

显示文件

文件： simplify.py 项目： DerrickZhu1/11611teamproject-YenYuan-

def removeVerbMods(tree):
    tree_str = tsurgeon.remove_verb_modifiers(tree)
    if tree_str != '':
        new = Tree.fromstring(tree_str)
        if new != tree:
            return removeVerbMods(new)
    return tree

示例#22

0

显示文件

文件： Util.py 项目： meyersbs/SPLAT

def draw_trees(treestrings):
	""" Draws pictures of each parsers-tree-string using Matplotlib. """
	for tree_string in treestrings:
		print(tree_string)
		sentence = Tree.fromstring(tree_string)
		sentence.draw()

	return ''

示例#23

0

显示文件

文件： tree_diff.py 项目： timpalpant/KaggleBillionWordImputation

def main(tree_file1, tree_file2):
    same = 0
    different = 0
    for line1, line2 in izip(tree_file1, tree_file2):
        try:
            tree1 = Tree.fromstring(line1)
            tree2 = Tree.fromstring(line2)
            d = tree_diff(tree1, tree2)
            if d:
                different += 1
                print tree1
                print tree2
            else: same += 1
        except Exception, e:
            print e
            print line1
            print line2

示例#24

0

显示文件

文件： test_upcfg.py 项目： acapello/PLN-2015

    def test_parse_no_parse_returns_flat(self):
        t = Tree.fromstring(
            """
                (S
                    (NP (Det el) (Noun gato))
                    (VP (Verb come) (NP (Noun pescado) (Adj crudo)))
                )
            """)
        model = UPCFG([t], start='S')

        sent = 'gato el come pescado crudo'.split()
        tags = 'Noun Det Verb Noun Adj'.split()
        tagged_sent = list(zip(sent, tags))
        tree = model.parse(tagged_sent)

        tree2 = Tree.fromstring("(S (Noun gato) (Det el) (Verb come) (Noun pescado) (Adj crudo))")
        self.assertEqual(tree, tree2)

示例#25

0

显示文件

文件： TTree.py 项目： tuur/STPS

def tuples_to_tree(tuples):
    tups = list(tuples)
    if tuples==set([]):
        return TTree('(_ empty)')
    t_init = TTree("("+str(tups[0][0])+' '+str(tups[0][1])+")")
    for tup in tups:
        add_proj_tree(t_init,tuple_to_tree(tup))
    return TTree(str(nltktree.fromstring(str(t_init))))

示例#26

0

显示文件

文件： simplify.py 项目： DerrickZhu1/11611teamproject-YenYuan-

def movePP(tree):
    # Temporary condition
    if type(tree) == str:
        pass
    moved_pp_treestr = tsurgeon.moveLeadingPP(tree)
    if moved_pp_treestr != '':
        return Tree.fromstring(moved_pp_treestr)
    pass

示例#27

0

显示文件

文件： penn_tree_bank_reader_test.py 项目： apmoore1/allennlp

    def test_get_gold_spans_correctly_extracts_spans(self):
        ptb_reader = PennTreeBankConstituencySpanDatasetReader()
        tree = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")

        span_dict = {}
        ptb_reader._get_gold_spans(tree, 0, span_dict)
        spans = list(span_dict.items()) # pylint: disable=protected-access
        assert spans == [((0, 1), 'NP'), ((3, 4), 'NP'), ((2, 4), 'VP'), ((0, 4), 'S')]

示例#28

0

显示文件

文件： xin.py 项目： DerrickZhu1/11611teamproject-YenYuan-

def inverse_verb(main_tree_str):
    if tsurgeon.test_aux(main_tree_str):
        main_tree_str = tsurgeon.mark_aux(main_tree_str)
        main_tree_str = tsurgeon.move_aux(main_tree_str)
        main_tree = Tree.fromstring(main_tree_str)
    else:
        main_tree = move_no_aux(main_tree_str)
    return main_tree

示例#29

0

显示文件

文件： util.py 项目： nephill/Discourse-Parser

def get_production_rule_by_parse_tree(parsetree):
	syntax_tree = Tree.fromstring(parsetree)

	convert_str_format = lambda string, strip_char='\'': \
		''.join( [ ch for ch in '->'.join( [ st.strip() for st in string.split('->')] ) if ch not in strip_char ] )

	production_rule = [ convert_str_format(str(pr)) for pr in syntax_tree.productions() ]

	return production_rule

示例#30

0

显示文件

文件： constgraph.py 项目： cmps143-nlp/homework_8

def read_story_parses(parfile): 
    fh = open(parfile, 'r')
    lines = fh.readlines()
    fh.close()
    # skip lines that are not constituency parses
    treeList = [Tree.fromstring(line) for line in lines 
                    if 'QuestionId' not in line and
                    len(line) > 2]
    return treeList

示例#31

0

显示文件

文件： sst_classifier_1-0.py 项目： sanjeevhalyal/realworldnlp

 def _read(self, file_path):
     with open(cached_path(file_path), "r") as data_file:
         for line in data_file.readlines():
             line = line.strip("\n")
             if not line:
                 continue
             parsed_line = Tree.fromstring(line)
             sent = ' '.join(parsed_line.leaves())
             tokens = self._tokenizer.tokenize(sent)
             label = parsed_line.label()
             instance = self.text_to_instance(tokens, label)
             if instance is not None:
                 yield instance

示例#32

0

显示文件

文件： text_datasets.py 项目： ihsgnef/rawr_transfer_chainer

def read_sst(sst_dir, split, shrink=1, char_based=False):
    dataset = []
    f = open(os.path.join(sst_dir, '{}.txt'.format(split)))
    for i, line in enumerate(f.readlines()):
        if i % shrink != 0:
            continue
        tree = Tree.fromstring(line)
        tokens = ' '.join(tree.leaves())
        tokens = split_text(normalize_text(tokens), char_based)
        label = int(tree.label())
        dataset.append((tokens, label))
    f.close()
    return dataset

示例#33

0

显示文件

文件： snlp.py 项目： LanceNorskog/deep_meter_2

 def walk(t):
     if type(t) == type('') or type(t[0]) == type(''):
         return
     for i in range(len(t)):
         if t[i].label() == lab:
             for j in range(len(t[i])):
                 if t[i][j].label() == sublab:
                     # Yes, it really does have to work this way!
                     t[i] = Tree.fromstring('(' + lab + ' ' + str(t[i][j]) +
                                            ')')
                     break
         if type(t[i]) != type('str'):
             walk(t[i])

示例#34

0

显示文件

文件： cdtb.py 项目： yuzhongyuanwuqu/ChineseDiscourseParser

 def load_ctb(ctb_dir, encoding="UTF-8"):
     ctb = {}
     s_pat = re.compile("<S ID=(?P<sid>\S+?)>(?P<sparse>.*?)</S>",
                        re.M | re.DOTALL)
     for file in os.listdir(ctb_dir):
         with open(os.path.join(ctb_dir, file), "r",
                   encoding=encoding) as fd:
             doc = fd.read()
         for match in s_pat.finditer(doc):
             sid = match.group("sid")
             sparse = ParseTree.fromstring(match.group("sparse"))
             ctb[sid] = sparse
     return ctb

示例#35

0

显示文件

 def get_relation_chomsky_syntax_tree(self, i):
     """
     Args:
             i: relation number
     Returns:
         if arg1 and arg2 have different sentence:
             {'Arg1': [arg1_parse_trees], 'Arg2', [arg2_parse_trees]}
         if arg1 and arg2 have the same sentence:
             (syntax_tree)
         if arg1 or arg2 contains more than 1 sentence:
             None
     """
     arg1_sent_id = self.get_arg_sent_id(i, 'Arg1')
     arg2_sent_id = self.get_arg_sent_id(i, 'Arg2')
     if len(arg1_sent_id) == len(arg2_sent_id) == 1:
         # SS case
         if arg1_sent_id[0] == arg2_sent_id[0]:
             nltk_tree = Tree.fromstring(
                 self.get_parse_tree(self.parse_data[i]['DocID'],
                                     arg1_sent_id[0]))
             nltk_tree.chomsky_normal_form()
             chomsky_tree = str(nltk_tree)
             return Syntax_tree(chomsky_tree)
         # PS case
         elif arg1_sent_id[0] < arg2_sent_id[0]:
             nltk_arg1_tree = Tree.fromstring(
                 self.get_parse_tree(self.parse_data[i]['DocID'],
                                     arg1_sent_id[0]))
             nltk_arg2_tree = Tree.fromstring(
                 self.get_parse_tree(self.parse_data[i]['DocID'],
                                     arg2_sent_id[0]))
             nltk_arg1_tree.chomsky_normal_form()
             nltk_arg2_tree.chomsky_normal_form()
             chomsky_arg1_tree = str(nltk_arg1_tree)
             chomsky_arg2_tree = str(nltk_arg2_tree)
             return {'Arg1': Syntax_tree(chomsky_arg1_tree), \
                     'Arg2': Syntax_tree(chomsky_arg2_tree)  }
     else:
         return None

示例#36

0

显示文件

文件： Stanford_CoreNLP_clause_util.py 项目： EricFan24/NLP-Suite

def clausal_info_extract_from_string(parse_tree_str):
    try:
        parse_tree = Tree.fromstring(parse_tree_str)
        return clausal_info_extract(parse_tree)
    except:
        print("\nERROR IN NLTK PARSE-TREE\n", parse_tree_str,
              parse_tree.flatten())
        mb.showwarning(
            title='ERROR IN PARSE-TREE',
            message=
            "There was an error in NLTK parsing of the sentence tree displayed in command line.\n\nSearch in your document for the words displayed in command line, edit your document for characters that may lead to this error, and try again."
        )
        return

示例#37

0

显示文件

文件： unit_tests.py 项目： Shiladitya2002/CoVaSEAv2.0

 def test_getVerbtrees(self):
     t = Tree.fromstring(
         "(S(NP (DT The@$/$@1) (NN teacher@$/$@2))(VP (VBZ likes@$/$@3) (NP (NNS apples@$/$@4)))(. .@$/$@5))"
     )
     verb = []
     obj = []
     ttriples = []
     triple_extraction.getVerbtrees(t, verb, obj, ttriples)
     if "likes@$/$@3" == obj[0].split(";")[0]:
         print("getVerbtrees - OK")
     else:
         print("getVerbtrees - ERROR")
     self.assertEqual(obj[0].split(";")[0], "likes@$/$@3")

示例#38

0

显示文件

文件： data_formatter_utils.py 项目： helioxgroup/limits-cross-domain-transfer

def sst_reader(src_filename, class_func=None, include_subtrees=True):
    if class_func is None:
        class_func = lambda x: x
    with open(src_filename) as f:
        for line in f:
            tree = Tree.fromstring(line)
            if include_subtrees:
                for subtree in tree.subtrees():
                    label = class_func(subtree.label())
                    yield (_sst_detokenize(subtree), label)
            else:
                label = class_func(tree.label())
                yield (_sst_detokenize(tree), label)

示例#39

0

显示文件

def deleaf(parse_string):
    tree = Tree.fromstring(parse_string.strip(), read_leaf=lambda s: "")
    for sub in tree.subtrees():
        for n, child in enumerate(sub):
            if isinstance(child, str):
                continue
            if len(list(child.subtrees(
                    filter=lambda x: x.label() == '-NONE-'))) == len(
                        child.leaves()):
                del sub[n]
    oneline = tree.pformat(margin=10000, parens=[" ( ", " ) "])
    oneline = re.sub(' +', ' ', oneline)
    return oneline

示例#40

0

显示文件

文件： StanfordParserdemo.py 项目： zhaoxuangithub/Python-Natural-Language-Processing

def stanfordparserdemo(sentnece):
    text = (sentnece)

    output = nlp.annotate(text,
                          properties={
                              'annotators':
                              'tokenize,ssplit,pos,depparse,parse',
                              'outputFormat': 'json'
                          })

    print "\n------------Stanford Parser Parseing Result------------"
    parsetree = output['sentences'][0]['parse']
    print "\n------parsing------\n"
    print parsetree
    print "\n------ Words inside NP ------\n"
    for i in Tree.fromstring(parsetree).subtrees():
        if i.label() == 'NP':
            print i.leaves(), i.label()
    print "\n------ Words inside NP with POS tags ------\n"
    for i in Tree.fromstring(parsetree).subtrees():
        if i.label() == 'NP':
            print i

示例#41

0

显示文件

文件： nlp_stanford_by_java_model.py 项目： Innerface/innerface

def generate_partial(segment):
    """
    短语树拆分
    :param segment:
    :return:
    """
    pos_root = BASE_DIR + "/vendor/dataset/stanford/stanford-corenlp-full-2017-06-09/"
    par_model = pos_root + "models/lexparser/chinesePCFG.ser.gz"
    opttype = 'penn'
    parser = StanfordParser(par_model, pos_root, opttype)
    par_tag = parser.tagfile(segment)
    tree = Tree.fromstring(par_tag)
    return tree

示例#42

0

显示文件

 def fromtree(cls, data, fields, subtrees=False):
     warnings.warn('Example class will be retired in the 0.8.0 release and moved to torchtext.legacy. Please see 0.7.0 release notes for further information.', UserWarning)
     try:
         from nltk.tree import Tree
     except ImportError:
         print("Please install NLTK. "
               "See the docs at http://nltk.org for more information.")
         raise
     tree = Tree.fromstring(data)
     if subtrees:
         return [cls.fromlist(
             [' '.join(t.leaves()), t.label()], fields) for t in tree.subtrees()]
     return cls.fromlist([' '.join(tree.leaves()), tree.label()], fields)

示例#43

0

显示文件

    def test_calc_frazier_score(self):
        sent = "Colorless green ideas sleep furiously"
        parse = [
            '( (S (NP (NNP Colorless) (JJ green) (NNS ideas)) (VP (VBP sleep) (ADVP (RB furiously)))) )'
        ]

        expected = 4.5
        actual = calc_frazier_score(Tree.fromstring(parse[0]), 0, '')
        self.assertEqual(expected, actual)

        expected = -1
        actual = calc_frazier_score("Hi!", 0, '')
        self.assertEqual(expected, actual)

示例#44

0

显示文件

文件： data_helper.py 项目： fcihraeipnusnacwh/MRC-CE

    def _pre_processing(self):
        all_data = self.read_json(
            path.join(self.data_dir, 'train.stanford.json'))
        gram2count = defaultdict(int)
        pos_tag2count = defaultdict(int)
        chunk_tag2count = defaultdict(int)
        dep_tag2count = defaultdict(int)

        for data in all_data:
            print(type(data))
            sentences_list = data['sentences']
            for sentence_l in sentences_list:

                tokens = sentence_l['tokens']
                for token in tokens:
                    gram2count[token['originalText']] += 1
                    pos_tag2count[token['pos']] += 1
                    pos_tag2count[token['originalText'] + '_' +
                                  token['pos']] += 1
                deparse = sentence_l['basicDependencies']
                for word in deparse:
                    dep_tag2count[word['dep']] += 1
                    dep_tag2count[word['dependentGloss'] + '_' +
                                  word['dep']] += 1

                coparse = Tree.fromstring(sentence_l['parse'])
                for s in coparse.subtrees(lambda t: t.label() in chunk_pos):
                    leaves = s.leaves()
                    node = s.label()
                    chunk_tag2count[node] += 1
                    for leaf in leaves:
                        chunk_tag2count[leaf + '_' + node] += 1
                chunk_tag2count['ROOT'] = 100

        print('feature stat')
        print('# of gram: %d' % len(gram2count))
        print('# of pos: %d' % len(pos_tag2count))
        print('# of chunk_tag: %d' % len(chunk_tag2count))
        print('# of dep: %d' % len(dep_tag2count))
        feature2id = {
            'gram2count': gram2count,
            'pos_tag2count': pos_tag2count,
            'chunk_tag2count': chunk_tag2count,
            'dep_tag2count': dep_tag2count
        }

        with open(path.join(self.data_dir, 'feature2count.json'),
                  'w',
                  encoding='utf8') as f:
            json.dump(feature2id, f, ensure_ascii=False)
            f.write('\n')

示例#45

0

显示文件

    def why_answer(self, question, relevant):
        #Get all nouns in the question
        Q_nouns = [tup[0] for tup in self.nlp.pos(question) if tup[1][0] == 'N']

        #Find all phrases and sub phrases from the relevent sentence
        r_out = Tree.fromstring(self.nlp.parse(relevant))
        phrase_ans = []
        phrases = self.find_S(r_out)

        #For each phrase, find the NP and VP and parse out the nouns in the NP
        for tree in phrases:
            #print(tree.label())
            #print(tree.leaves())
            found = False
            for subtree in tree:
                #print(subtree.label())
                #print(subtree.leaves())
                if subtree.label() == 'NP':
                    nounP = " ".join(subtree.leaves())
                    R_nouns = [tup[0] for tup in self.nlp.pos(nounP) if tup[1][0] == 'N']
                    for noun in R_nouns:
                        #If nouns in the subphrase are not in the question, we are in the wrong phrase, append wrong phrase and skip the current phrase
                        if noun not in Q_nouns:
                            phrase_ans.append('WrongPhrase')
                            break
                verbP = ''
                if subtree.label() == 'VP':
                    verbP = " " .join(subtree.leaves())
                #If we find an instance of a "Why" word, find the position and return the string starting from that position.
                for word in self.why_words:
                    if word in verbP:
                        found = True
                        location = verbP.find(word)
                        verbP = verbP[location:]
                        phrase_ans.append(verbP.capitalize())
                        break

            #If there was no phrase, append WrongPhrase
            if found == False:
                phrase_ans.append('WrongPhrase')

        ans = ""
        #Check all the answers in phrase answers, the correct answer is the one that is not from a Wrong Phrase
        for answer in phrase_ans:
            if answer != 'WrongPhrase':
                ans = answer + '.'

        if ans == "":
            return ""
        else:
            return ans

示例#46

0

显示文件

 def test_get_gold_spans_correctly_extracts_spans_with_nested_labels(self):
     ptb_reader = PennTreeBankConstituencySpanDatasetReader()
     # Here we have a parse with several nested labels - particularly the (WHNP (WHNP (WP What)))
     # fragment. These should be concatenated into a single label by get_gold_spans.
     tree = Tree.fromstring("""
         (S
     (`` ``)
     (S-TPC
     (NP-SBJ (PRP We))
     (VP
         (VBP have)
         (S
         (VP
             (TO to)
             (VP
             (VP
                 (VB clear)
                 (PRT (RP up))
                 (NP (DT these) (NNS issues)))
             (CC and)
             (VP
                 (VB find)
                 (PRT (RP out))
                 (SBAR-NOM
                 (WHNP (WHNP (WP what)))
                 (S
                     (VP
                     (VBZ is)
                     (ADJP-PRD (JJ present))
                     (SBAR
                         (WHNP (WDT that))
                         (S
                         (VP
                             (VBZ is)
                             (VP
                             (VBG creating)
                             (NP (JJ artificial) (NN volatility)))))))))))))))
     (, ,)
     ('' '')
     (NP-SBJ (NNP Mr.) (NNP Fisher))
     (VP (VBD said))
     (. .))
     """)
     span_dict = {}
     ptb_reader._strip_functional_tags(tree) # pylint: disable=protected-access
     ptb_reader._get_gold_spans(tree, 0, span_dict) # pylint: disable=protected-access
     assert span_dict == {(1, 1): 'NP', (5, 5): 'PRT', (6, 7): 'NP', (4, 7): 'VP', (10, 10): 'PRT',
                          (11, 11): 'WHNP-WHNP', (13, 13): 'ADJP', (14, 14): 'WHNP', (17, 18): 'NP',
                          (16, 18): 'VP', (15, 18): 'S-VP', (14, 18): 'SBAR', (12, 18): 'S-VP',
                          (11, 18): 'SBAR', (9, 18): 'VP', (4, 18): 'VP', (3, 18): 'S-VP',
                          (2, 18): 'VP', (1, 18): 'S', (21, 22): 'NP', (23, 23): 'VP', (0, 24): 'S'}

示例#47

0

显示文件

文件： simplify.py 项目： needonature/11611teamproject-YenYuan-

def extractNonResMod(tree):
    subject = tsurgeon.findSubject(tree)
    if not subject:
        return
    subj_tree = Tree.fromstring(subject)
    tokens = subj_tree.leaves()
    parts = ' '.join(tokens).split(',')
    main_subject = parts[0]
    if len(parts) > 1 and parts[1] != '':
        phrase_type = getTag(parts[1].strip(), subj_tree)
        # check if it is an appositive
        if phrase_type == 'NP':
            # adding 'is' temporarily - might be able to get inflection correct
            # by examining get_top_questions verb.
            appos = parts[1].split()
            subj = main_subject.split()
            appos_tree = None
            newsubj_tree = None
            for sub in subj_tree.subtrees():
                if sub.leaves() == appos and (appos_tree == None
                                              or len(sub) > len(appos_tree)):
                    appos_tree = str(sub)
                elif sub.leaves() == subj and (newsubj_tree == None or
                                               len(sub) > len(newsubj_tree)):
                    newsubj_tree = str(sub)
            new_treestr = "(ROOT (S %s (VP (VBZ is) %s) (. .)))" % (
                newsubj_tree, appos_tree)
            new_tree = Tree.fromstring(new_treestr)
            return new_tree
        # check if it is a relative clause
        elif phrase_type == 'SBAR':
            # CONSTRAINTS:
            # fails for relative clauses with adjunct gaps
            # assumes we don't have a subordinate clause - need case for this
            substitution = [main_subject.rstrip()] + parts[1].split()[1:]
            sentence = ' '.join(substitution).rstrip() + '.'
            return sentence
    pass

示例#48

0

显示文件

文件： example.py 项目： windweller/discourse

 def fromtree(cls, data, fields, subtrees=False):
     try:
         from nltk.tree import Tree
     except ImportError:
         print('''Please install NLTK:
 $ pip install nltk''')
         raise
     tree = Tree.fromstring(data)
     if subtrees:
         return [
             cls.fromlist([t.leaves(), t.label()], fields)
             for t in tree.subtrees()
         ]
     return cls.fromlist([tree.leaves(), tree.label()], fields)

示例#49

0

显示文件

文件： parse_tree.py 项目： anantupadhyay/NLP-Project

def getParseTreeAnalysis(output):
    parse_tree = output['sentences'][0]['parse']
    tree = ParentedTree.convert(Tree.fromstring(parse_tree))
    #tree.pretty_print()
    rel2 = dict()
    nouns = list()
    for s in tree.subtrees(lambda tree: tree.label().startswith('NN') or tree.
                           label() == 'PRP'):
        rel2.setdefault(s[0], [])
        nouns.append(s)
    for s in nouns:
        values = find_attributes(s, 1, [])
        rel2[s[0]] = values
    print rel2

示例#50

0

显示文件

def X_tree():
    vocab = ["1", "+", "2", "$UNK"]
    train = [
        "(odd 1)",
        "(even 2)",
        "(odd (pdd 1))",
        "(even (even 2))",
        "(even (odd 1) (neutral (neutral +) (odd 1)))",
        "(odd (odd 1) (neutral (neutral +) (even 2)))",
        "(odd (even 2) (neutral (neutral +) (odd 1)))",
        "(even (even 2) (neutral (neutral +) (even 2)))",
        "(even (odd 1) (neutralB (neutral +) (odd (odd 1) (neutral (neutral +) (even 2)))))"]
    X_train = [Tree.fromstring(x) for x in train]
    return X_train, vocab

示例#51

0

显示文件

文件： sentiment.py 项目： viking-sudo-rm/saturated-sgd

    def _read(self, file_path):
        with open(file_path) as in_file:
            for line in in_file.readlines():
                if not line:
                    continue

                tree = Tree.fromstring(line)
                sentiment = tree.label()
                if self._binary_sentiment:
                    sentiment = _binarize_sentiment(sentiment)
                    if sentiment is None:
                        continue

                yield self.text_to_instance(tree.leaves(), sentiment)

示例#52

0

显示文件

文件： map_node.py 项目： zclore/helloworld

	def __spilt_sentence(self,sentence): 
		nlp = StanfordCoreNLP('http://localhost', port=12331)
		 # 句法分析树
		rootTree = Tree.fromstring(nlp.parse(sentence))
		nlp.close()
		# 这里可以获得所有的短语集
		subtrees = rootTree.subtrees()
		phraseSet = set()
		for t in subtrees:
			tleaves = t.leaves()
			if len(tleaves) < 4:
				ele = " ".join(tleaves)
				phraseSet.add(ele)
		return phraseSet

示例#53

0

显示文件

文件： model.py 项目： alankyuen/SentimentAnalysis_CoLaLab

    def create(self, corenlp):
        ''' 
			parses the raw string review into sentences then tokens as well as a constituency parse 
			also intializes all the variables
		'''

        assert corenlp is not None

        output = corenlp.annotate(
            self.review_string,
            properties={
                'annotators':
                'tokenize, ssplit, parse',
                'outputFormat':
                'json',
                'parse.model':
                'edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz'
            })

        if (type(output) is str
            ):  #TypeError: eval() arg 1 must be a string, bytes or code object
            output = eval(output)
        self.size = len(output['sentences'])

        #organize into 1D and MD(multi-dimensional --> Tree)
        for i in range(self.size):
            tokenized_1D = [
                token_json['word']
                for token_json in output['sentences'][i]['tokens']
            ]
            self.list_tokenized_1D.append(tokenized_1D)

            parsetree = Tree.fromstring(output['sentences'][i]['parse'])
            self.list_NLTK_trees.append(parsetree)  #NLTK Tree objects
            self.list_token_trees.append(map_token_tree(parsetree))  #MD tokens
            self.list_tree_indices.append(
                getTreeIndices(self.list_token_trees[i]))
            self.list_valence_1D.append([])
            self.list_valence_trees.append([])

        #save original as string json
        self.orig_list_token_trees = json.dumps(
            {"Tree": self.list_token_trees})
        self.orig_list_tokenized_1D = json.dumps(
            {"1D": self.list_tokenized_1D})
        self.orig_list_tree_indices = json.dumps(
            {"Tree Indices": self.list_tree_indices})
        self.orig_list_NLTK_trees = [
            tree.copy(deep=True) for tree in self.list_NLTK_trees
        ]

示例#54

0

显示文件

 def process_data_file(self, file_path):
     cnt = 0
     with open(file_path, "r") as f:
         for line in f:
             line = line.strip()
             tree = Tree.fromstring(line)
             label = self.label_level(tree.label())
             if label != "neutral":
                 assert len(self.X) == len(self.Y)
                 idx = len(self.X)
                 self.X[idx] = " ".join(tree.leaves())
                 self.Y[idx] = label
                 cnt += 1
     return cnt

示例#55

0

显示文件

def extract_phrase(tree_str, label):
    phrases = []
    trees = Tree.fromstring(tree_str)
    for tree in trees:
        #print(tree)
        #print("#########################")
        for subtree in tree.subtrees():
            #print(subtree)
            if subtree.label() == label:
                t = subtree
                t = ' '.join(t.leaves())
                phrases.append(t)

    return phrases

示例#56

0

显示文件

文件： answer_bin.py 项目： zhongchaoze/nlp_qa_project

 def get_raw_answer(self, question, answer):
     q_tree = sNLP.parse(question)
     q_tree = Tree.fromstring(str(q_tree))
     a_tree = sNLP.parse(Binary.main(answer))
     a_tree = Tree.fromstring(str(a_tree))
     # res = True
     (q_top_level_structure,
      q_parse_by_structure) = self.get_top_level_structure(q_tree)
     (a_top_level_structure,
      a_parse_by_structure) = self.get_top_level_structure(a_tree)
     for i in range(0, len(q_top_level_structure)):
         q_label = q_top_level_structure[i]
         if q_label in a_top_level_structure:
             a_index = a_top_level_structure.index(q_label)
         else:
             print("label not found")
             return False
         # print "Result:!!!!!", self.partial_matching(q_parse_by_structure[i], a_parse_by_structure[a_index])
         if not self.partial_matching(q_parse_by_structure[i],
                                      a_parse_by_structure[a_index]):
             # print("struct:", q_parse_by_structure[i], a_parse_by_structure[a_index])
             return False
     return True

示例#57

0

显示文件

def getspan_fromtree(t: 'str of tree') \
        -> 'span of each tag:dictionary{tag_num:(pos,start,end)})':
    tree = Tree.fromstring(t)
    span = {}
    tag_num = 1
    pl = 0  #単語の位置
    for i in tree.subtrees():  #部分木すべてについて
        pl = pl + tree.leaves()[pl:].index(
            i.leaves()[0])  #単語の見る位置を部分木の初めの単語の位置に変更
        start = pl + tree.leaves()[pl:].index(i.leaves()[0])  #タグが含む範囲の初めの位置
        end = start + len(i.leaves())  #タグが含む範囲の終わりの位置(初めの位置+部分木の葉の数)
        span[tag_num] = (i.label(), start + 1, end)
        tag_num += 1
    return (span)

示例#58

0

显示文件

文件： why_question.py 项目： jay1999ke/PureQPA

 def main(self, text, parser):
     print(text)
     tree = parser.parse(text)
     tree = Tree.fromstring(str(tree))
     # print tree
     if not self.is_why(tree):
         print("It could not be converted to why question.")
     (top_level_structure, parse_by_structure) = self.remove_SBAR(tree)
     # print top_level_structure
     # print parse_by_structure
     sent = " ".join(parse_by_structure)
     sent = Binary.main(sent, parser)
     print("Why " + sent)
     return ("Why " + sent)

示例#59

0

显示文件

文件： example.py 项目： a-little-story/dccnn

 def fromtree(cls, data, fields, subtrees=False):
     try:
         from nltk.tree import Tree
     except ImportError:
         print("Please install NLTK. "
               "See the docs at http://nltk.org for more information.")
         raise
     tree = Tree.fromstring(data)
     if subtrees:
         return [
             cls.fromlist([t.leaves(), t.label()], fields)
             for t in tree.subtrees()
         ]
     return cls.fromlist([tree.leaves(), tree.label()], fields)

示例#60

0

显示文件

文件： what_who.py 项目： jay1999ke/PureQPA

 def main(self, text, NE, parser):
     tree = parser.parse(text)
     tree = Tree.fromstring(str(tree))
     (top_level_structure,
      parse_by_structure) = Binary.get_top_level_structure(tree)
     np_index = top_level_structure.index("NP")
     if self.is_who(parse_by_structure[np_index], NE):
         parse_by_structure[np_index] = "who"
     else:
         parse_by_structure[np_index] = "what"
     parse_by_structure[-1] = "?"
     sent = " ".join(parse_by_structure)
     print(sent)
     return sent