示例#1
0
 def process_paracl(self, sentence, idx):
     tree = sentence.tree
     parent = tree.parents[idx]
     subtree = sorted(tree.get_subtree(idx))
     clause = [
         tree.words[node] for node in subtree
         if tree.dependent_relation(idx, node) != 'mark'
     ]  # drop mark word
     conjunction = tree.get_conjunction(parent)
     conjunction.insert(0, parent)
     conjunction = tree.build_conjunction(conjunction)
     for i, token in enumerate(conjunction):
         # if token.idx + 1 < idx:
         clause.insert(i, token)
     if tree.in_coming_relation(idx) == "nmod":
         for child in tree.children(idx):
             if tree.dependent_relation(
                     idx,
                     child) == "case" and tree.words[child].pos == "VBG":
                 idx = child
                 break
     if tree.words[idx].pos == "VBG":
         tree.words[idx].word = stem(tree.words[idx].word)
     else:
         clause.insert(len(conjunction), Token("be", "VBZ", -1, -1))
     sent = self.sentence_builder.from_un_parsed_tokens(clause)
     message = ClauseMessage(clause_detection.ACL, None, None)
     sentence.left.append((message, sent))
     tree.delete_subtree(subtree)
 def compound_first(self, sentence, restructurer):
     tokens = tokenize(sentence)
     tokens = [Token(token, None, i, i) for i, token in enumerate(tokens)]
     sent = self.builder.from_un_parsed_tokens(tokens)
     clauses = list(self.decompose_compound(sent))
     if len(clauses) > 1:
         for clause in clauses:
             sent = self.builder.from_un_parsed_tokens(clause)
             for clause in self.appositive_first(sent, restructurer):
                 yield clause
 def appositive_first(self, sentence, restructurer):
     """
     :param sentence: raw sentence
     :param restructurer:
     :return:
     """
     tokens = tokenize(sentence)
     tokens = [Token(token, None, i, i) for i, token in enumerate(tokens)]
     sent = self.builder.from_un_parsed_tokens(tokens)
     restructurer.apposition_first(sent)
     for sub_sent in sent.iter_subsentence():
         for clause in self.decompose_compound(sub_sent):
             yield clause
示例#4
0
 def extract_appositive_clause(self, tree, idx, normal=True):
     subtree = sorted(tree.get_subtree(idx))
     clause = [tree.words[node] for node in subtree]
     if normal:
         conjunction = tree.get_conjunction(tree.parents[idx])
         conjunction.insert(0, tree.parents[idx])
         conjunction = tree.build_conjunction(conjunction)
         n = 0
         for i, token in enumerate(conjunction):
             if token.idx + 1 < idx:
                 n += 1
                 clause.insert(i, token)
         clause.insert(n, Token("be", "VBZ", -1, -1))
     else:  # get the nearest noun
         flag = False
         for i in range(idx - 1, 0, -1):
             if i in tree.words:
                 if tree.words[i].word == ",":
                     flag = True
                 elif tree.words[i].pos.startswith("NN") and flag:
                     clause.insert(0, Token("be", "VBZ", -1, -1))
                     clause.insert(0, tree.words[i])
                     break
     return subtree, clause
示例#5
0
 def _parseline(self, line):
     """ Parse one line from *.merge file
     """
     items = line.split("\t")
     tok = Token()
     tok.sidx, tok.tidx = int(items[0]), int(items[1])
     # Without changing the case
     tok.word, tok.lemma = items[2], items[3]
     tok.pos = items[4]
     tok.deplabel = items[5]
     try:
         tok.hidx = int(items[6])
     except ValueError:
         pass
     tok.ner, tok.partialparse = items[7], items[8]
     try:
         tok.eduidx = int(items[9])
     except ValueError:
         print tok.word, self.fmerge
         # sys.exit()
         pass
     return tok
示例#6
0
 def process_noun_adjective_phrase(self, sentence, idx):
     tree = sentence.tree
     parent = tree.parents[idx]
     subtree = sorted(tree.get_subtree(idx))
     clause = [tree.words[node] for node in subtree]
     clause[0].word = clause[0].word.lower()  # lower case
     subjects = tree.get_subjects(parent)
     if not subjects:
         return
     conjunction = tree.build_conjunction(subjects)
     for i, token in enumerate(conjunction):
         clause.insert(i, token)
     clause.insert(len(conjunction), Token("be", "VBZ", -1, -1))
     sent = self.sentence_builder.from_un_parsed_tokens(clause)
     message = ClauseMessage(clause_detection.ADVCL, None, None)
     sentence.left.append((message, sent))
     tree.delete_subtree(subtree)
示例#7
0
    def process_paradvcl(self, sentence, idx):
        tree = sentence.tree
        parent = tree.parents[idx]
        subtree = sorted(tree.get_subtree(idx))
        clause = [
            tree.words[node] for node in subtree
            if tree.dependent_relation(idx, node) != 'mark'
        ]  # drop mark word
        clause[0].word = clause[0].word.lower()  # lower case
        mark = None
        if subtree[0] != idx:
            mark = clause[0].word
            clause.pop(0)

        subjects = tree.get_subjects(parent)
        subjects = tree.build_conjunction(subjects)
        if tree.words[idx].pos == clause_detection.VBN:
            clause.insert(0, Token("be", "VBZ", -1, -1))
            for i, token in enumerate(subjects):
                clause.insert(i, token)
        else:
            tree.words[idx].word = stem(tree.words[idx].word)
            objects = tree.get_objects(parent)
            if not objects:
                for i, token in enumerate(subjects):
                    clause.insert(i, token)
            elif idx - 1 in tree.words and tree.words[
                    idx - 1].pos == clause_detection.PREP:
                token = tree.words[idx - 1]
                if token.word == "by":
                    for i, token in enumerate(subjects):
                        clause.insert(i, token)
                else:
                    objects = tree.build_conjunction(objects)
                    for i, token in enumerate(objects):
                        clause.insert(i, token)
            else:
                for i, token in enumerate(subjects):
                    clause.insert(i, token)

        sent = self.sentence_builder.from_un_parsed_tokens(clause)
        message = ClauseMessage(clause_detection.ADVCL, mark,
                                tree.words[parent])
        sentence.left.append((message, sent))
        tree.delete_subtree(subtree)
示例#8
0
文件: docreader.py 项目: OlafLee/DPLP
 def _parseline(self, line):
     """ Parse one line from *.merge file
     """
     items = line.split("\t")
     tok = Token()
     tok.sidx, tok.tidx = int(items[0]), int(items[1])
     # Without changing the case
     tok.word, tok.lemma = items[2], items[3]
     tok.pos = items[4]
     tok.deplabel = items[5]
     try:
         tok.hidx = int(items[6])
     except ValueError:
         pass
     tok.ner, tok.partialparse = items[7], items[8]
     try:
         tok.eduidx = int(items[9])
     except ValueError:
         print tok.word, self.fmerge
         # sys.exit()
         pass
     return tok
 def lexical_simplification_first(self, sentence, restructurer):
     """
     :param sentence: raw sentence
     :param restructurer:
     :return:
     """
     original_tokens, tokens, chunks = extract_noun_chunks(sentence)
     tokens = [
         Token(token[0], None, i, token[1])
         for i, token in enumerate(tokens)
     ]
     sentence = self.builder.from_un_parsed_tokens(tokens, False)
     # print(" ".join([token.word for token in sentence.tree.words.values()]))
     restructurer.extract_appositive(sentence)
     nmods = sentence.tree.get_noun_nmods()
     sentence = self.reparse(sentence, False)
     adverbials = sentence.tree.get_extra_adverbial()
     sentence = self.reparse(sentence, False)
     restructurer.restructure(sentence)
     clauses = []
     for sub_sent in sentence.iter_subsentence():
         for clause in self.decompose_compound(sub_sent):
             clauses.append(clause)
     return original_tokens, chunks, nmods, adverbials, clauses