def convert_to_nltk_tree(tree): if isinstance(tree,str): return tree tree_node=Tree(tree[0],[]) for child in tree[1:]: child_node=convert_to_nltk_tree(child) tree_node.insert(len(tree_node),child_node) return tree_node
def _remove_tag(self, tree): if isinstance(tree, str): return re.sub('[0-9]+', 'n', tree.lower()) if len(tree) == 1: return self._remove_tag(tree[0]) new_t = Tree(tree.label(), []) for subtree in tree: if isinstance(subtree, Tree): subtree = self._remove_tag(subtree) new_t.insert(len(new_t), subtree) return new_t
def _remove_puntc(self, tree): if len(tree) == 1: if isinstance(tree[0], str): if self.lang == 'il': if tree.label() in string.punctuation or '-' not in tree[ 0] or '*' in tree[0]: return None else: tree[0] = tree[0].split('-')[0] elif word_tags.get(self.lang) is not None and tree.label( ) not in word_tags[self.lang]: return None else: return self._remove_puntc(tree[0]) new_t = Tree(tree.label(), []) for subtree in tree: if isinstance(subtree, Tree): subtree = self._remove_puntc(subtree) if subtree is not None and len(subtree) > 0: new_t.insert(len(new_t), subtree) return new_t
def modify_tree_structure(parent_tree): # Mark all subtrees position as 0 tree_traversal_flag = label_parse_subtrees(parent_tree) # Initialize new parse tree modified_parse_tree = Tree('ROOT', []) i = 0 for sub_tree in parent_tree.subtrees(): if sub_tree.label() == "NP": i, modified_parse_tree = handle_noun_clause(i, tree_traversal_flag, modified_parse_tree, sub_tree) if sub_tree.label() == "VP" or sub_tree.label() == "PRP": i, modified_parse_tree = handle_verb_prop_clause(i, tree_traversal_flag, modified_parse_tree, sub_tree) # recursively check for omitted clauses to be inserted in tree for sub_tree in parent_tree.subtrees(): for child_sub_tree in sub_tree.subtrees(): if len(child_sub_tree.leaves()) == 1: #check if subtree leads to some word if tree_traversal_flag[child_sub_tree.treeposition()] == 0 and tree_traversal_flag[child_sub_tree.parent().treeposition()] == 0: tree_traversal_flag[child_sub_tree.treeposition()] = 1 modified_parse_tree.insert(i, child_sub_tree) i = i + 1 return modified_parse_tree
def convert_sentence(input_string: str): java_path = '/usr/bin/java' os.environ['CLASSPATH'] = java_path if input_string.split() == 1: return None if len(input_string.split()) == 1: path = create_video(input_string) return path parser = CoreNLPParser(url='http://localhost:9000') englishtree = [tree for tree in parser.parse(input_string.split())] parsetree = englishtree[0] dict = {} # parenttree = ParentedTree(node=parsetree, children=[]) parenttree = ParentedTree.fromstring(str(parsetree)) # print("Input Sentence: ", input_string) # print("Input Sentence Tree\n") # print(parenttree) print("\n\n") for sub in parenttree.subtrees(): dict[sub.treeposition()] = 0 #----------------------------# islTree = Tree('ROOT', []) i = 0 for sub in parenttree.subtrees(): if (sub.label() == "NP" and dict[sub.treeposition()] == 0 and dict[sub.parent().treeposition()] == 0): dict[sub.treeposition()] = 1 islTree.insert(i, sub) i = i + 1 if (sub.label() == "VP" or sub.label() == "PRP"): for sub2 in sub.subtrees(): if ((sub2.label() == "NP" or sub2.label() == 'PRP') and dict[sub2.treeposition()] == 0 and dict[sub2.parent().treeposition()] == 0): dict[sub2.treeposition()] = 1 islTree.insert(i, sub2) i = i + 1 for sub in parenttree.subtrees(): for sub2 in sub.subtrees(): if (len(sub2.leaves()) == 1 and dict[sub2.treeposition()] == 0 and dict[sub2.parent().treeposition()] == 0): dict[sub2.treeposition()] = 1 islTree.insert(i, sub2) i = i + 1 parsed_sent = islTree.leaves() # words = parsed_sent # print("ISL Tree\n") # print(islTree) # print("\n\n") # nltk.download('stopwords') # nltk.download('wordnet') # print() stop_words = set(stopwords.words("english")) lemmantizer = WordNetLemmatizer() # ps = PorterStemmer() lemmantized_words = [] for w in parsed_sent: # w = ps.stem(w) lemmantized_words.append(lemmantizer.lemmatize(w)) islSentence = "" for w in lemmantized_words: if w not in stop_words: islSentence += w islSentence += " " # islSentence += w # islSentence += " " # print("ISL Sentence\n") # print(islSentence) # print("\n\n") path = create_video(islSentence) return path