def drawing(tree): if not (tree is None): tree.draw()
def drawing(tree): tree.draw()
print("Example sentences") print(str_sentence1) print(str_sentence2) tokens1 = nltk.word_tokenize(str_sentence1) tokens2 = nltk.word_tokenize(str_sentence2) # Create the Chart and Viterbi parsers, with the input grammar chart_parser = nltk.ChartParser(grammar) viterbi_parser = nltk.ViterbiParser(grammar) # Results for the Chart Parser print("Parse trees obtained with the Chart parser") print("Sentence 1") for tree in chart_parser.parse(tokens1): print(tree) tree.draw() print("Sentence 2") for tree in chart_parser.parse(tokens2): print(tree) tree.draw() # Results for the Viterbi Parser print("Parse trees obtained with the Viterbi parser") print("Sentence 1") for tree in viterbi_parser.parse(tokens1): print(tree) tree.draw() print("Sentence 2") for tree in viterbi_parser.parse(tokens2):
def namedEntityRecognition(self,sentence): tokens = nltk.word_tokenize(sentence) pos_tags = nltk.pos_tag(tokens) #print nltk.ne_chunk(pos_tags, binary=True) grammar = "NP: {<DT|PP\$>?<JJ>*<NNP>+}" """ NP: {<DT|PP\$>?<JJ>*<NN>} # chunk determiner/possessive, adjectives and proper nouns {<NNP>+} # chunk sequences of proper nouns """ cp = nltk.RegexpParser(grammar) tree = cp.parse(pos_tags) possibleTitles = [] possibleTitle = "" possibleDelimTitle = "" delimCounter = 0 delimBool = False tree.draw() for subtree in tree: if str(type (subtree)) == "<type 'tuple'>": if delimBool ==True: delimCounter+=1 possibleDelimTitle +=subtree[0]+" " if delimCounter > numWrdDelimiter: delimBool = False delimCounter = 0 possibleDelimTitle = "" possibleTitle = "" else: if subtree.node == "NP": firstLeaf = subtree.leaves()[0] firstString = firstLeaf[0] #print subtree if firstString.endswith("."): #check first leaf to see if a leaf of a #subtree begins with a string with a period #and the end. This happens sometimes when a #PN begins a sentence, the previouse tuple gets #included as a PN subtree.pop(0) if len(subtree)>1: for leaf in subtree.leaves(): possibleTitle +=leaf[0]+" " possibleTitles.append(possibleTitle.strip()) if len(subtree)>=1: #try to concat a movie title if its seperated by non nouns for leaf in subtree.leaves(): possibleDelimTitle +=leaf[0]+" " if delimCounter > 0: #the tail end of the split proper noun possibleTitles.append(possibleDelimTitle.strip()) #reset the counter delimBool = True delimCounter = 0 for title in possibleTitles: #if we have a title with all caps we can discard this with high confidence titleClean = re.sub(r'\W+', ' ', title) if titleClean.isupper(): possibleTitles = [] if "." in title: #sometimes the NLTK will say "Jump Street. I" #instead of "Jump Street", this alters this case possibleTitles[possibleTitles.index(title)]= title.split(".")[0] if " 's" in title: possibleTitles[possibleTitles.index(title)]= title.replace(" 's","'s") return possibleTitles
def drawTree(): tree = nltk.Tree.parse('(Tweet (Adj old) (NP (N men) (Conj and) (N women)))') cmutree = nltk.Tree.parse('(Tweet (Adj old) (NP (N men) (Conj and) (N women)))') tree.draw()
def main(tigerFile): tree = ptbToTree(tigerFile) tree.draw()