def extract_junction(parse_tree, word): """ Returns either 'and' or 'or'. Defaults to 'and' if junction cannot be determined. >>> import nltk >>> raw_input_string = "What can I make with carrots and children or celery?" >>> tokenizer = nltk.WordPunctTokenizer() >>> tokenized_string = tokenizer.tokenize(raw_input_string) >>> tree = get_parse_tree(tokenized_string) >>> extract_junction(tree, 'carrots') 'and' >>> extract_junction(tree, 'children') 'and' >>> extract_junction(tree, 'celery') 'or' >>> extract_junction(tree, 'rats') == None True """ # locate the word node for node in parse_tree.getLeaves(): if node.value() == word: # extract the junction node node = extract_junction_node(parse_tree, node) # return the node junction type if node: nodeString = get_node_string(node) if 'and' in nodeString: return 'and' elif 'or' in nodeString: return 'or' else: return 'and' return None
def extract_subjects(parse_tree, enum=True): """ Returns a list of subject words. """ for node in extract_subject_nodes(parse_tree): word = get_node_string(node) if enum: yield (parse_tree.indexOf(node), word) else: yield word
def _parse(self, raw_input_string): """ Fills out message meta and frame attributes. """ tokenizer = nltk.WordPunctTokenizer() tokenized_string = tokenizer.tokenize(raw_input_string) subjects = extract_subject_nodes(tokenized_string) if subjects: self.frame['subject'] = [get_node_string(subject) for subject in subjects] words_temporary_pos = extract_close_keywords( PreferenceMessage.keywords_temporary_pos, tokenized_string, 2) words_temporary_neg = extract_close_keywords( PreferenceMessage.keywords_temporary_neg, tokenized_string, 2) words_permanent_pos = extract_close_keywords( PreferenceMessage.keywords_permanent_pos, tokenized_string, 2) words_permanent_neg = extract_close_keywords( PreferenceMessage.keywords_permanent_neg, tokenized_string, 2) words_temporary = words_temporary_pos + words_temporary_neg words_permanent = words_permanent_pos + words_permanent_neg if words_temporary and words_permanent: # Confused # self.frame['temporal'] = None # self.frame['word'] = None # This check is skipped due to an error in not using the POS # when looking up synsets. # TODO: Fix (example: fish) pass if words_temporary: self.frame['temporal'] = 'temporary' self.frame['word'] = words_temporary[0] else: # words_permanent self.frame['temporal'] = 'permanent' self.frame['word'] = words_permanent[0] words_pos = words_temporary_pos + words_permanent_pos words_neg = words_temporary_neg + words_permanent_neg if words_pos and words_neg: # Confused self.frame['prefer'] = None if words_pos: self.frame['prefer'] = True else: # words_neg self.frame['prefer'] = False
def _parse(self, raw_input_string, g): """ Fills out message meta and frame attributes. """ tokenized_string = g.generate_tokenized_string(raw_input_string) parseTree = g.generate_stanford_parse_tree(raw_input_string) subjects = extract_subject_nodes(parseTree) if subjects: self.frame['subject'] = [ get_node_string(subject) for subject in subjects ] words_temporary_pos = extract_close_keywords( PreferenceMessage.keywords_temporary_pos, tokenized_string, 2) words_temporary_neg = extract_close_keywords( PreferenceMessage.keywords_temporary_neg, tokenized_string, 2) words_permanent_pos = extract_close_keywords( PreferenceMessage.keywords_permanent_pos, tokenized_string, 2) words_permanent_neg = extract_close_keywords( PreferenceMessage.keywords_permanent_neg, tokenized_string, 2) words_temporary = words_temporary_pos + words_temporary_neg words_permanent = words_permanent_pos + words_permanent_neg if words_temporary and words_permanent: # Confused # self.frame['temporal'] = None # self.frame['word'] = None # This check is skipped due to an error in not using the POS # when looking up synsets. # TODO: Fix (example: fish) pass if words_temporary: self.frame['temporal'] = 'temporary' self.frame['word'] = words_temporary[0] else: # words_permanent self.frame['temporal'] = 'permanent' self.frame['word'] = words_permanent[0] words_pos = words_temporary_pos + words_permanent_pos words_neg = words_temporary_neg + words_permanent_neg if words_pos and words_neg: # Confused self.frame['prefer'] = None if words_pos: self.frame['prefer'] = True else: # words_neg self.frame['prefer'] = False