def extract_subjects(parse_tree, enum=True): """ Returns a list of subject words. """ for node in extract_subject_nodes(parse_tree): word = get_node_string(node) if enum: yield (parse_tree.indexOf(node), word) else: yield word
def _parse(self, raw_input_string): """ Fills out message meta and frame attributes. """ tokenizer = nltk.WordPunctTokenizer() tokenized_string = tokenizer.tokenize(raw_input_string) subjects = extract_subject_nodes(tokenized_string) if subjects: self.frame['subject'] = [get_node_string(subject) for subject in subjects] words_temporary_pos = extract_close_keywords( PreferenceMessage.keywords_temporary_pos, tokenized_string, 2) words_temporary_neg = extract_close_keywords( PreferenceMessage.keywords_temporary_neg, tokenized_string, 2) words_permanent_pos = extract_close_keywords( PreferenceMessage.keywords_permanent_pos, tokenized_string, 2) words_permanent_neg = extract_close_keywords( PreferenceMessage.keywords_permanent_neg, tokenized_string, 2) words_temporary = words_temporary_pos + words_temporary_neg words_permanent = words_permanent_pos + words_permanent_neg if words_temporary and words_permanent: # Confused # self.frame['temporal'] = None # self.frame['word'] = None # This check is skipped due to an error in not using the POS # when looking up synsets. # TODO: Fix (example: fish) pass if words_temporary: self.frame['temporal'] = 'temporary' self.frame['word'] = words_temporary[0] else: # words_permanent self.frame['temporal'] = 'permanent' self.frame['word'] = words_permanent[0] words_pos = words_temporary_pos + words_permanent_pos words_neg = words_temporary_neg + words_permanent_neg if words_pos and words_neg: # Confused self.frame['prefer'] = None if words_pos: self.frame['prefer'] = True else: # words_neg self.frame['prefer'] = False
def _parse(self, raw_input_string): """ Fills out message meta and frame attributes. """ tokenizer = nltk.WordPunctTokenizer() tokenized_string = tokenizer.tokenize(raw_input_string) tagger = utils.combined_taggers tagged_string = tagger.tag(tokenized_string) # Ingredients for i, ingredient in get_ingredients(tokenized_string, enum=True): self.frame['ingredient'].append({'id': i, 'name': ingredient, 'descriptor': [], # TODO: siblings JJ 'preference': 0, # TODO: RB = not or n't 'relationship': 'and', #TODO: Implement }) # Meals for i, meal in get_meals(tokenized_string, enum=True): meal = tokenized_string[i] self.frame['meal'].append({'id': i, 'name': meal, 'descriptor': [], # TODO: siblings JJ 'preference': 0, # TODO: RB = not or n't 'relationship': 'and', #TODO: Implement }) # Cuisine for i, cuisine in get_cuisines(tokenized_string, enum=True): self.frame['cuisine'].append({'id': i, 'name': cuisine, 'descriptor': [], # TODO: siblings JJ 'preference': 0, # TODO: RB = not or n't 'relationship': 'and', #TODO: Implement }) # Dish # TODO: Get the subject of the sentence (aka what the verb is reffering to) dishesSet = [(i, w) for i,w in extract_subject_nodes(tokenized_string, enum=True) if w not in self.frame['ingredient'] and w not in self.frame['meal']] for i, dish in dishesSet: self.frame['dish'].append({'id': i, 'name': dish, 'descriptor': [], # TODO: siblings JJ 'preference': 0, # TODO: RB = not or n't 'relationship': 'and', #TODO: Implement })
def _parse(self, raw_input_string, g): """ Fills out message meta and frame attributes. """ tokenized_string = g.generate_tokenized_string(raw_input_string) parseTree = g.generate_stanford_parse_tree(raw_input_string) subjects = extract_subject_nodes(parseTree) if subjects: self.frame['subject'] = [ get_node_string(subject) for subject in subjects ] words_temporary_pos = extract_close_keywords( PreferenceMessage.keywords_temporary_pos, tokenized_string, 2) words_temporary_neg = extract_close_keywords( PreferenceMessage.keywords_temporary_neg, tokenized_string, 2) words_permanent_pos = extract_close_keywords( PreferenceMessage.keywords_permanent_pos, tokenized_string, 2) words_permanent_neg = extract_close_keywords( PreferenceMessage.keywords_permanent_neg, tokenized_string, 2) words_temporary = words_temporary_pos + words_temporary_neg words_permanent = words_permanent_pos + words_permanent_neg if words_temporary and words_permanent: # Confused # self.frame['temporal'] = None # self.frame['word'] = None # This check is skipped due to an error in not using the POS # when looking up synsets. # TODO: Fix (example: fish) pass if words_temporary: self.frame['temporal'] = 'temporary' self.frame['word'] = words_temporary[0] else: # words_permanent self.frame['temporal'] = 'permanent' self.frame['word'] = words_permanent[0] words_pos = words_temporary_pos + words_permanent_pos words_neg = words_temporary_neg + words_permanent_neg if words_pos and words_neg: # Confused self.frame['prefer'] = None if words_pos: self.frame['prefer'] = True else: # words_neg self.frame['prefer'] = False