def get_meals(tokenized_string, enum=False): """ Returns a tuple of (index, meal) or a list of meals from a tokenized string. >>> raw_input_string = "I want cats for breakfast and dogs for dinner." >>> tokenizer = nltk.WordPunctTokenizer() >>> tokenized_string = tokenizer.tokenize(raw_input_string) >>> for i,w in get_meals(tokenized_string, enum=True): print i,w 4 breakfast 8 dinner """ stemmed_string = utils.stem_words(tokenized_string) stemmed_meals = utils.stem_words(wordlists.meal_types) results = extract_words_from_list(stemmed_meals, stemmed_string, True) if enum: return [(i, tokenized_string[i]) for i, w in results] else: return [tokenized_string[i] for i, w in results]
def get_cuisines(tokenized_string, enum=False): """ Returns a tuple of (index, cuisine) or a list of cuisines from a tokenized string. >>> raw_input_string = "I want a chinese or mexican dish." >>> tokenizer = nltk.WordPunctTokenizer() >>> tokenized_string = tokenizer.tokenize(raw_input_string) >>> for i,w in get_cuisines(tokenized_string, enum=True): print i,w 3 chinese 5 mexican """ stemmed_string = utils.stem_words(tokenized_string) cuisines = set.difference(wordlists.cuisines, wordlists.meal_types) cuisines = cuisines.union(wordlists.list_of_adjectivals) stemmed_cuisines = utils.stem_words(cuisines) results = extract_words_from_list(stemmed_cuisines, stemmed_string, True) if enum: return [(i, tokenized_string[i]) for i, w in results] else: return [tokenized_string[i] for i, w in results]