Exemplo n.º 1
0
 def get_ontology_node(self, name):
     """
     Get the ontology node for the given name.  Rather that performing
     an exact match with the name, this uses a heuristic to find the
     best-matching OntologyNode.
     """
     return self._get_closest_ontology_node(normalize_ingredient_name(name))
 def get_ontology_node(self, name):
     """
     Get the ontology node for the given name.  Rather that performing
     an exact match with the name, this uses a heuristic to find the
     best-matching OntologyNode.
     """
     return self._get_closest_ontology_node(normalize_ingredient_name(name))
 def get_ingredients(self, name=None):
     """
     Get ingredients matching the given criteria.
     """
     query = self._session.query(Ingredient)
     if name != None:
         name = normalize_ingredient_name(name)
         query = query.filter_by(name=name)
     return query
Exemplo n.º 4
0
 def get_ingredients(self, name=None):
     """
     Get ingredients matching the given criteria.
     """
     query = self._session.query(Ingredient)
     if name != None:
         name = normalize_ingredient_name(name)
         query = query.filter_by(name=name)
     return query
def get_ingredients(tokenized_string, enum=False):
    """
    Returns a tuple of (index, ingredient) or a list of ingredients from a
    tokenized string.

    >>> raw_input_string = "I like apples, cinnamon, and pepper."
    >>> tokenizer = nltk.WordPunctTokenizer()
    >>> tokenized_string = tokenizer.tokenize(raw_input_string)
    >>> for i,w in get_ingredients(tokenized_string, enum=True): print i,w
    2 apples
    4 cinnamon
    7 pepper
    """
    words = [normalize_ingredient_name(x) for x in tokenized_string]
    results = [x for x in enumerate(words) if is_ingredient(x[1])]
    if enum:
        return [(i, tokenized_string[i]) for i, w in results]
    else:
        return [tokenized_string[i] for i, w in results]
def get_ingredients(tokenized_string, enum=False):
    """
    Returns a tuple of (index, ingredient) or a list of ingredients from a
    tokenized string.

    >>> raw_input_string = "I like apples, cinnamon, and pepper."
    >>> tokenizer = nltk.WordPunctTokenizer()
    >>> tokenized_string = tokenizer.tokenize(raw_input_string)
    >>> for i,w in get_ingredients(tokenized_string, enum=True): print i,w
    2 apples
    4 cinnamon
    7 pepper
    """
    words = [normalize_ingredient_name(x) for x in tokenized_string]
    results = [x for x in enumerate(words) if is_ingredient(x[1])]
    if enum:
        return [(i, tokenized_string[i]) for i, w in results]
    else:
        return [tokenized_string[i] for i, w in results]
Exemplo n.º 7
0
def get_cuisine(title,
                description,
                ingredients,
                title_weight=10,
                description_weight=5,
                ingredient_weight=1):
    """
    Given a title (string), a description (string), and a list of 
    ingredients (list of strings), it will return a most likely cuisine
    that is what the given data is. The value attached to the cuisine 
    that is highest is the most probable cuisine. 

    The relative weights of the hits from the title, description, and 
    ingredients are modifiable via arguments 4, 5, and 6 (respectively)

    >>> get_cuisine("Japanese Pork Fried Rice","My Japanese grandmother gave me this recipe",["Pork","rice","vinegar","cabbage"])
    defaultdict(<type 'int'>, {'Sausage': 1, 'Japanese': 15})

    >>> get_cuisine("Thai Chicken","This recipe was created when we felt like having spicy, Oriental tasting food. It's delicious, uses bold ingredients such as peanut butter, fresh ginger sesame oil. Try serving over rice.",["soy sauce", "garlic", "ginger", "chicken", "sesame oil", "peanut butter", "green onions"])
    defaultdict(<type 'int'>, {'Thai': 10, 'Indonesian': 1, 'Sausage': 1})

    >>> get_cuisine("Italian Rice Balls","Crispy meatball-sized appetizers with deep fried outsides and moist herb and cheese insides",["water","brown rice","garlic","bay leaf","prosciutto", "basil","olive oil","egg whites","Parmesan","bread crumbs", "vegetable oil"])
    defaultdict(<type 'int'>, {'Pizza': 2, 'Italian': 11})

    """
    cuisines = defaultdict(int)  # Maps cuisine to frequency of occurrence
    title_words = title.split()  #split the words into an iterable list
    for word in title_words:  #iterate through the title
        if word in list_of_adjectivals:
            cuisines[word] += title_weight  #add the weight

    description_words = description.split()  #split up the description
    for word in description_words:  #iterate through the description
        if word in list_of_adjectivals:
            cuisines[word] += description_weight  #add the weight

    for word in ingredients:  #go through the ingredients list
        # Check for cuisines strongly associated with certain ingredients
        normal = normalize_ingredient_name(word)
        if normal in ingredient_cuisine_mapping:
            for cuisine in ingredient_cuisine_mapping[normal]:
                cuisines[cuisine] += ingredient_weight
    return cuisines
def get_cuisine(title, description, ingredients, title_weight=10, description_weight=5, ingredient_weight=1):
    """
    Given a title (string), a description (string), and a list of 
    ingredients (list of strings), it will return a most likely cuisine
    that is what the given data is. The value attached to the cuisine 
    that is highest is the most probable cuisine. 

    The relative weights of the hits from the title, description, and 
    ingredients are modifiable via arguments 4, 5, and 6 (respectively)

    >>> get_cuisine("Japanese Pork Fried Rice","My Japanese grandmother gave me this recipe",["Pork","rice","vinegar","cabbage"])
    defaultdict(<type 'int'>, {'Sausage': 1, 'Japanese': 15})

    >>> get_cuisine("Thai Chicken","This recipe was created when we felt like having spicy, Oriental tasting food. It's delicious, uses bold ingredients such as peanut butter, fresh ginger sesame oil. Try serving over rice.",["soy sauce", "garlic", "ginger", "chicken", "sesame oil", "peanut butter", "green onions"])
    defaultdict(<type 'int'>, {'Thai': 10, 'Indonesian': 1, 'Sausage': 1})

    >>> get_cuisine("Italian Rice Balls","Crispy meatball-sized appetizers with deep fried outsides and moist herb and cheese insides",["water","brown rice","garlic","bay leaf","prosciutto", "basil","olive oil","egg whites","Parmesan","bread crumbs", "vegetable oil"])
    defaultdict(<type 'int'>, {'Pizza': 2, 'Italian': 11})

    """
    cuisines = defaultdict(int)  # Maps cuisine to frequency of occurrence
    title_words = title.split()  # split the words into an iterable list
    for word in title_words:  # iterate through the title
        if word in list_of_adjectivals:
            cuisines[word] += title_weight  # add the weight

    description_words = description.split()  # split up the description
    for word in description_words:  # iterate through the description
        if word in list_of_adjectivals:
            cuisines[word] += description_weight  # add the weight

    for word in ingredients:  # go through the ingredients list
        # Check for cuisines strongly associated with certain ingredients
        normal = normalize_ingredient_name(word)
        if normal in ingredient_cuisine_mapping:
            for cuisine in ingredient_cuisine_mapping[normal]:
                cuisines[cuisine] += ingredient_weight
    return cuisines
Exemplo n.º 9
0
def normalize_ontology_name(name):
    return normalize_ingredient_name(name).replace('_', ' ')
    def get_recipes(self, include_ingredients=(), exclude_ingredients=(),
                    include_cuisines=(), exclude_cuisines=(),
                    prep_time=None, cook_time=None, total_time=None,
                    num_steps=None, num_ingredients=None):
        """
        Get recipes matching the given criteria.

        Numeric attributes, like total_time, can be specified as single values
        (to retreive exact matches) or (min, max) tuples that define ranges
        which include their endpoints.  To specify just a maximum or minimum,
        set the other value to None.

        For example, to find recipes with a total time of 1/2 to 1 hours:
        >>> db = Database("sqlite:///:memory:")
        >>> recipes = db.get_recipes(total_time=(30, 60))

        Or, to find recipes that take up to 15 minutes to prepare:
        >>> recipes = db.get_recipes(prep_time=(None, 15))

        To find recipes that have exactly 5 steps:
        >>> recipes = db.get_recipes(num_steps=5)

        To find Italian recipes:
        >>> recipes = db.get_recipes(include_cuisines=["Italian"])
        """
        # Make sure that include_* and exclude_* arguments are not strings:
        for argument in [include_ingredients, exclude_ingredients,
            include_cuisines, exclude_cuisines]:
            if isinstance(argument, types.StringTypes):
                raise ValueError('include_* and exclude_* must be iterables of'
                ' strings, not strings.')
        # Normalize ingredient names, so that they match the names stored in
        # the database.
        include_ingredients = \
            [normalize_ingredient_name(i) for i in include_ingredients]
        exclude_ingredients = \
            [normalize_ingredient_name(i) for i in exclude_ingredients]
        # Construct the query
        query = self._session.query(Recipe)
        # Handle ingredient inclusion and exclusion
        if include_ingredients or exclude_ingredients:
            double_join = join(RecipeIngredientAssociation, Recipe)
            triple_join = join(double_join, Ingredient)
            join_query = query.select_from(triple_join)
            query = join_query
            for ingredient_name in include_ingredients:
                query = query.intersect(
                    join_query.filter(Ingredient.name == ingredient_name))
            for ingredient_name in exclude_ingredients:
                query = query.except_(
                    join_query.filter(Ingredient.name == ingredient_name))
        # Handle cuisine inclusion and exclusion:
        # TODO: cuisine names should probably be normalized before querying, so
        # lowercase 'italian' matches 'Italian'.
        if include_cuisines or exclude_cuisines:
            for cuisine_name in include_cuisines:
                query = query.filter(Recipe.cuisines.any(
                    Cuisine.name == cuisine_name))
            for cuisine_name in exclude_cuisines:
                query = query.filter(Recipe.cuisines.any(
                    Cuisine.name != cuisine_name))
        # Handle ranges searches over simple numeric attributes, like
        # total_time or num_steps
        if total_time != None:
            query = query.filter(_range_predicate(Recipe.total_time,
                total_time))
        if cook_time != None:
            query = query.filter(_range_predicate(Recipe.cook_time, cook_time))
        if prep_time != None:
            query = query.filter(_range_predicate(Recipe.prep_time, prep_time))
        if num_steps != None:
            query = query.filter(_range_predicate(Recipe.num_steps, num_steps))
        if num_ingredients != None:
            query = query.filter(_range_predicate(Recipe.num_ingredients,
                num_ingredients))
        return query.all()
Exemplo n.º 11
0
    def get_recipes(self,
                    include_ingredients=(),
                    exclude_ingredients=(),
                    include_cuisines=(),
                    exclude_cuisines=(),
                    prep_time=None,
                    cook_time=None,
                    total_time=None,
                    num_steps=None,
                    num_ingredients=None):
        """
        Get recipes matching the given criteria.

        Numeric attributes, like total_time, can be specified as single values
        (to retreive exact matches) or (min, max) tuples that define ranges
        which include their endpoints.  To specify just a maximum or minimum,
        set the other value to None.

        For example, to find recipes with a total time of 1/2 to 1 hours:
        >>> db = Database("sqlite:///:memory:")
        >>> recipes = db.get_recipes(total_time=(30, 60))

        Or, to find recipes that take up to 15 minutes to prepare:
        >>> recipes = db.get_recipes(prep_time=(None, 15))

        To find recipes that have exactly 5 steps:
        >>> recipes = db.get_recipes(num_steps=5)

        To find Italian recipes:
        >>> recipes = db.get_recipes(include_cuisines=["Italian"])
        """
        # Make sure that include_* and exclude_* arguments are not strings:
        for argument in [
                include_ingredients, exclude_ingredients, include_cuisines,
                exclude_cuisines
        ]:
            if isinstance(argument, types.StringTypes):
                raise ValueError('include_* and exclude_* must be iterables of'
                                 ' strings, not strings.')
        # Normalize ingredient names, so that they match the names stored in
        # the database.
        include_ingredients = \
            [normalize_ingredient_name(i) for i in include_ingredients]
        exclude_ingredients = \
            [normalize_ingredient_name(i) for i in exclude_ingredients]
        # Construct the query
        query = self._session.query(Recipe)
        # Handle ingredient inclusion and exclusion
        if include_ingredients or exclude_ingredients:
            double_join = join(RecipeIngredientAssociation, Recipe)
            triple_join = join(double_join, Ingredient)
            join_query = query.select_from(triple_join)
            query = join_query
            for ingredient_name in include_ingredients:
                query = query.intersect(
                    join_query.filter(Ingredient.name == ingredient_name))
            for ingredient_name in exclude_ingredients:
                query = query.except_(
                    join_query.filter(Ingredient.name == ingredient_name))
        # Handle cuisine inclusion and exclusion:
        # TODO: cuisine names should probably be normalized before querying, so
        # lowercase 'italian' matches 'Italian'.
        if include_cuisines or exclude_cuisines:
            for cuisine_name in include_cuisines:
                query = query.filter(
                    Recipe.cuisines.any(Cuisine.name == cuisine_name))
            for cuisine_name in exclude_cuisines:
                query = query.filter(
                    Recipe.cuisines.any(Cuisine.name != cuisine_name))
        # Handle ranges searches over simple numeric attributes, like
        # total_time or num_steps
        if total_time != None:
            query = query.filter(
                _range_predicate(Recipe.total_time, total_time))
        if cook_time != None:
            query = query.filter(_range_predicate(Recipe.cook_time, cook_time))
        if prep_time != None:
            query = query.filter(_range_predicate(Recipe.prep_time, prep_time))
        if num_steps != None:
            query = query.filter(_range_predicate(Recipe.num_steps, num_steps))
        if num_ingredients != None:
            query = query.filter(
                _range_predicate(Recipe.num_ingredients, num_ingredients))
        return query.all()
def normalize_ontology_name(name):
    return normalize_ingredient_name(name).replace('_', ' ')