def main(data_dir):
    ureg = UnitRegistry()
    food_description_file = os.path.join(data_dir, "FOOD_DES.txt")
    nutrient_definition_file = os.path.join(data_dir, "NUTR_DEF.txt")
    nutrition_data_file = os.path.join(data_dir, "NUT_DATA.txt")
    weight_data_file = os.path.join(data_dir, "WEIGHT.txt")

    food_descriptions = pandas.read_csv(food_description_file, quotechar='~', delimiter='^', encoding='latin-1',
                                        header=None, names=food_description_columns)
    nutrient_definitions = pandas.read_csv(nutrient_definition_file, quotechar='~', delimiter='^', encoding='latin-1',
                                           header=None, names=nutrient_definition_columns)
    nutrition_data = pandas.read_csv(nutrition_data_file, quotechar='~', delimiter='^', encoding='latin-1', header=None,
                                     names=nutrition_data_columns)
    weight_data = pandas.read_csv(weight_data_file, quotechar='~', delimiter='^', encoding='latin-1', header=None,
                                  names=weight_data_columns)

    # Pandas is retarded when it comes to handling text in csv files...
    food_descriptions.fillna('', inplace=True)
    nutrient_definitions.fillna('', inplace=True)
    nutrition_data.fillna('', inplace=True)
    weight_data.fillna('', inplace=True)

    with app.test_request_context():
        ingredients = {}
        ingredient_preparations = []
        nutrients = {}
        ingredient_nutrients = []

        for entry in food_descriptions.itertuples():
            if entry.food_group in ignored_food_groups:
                continue
            ingredient = models.Ingredient(ingredient_id=int(entry.ndb_id))
            ingredients[entry.ndb_id] = ingredient
            ingredient.names.append(models.IngredientName(name=entry.description, canonical=True))

        for entry in nutrient_definitions.itertuples():
            nutrient_id = int(entry.nutrient_id)
            display_name = nutrient_display_names.get(nutrient_id)
            scientific_name = nutrient_scientific_names.get(nutrient_id)
            recommended_daily_intake = nutrient_rdi.get(nutrient_id)
            display = display_nutrient.get(nutrient_id, False)
            nutrient = models.Nutrient(nutrient_id=nutrient_id, display_name=display_name,
                                       scientific_name=scientific_name, measurement_unit=entry.units,
                                       recommended_daily_intake=recommended_daily_intake, display=display)
            nutrients[nutrient_id] = nutrient

        # Most entries in the weights file conform to this pattern
        weight_re = re.compile(r"([\w\s]+?)(?:\s+\(.*\))?(?:,\s+(.*))?\Z")
        for weight_entry in weight_data.itertuples():
            if weight_entry.ndb_id not in ingredients:
                continue
            # Pint thinks fl oz is femtolitre ounces
            if weight_entry.measure_description == "fl oz":
                description = "fluid ounces"
                # US regulation defines a fluid ounce as equivalent to 30mL for nutrition labeling purposes
                volume = weight_entry.amount * ureg.parse_expression("30 ml").to_base_units()
                # Convert the gram weight to kilograms so density is in standard units
                mass = weight_entry.gram_weight / 1000 * ureg.kilogram
                density = float((mass / volume).magnitude)
            # Special case, as pat matches a unit, but in this context should not be interpreted as such
            elif weight_entry.measure_description.startswith("pat "):
                description = weight_entry.measure_description
                density = None
            else:
                match = weight_re.match(weight_entry.measure_description)
                if match:
                    (unit_name, preparation) = match.groups()
                    # First determine that this weight contains units rather than something nebulous like a "serving"
                    try:
                        quantity = weight_entry.amount * ureg.parse_expression(unit_name)
                        description = preparation
                        # Convert to base units so volume measurements are in cubic meters
                        volume = quantity.to_base_units()
                        # Discard entries with non-volume measurements
                        if not volume.units.get("meter") == 3:
                            continue
                        # Convert the gram weight to kilograms so density is in standard units
                        mass = weight_entry.gram_weight / 1000 * ureg.kilogram
                        density = float((mass / volume).magnitude)
                    except UndefinedUnitError:
                        description = weight_entry.measure_description
                        density = None
                else:
                    description = weight_entry.measure_description
                    density = None
            ingredient_preparation = models.IngredientMeasure(ingredient_id=int(weight_entry.ndb_id),
                                                              description=description, density=density,
                                                              amount=float(weight_entry.amount),
                                                              weight=float(weight_entry.gram_weight))
            ingredient_preparations.append(ingredient_preparation)

        for entry in nutrition_data.itertuples():
            if entry.ndb_id not in ingredients:
                continue
            ingredient_nutrient = models.IngredientNutrient(nutrient_id=int(entry.nutrient_id),
                                                            ingredient_id=int(entry.ndb_id),
                                                            quantity=float(entry.nutrient_value))
            ingredient_nutrients.append(ingredient_nutrient)
        db.session.add_all(ingredients.values())
        db.session.add_all(nutrients.values())
        db.session.commit()

        db.session.add_all(ingredient_preparations)
        db.session.add_all(ingredient_nutrients)
        db.session.commit()
Exemplo n.º 2
0
import metarecipe.models as m
from metarecipe.app import app, db

if __name__ == "__main__":
    with app.test_request_context():
        db.metadata.drop_all()
        db.metadata.create_all()
        # m.Ingredient.query.order_by(db.desc(db.func.similarity("chicken", m.Ingredient.ingredient_name))).limit(10).all()

def main(data_dir):
    ureg = UnitRegistry()
    food_description_file = os.path.join(data_dir, "FOOD_DES.txt")
    nutrient_definition_file = os.path.join(data_dir, "NUTR_DEF.txt")
    nutrition_data_file = os.path.join(data_dir, "NUT_DATA.txt")
    weight_data_file = os.path.join(data_dir, "WEIGHT.txt")

    food_descriptions = pandas.read_csv(food_description_file,
                                        quotechar='~',
                                        delimiter='^',
                                        encoding='latin-1',
                                        header=None,
                                        names=food_description_columns)
    nutrient_definitions = pandas.read_csv(nutrient_definition_file,
                                           quotechar='~',
                                           delimiter='^',
                                           encoding='latin-1',
                                           header=None,
                                           names=nutrient_definition_columns)
    nutrition_data = pandas.read_csv(nutrition_data_file,
                                     quotechar='~',
                                     delimiter='^',
                                     encoding='latin-1',
                                     header=None,
                                     names=nutrition_data_columns)
    weight_data = pandas.read_csv(weight_data_file,
                                  quotechar='~',
                                  delimiter='^',
                                  encoding='latin-1',
                                  header=None,
                                  names=weight_data_columns)

    # Pandas is retarded when it comes to handling text in csv files...
    food_descriptions.fillna('', inplace=True)
    nutrient_definitions.fillna('', inplace=True)
    nutrition_data.fillna('', inplace=True)
    weight_data.fillna('', inplace=True)

    with app.test_request_context():
        ingredients = {}
        ingredient_preparations = []
        nutrients = {}
        ingredient_nutrients = []

        for entry in food_descriptions.itertuples():
            if entry.food_group in ignored_food_groups:
                continue
            ingredient = models.Ingredient(ingredient_id=int(entry.ndb_id))
            ingredients[entry.ndb_id] = ingredient
            ingredient.names.append(
                models.IngredientName(name=entry.description, canonical=True))

        for entry in nutrient_definitions.itertuples():
            nutrient_id = int(entry.nutrient_id)
            display_name = nutrient_display_names.get(nutrient_id)
            scientific_name = nutrient_scientific_names.get(nutrient_id)
            recommended_daily_intake = nutrient_rdi.get(nutrient_id)
            display = display_nutrient.get(nutrient_id, False)
            nutrient = models.Nutrient(
                nutrient_id=nutrient_id,
                display_name=display_name,
                scientific_name=scientific_name,
                measurement_unit=entry.units,
                recommended_daily_intake=recommended_daily_intake,
                display=display)
            nutrients[nutrient_id] = nutrient

        # Most entries in the weights file conform to this pattern
        weight_re = re.compile(r"([\w\s]+?)(?:\s+\(.*\))?(?:,\s+(.*))?\Z")
        for weight_entry in weight_data.itertuples():
            if weight_entry.ndb_id not in ingredients:
                continue
            # Pint thinks fl oz is femtolitre ounces
            if weight_entry.measure_description == "fl oz":
                description = "fluid ounces"
                # US regulation defines a fluid ounce as equivalent to 30mL for nutrition labeling purposes
                volume = weight_entry.amount * ureg.parse_expression(
                    "30 ml").to_base_units()
                # Convert the gram weight to kilograms so density is in standard units
                mass = weight_entry.gram_weight / 1000 * ureg.kilogram
                density = float((mass / volume).magnitude)
            # Special case, as pat matches a unit, but in this context should not be interpreted as such
            elif weight_entry.measure_description.startswith("pat "):
                description = weight_entry.measure_description
                density = None
            else:
                match = weight_re.match(weight_entry.measure_description)
                if match:
                    (unit_name, preparation) = match.groups()
                    # First determine that this weight contains units rather than something nebulous like a "serving"
                    try:
                        quantity = weight_entry.amount * ureg.parse_expression(
                            unit_name)
                        description = preparation
                        # Convert to base units so volume measurements are in cubic meters
                        volume = quantity.to_base_units()
                        # Discard entries with non-volume measurements
                        if not volume.units.get("meter") == 3:
                            continue
                        # Convert the gram weight to kilograms so density is in standard units
                        mass = weight_entry.gram_weight / 1000 * ureg.kilogram
                        density = float((mass / volume).magnitude)
                    except UndefinedUnitError:
                        description = weight_entry.measure_description
                        density = None
                else:
                    description = weight_entry.measure_description
                    density = None
            ingredient_preparation = models.IngredientMeasure(
                ingredient_id=int(weight_entry.ndb_id),
                description=description,
                density=density,
                amount=float(weight_entry.amount),
                weight=float(weight_entry.gram_weight))
            ingredient_preparations.append(ingredient_preparation)

        for entry in nutrition_data.itertuples():
            if entry.ndb_id not in ingredients:
                continue
            ingredient_nutrient = models.IngredientNutrient(
                nutrient_id=int(entry.nutrient_id),
                ingredient_id=int(entry.ndb_id),
                quantity=float(entry.nutrient_value))
            ingredient_nutrients.append(ingredient_nutrient)
        db.session.add_all(ingredients.values())
        db.session.add_all(nutrients.values())
        db.session.commit()

        db.session.add_all(ingredient_preparations)
        db.session.add_all(ingredient_nutrients)
        db.session.commit()