def data_transform(): """ 将 mongodb 中的数据转储到 mysql 数据库中 主要是将排行榜中的数据进行转储,因为没有爬取到食物的点赞数和收藏数,通转转储随机生成 """ conn = mongo_client() # 从 food_rank 集合中提取文档 data = conn.food.food_rank.find({}).batch_size(30) for item in data: food_article = FoodArticle() food_article.article_id = item["random_id"] food_article.name = item["name"] food_article.image = item["image_path"] food_article.author = item["author"] food_article.ingredient_list = item["ingredient_list"] food_article.count = item["count"] # 获取文章的评分, 从 food_data 集合中提取文档 evaluation_tag_conn = mongo_client() evaluation_tag = evaluation_tag_conn.food.food_data.find_one( {"random_id": item["random_id"]}) # 文章的评分 food_article.like = evaluation_tag["evaluation"]['like'] food_article.fav = evaluation_tag["evaluation"]['fav'] food_article.click_number = random.randint(2000, 5000) food_article.save() conn.close()
def food_data_transform_1(): conn = mongo_client() data = conn.food.food_data.find({}).batch_size(30) for item in data: food = FoodArticle.objects.get(article_id=item['random_id']) food.description = item['desc'] food.tips = item['tip_info'] food.save() conn.close()
def get_tags(): """从 MongoDB 中提取美食文章的标签""" conn = mongo_client() data = conn.food.food_data.find({}).batch_size(30) for item in data: for tag in item["tags_list"]: if not Tags.objects.filter(name=tag): Tags.objects.create(name=tag) conn.close()
def image_transform_1(): """将 MongoDB 中的图片数据转储到 MySQL""" conn = mongo_client() data = conn.food.food_image.find({}).batch_size(30) for item in data: image = FoodImage.objects.get(name=item["random_id"]) image.image = os.path.join('food_image', item['random_id'], f'{item["random_id"]}-small.jpg') image.save() conn.close()
def image_like(): """随机数据不能定义在模型中,程序在运行时会直接编译,导致所有的随机数据都是一样的""" conn = mongo_client() data = conn.food.food_image.find({}).batch_size(30) for item in data: image = FoodImage.objects.get(name=item["random_id"]) image.like = random.randint(200, 500) image.fav = random.randint(500, 2000) image.click_number = random.randint(2000, 5000) image.save() conn.close()
def add_image_tags(): """为图片添加标签数据""" conn = mongo_client() data = conn.food.food_image.find({}).batch_size(30) for item in data: image = FoodImage.objects.get(name=item['random_id']) for tag in item["tags_list"]: image_tag = ImageTags.objects.get(name=tag) image.tags.add(image_tag) image.save() conn.close()
def image_transform(): """将 MongoDB 中的图片数据转储到 MySQL""" conn = mongo_client() data = conn.food.food_image.find({}).batch_size(30) for item in data: for tag in item["tags_list"]: if not ImageTags.objects.filter(name=tag): ImageTags.objects.create(name=tag) image = FoodImage() image.name = item["random_id"] image.image = os.path.join('food_image', item['random_id'], f'{item["random_id"]}-small.jpg') image.save() conn.close()
def add_article_tags(): """ 无法在一个函数内实现,只能在写一个函数来单独添加文章的标签 """ conn = mongo_client() # 从 food_data 集合中提取文档 data = conn.food.food_data.find({}).batch_size(30) for item in data: food_article = FoodArticle.objects.get(article_id=item['random_id']) for tag in item['tags_list']: food_tag = Tags.objects.get(name=tag) food_article.tags.add(food_tag) food_article.save() conn.close()
def delete_error_image(): """删除错误图片数据""" import shutil path = os.path.join(BASE_DIR, 'media', 'food_image') wrong_set = {name for name in os.listdir(path)} right_set = set() conn = mongo_client() data = conn.food.food_image.find({}).batch_size(30) for item in data: right_set.add(item["random_id"]) conn.close() diff = wrong_set - right_set print(len(diff)) for name in diff: shutil.rmtree(os.path.join(path, name))
def get_steps(): """从 MongoDB 中提取美食文章的制作步骤""" conn = mongo_client() # 从 food_data 集合中提取 data = conn.food.food_data.find({}).batch_size(30) for item in data: for step in item["steps_list"]: food_step = FoodSteps() article = FoodArticle.objects.get(article_id=item["random_id"]) # 这里必须是 FoodArticle 的实例才能建立起一对多的关联 # 直接使用 item["random_id"] 会报错 food_step.article_id = article food_step.step_number = step["step_number"] food_step.image = step['step_image_path'] food_step.description = step['step_info'] food_step.save()
def resize_image(): """食物图片处理函数""" conn = mongo_client() data = conn.food.food_image.find({}).batch_size(30) for item in data: path = os.path.join(BASE_DIR, "media", 'food_image', item['random_id']) full_image = Image.open( os.path.join(path, f'{item["random_id"]}-full.jpg')) (x, y) = full_image.size new_x = 480 new_y = int(y * new_x / x) small_image = full_image.resize((new_x, new_y), Image.ANTIALIAS) if small_image.mode != "RGB": small_image = small_image.convert("RGB") small_image.save(os.path.join(path, f'{item["random_id"]}-small.jpg')) full_image.close() conn.close()
def food_data_transform(): conn = mongo_client() data = conn.food.food_data.find({}).batch_size(30) for item in data: for ingredient in item['ingredients_list']: for key, value in ingredient['formulas'].items(): food = FoodArticle.objects.get(article_id=item['random_id']) food_in = FoodIngredients() food_in.article_id = food if ingredient['name'] == '主料': key_name = '1' elif ingredient['name'] == '辅料': key_name = '2' else: key_name = '3' food_in.name = key food_in.dosage = value food_in.classification = key_name food_in.save() conn.close()