def insert_mishop_comments(comment_list: list): for comment in comment_list: try: existed_comment = Comment.get_by_id('MI' + comment['comment_id']) existed_comment.star = int(comment['total_grade']) existed_comment.save() print(f'评论ID为{comment["comment_id"]}的评论已存在, 已更新用户评分星级') except Comment.DoesNotExist: if comment['is_youpin'] == 0: proper_source = '小米商城' else: proper_source = '小米有品' try: mi_sku = MiSku.get_by_id(str(comment['goods_id'])) except MiSku.DoesNotExist: print('---此条评论对应的SKU不存在---') continue Comment.create(source=proper_source, is_official=True, comment_id='MI' + comment['comment_id'], create_time=comment['add_time'], content=comment['comment_content'], star=int(comment['total_grade']), product_color=mi_sku.product_color, product_ram=mi_sku.product_ram, product_rom=mi_sku.product_rom)
def insert_jd_comments(comment_list: list, shop: Shop): for comment in comment_list: color, ram, rom = parse_mi10_product_info(comment['productColor'], comment['productSize']) new_comment, created = Comment.get_or_create( source=shop.source, is_official=shop.is_official, comment_id='JD' + str(comment['id']), create_time=comment['creationTime'], content=comment['content'], star=comment['score'], order_time=comment['referenceTime'], order_days=comment['days'], product_color=color, product_ram=ram, product_rom=rom) if created is True: if 'afterUserComment' in comment: after_comment = comment['afterUserComment'] new_comment.after_time = after_comment['created'] new_comment.after_content = after_comment['content'] new_comment.after_days = comment['afterDays'] if comment['userClient'] == 4: new_comment.user_device = 'Android' elif comment['userClient'] == 2: new_comment.user_device = 'iOS' else: new_comment.user_device = 'other' new_comment.save()
def get_order_days_count(): for comment in Comment.select().where(Comment.order_days.is_null(False)): order_days = comment.order_days try: odc = OrderDaysCount.get_by_id(order_days) odc.total += 1 odc.save() except OrderDaysCount.DoesNotExist: OrderDaysCount.create( order_days=order_days, total=1 ) order_total = Comment.select().where(Comment.order_days.is_null(False)).count() for odc in OrderDaysCount.select(): odc.percentage = calculate_percentage(order_total, odc.total) odc.save()
def get_order_date_count(): for comment in Comment.select().where(Comment.order_time.is_null(False)): year_month = str(comment.order_time)[0:7] try: odc = OrderDateCount.get_by_id(year_month) odc.total += 1 odc.save() except OrderDateCount.DoesNotExist: OrderDateCount.create( year_month=year_month, total=1 ) order_total = Comment.select().where(Comment.order_time.is_null(False)).count() for odc in OrderDateCount.select(): odc.percentage = calculate_percentage(order_total, odc.total) odc.save()
def get_after_days_count(): for comment in Comment.select().where(Comment.after_days.is_null(False)): after_days = comment.after_days try: adc = AfterDaysCount.get_by_id(after_days) adc.total += 1 adc.save() except AfterDaysCount.DoesNotExist: AfterDaysCount.create( after_days=after_days, total=1 ) after_total = Comment.select().where(Comment.after_days.is_null(False)).count() for adc in AfterDaysCount.select(): adc.percentage = calculate_percentage(after_total, adc.total) adc.save()
def get_comment_date_count(): for comment in Comment.select(): year_month = str(comment.create_time)[0:7] try: cdc = CommentDateCount.get_by_id(year_month) cdc.total += 1 cdc.save() except CommentDateCount.DoesNotExist: CommentDateCount.create( year_month=year_month, total=1 ) comments_total = Comment.select().count() for cdc in CommentDateCount.select(): cdc.percentage = calculate_percentage(comments_total, cdc.total) cdc.save()
def get_after_comments_words(): content = '' for comment in Comment.select().where(Comment.after_content.is_null(False)): content += comment.after_content + '\n' # 基于TF-IDF算法的关键词抽取 jieba.analyse.set_stop_words(DATA_ANALYZE_DIR + '/custom_cn_stopwords.txt') tags = jieba.analyse.extract_tags(content, topK=200) for tag in tags: AfterCommentsWords.create(word=tag)
def get_user_device_count(): total = Comment.select().where(Comment.user_device.is_null(False)).count() android = Comment.select().where(Comment.user_device == 'Android').count() ios = Comment.select().where(Comment.user_device == 'iOS').count() other = Comment.select().where(Comment.user_device == 'other').count() android_percentage = calculate_percentage(total, android) ios_percentage = calculate_percentage(total, ios) other_percentage = calculate_percentage(total, other) UserDeviceCount.create( total=total, android=android, ios=ios, other=other, android_percentage=android_percentage, ios_percentage=ios_percentage, other_percentage=other_percentage )
def get_non_five_star_comments_words(): content = '' for comment in Comment.select().where((Comment.star.in_([1, 2, 3, 4]))): content += comment.content + '\n' if comment.after_content is not None: content += comment.after_content + '\n' # 基于TF-IDF算法的关键词抽取 jieba.analyse.set_stop_words(DATA_ANALYZE_DIR + '/custom_cn_stopwords.txt') tags = jieba.analyse.extract_tags(content, topK=200) for tag in tags: NonFiveStarCommentsWords.create(word=tag)
def get_ios_comments_words(): content = '' for comment in Comment.select().where(Comment.user_device == 'iOS'): content += comment.content + '\n' if comment.after_content is not None: content += comment.after_content + '\n' # 基于TF-IDF算法的关键词抽取 jieba.analyse.set_stop_words(DATA_ANALYZE_DIR + '/custom_cn_stopwords.txt') tags = jieba.analyse.extract_tags(content, topK=200) for tag in tags: IosCommentsWords.create(word=tag)
def insert_youpin_comments(comment_list: list, shop: Shop): for comment in comment_list: if comment['comment_source'] == 'mishop': proper_source = '小米商城' else: proper_source = '小米有品' try: mi_sku = MiSku.get_by_id(str(comment['pid'])) except MiSku.DoesNotExist: print('---此条评论对应的SKU不存在---') continue Comment.get_or_create( source=proper_source, is_official=shop.is_official, comment_id='MI' + str(comment['comment_id']), create_time=parse_timestamp_13bit(comment['ctime']), content=comment['text'], star=0, # comment['score'] product_color=mi_sku.product_color, product_ram=mi_sku.product_ram, product_rom=mi_sku.product_rom)
def get_all_comments_wordcloud(): content = '' for comment in Comment.select(): content += comment.content + '\n' wordcloud = WordCloud(font_path=FONT_DIR + '/NotoSansCJKsc-Regular.otf', width=7200, height=2400, stopwords=get_stopwords_set(), background_color='white', collocations=False) # 使用jieba分词的默认精确模式 wordcloud.generate(' '.join(jieba.lcut(content))) wordcloud.to_file(IMAGE_DIR + '/mi10_all_comments_wordcloud.png')
def get_non_five_star_comments_wordcloud(): content = '' for comment in Comment.select().where((Comment.star.in_([1, 2, 3, 4]))): content += comment.content + '\n' if comment.after_content is not None: content += comment.after_content + '\n' wordcloud = WordCloud(font_path=FONT_DIR + '/NotoSansCJKsc-Regular.otf', width=4000, height=2400, stopwords=get_stopwords_set(), background_color='white', collocations=False) # 使用jieba分词的默认精确模式 wordcloud.generate(' '.join(jieba.lcut(content))) wordcloud.to_file(IMAGE_DIR + '/mi10_non_five_star_comments_wordcloud.png')
def insert_sn_comments(comment_list: list, shop: Shop): for comment in comment_list: try: commodity_info = comment['commodityInfo'] color, ram, rom = parse_mi10_product_info( commodity_info['charaterDesc1'], commodity_info['charaterDesc2']) except (AttributeError, KeyError): print('---有一条评论对应的产品信息不规范, 跳过此条评论---') continue new_comment, created = Comment.get_or_create( source=shop.source, is_official=shop.is_official, comment_id='SN' + str(comment['commodityReviewId']), create_time=comment['publishTime'], content=comment['content'], star=comment['qualityStar'], product_color=color, product_ram=ram, product_rom=rom) if created is True: if comment['againFlag'] is True: after_comment = comment['againReview'] new_comment.after_time = after_comment['publishTime'] new_comment.after_content = after_comment['againContent'] after_days_str = after_comment['publishTimeStr'] if after_days_str == '当天追加': after_days_num = 0 else: after_days_num = int( re.match(r'^\d+', after_days_str).group()) new_comment.after_days = after_days_num if comment['sourceSystem'] == 'android': new_comment.user_device = 'Android' elif comment['sourceSystem'] == 'ios': new_comment.user_device = 'iOS' else: new_comment.user_device = 'other' new_comment.save()