예제 #1
0
def get_limit_appchina_comments(limit, json_path):
    weixin_json_str = open(
        json_path, 'r').read()
    weixin_json_root = json.loads(weixin_json_str)
    comments = weixin_json_root['comments']
    result = []
    temp_dict = {}
    for i in xrange(limit):
        refined_str = remove_illegal_characters(json.dumps(comments[i]))
        refined_comment = json.loads(refined_str)
        # md5标记去重
        comment_md5 = hashlib.md5(refined_comment['comment'] + refined_comment['nickname']).hexdigest()
        if comment_md5 not in temp_dict:
            refined_comment[u'datetime'] = arrow.get(refined_comment['date'])
            result.append(refined_comment)
            temp_dict[comment_md5] = 1
    return result
예제 #2
0
def get_limit_taobao_comments(limit, json_path, is_datetime=True):
    taobao_json_str = open(
        json_path, 'r').read()
    taobao_json_root = json.loads(taobao_json_str)
    comments = taobao_json_root['rateList']
    result = []
    temp_dict = {}
    limit = min(limit, len(comments))
    for i in xrange(limit):
        refined_str = remove_illegal_characters(json.dumps(comments[i]))
        refined_comment = json.loads(refined_str)
        com_content = refined_comment.get('content', '')
        if not com_content:
            com_content = ''
        # md5标记去重
        comment_md5 = hashlib.md5(
            str(com_content) + str(refined_comment['user']['nick'])).hexdigest()
        if comment_md5 not in temp_dict:
            if is_datetime:
                refined_comment[u'datetime'] = arrow.get(refined_comment['date'], u'YYYY年MM月DD日 HH:mm',
                                                         tzinfo=tz.tzlocal())
                result.append(
                    {'comment': com_content, 'datetime': refined_comment['datetime'],
                     # 'isBelievable': random.choice([True, False])
                     'isBelievable': True if random.random() > 0.7 else False

                     })
            else:
                result.append(
                    {'comment': com_content, 'datetime': refined_comment['date'],
                     # 'isBelievable': random.choice([True, False])
                     'isBelievable': True if random.random() > 0.7 else False
                     })
            temp_dict[comment_md5] = 1
            if not refined_comment['content']:
                print refined_str
    return result