Пример #1
0
    def get_comments_from_maoyan(self, offset):
        comments = []

        json_str = self.get_one_page_comment_json_from_maoyan(offset)
        if json_str is None:
            return None

        try:
            data1 = json_str['cmts']  # 获取评论内容
            data2 = json_str['hcmts']  # 获取评论内容
            data = data1 + data2
        except KeyError:
            return None
        for item in data:
            comment = Comment(self.movie)
            # 使用get方法获取值,避免出现KeyError
            comment.user_loc = item.get('cityName')
            comment.user_avatar = item.get('avatarurl')
            comment.user_name = item.get('nickName')
            comment.user_id = item.get('userId')
            comment.comment = item.get('content')
            comment.create_time = item.get('time')
            comment.vote_count = item.get('approve')
            comments.append(comment)
        return comments
Пример #2
0
def parseComments(data):
    """
    Parse comments from site
    """
    global comments
    reviewBegins = '<div style="margin-left:0.5em;">'
    reviewEnds = '<div style="padding-top: 10px; clear: both; width: 100%;">'
    stars_line = 'margin-right:5px;'
    stars = re.compile('\d+.\d+ out of 5 stars')
    header_line = '<span style="vertical-align:middle;"'
    helpful_line ='people found the following review helpful'
    helpful = re.compile('\d+ of \d+ people found the following review helpful')
    reviewText = '<span class="h3color tiny">' # Actual review

    boundaries = commentsStartStopLineNmbr(data)
    for i in range(boundaries[0], boundaries[1] + 1):
        if reviewBegins in data[i]:
            curcomment = Comment()
            while reviewEnds not in data[i]:
                # Parse stars
                if stars_line in data[i]:
                    stars_found = re.search(stars, data[i])
                    if stars_found != None:
                        curcomment.stars = stars_found.group()
                # Parse header
                elif header_line in data[i]:
                    line = data[i]
                    begin = line.find('<b>') + 3
                    end = line.find('</b>')
                    curcomment.header = line[begin : end]
                # Parse helpfulness
                elif helpful_line in data[i]:
                    helpful_found = data[i].replace(",", "")
                    helpful_found = re.search(helpful, helpful_found)
                    if helpful_found != None:
                        curcomment.helpful = helpful_found.group()
                # Parse body text
                elif reviewText in data[i]:
                    i += 3
                    if '<span class="small"' in data[i]: # Yep, dirty trick :(
                        i += 3
                    data[i] = stripHtmlTags(data[i])
                    curcomment.comment = re.sub("\s+", " ", data[i])
                i += 1
            #comments.append(curcomment.getonelinecomment())
            comments.append(curcomment.__repr__())
Пример #3
0
    def decode_json(self, json_str):
        # 创建评论变量
        comments = []

        # 解析关键的根节点
        count = json_str['count']
        start = json_str['start']
        interests = json_str['interests']
        total = json_str['total']

        # print('本次获取的个数为:', count)
        # print('评论为:', interests)
        # print('起始评论数为:', start)
        # print('总评论数为:', total)

        # 解析所需要的评论内容
        for interest in interests:
            comment = Comment(self.movie)

            user = interest['user']
            rating = interest['rating']

            loc = user['loc']
            if loc is not None:
                loc_name = loc['name']
                comment.user_loc = loc_name

            comment.user_avatar = user['avatar']
            comment.user_name = user['name']
            comment.user_id = user['id']
            if rating is not None:
                comment.rate = rating['value']
            comment.comment = interest['comment']
            comment.create_time = interest['create_time']
            comment.vote_count = interest['vote_count']

            comments.append(comment)

        # 保存评论内容到文件中
        self.save_comments(comments, 0)
        return start, len(interests), total