def get_comments_from_maoyan(self, offset): comments = [] json_str = self.get_one_page_comment_json_from_maoyan(offset) if json_str is None: return None try: data1 = json_str['cmts'] # 获取评论内容 data2 = json_str['hcmts'] # 获取评论内容 data = data1 + data2 except KeyError: return None for item in data: comment = Comment(self.movie) # 使用get方法获取值,避免出现KeyError comment.user_loc = item.get('cityName') comment.user_avatar = item.get('avatarurl') comment.user_name = item.get('nickName') comment.user_id = item.get('userId') comment.comment = item.get('content') comment.create_time = item.get('time') comment.vote_count = item.get('approve') comments.append(comment) return comments
def parseComments(data): """ Parse comments from site """ global comments reviewBegins = '<div style="margin-left:0.5em;">' reviewEnds = '<div style="padding-top: 10px; clear: both; width: 100%;">' stars_line = 'margin-right:5px;' stars = re.compile('\d+.\d+ out of 5 stars') header_line = '<span style="vertical-align:middle;"' helpful_line ='people found the following review helpful' helpful = re.compile('\d+ of \d+ people found the following review helpful') reviewText = '<span class="h3color tiny">' # Actual review boundaries = commentsStartStopLineNmbr(data) for i in range(boundaries[0], boundaries[1] + 1): if reviewBegins in data[i]: curcomment = Comment() while reviewEnds not in data[i]: # Parse stars if stars_line in data[i]: stars_found = re.search(stars, data[i]) if stars_found != None: curcomment.stars = stars_found.group() # Parse header elif header_line in data[i]: line = data[i] begin = line.find('<b>') + 3 end = line.find('</b>') curcomment.header = line[begin : end] # Parse helpfulness elif helpful_line in data[i]: helpful_found = data[i].replace(",", "") helpful_found = re.search(helpful, helpful_found) if helpful_found != None: curcomment.helpful = helpful_found.group() # Parse body text elif reviewText in data[i]: i += 3 if '<span class="small"' in data[i]: # Yep, dirty trick :( i += 3 data[i] = stripHtmlTags(data[i]) curcomment.comment = re.sub("\s+", " ", data[i]) i += 1 #comments.append(curcomment.getonelinecomment()) comments.append(curcomment.__repr__())
def decode_json(self, json_str): # 创建评论变量 comments = [] # 解析关键的根节点 count = json_str['count'] start = json_str['start'] interests = json_str['interests'] total = json_str['total'] # print('本次获取的个数为:', count) # print('评论为:', interests) # print('起始评论数为:', start) # print('总评论数为:', total) # 解析所需要的评论内容 for interest in interests: comment = Comment(self.movie) user = interest['user'] rating = interest['rating'] loc = user['loc'] if loc is not None: loc_name = loc['name'] comment.user_loc = loc_name comment.user_avatar = user['avatar'] comment.user_name = user['name'] comment.user_id = user['id'] if rating is not None: comment.rate = rating['value'] comment.comment = interest['comment'] comment.create_time = interest['create_time'] comment.vote_count = interest['vote_count'] comments.append(comment) # 保存评论内容到文件中 self.save_comments(comments, 0) return start, len(interests), total