Пример #1
0
 def getComment(self, articleId, maxCommentCount):
     url = self.commentUrl + articleId + '/app/comments/newList'
     payload = dict()
     headers = dict()
     headers['User-Agent'] = 'NewsApp/29.1 iOS/11.0.3 (iPhone8,1)'
     Limit = 10
     payload['format'] = 'building'
     payload['ibc'] = 'newsappios'
     payload['headLimit'] = 2
     payload['tailLimit'] = 3 
     payload['showLevelThreshold'] = 5
     payload['limit'] = Limit
     offset = 0
     commentList = list()
     retryTime = 0
     maxRetryTime = 5
     while offset < maxCommentCount:
         payload['offset'] = offset
         #print(url)
         try:
             r = requests.get(url, params=payload, headers=headers, timeout = 3)
             r = r.json()
         #except requests.exceptions.ConnectTimeout:
             #traceback.print_exc()
             #break
         except requests.exceptions.Timeout:
             offset += (Limit + payload['headLimit'] + payload['tailLimit'])
             traceback.print_exc()
         except:
             traceback.print_exc()
             break
         else:
             if 'comments' not in r.keys():
                 continue 
             commentDict = r['comments']
             count = len(commentDict)
             #print('offset is ' + str(offset))
             #print('count is ' +  str(count))
             if count == 0:
                 retryTime += 1
             if retryTime >= maxRetryTime:
                 break
             for id,comment in commentDict.items():
                 if 'createTime' in comment.keys():
                     publishTime = comment['createTime']
                 else:
                     publishTime = '0000-00-00 00:00:00'
                 if 'nickName' in comment['user'].keys() and comment['user']['nickname']!=None:
                     userName = utils.formatContent(comment['user']['nickname'])
                     userName = utils.formatComment(userName)
                 else:
                     userName = '******'
                 content = utils.formatContent(comment['content'])
                 content = utils.formatComment(content)
                 content1 = publishTime + ' ' + userName + ' ' + content
                 commentList.append(content1)
             offset += count
     return commentList
Пример #2
0
 def getComment(self, articleId, maxCommentCount):
     url = self.commentUrl
     payload = dict()
     headers = dict()
     headers['User-Agent'] = 'NewsApp/29.1 iOS/11.0.3 (iPhone8,1)'
     payload['limit'] = 20
     payload['page'] = 1
     payload['comments_url'] = articleId
     commentList1 = list()
     offset = 0
     retryTime = 0
     maxRetryTime = 5
     while offset < maxCommentCount:
         try:
             r = requests.get(url,
                              params=payload,
                              headers=headers,
                              timeout=3)
             r = r.json()
             commentList = r['data']
         except requests.exceptions.ConnectTimeout:
             traceback.print_exc()
             break
         except:
             payload['page'] = payload['page'] + 1
             traceback.print_exc()
         else:
             payload['page'] = payload['page'] + 1
             count = len(commentList)
             #print('offset is ' + str(offset))
             #print('count is ' +  str(count))
             if count == 0:
                 retryTime += 1
             if retryTime >= maxRetryTime:
                 break
             for comment in commentList:
                 if 'add_time' in comment['data'].keys():
                     publishTime = int(comment['data']['add_time'])
                     publishTime = time.localtime(publishTime)
                     publishTime = time.strftime("%Y-%m-%d %H:%M:%S",
                                                 publishTime)
                 else:
                     publishTime = '0000-00-00 00:00:00'
                 if 'nickname' in comment.keys(
                 ) and comment['nickname'] != None:
                     userName = utils.formatContent(comment['nickname'])
                     userName = utils.formatComment(userName)
                 else:
                     userName = '******'
                 content = utils.formatContent(
                     comment['data']['comment_contents'])
                 content = utils.formatComment(content)
                 content1 = publishTime + ' ' + userName + ' ' + content
                 commentList1.append(content1)
             offset += count
     return commentList1
Пример #3
0
 def getComment(self, articleId, maxCommentCount):
     url = self.commentUrl
     payload = dict()
     headers = dict()
     headers['User-Agent'] = 'NewsApp/29.1 iOS/11.0.3 (iPhone8,1)'
     payload['busiCode'] = 2
     payload['id'] = articleId
     payload['page'] = 1
     payload['rollType'] = 2
     payload['size'] = 10
     payload['type'] = 3
     commentList1 = list()
     offset = 0
     while offset < maxCommentCount:
         try:
             r = requests.get(url,
                              params=payload,
                              headers=headers,
                              timeout=3)
             r = r.json()
         except requests.exceptions.ConnectTimeout:
             traceback.print_exc()
             break
         except:
             payload['page'] = payload['page'] + 1
             traceback.print_exc()
         else:
             payload['page'] = payload['page'] + 1
             commentList = r['response']['commentList']
             count = len(commentList)
             #print('offset is ' + str(offset))
             #print('count is ' +  str(count))
             if count == 0:
                 break
             for comment in commentList:
                 if 'ctime' in comment.keys():
                     publishTime = comment['ctime']
                     publishTime = int(publishTime) / 1000
                     publishTime = time.localtime(publishTime)
                     publishTime = time.strftime("%Y-%m-%d %H:%M:%S",
                                                 publishTime)
                 else:
                     publishTime = '0000-00-00 00:00:00'
                 if 'author' in comment.keys(
                 ) and comment['author'] != None:
                     userName = utils.formatContent(comment['author'])
                     userName = utils.formatComment(userName)
                 else:
                     userName = '******'
                 content = utils.formatContent(comment['content'])
                 content = utils.formatComment(content)
                 content1 = publishTime + ' ' + userName + ' ' + content
                 commentList1.append(content1)
             offset += count
     return commentList1
Пример #4
0
 def getArticle(self, db, articleId, source):
     url = self.articleUrl
     headers = dict()
     payload = dict()
     headers['User-Agent'] = 'NewsApp/29.1 iOS/11.0.3 (iPhone8,1)'
     payload['id'] = articleId
     try:
         r = requests.get(url, headers=headers, params=payload, timeout=3)
         r = r.json()
     except:
         traceback.print_exc()
         return None
     else:
         if 'title' not in r.keys():
             return None
         title = r['title']
         if title == None:
             return None
         webUrl = r['url']
         webUrl = utils.formatUrl(webUrl)
         if utils.checkVisited(webUrl, db):
             return -1
         content = r['content']
         content = utils.formatContent(content)
         appUrl = url
         publishTime = r['pubtime']
         news = utils.News(title, appUrl, webUrl, content, publishTime,
                           source)
         return news
Пример #5
0
 def getArticle( self, db, articleId, source):
     #print('getArticle')
     url = self.articleUrl + articleId  + '/full.html'
     #print(url)
     headers = dict()
     headers['User-Agent'] = 'NewsApp/29.1 iOS/11.0.3 (iPhone8,1)'
     try:
         r = requests.get(url, headers=headers, timeout = 3)
         r = r.json()
     except:
         traceback.print_exc()
         return None
     else:
         r = r[articleId]
         if 'title' not in r.keys():
             return None
         title = r['title']
         if title == None:
             return None
         webUrl = r['shareLink']
         webUrl = utils.formatUrl(webUrl)
         if utils.checkVisited(webUrl, db):
             return -1
         content = r['body']
         content = utils.formatContent(content)
         appUrl = url
         commentCount = r['replyCount']
         upCount = r['threadVote']
         downCount = r['threadAgainst']
         publishTime = r['ptime']
         news = utils.News(title, appUrl, webUrl, content, publishTime, source)
         news.commentCount = commentCount
         news.upCount = upCount
         news.downCount = downCount
         return news  
Пример #6
0
 def getComment(self, articleId, maxCommentCount):
     url = self.commentUrl
     payload = dict()
     headers = dict()
     headers['User-Agent'] = 'NewsApp/29.1 iOS/11.0.3 (iPhone8,1)'
     payload['itemid'] = articleId
     payload['page'] = 1
     payload['prepare'] = 20
     payload['app'] = 'news'
     commentList1 = list()
     offset = 0
     while offset < maxCommentCount:
         try:
             r = requests.get(url,
                              params=payload,
                              headers=headers,
                              timeout=3)
             r = r.json()
             commentList = r['data']['content']
         except requests.exceptions.ConnectTimeout:
             traceback.print_exc()
             break
         except:
             payload['page'] = payload['page'] + 1
             traceback.print_exc()
         else:
             payload['page'] = payload['page'] + 1
             count = len(commentList)
             if count == 0:
                 break
             for key, comment in commentList.items():
                 if 'dateline' in comment.keys():
                     publishTime = comment['dateline']
                     publishTime = time.localtime(int(publishTime))
                     publishTime = time.strftime("%Y-%m-%d %H:%M:%S",
                                                 publishTime)
                 else:
                     publishTime = '0000-00-00 00:00:00'
                 if 'author' in comment.keys(
                 ) and comment['author'] != None:
                     userName = utils.formatComment(comment['author'])
                     userName = utils.formatComment(userName)
                 else:
                     userName = '******'
                 content = utils.formatContent(comment['message'])
                 content = utils.formatComment(content)
                 content1 = publishTime + ' ' + userName + ' ' + content
                 commentList1.append(content1)
             offset += count
     return commentList1