Exemplo n.º 1
0
 def getPost(self: None,
             pageNumber: int,
             ajax: bool = True,
             useTemp: bool = True):  # 获得html源文件函数
     self.__workPageNumber = pageNumber
     link = self.__postLink + str(pageNumber)
     existTemp = self.__tempSave.getSameTemp()
     if existTemp.get('html') and useTemp:
         for i in existTemp['html']:
             if int(i[1]) == int(pageNumber):
                 Avalon.debug_info('第%d页已经在临时文件中存在,跳过' % pageNumber)
                 return self.__tempSave.readFileByID(i)
     if ajax is False:
         link = link.replace('ajax=1&', '')
     for tryTimes in range(1, 11):
         try:
             postRequest = request.Request(link)
             try:
                 # 设置程序请求头,伪装爬虫(必要性存疑)
                 postRequest.add_header('User-Agent', (random.choice(
                     self.__userAgent)).replace('\n', ''))
                 postRequest.add_header('Referer',
                                        'https://tieba.baidu.com')
             except:
                 continue
             else:
                 postRead: bytes = request.urlopen(postRequest,
                                                   timeout=5).read()
                 if self.debug:
                     Avalon.debug_info('链接:"%s"请求头:%s.' %
                                       (link, postRequest.headers))
         # 错误处理
         except error.URLError as e:
             Avalon.warning("获取帖子正文失败!原因:%s(%s/10)" %
                            (str(e.reason), str(tryTimes)))
         except timeout as e:
             Avalon.warning("获取帖子正文失败!原因:%s(%s/10)" %
                            (str(e), str(tryTimes)))
         except KeyboardInterrupt:
             Avalon.critical("用户强制退出")
             quit(1)
         except:
             Avalon.warning("获取帖子正文失败!原因:未知错误(%s/10)" % tryTimes)
         # 没有错误,结束循环
         else:
             if self.debug:
                 Avalon.debug_info('Link %s Get Successed.' % link)
             break
     else:
         Avalon.error('获取失败!')
         if self.debug:
             Avalon.debug('Link:%s' % link)
         quit(1)
     if useTemp is True:
         self.__tempSave.savePostRaw(postRead.decode(errors='ignore'),
                                     pageNumber=pageNumber)
     return (postRead.decode(errors='ignore'))
Exemplo n.º 2
0
 def __getUserName(self, userID: str):
     dbResult = self.__db.checkExistUsers(userID)
     if not dbResult:
         Avalon.debug(
             'User ID: %s Can\'t Get Username,Will Use ID Instead.' %
             userID)
         userName = str(userID)
     else:
         userName = str(dbResult[1])
     return userName
Exemplo n.º 3
0
def methodPost(url: str,
               datasEncoded: bytes,
               headers: dict = {},
               maxTryTimes: int = 10):
    encodedUrl = parse.urlparse(url=url).geturl()
    requsetMaker = request.Request(url=encodedUrl,
                                   headers=headers,
                                   data=datasEncoded)
    for tryTimes in range(maxTryTimes):
        try:
            dataPost = request.urlopen(url=requsetMaker, timeout=10).read()
        except error.URLError as e:
            Avalon.debug(
                'POST "%s" Failed,reason:%s,Program will Try %d Times Later.' %
                (url, e.reason, maxTryTimes - tryTimes))
        except timeout as e:
            Avalon.debug(
                'POST "%s" Timeout,reason:%s,Program will Try %d Times Later.'
                % (url, e, maxTryTimes - tryTimes))
        except:
            Avalon.debug(
                'POST "%s" Timeout with unknown reason,Program will Try %d Times Later.'
                % (url, maxTryTimes - tryTimes))
        else:
            break
    else:
        Avalon.error(
            'POST "%s" Failed during all request,please check your network status.'
            % url)
        __quitCheck(1)
    return bytes(dataPost)