示例#1
0
def checkBooks(libName, libUrl):
    Helper.print("Check " + libName + " chapter")

    # 把lib里面的书按照 name=index 存到字典里
    Novel_Book = {}
    Lib = open(NOVEL_LIB_PATH + libName + '.txt', 'r+', encoding='utf-8')
    for line in Lib.readlines():
        line = re.sub('\n', '', line)
        values = line.split('=')
        Novel_Book[values[0]] = values[1]
    Lib.close()

    # 判断库里有没有存这本书的索引
    bookSelect = 0
    while bookSelect < len(SearchNovels):
        bookName = SearchNovels[bookSelect]
        Helper.print("Update {} chapters".format(bookName))
        libIndex = int(Novel_Book.get(str(bookName), "0"))
        if libIndex != 0:
            checkChapters(libName, libUrl, libIndex, bookName)
        else:
            Helper.print("Can't find {} in {} lib".format(bookName, libName))

        bookSelect = bookSelect + 1
        time.sleep(Helper.randomFloat())

    checkNextUrl()
示例#2
0
def getChapterHtml(libName, libUrl, libIndex, chapterIdx):
    baseUrl = re.search("www(.*?)/", libUrl).group()
    baseUrl = re.sub("/", "", baseUrl)

    limitData = URL_LIMIT[libName]
    if limitData['count'] == 2:
        url = libUrl.format(math.floor(libIndex / 1000), libIndex)
    if limitData['count'] == 1:
        url = libUrl.format(libIndex)

    headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
        'Connection': 'keep-alive',
        'Host': baseUrl,
        # 'Referer': url,
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': User_Agent,
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
    }

    questTimes = 0
    while questTimes < 3:
        try:
            html = SESSION.get(url + chapterIdx,
                               headers=headers,
                               params={},
                               verify=False,
                               timeout=3)
            questTimes = 5
        except Exception as e:
            Helper.printError(string="request {} again".format(libIndex))
            questTimes = questTimes + 1
            time.sleep(Helper.randomFloat())

    if questTimes < 5:
        return

    return Helper.decodeHtml(html)
示例#3
0
def checkLib(libName, libUrl):
    Helper.print("check " + libName + " lib")
    Novel_Lib = {}

    Lib = open(NOVEL_LIB_PATH + libName + '.txt', 'r+', encoding='utf-8')
    for line in Lib.readlines():
        line = re.sub('\n', '', line)
        values = line.split('=')
        Novel_Lib[values[1]] = values[0]

    curIndex = len(Novel_Lib) + 1
    ErrorCount = 0

    while curIndex <= Lib_Max_Count:
        try:
            html = getBookChapterHtml(curIndex, libName, libUrl)
            novelName = Html.getBookName(html)
            if novelName:
                if Novel_Lib.get(str(curIndex), "") != novelName:
                    ErrorCount = 0
                    Novel_Lib[str(curIndex)] = novelName
                    Lib.write(novelName + "=" + str(curIndex) + "\n")
                    Helper.print("{} add {} {}".format(libName, curIndex,
                                                       novelName))
                else:
                    curIndex = curIndex + 1
            else:
                Helper.printError(string="request {} error".format(curIndex))
                ErrorCount = ErrorCount + 1
                if ErrorCount >= Repeat_Max_Count:
                    curIndex = curIndex + 1
                    ErrorCount = 0

        except Exception as e:
            Helper.printError()

        time.sleep(Helper.randomFloat())

    Lib.close()
    checkNextUrl()
示例#4
0
def getBookChapterHtml(libIndex, libName, libUrl):
    baseUrl = re.search("www(.*?)/", libUrl).group()
    baseUrl = re.sub("/", "", baseUrl)

    limitData = URL_LIMIT[libName]
    if limitData['count'] == 2:
        url = libUrl.format(math.floor(libIndex / 1000), libIndex)
    if limitData['count'] == 1:
        url = libUrl.format(libIndex)

    headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language':
        'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Host': baseUrl,
        'Referer': 'https://' + baseUrl,
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': User_Agent,
    }

    questTimes = 0
    while questTimes < 2:
        try:
            html = SESSION.get(url,
                               headers=headers,
                               params={},
                               verify=False,
                               timeout=3)
            return Helper.decodeHtml(html)
        except Exception as e:
            questTimes = questTimes + 1
            time.sleep(Helper.randomFloat())

    return ""