Пример #1
0
def init():
	funcData = {
		"1" : {"idx" : 1, "funcName" : "UpdateNovelChapter"},
		"2" : {"idx" : 2, "funcName" : "UpdateNovelLib"},
		"3" : {"idx" : 3, "funcName" : "ResetNovelLib"},
		"0" : {"idx" : 0, "funcName" : "Quit"},
	}

	Helper.printLine()
	for data in funcData.values():
		Helper.print(string = '{}: {}'.format(data.get("idx"),data.get("funcName")))
	Helper.printLine()


	checkDone = False
	while not checkDone:
		try:
			data = funcData.get(str(Helper.getNum()),{})
			if data:
				checkDone = True
				funcName = data.get('funcName','defalt')
				func = globals().get(funcName)
				func()
		except Exception as e:
			Helper.printError()
			raise e
Пример #2
0
def checkNextUrl():
    global StackFunc
    global StackUrls
    global funcIndex

    if StackUrls.is_empty():
        Helper.print('Check WebUrl Done')
    else:
        name = StackFunc.pop()
        url = StackUrls.pop()

        if funcIndex == 1:
            checkBooks(name, url)
        elif funcIndex == 2:
            checkLib(name, url)
        else:
            pass
Пример #3
0
def checkBooks(libName, libUrl):
    Helper.print("Check " + libName + " chapter")

    # 把lib里面的书按照 name=index 存到字典里
    Novel_Book = {}
    Lib = open(NOVEL_LIB_PATH + libName + '.txt', 'r+', encoding='utf-8')
    for line in Lib.readlines():
        line = re.sub('\n', '', line)
        values = line.split('=')
        Novel_Book[values[0]] = values[1]
    Lib.close()

    # 判断库里有没有存这本书的索引
    bookSelect = 0
    while bookSelect < len(SearchNovels):
        bookName = SearchNovels[bookSelect]
        Helper.print("Update {} chapters".format(bookName))
        libIndex = int(Novel_Book.get(str(bookName), "0"))
        if libIndex != 0:
            checkChapters(libName, libUrl, libIndex, bookName)
        else:
            Helper.print("Can't find {} in {} lib".format(bookName, libName))

        bookSelect = bookSelect + 1
        time.sleep(Helper.randomFloat())

    checkNextUrl()
Пример #4
0
def getChapterHtml(libName, libUrl, libIndex, chapterIdx):
    baseUrl = re.search("www(.*?)/", libUrl).group()
    baseUrl = re.sub("/", "", baseUrl)

    limitData = URL_LIMIT[libName]
    if limitData['count'] == 2:
        url = libUrl.format(math.floor(libIndex / 1000), libIndex)
    if limitData['count'] == 1:
        url = libUrl.format(libIndex)

    headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
        'Connection': 'keep-alive',
        'Host': baseUrl,
        # 'Referer': url,
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': User_Agent,
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
    }

    questTimes = 0
    while questTimes < 3:
        try:
            html = SESSION.get(url + chapterIdx,
                               headers=headers,
                               params={},
                               verify=False,
                               timeout=3)
            questTimes = 5
        except Exception as e:
            Helper.printError(string="request {} again".format(libIndex))
            questTimes = questTimes + 1
            time.sleep(Helper.randomFloat())

    if questTimes < 5:
        return

    return Helper.decodeHtml(html)
Пример #5
0
def checkChapters(libName, libUrl, libIndex, bookName):
    fileContent = open(NOVEL_PATH + libName + os.sep + bookName + '.txt',
                       'a+',
                       encoding='utf-8')
    fileMulu = open(NOVEL_PATH + libName + os.sep + bookName + '_目录.txt',
                    'r+',
                    encoding='utf-8')

    curIdx = -1
    allIdx = -1
    chapterUrls = []
    try:
        html = getBookChapterHtml(libIndex, libName, libUrl)
        novelName = Html.getBookName(html)
        if bookName == novelName:
            mulu = fileMulu.readlines()
            curIdx = len(mulu)
            chapterUrls = Html.getChapterUrls(html)
            allIdx = len(chapterUrls)
        else:
            Helper.printError("{} lib {} index {} need update".format(
                libName, bookName, libIndex))
    except Exception as e:
        Helper.printError()

    if curIdx < allIdx:
        while curIdx <= (allIdx - 1):
            errorTimes = 0
            while errorTimes < 3:
                try:
                    url_name = chapterUrls[curIdx]
                    values = re.split("=", url_name)
                    chapter = Helper.formatChapterName(values[1])
                    html = getChapterHtml(libName, libUrl, libIndex, values[0])
                    content = Html.getChapterContent(html)

                    fileContent.write(chapter + "\n" + content + "\n")
                    fileMulu.write(chapter + "\n")
                    Helper.print("{} {}".format(bookName, chapter))
                    errorTimes = 3
                except Exception as e:
                    errorTimes = errorTimes + 1
                    Helper.printError()
            curIdx = curIdx + 1

    fileContent.close()
    fileMulu.close()
Пример #6
0
def getBookChapterHtml(libIndex, libName, libUrl):
    baseUrl = re.search("www(.*?)/", libUrl).group()
    baseUrl = re.sub("/", "", baseUrl)

    limitData = URL_LIMIT[libName]
    if limitData['count'] == 2:
        url = libUrl.format(math.floor(libIndex / 1000), libIndex)
    if limitData['count'] == 1:
        url = libUrl.format(libIndex)

    headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language':
        'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Host': baseUrl,
        'Referer': 'https://' + baseUrl,
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': User_Agent,
    }

    questTimes = 0
    while questTimes < 2:
        try:
            html = SESSION.get(url,
                               headers=headers,
                               params={},
                               verify=False,
                               timeout=3)
            return Helper.decodeHtml(html)
        except Exception as e:
            questTimes = questTimes + 1
            time.sleep(Helper.randomFloat())

    return ""
Пример #7
0
def checkLib(libName, libUrl):
    Helper.print("check " + libName + " lib")
    Novel_Lib = {}

    Lib = open(NOVEL_LIB_PATH + libName + '.txt', 'r+', encoding='utf-8')
    for line in Lib.readlines():
        line = re.sub('\n', '', line)
        values = line.split('=')
        Novel_Lib[values[1]] = values[0]

    curIndex = len(Novel_Lib) + 1
    ErrorCount = 0

    while curIndex <= Lib_Max_Count:
        try:
            html = getBookChapterHtml(curIndex, libName, libUrl)
            novelName = Html.getBookName(html)
            if novelName:
                if Novel_Lib.get(str(curIndex), "") != novelName:
                    ErrorCount = 0
                    Novel_Lib[str(curIndex)] = novelName
                    Lib.write(novelName + "=" + str(curIndex) + "\n")
                    Helper.print("{} add {} {}".format(libName, curIndex,
                                                       novelName))
                else:
                    curIndex = curIndex + 1
            else:
                Helper.printError(string="request {} error".format(curIndex))
                ErrorCount = ErrorCount + 1
                if ErrorCount >= Repeat_Max_Count:
                    curIndex = curIndex + 1
                    ErrorCount = 0

        except Exception as e:
            Helper.printError()

        time.sleep(Helper.randomFloat())

    Lib.close()
    checkNextUrl()
Пример #8
0
def resetLib():
    for data in URLS.values():
        lib = open(NOVEL_LIB_PATH + data['name'] + '.txt', 'w')
        lib.close()
    Helper.print("reset lib done")
Пример #9
0
def defalt():
	Helper.printError(string = "can't find funcName")