def init(): funcData = { "1" : {"idx" : 1, "funcName" : "UpdateNovelChapter"}, "2" : {"idx" : 2, "funcName" : "UpdateNovelLib"}, "3" : {"idx" : 3, "funcName" : "ResetNovelLib"}, "0" : {"idx" : 0, "funcName" : "Quit"}, } Helper.printLine() for data in funcData.values(): Helper.print(string = '{}: {}'.format(data.get("idx"),data.get("funcName"))) Helper.printLine() checkDone = False while not checkDone: try: data = funcData.get(str(Helper.getNum()),{}) if data: checkDone = True funcName = data.get('funcName','defalt') func = globals().get(funcName) func() except Exception as e: Helper.printError() raise e
def checkNextUrl(): global StackFunc global StackUrls global funcIndex if StackUrls.is_empty(): Helper.print('Check WebUrl Done') else: name = StackFunc.pop() url = StackUrls.pop() if funcIndex == 1: checkBooks(name, url) elif funcIndex == 2: checkLib(name, url) else: pass
def checkBooks(libName, libUrl): Helper.print("Check " + libName + " chapter") # 把lib里面的书按照 name=index 存到字典里 Novel_Book = {} Lib = open(NOVEL_LIB_PATH + libName + '.txt', 'r+', encoding='utf-8') for line in Lib.readlines(): line = re.sub('\n', '', line) values = line.split('=') Novel_Book[values[0]] = values[1] Lib.close() # 判断库里有没有存这本书的索引 bookSelect = 0 while bookSelect < len(SearchNovels): bookName = SearchNovels[bookSelect] Helper.print("Update {} chapters".format(bookName)) libIndex = int(Novel_Book.get(str(bookName), "0")) if libIndex != 0: checkChapters(libName, libUrl, libIndex, bookName) else: Helper.print("Can't find {} in {} lib".format(bookName, libName)) bookSelect = bookSelect + 1 time.sleep(Helper.randomFloat()) checkNextUrl()
def getChapterHtml(libName, libUrl, libIndex, chapterIdx): baseUrl = re.search("www(.*?)/", libUrl).group() baseUrl = re.sub("/", "", baseUrl) limitData = URL_LIMIT[libName] if limitData['count'] == 2: url = libUrl.format(math.floor(libIndex / 1000), libIndex) if limitData['count'] == 1: url = libUrl.format(libIndex) headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7', 'Connection': 'keep-alive', 'Host': baseUrl, # 'Referer': url, 'Upgrade-Insecure-Requests': '1', 'User-Agent': User_Agent, 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', } questTimes = 0 while questTimes < 3: try: html = SESSION.get(url + chapterIdx, headers=headers, params={}, verify=False, timeout=3) questTimes = 5 except Exception as e: Helper.printError(string="request {} again".format(libIndex)) questTimes = questTimes + 1 time.sleep(Helper.randomFloat()) if questTimes < 5: return return Helper.decodeHtml(html)
def checkChapters(libName, libUrl, libIndex, bookName): fileContent = open(NOVEL_PATH + libName + os.sep + bookName + '.txt', 'a+', encoding='utf-8') fileMulu = open(NOVEL_PATH + libName + os.sep + bookName + '_目录.txt', 'r+', encoding='utf-8') curIdx = -1 allIdx = -1 chapterUrls = [] try: html = getBookChapterHtml(libIndex, libName, libUrl) novelName = Html.getBookName(html) if bookName == novelName: mulu = fileMulu.readlines() curIdx = len(mulu) chapterUrls = Html.getChapterUrls(html) allIdx = len(chapterUrls) else: Helper.printError("{} lib {} index {} need update".format( libName, bookName, libIndex)) except Exception as e: Helper.printError() if curIdx < allIdx: while curIdx <= (allIdx - 1): errorTimes = 0 while errorTimes < 3: try: url_name = chapterUrls[curIdx] values = re.split("=", url_name) chapter = Helper.formatChapterName(values[1]) html = getChapterHtml(libName, libUrl, libIndex, values[0]) content = Html.getChapterContent(html) fileContent.write(chapter + "\n" + content + "\n") fileMulu.write(chapter + "\n") Helper.print("{} {}".format(bookName, chapter)) errorTimes = 3 except Exception as e: errorTimes = errorTimes + 1 Helper.printError() curIdx = curIdx + 1 fileContent.close() fileMulu.close()
def getBookChapterHtml(libIndex, libName, libUrl): baseUrl = re.search("www(.*?)/", libUrl).group() baseUrl = re.sub("/", "", baseUrl) limitData = URL_LIMIT[libName] if limitData['count'] == 2: url = libUrl.format(math.floor(libIndex / 1000), libIndex) if limitData['count'] == 1: url = libUrl.format(libIndex) headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Host': baseUrl, 'Referer': 'https://' + baseUrl, 'Upgrade-Insecure-Requests': '1', 'User-Agent': User_Agent, } questTimes = 0 while questTimes < 2: try: html = SESSION.get(url, headers=headers, params={}, verify=False, timeout=3) return Helper.decodeHtml(html) except Exception as e: questTimes = questTimes + 1 time.sleep(Helper.randomFloat()) return ""
def checkLib(libName, libUrl): Helper.print("check " + libName + " lib") Novel_Lib = {} Lib = open(NOVEL_LIB_PATH + libName + '.txt', 'r+', encoding='utf-8') for line in Lib.readlines(): line = re.sub('\n', '', line) values = line.split('=') Novel_Lib[values[1]] = values[0] curIndex = len(Novel_Lib) + 1 ErrorCount = 0 while curIndex <= Lib_Max_Count: try: html = getBookChapterHtml(curIndex, libName, libUrl) novelName = Html.getBookName(html) if novelName: if Novel_Lib.get(str(curIndex), "") != novelName: ErrorCount = 0 Novel_Lib[str(curIndex)] = novelName Lib.write(novelName + "=" + str(curIndex) + "\n") Helper.print("{} add {} {}".format(libName, curIndex, novelName)) else: curIndex = curIndex + 1 else: Helper.printError(string="request {} error".format(curIndex)) ErrorCount = ErrorCount + 1 if ErrorCount >= Repeat_Max_Count: curIndex = curIndex + 1 ErrorCount = 0 except Exception as e: Helper.printError() time.sleep(Helper.randomFloat()) Lib.close() checkNextUrl()
def resetLib(): for data in URLS.values(): lib = open(NOVEL_LIB_PATH + data['name'] + '.txt', 'w') lib.close() Helper.print("reset lib done")
def defalt(): Helper.printError(string = "can't find funcName")