def handle_comic_data(self, data): try: data = json.loads(data) except Exception as e: debug(e) if not data["succ"]: debug("not succ") return debug("succ 获取成功 ====> ", data["succ"]) try: chapter_list = data["result"]["list"] except Exception as e: debug(e) return for item in chapter_list: item["comic_id"] = item["id"] del item["cjid"] del item["id"] item["view"] = 0 lock.acquire() sql = self.db.getInsertSql(item, self.comic_table) result = self.db.insert(sql, is_close_db=False) lock.release() if result == 0: debug("数据插入失败了")
def __handle(self, item): try: game_page_url = item.find(name="a").attrs['href'] game_page = GamePage(game_page_url, self) game_page.run() except Exception as e: debug(e)
def __handle(self, game_title): """ 具体逻辑处 :param game_title: :return: """ data = self.game_spider.db.select( { "table": "game", "condition": [ "title like '%{title}%' and game_url not like '%youtube%' and game_url not like '%google%' and game_url != ''" .format(title=game_title) ], # "columns": ['title', 'id'] }, get_all=False, is_close_db=False) try: del data['id'] except: return data['category'] = 9 data['title'] = game_title # debug(data) result = self.__insert(data) if result != 0: debug("数据存储 =====> 成功") else: debug("数据存储 =====> 失败")
def get_job(): """ 获取配置文件 :return: json """ err_data = {"data": {}, "error_code": 1} operator_code = request.values.get("operator_code", 0) if operator_code == 0: try: post = request.get_data().decode("utf-8") post = json.loads(post) operator_code = post['operator_code'] except Exception as e: debug(e) return Reply.json(err_data) operator_code = "op_{operator_code}".format(operator_code=operator_code) data = redis.get(operator_code) if data is None: return Reply.json(err_data) try: data = json.loads(data) except Exception as e: data = err_data debug(e) return Reply.json(data)
def __change_top_ncx(self, tmp_dir, item): """ 改变目录页的内容 :param tmp_dir: :return: """ filename = "" for name in os.listdir(tmp_dir): if name.endswith("ncx"): filename = tmp_dir + name break if filename == "": return False with open(filename, "rb") as f: data = f.read().decode("utf-8") bs4 = BeautifulSoup(data, "xml") result = bs4.find_all("navPoint", attrs={"playOrder": "1"}) try: result = result[0].find("text") result = str(result) debug(result + " ============> " + "<text>{title}</text>".format(title=item['title'])) data = re.sub(result, "<text>{title}</text>".format(title=item['title']), data) except Exception as e: debug(e) with open(filename, "wb") as f: f.write(data.encode("utf-8")) return True
def run(self): self.db.insert({ "table": "p_user", "username": "******" }, is_close_db=False) debug("ok")
def __get_game_li_list(cls, bs_html): game_ul = bs_html.find_all(name="ul", attrs={"class": "_2tY3C"}) try: game_li = game_ul[0].find_all(name="li", attrs={"class": "_1cn3x"}) except Exception as e: game_li = list() debug(e) return game_li
def test(self): url = "https://www.crunchyroll.com/videos/anime/popular/ajax_page?pg=3" data = curl_data( url, referer= "https://www.crunchyroll.com/videos/anime/popular/ajax_page?pg=3", open_virtual_ip=True) debug(data)
def __get_title(cls, data): try: title_bs4 = data.find(name="div", attrs={"class": "item-header__title"}) title_bs4 = title_bs4.find(name="h1").get_text().strip() except Exception as e: debug("get title error: {error}".format(error=e.__str__())) title_bs4 = "" return title_bs4
def __get_describe(cls, data): try: describe = data.find(name="div", attrs={"class": "user-html__with-lazy-load"}) describe = str(describe) except Exception as e: debug("get describe error: {error}".format(error=e.__str__())) describe = "" return describe
def __get_img(cls, data): img_bs4 = data.find(name="div", attrs={"class": "-preview-live"}) try: img_bs4 = img_bs4.find(name="img") img_bs4 = img_bs4.attrs['src'] except Exception as e: debug("get img error: {error}".format(error=e.__str__())) img_bs4 = "" return img_bs4
def __handle(self, item): with self.auto_handle_exception(): debug("开始下载 ==========> {name}".format(name=item["download_url"])) data = curl_data(item["download_url"]) debug(data) with open( "static/spider/game_download/{name}.apk".format( name=item["id"]), "wb") as f: f.write(data) f.close()
def __get_next_page(cls, data): next_page = data.find(name="a", attrs={"class", "k89zG"}) is_continue = True try: next_page = "https://codecanyon.net" + next_page.attrs['href'] except Exception as e: debug("get next page error: {error}".format(error=e.__str__())) next_page = "" is_continue = False return next_page, is_continue
def __insert(self, insert_arr): lock.acquire() sql = self.game_spider.db.getInsertSql(insert_arr, "game") result = self.game_spider.db.insert(sql, is_close_db=False) lock.release() if result == 0: debug("游戏:{name} ============> 插入成功".format( name=insert_arr['title'])) else: debug("游戏:{name} ============> 插入成功".format( name=insert_arr['title']))
def __get_game_url(self, game_url): data = self.__get_frame_page(game_url) data = BeautifulSoup(data, "html.parser") game_url = data.find(name="iframe", attrs={"class": "full-screen-preview__frame"}) try: game_url = game_url.attrs['src'] except Exception as e: debug("get game_url error: {error}".format(error=e.__str__())) game_url = "" return game_url
def vote(self): url = "http://fyxqt.fuyuxiangqi.cn/wxtp/web/aipainew/aipainewAction!dianji.action?t={time_stamp}".format( time_stamp=int(time.time() * 1000)) params = { "id": self.id, "hdid": self.wx_id, "yz": "" } header = self.__get_header() data = curl_data(url, value=params, cookie=self.cookie, header=header) debug(data)
def test_ip(self): url = "https://a-vrv.akamaized.net/evs/1631771ddd0df6e6f7c60770955fe64f/assets/p/6bbmnx58kgajfsd_,1278465.mp4,1278467.mp4,1278463.mp4,1278461.mp4,1278451.mp4,.urlset/fragment-21-f1-a1-x3.m4s?t=exp=1565753706~acl=/evs/1631771ddd0df6e6f7c60770955fe64f/assets/*~hmac=be0ef2b7b8215367e2069db78781d28627a051399f80b10240d73da945ffc162" # url = "https://nl.tan90.club/" data = curl_data( url=url, referer="https://static.crunchyroll.com/vilos/player.html", open_virtual_ip=True) with open("test.mp4", "wb") as f: f.write(data) f.close() debug(data)
def __get_tr_data(cls, bs_4, info): """ 获取具体数据 :param bs_4: :return: """ try: data = bs_4.get_text().strip() except Exception as e: data = "" debug("{info}, error: {error}".format(info=info, error=e.__str__())) return data
def test(self): # url = "https://www.crunchyroll.com/videos/anime" url = "https://www.crunchyroll.com/videos/anime/popular/ajax_page?pg=1" res = requests.get(url) data = res.text debug(data) with open("./test.html", "wb") as f: f.write(data.encode("utf-8")) f.close() html = etree.parse("./test.html", etree.HTMLParser()) result = html.xpath("//*[@id='main_content']//li/@id") debug(result)
def handle(self): """ :return: """ excel = xlrd.open_workbook(self.origin_path) self.new_excel = copy(excel) # self.aim_excel_data = self.get_origin_name() sheet_list = excel.sheets() for k, sheet in enumerate(sheet_list): debug(sheet.name) self.__handle(sheet, self.new_excel.get_sheet(k)) self.new_excel.save("static/excel/new_excel.xls")
def get_comic_detail(self): flag = True comic_list = self.get_comic_list_from_db() for item in comic_list: # if flag: # if item["comic_id"] != 349: # continue # else: # flag = False self.get_comic_data(item) debug("等待3秒再继续") sleep(2)
def __handle(self, item, path, name, category_id): insert_arr = dict() seconds, minutes = self.__get_music_time(path + name + "/" + item) singer, song = self.__get_singer_and_name(item) insert_arr["singer"] = singer insert_arr["minutes"] = minutes insert_arr["second"] = seconds insert_arr["name"] = song insert_arr["category"] = category_id result = self.__insert(insert_arr, "music_list") if result: shutil.copy(path + name + "/" + item, self.aim_pos + str(result) + ".mp3") debug(name + " => " + item)
def auto_handle_exception(self, before_callback=default_callback, error_callback=default_callback, after_callback=default_callback, throw_exception_flag=False, **kwargs): try: before_callback(**kwargs) yield after_callback(**kwargs) except Exception as e: error_callback(**kwargs) if throw_exception_flag: debug(e)
def __get_source_url(cls, bs_4): """ 获取资源链接 :param bs_4: :return: """ table = bs_4.find(name="table", attrs={"class": "files"}) try: td = table.find(name="td", attrs={"content": "application/epub+zip"}) source_url = td.find(name="a").attrs['href'] except Exception as e: source_url = False debug("资源链接获取出错,线程终止, error: {error}".format(error=e.__str__())) return source_url
def __get_url(cls, item): """ 获取书籍详情页的 url :param item: :return: """ try: url = item.find(name="a").attrs['href'] li_content = item.get_text() if "Dutch" not in li_content: return False except Exception as e: url = False debug("书籍url获取出错, 线程停止,error: {error}".format(error=e.__str__())) return url
def __get_cover_img(cls, data): cover_img = data.find(name="div", attrs={"class": "item-preview-image__gallery"}) cover_str = "" try: cover_img = cover_img.find_all(name="a") for k, item in enumerate(cover_img): if k == 0: cover_str = item.attrs['href'] else: cover_str = cover_str + "," + item.attrs['href'] except Exception as e: debug("get cover img error: {error}".format(error=e.__str__())) cover_str = "" return cover_str
def handle(self): # url = "https://i.ytimg.com/vi/9OHkwJpS6u4/hqdefault.jpg?sqp=-oaymwEZCPYBEIoBSFXyq4qpAwsIARUAAIhCGAFwAQ==&rs=AOn4CLDEO8flAyYWStTIWI3aLoirwz73yg" # url = "https://i.ytimg.com/vi/9OHkwJpS6u4/hqdefault.jpg" # url = "https://www.google.com/" url = "http://192.168.50.177:8083/download" # url = "https://www386.hlsmp4.com/token=b2LDM4PEjOWh5XvREsjfdw/1567685699/0.0.0.0/67/f/9b/11b5f88fd13540ae36950a5a0daa19bf-480p.mp4" header = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36", # "upgrade-insecure-requests": "1" } data = curl_data(url, value={"name": "ok"}, header=header, open_virtual_ip=True) debug(data)
def __handle(self, item): # 设置自动处理错误 with self.auto_handle_exception(throw_exception_flag=True): tmp_dir = self.tmp_path + "{dirname}/".format(dirname=item['id']) if not os.path.exists(tmp_dir): os.mkdir(tmp_dir) filename = self.dir + "{filename}.epub".format(filename=item['id']) try: self.__unzip(filename, tmp_dir) self.__get_cover(tmp_dir, item) except Exception as e: debug("此电子书 ============================> 无cover图片,删除") # self.draw_cover(item) self.__delete(item) os.remove(self.dir + str(item['id']) + ".epub") shutil.rmtree(tmp_dir)
def search_by_code(): """ 根据 operator_code 查询数据 :return: """ data = redis.keys() debug(data) operator_code = request.values.get("operator_code", 0) if operator_code == 0: return Reply.error("failed") data = redis.get("op_{operator_code}".format(operator_code=operator_code)) return Reply.error("empty") if data is None else Reply.success( [{ "operator_code": operator_code, "config": data }])
def __download(self, item): # 检查文件是否已经存在 if os.path.exists("static/spider/epub/{filename}.epub".format( filename=item['id'])): debug("电子书:{title} ========> 已经存在, 跳过".format(title=item['title'])) return with self.auto_handle_exception(error_callback=self.__error_callback, throw_exception_flag=True, item=item): data = curl_data(self.url_prefix + item['source_url']) with open( "static/spider/epub/{filename}.epub".format( filename=item['id']), "wb") as f: f.write(data) f.close() debug("电子书:{title} =======> 下载成功".format(title=item['title']))