class BL: _SITE = 'DW' SUBSTITUTE = r"[&#\s/]" def __init__(self, configfile, dbfile, device, logging, scraper, filename): self.configfile = configfile self.dbfile = dbfile self.device = device self.hostnames = CrawlerConfig('Hostnames', self.configfile) self.url = self.hostnames.get('dw') self.password = self.url.split('.')[0] if "List_ContentAll_Seasons" not in filename: self.URL = 'https://' + self.url + "/downloads/hauptkategorie/movies/" else: self.URL = 'https://' + self.url + "/downloads/hauptkategorie/serien/" self.FEED_URLS = [self.URL] self.config = CrawlerConfig("ContentAll", self.configfile) self.feedcrawler = CrawlerConfig("FeedCrawler", self.configfile) self.log_info = logging.info self.log_error = logging.error self.log_debug = logging.debug self.scraper = scraper self.filename = filename self.pattern = False self.db = FeedDb(self.dbfile, 'FeedCrawler') self.hevc_retail = self.config.get("hevc_retail") self.retail_only = self.config.get("retail_only") self.hosters = CrawlerConfig("Hosters", configfile).get_section() self.hoster_fallback = self.config.get("hoster_fallback") self.prefer_dw_mirror = self.feedcrawler.get("prefer_dw_mirror") search = int( CrawlerConfig("ContentAll", self.configfile).get("search")) i = 2 while i <= search: page_url = self.URL + "order/zeit/sort/D/seite/" + str(i) + "/" if page_url not in self.FEED_URLS: self.FEED_URLS.append(page_url) i += 1 self.cdc = FeedDb(self.dbfile, 'cdc') self.last_set_all = self.cdc.retrieve("ALLSet-" + self.filename) self.headers = { 'If-Modified-Since': str(self.cdc.retrieve(self._SITE + "Headers-" + self.filename)) } self.last_sha = self.cdc.retrieve(self._SITE + "-" + self.filename) settings = [ "quality", "search", "ignore", "regex", "cutoff", "enforcedl", "crawlseasons", "seasonsquality", "seasonpacks", "seasonssource", "imdbyear", "imdb", "hevc_retail", "retail_only", "hoster_fallback" ] self.settings = [] self.settings.append(self.feedcrawler.get("english")) self.settings.append(self.feedcrawler.get("surround")) self.settings.append(self.feedcrawler.get("prefer_dw_mirror")) self.settings.append(self.hosters) for s in settings: self.settings.append(self.config.get(s)) self.search_imdb_done = False self.search_regular_done = False self.dl_unsatisfied = False self.get_feed_method = dw_feed_enricher self.get_url_method = get_url self.get_url_headers_method = get_url_headers self.get_download_links_method = dw_get_download_links self.download_method = add_decrypt_instead_of_download try: self.imdb = float(self.config.get('imdb')) except: self.imdb = 0.0 def periodical_task(self): self.device = shared_blogs.periodical_task(self) return self.device
class SJ: _INTERNAL_NAME = 'SJ' _SITE = 'SJ' def __init__(self, configfile, dbfile, device, logging, scraper, filename): self.configfile = configfile self.dbfile = dbfile self.device = device self.hostnames = CrawlerConfig('Hostnames', self.configfile) self.url = self.hostnames.get('sj') self.filename = filename if "List_ContentAll_Seasons" in self.filename: self.config = CrawlerConfig("ContentAll", self.configfile) else: self.config = CrawlerConfig("ContentShows", self.configfile) self.feedcrawler = CrawlerConfig("FeedCrawler", self.configfile) self.hevc_retail = self.config.get("hevc_retail") self.retail_only = self.config.get("retail_only") self.hoster_fallback = self.config.get("hoster_fallback") self.hosters = CrawlerConfig("Hosters", configfile).get_section() self.log_info = logging.info self.log_error = logging.error self.log_debug = logging.debug self.scraper = scraper self.db = FeedDb(self.dbfile, 'FeedCrawler') self.quality = self.config.get("quality") self.prefer_dw_mirror = self.feedcrawler.get("prefer_dw_mirror") self.cdc = FeedDb(self.dbfile, 'cdc') self.last_set = self.cdc.retrieve(self._INTERNAL_NAME + "Set-" + self.filename) self.last_sha = self.cdc.retrieve(self._INTERNAL_NAME + "-" + self.filename) self.headers = {'If-Modified-Since': str(self.cdc.retrieve(self._INTERNAL_NAME + "Headers-" + self.filename))} self.settings_array = ["quality", "rejectlist", "regex", "hevc_retail", "retail_only", "hoster_fallback"] self.settings = [] self.settings.append(self.feedcrawler.get("english")) self.settings.append(self.feedcrawler.get("surround")) self.settings.append(self.feedcrawler.get("prefer_dw_mirror")) self.settings.append(self.hosters) for s in self.settings_array: self.settings.append(self.config.get(s)) self.mediatype = "Serien" self.listtype = "" self.empty_list = False if self.filename == 'List_ContentShows_Seasons_Regex': self.listtype = " (Staffeln/RegEx)" elif self.filename == 'List_ContentAll_Seasons': self.seasonssource = self.config.get('seasonssource').lower() self.listtype = " (Staffeln)" elif self.filename == 'List_ContentShows_Shows_Regex': self.listtype = " (RegEx)" list_content = shared_shows.get_series_list(self) if list_content: self.pattern = r'^(' + "|".join(list_content).lower() + ')' else: self.empty_list = True self.day = 0 self.get_feed_method = j_releases_to_feedparser_dict self.parse_download_method = j_parse_download def periodical_task(self): self.device = shared_shows.periodical_task(self) return self.device
def get_redirected_url(url, configfile, dbfile, scraper=False): config = CrawlerConfig('FeedCrawler', configfile) proxy = config.get('proxy') if not scraper: scraper = cloudscraper.create_scraper() db = FeedDb(dbfile, 'proxystatus') db_normal = FeedDb(dbfile, 'normalstatus') site = check_is_site(url, configfile) if proxy: try: if site and "SJ" in site: if db.retrieve("SJ"): if config.get("fallback") and not db_normal.retrieve("SJ"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url elif site and "DJ" in site: if db.retrieve("DJ"): if config.get("fallback") and not db_normal.retrieve("DJ"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url elif site and "SF" in site: if db.retrieve("SF"): if config.get("fallback") and not db_normal.retrieve("SF"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url elif site and "BY" in site: if db.retrieve("BY"): if config.get("fallback") and not db_normal.retrieve("BY"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url elif site and "DW" in site: if db.retrieve("DW"): if config.get("fallback") and not db_normal.retrieve("DW"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url elif site and "FX" in site: if db.retrieve("FX"): if config.get("fallback") and not db_normal.retrieve("FX"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url elif site and "NK" in site: if db.retrieve("NK"): if config.get("fallback") and not db_normal.retrieve("NK"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url elif site and "WW" in site: return url elif site and "DD" in site: if db.retrieve("DD"): if config.get("fallback") and not db_normal.retrieve("DD"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url proxies = {'http': proxy, 'https': proxy} response = scraper.get(url, allow_redirects=False, proxies=proxies, timeout=30).headers._store["location"][1] return response except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return url else: try: if site and "SJ" in site and db_normal.retrieve("SJ"): return url elif site and "DJ" in site and db_normal.retrieve("DJ"): return url elif site and "SF" in site and db_normal.retrieve("SF"): return url elif site and "BY" in site and db_normal.retrieve("BY"): return url elif site and "DW" in site and db_normal.retrieve("DW"): return url elif site and "FX" in site and db_normal.retrieve("FX"): return url elif site and "NK" in site and db_normal.retrieve("NK"): return url elif site and "WW" in site: return url elif site and "DD" in site and db_normal.retrieve("DD"): return url response = scraper.get(url, allow_redirects=False, timeout=30).headers._store["location"][1] return response except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return url
def post_url_headers(url, configfile, dbfile, headers, data, scraper=False): config = CrawlerConfig('FeedCrawler', configfile) proxy = config.get('proxy') if not scraper: scraper = cloudscraper.create_scraper() db = FeedDb(dbfile, 'proxystatus') db_normal = FeedDb(dbfile, 'normalstatus') site = check_is_site(url, configfile) if proxy: try: if site and "SJ" in site: if db.retrieve("SJ"): if config.get("fallback") and not db_normal.retrieve("SJ"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "DJ" in site: if db.retrieve("DJ"): if config.get("fallback") and not db_normal.retrieve("DJ"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "SF" in site: if db.retrieve("SF"): if config.get("fallback") and not db_normal.retrieve("SF"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "BY" in site: if db.retrieve("BY"): if config.get("fallback") and not db_normal.retrieve("BY"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "DW" in site: if db.retrieve("DW"): if config.get("fallback") and not db_normal.retrieve("DW"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "FX" in site: if db.retrieve("FX"): if config.get("fallback") and not db_normal.retrieve("FX"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "NK" in site: if db.retrieve("NK"): if config.get("fallback") and not db_normal.retrieve("NK"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "WW" in site: if db.retrieve("WW"): if config.get("fallback") and not db_normal.retrieve("WW"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "DD" in site: if db.retrieve("DD"): if config.get("fallback") and not db_normal.retrieve("DD"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] proxies = {'http': proxy, 'https': proxy} response = scraper.post(url, data, headers=headers, proxies=proxies, timeout=30) return [response, scraper] except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ["", scraper] else: try: if site and "SJ" in site and db_normal.retrieve("SJ"): return ["", scraper] elif site and "DJ" in site and db_normal.retrieve("DJ"): return ["", scraper] elif site and "SF" in site and db_normal.retrieve("SF"): return ["", scraper] elif site and "BY" in site and db_normal.retrieve("BY"): return ["", scraper] elif site and "DW" in site and db_normal.retrieve("DW"): return ["", scraper] elif site and "FX" in site and db_normal.retrieve("FX"): return ["", scraper] elif site and "NK" in site and db_normal.retrieve("NK"): return ["", scraper] elif site and "WW" in site and db_normal.retrieve("WW"): return ["", scraper] elif site and "DD" in site and db_normal.retrieve("DD"): return ["", scraper] response = scraper.post(url, data, headers=headers, timeout=30) return [response, scraper] except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ["", scraper]
def download(configfile, dbfile, device, title, subdir, old_links, password, full_path=None, autostart=False): try: if not device or not is_device(device): device = get_device(configfile) if isinstance(old_links, list): links = [] for link in old_links: if link not in links: links.append(link) else: links = [old_links] links = str(links).replace(" ", "") crawljobs = CrawlerConfig('Crawljobs', configfile) usesubdir = crawljobs.get("subdir") priority = "DEFAULT" if full_path: path = full_path else: if usesubdir: path = subdir + "/<jd:packagename>" else: path = "<jd:packagename>" if "Remux" in path: priority = "LOWER" try: device.linkgrabber.add_links(params=[ { "autostart": autostart, "links": links, "packageName": title, "extractPassword": password, "priority": priority, "downloadPassword": password, "destinationFolder": path, "comment": "FeedCrawler by rix1337", "overwritePackagizerRules": False }]) except feedcrawler.myjdapi.TokenExpiredException: device = get_device(configfile) if not device or not is_device(device): return False device.linkgrabber.add_links(params=[ { "autostart": autostart, "links": links, "packageName": title, "extractPassword": password, "priority": priority, "downloadPassword": password, "destinationFolder": path, "comment": "FeedCrawler by rix1337", "overwritePackagizerRules": False }]) db = FeedDb(dbfile, 'crawldog') if db.retrieve(title): db.delete(title) db.store(title, 'retried') else: db.store(title, 'added') return device except feedcrawler.myjdapi.MYJDException as e: print(u"Fehler bei der Verbindung mit MyJDownloader: " + str(e)) return False