def post_url(url, configfile, dbfile, data, scraper=False): config = RssConfig('RSScrawler', configfile) proxy = config.get('proxy') if not scraper: scraper = cloudscraper.create_scraper() db = RssDb(dbfile, 'proxystatus') db_normal = RssDb(dbfile, 'normalstatus') site = check_is_site(url, configfile) # Temporary fix for FX if site and "FX" in site: scraper = requests.session() scraper.headers = scraper.headers scraper.cookies = scraper.cookies scraper.verify = False if proxy: try: if site and "SJ" in site: if db.retrieve("SJ"): if config.get("fallback") and not db_normal.retrieve("SJ"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "DJ" in site: if db.retrieve("DJ"): if config.get("fallback") and not db_normal.retrieve("DJ"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "SF" in site: if db.retrieve("SF"): if config.get("fallback") and not db_normal.retrieve("SF"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "MB" in site: if db.retrieve("MB"): if config.get("fallback") and not db_normal.retrieve("MB"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "HW" in site: if db.retrieve("HW"): if config.get("fallback") and not db_normal.retrieve("HW"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "FX" in site: if db.retrieve("FX"): if config.get("fallback") and not db_normal.retrieve("FX"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "HS" in site: if db.retrieve("HS"): if config.get("fallback") and not db_normal.retrieve("HS"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "NK" in site: if db.retrieve("NK"): if config.get("fallback") and not db_normal.retrieve("NK"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "DD" in site: if db.retrieve("DD"): if config.get("fallback") and not db_normal.retrieve("DD"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "FC" in site: if db.retrieve("FC"): if config.get("fallback") and not db_normal.retrieve("FC"): return scraper.post(url, data, timeout=30).content else: return "" proxies = {'http': proxy, 'https': proxy} response = scraper.post(url, data, proxies=proxies, timeout=30).content return response except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return "" else: try: if site and "SJ" in site and db_normal.retrieve("SJ"): return "" elif site and "DJ" in site and db_normal.retrieve("DJ"): return "" elif site and "SF" in site and db_normal.retrieve("SF"): return "" elif site and "MB" in site and db_normal.retrieve("MB"): return "" elif site and "HW" in site and db_normal.retrieve("HW"): return "" elif site and "FX" in site and db_normal.retrieve("FX"): return "" elif site and "HS" in site and db_normal.retrieve("HS"): return "" elif site and "NK" in site and db_normal.retrieve("NK"): return "" elif site and "DD" in site and db_normal.retrieve("DD"): return "" elif site and "FC" in site and db_normal.retrieve("FC"): return "" response = scraper.post(url, data, timeout=30).content return response except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ""
class SF: def __init__(self, configfile, dbfile, device, logging, scraper, filename, internal_name): self._INTERNAL_NAME = internal_name self.configfile = configfile self.dbfile = dbfile self.device = device self.hostnames = RssConfig('Hostnames', self.configfile) self.sf = self.hostnames.get('sf') self.config = RssConfig(self._INTERNAL_NAME, self.configfile) self.rsscrawler = RssConfig("RSScrawler", self.configfile) self.hevc_retail = self.config.get("hevc_retail") self.retail_only = self.config.get("retail_only") self.hoster_fallback = self.config.get("hoster_fallback") self.hosters = RssConfig("Hosters", configfile).get_section() self.log_info = logging.info self.log_error = logging.error self.log_debug = logging.debug self.scraper = scraper self.filename = filename self.db = RssDb(self.dbfile, 'rsscrawler') self.quality = self.config.get("quality") self.cdc = RssDb(self.dbfile, 'cdc') self.last_set_sf = self.cdc.retrieve("SFSet-" + self.filename) self.last_sha_sf = self.cdc.retrieve("SF-" + self.filename) self.headers = { 'If-Modified-Since': str(self.cdc.retrieve("SFHeaders-" + self.filename)) } settings = [ "quality", "rejectlist", "regex", "hevc_retail", "retail_only", "hoster_fallback" ] self.settings = [] self.settings.append(self.rsscrawler.get("english")) self.settings.append(self.rsscrawler.get("surround")) self.settings.append(self.hosters) for s in settings: self.settings.append(self.config.get(s)) self.empty_list = False if self.filename == 'SJ_Staffeln_Regex': self.level = 3 elif self.filename == 'MB_Staffeln': self.seasonssource = self.config.get('seasonssource').lower() self.level = 2 elif self.filename == 'SJ_Serien_Regex': self.level = 1 else: self.level = 0 self.pattern = r'^(' + "|".join( self.get_series_list(self.filename, self.level)).lower() + ')' self.listtype = "" self.day = 0 def settings_hash(self, refresh): if refresh: settings = [ "quality", "rejectlist", "regex", "hevc_retail", "retail_only", "hoster_fallback" ] self.settings = [] self.settings.append(self.rsscrawler.get("english")) self.settings.append(self.rsscrawler.get("surround")) self.settings.append(self.hosters) for s in settings: self.settings.append(self.config.get(s)) self.pattern = r'^(' + "|".join( self.get_series_list(self.filename, self.level)).lower() + ')' set_sf = str(self.settings) + str(self.pattern) return hashlib.sha256(set_sf.encode('ascii', 'ignore')).hexdigest() def get_series_list(self, liste, series_type): if series_type == 1: self.listtype = " (RegEx)" elif series_type == 2: self.listtype = " (Staffeln)" elif series_type == 3: self.listtype = " (Staffeln/RegEx)" cont = ListDb(self.dbfile, liste).retrieve() titles = [] if cont: for title in cont: if title: title = title.replace(" ", ".") titles.append(title) if not titles: self.empty_list = True return titles def parse_download(self, series_url, title, language_id): if not check_valid_release(title, self.retail_only, self.hevc_retail, self.dbfile): self.log_debug( title + u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)" ) return if self.filename == 'MB_Staffeln': if not self.config.get("seasonpacks"): staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower()) if staffelpack: self.log_debug("%s - Release ignoriert (Staffelpaket)" % title) return if not re.search(self.seasonssource, title.lower()): self.log_debug(title + " - Release hat falsche Quelle") return try: if language_id == 2: lang = 'EN' else: lang = 'DE' epoch = str(datetime.datetime.now().timestamp()).replace('.', '')[:-3] api_url = series_url + '?lang=' + lang + '&_=' + epoch response = get_url(api_url, self.configfile, self.dbfile, self.scraper) info = json.loads(response) is_episode = re.findall(r'.*\.(s\d{1,3}e\d{1,3})\..*', title, re.IGNORECASE) if is_episode: episode_string = re.findall(r'.*S\d{1,3}(E\d{1,3}).*', is_episode[0])[0].lower() season_string = re.findall(r'.*(S\d{1,3})E\d{1,3}.*', is_episode[0])[0].lower() season_title = rreplace( title.lower().replace(episode_string, ''), "-", ".*", 1).lower() season_title = season_title.replace(".untouched", ".*").replace( ".dd+51", ".dd.51") episode = str(int(episode_string.replace("e", ""))) season = str(int(season_string.replace("s", ""))) episode_name = re.findall(r'.*\.s\d{1,3}(\..*).german', season_title, re.IGNORECASE) if episode_name: season_title = season_title.replace(episode_name[0], '') codec_tags = [".h264", ".x264"] for tag in codec_tags: season_title = season_title.replace(tag, ".*264") web_tags = [".web-rip", ".webrip", ".webdl", ".web-dl"] for tag in web_tags: season_title = season_title.replace(tag, ".web.*") else: season = False episode = False season_title = title multiple_episodes = re.findall(r'(e\d{1,3}-e*\d{1,3}\.)', season_title, re.IGNORECASE) if multiple_episodes: season_title = season_title.replace( multiple_episodes[0], '.*') content = BeautifulSoup(info['html'], 'lxml') releases = content.find( "small", text=re.compile(season_title, re.IGNORECASE)).parent.parent.parent links = releases.findAll("div", {'class': 'row'})[1].findAll('a') valid = False for link in links: download_link = link['href'] if check_hoster(link.text.replace('\n', ''), self.configfile): valid = True break if not valid and not self.hoster_fallback: storage = self.db.retrieve_all(title) if 'added' not in storage and 'notdl' not in storage: wrong_hoster = '[SF/Hoster fehlt] - ' + title if 'wrong_hoster' not in storage: self.log_info(wrong_hoster) self.db.store(title, 'wrong_hoster') notify([wrong_hoster], self.configfile) else: self.log_debug(wrong_hoster) else: return self.send_package(title, download_link, language_id, season, episode) except: print( u"SF hat die Serien-API angepasst. Breche Download-Prüfung ab!" ) def send_package(self, title, download_link, language_id, season, episode): englisch = "" if language_id == 2: englisch = "/Englisch" if self.filename == 'SJ_Serien_Regex': link_placeholder = '[Episode/RegEx' + englisch + '] - ' elif self.filename == 'SJ_Serien': link_placeholder = '[Episode' + englisch + '] - ' elif self.filename == 'SJ_Staffeln_Regex]': link_placeholder = '[Staffel/RegEx' + englisch + '] - ' else: link_placeholder = '[Staffel' + englisch + '] - ' try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug("Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return if 'added' in storage or 'notdl' in storage: self.log_debug(title + " - Release ignoriert (bereits gefunden)") else: download_link = 'https://' + self.sf + download_link if season and episode: download_link = download_link.replace( '&_=', '&season=' + str(season) + '&episode=' + str(episode) + '&_=') download = add_decrypt(title, download_link, self.sf, self.dbfile) if download: self.db.store(title, 'added') log_entry = link_placeholder + title + ' - [SF]' self.log_info(log_entry) notify(["[Click'n'Load notwendig] - " + log_entry], self.configfile) return log_entry def periodical_task(self): if not self.sf: return self.device if self.filename == 'SJ_Serien_Regex': if not self.config.get('regex'): self.log_debug("Suche für SF-Regex deaktiviert!") return self.device elif self.filename == 'SJ_Staffeln_Regex': if not self.config.get('regex'): self.log_debug("Suche für SF-Regex deaktiviert!") return self.device elif self.filename == 'MB_Staffeln': if not self.config.get('crawlseasons'): self.log_debug("Suche für SF-Staffeln deaktiviert!") return self.device if self.empty_list: self.log_debug("Liste ist leer. Stoppe Suche für Serien!" + self.listtype) return self.device try: reject = self.config.get("rejectlist").replace( ",", "|").lower() if len( self.config.get("rejectlist")) > 0 else r"^unmatchable$" except TypeError: reject = r"^unmatchable$" set_sf = self.settings_hash(False) header = False response = False while self.day < 8: if self.last_set_sf == set_sf: try: delta = ( datetime.datetime.now() - datetime.timedelta(days=self.day)).strftime("%Y-%m-%d") response = get_url_headers( 'https://' + self.sf + '/updates/' + delta, self.configfile, self.dbfile, self.headers, self.scraper) self.scraper = response[1] response = response[0] if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex": feed = sf_releases_to_feedparser_dict( response.text, "seasons", 'https://' + self.sf, True) else: feed = sf_releases_to_feedparser_dict( response.text, "episodes", 'https://' + self.sf, True) except: print(u"SF hat die Feed-API angepasst. Breche Suche ab!") feed = False if response: if response.status_code == 304: self.log_debug( "SF-Feed seit letztem Aufruf nicht aktualisiert - breche Suche ab!" ) return self.device header = True else: try: delta = ( datetime.datetime.now() - datetime.timedelta(days=self.day)).strftime("%Y-%m-%d") response = get_url( 'https://' + self.sf + '/updates/' + delta, self.configfile, self.dbfile, self.scraper) if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex": feed = sf_releases_to_feedparser_dict( response, "seasons", 'https://' + self.sf, True) else: feed = sf_releases_to_feedparser_dict( response, "episodes", 'https://' + self.sf, True) except: print(u"SF hat die Feed-API angepasst. Breche Suche ab!") feed = False self.day += 1 if feed and feed.entries: first_post_sf = feed.entries[0] concat_sf = first_post_sf.title + first_post_sf.published + str( self.settings) + str(self.pattern) sha_sf = hashlib.sha256(concat_sf.encode( 'ascii', 'ignore')).hexdigest() else: self.log_debug("Feed ist leer - breche Suche ab!") return False for post in feed.entries: concat = post.title + post.published + \ str(self.settings) + str(self.pattern) sha = hashlib.sha256(concat.encode('ascii', 'ignore')).hexdigest() if sha == self.last_sha_sf: self.log_debug("Feed ab hier bereits gecrawlt (" + post.title + ") - breche Suche ab!") break series_url = post.series_url title = post.title.replace("-", "-") if self.filename == 'SJ_Serien_Regex': if self.config.get("regex"): if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: m = re.search(self.pattern, title.lower()) if not m and "720p" not in title and "1080p" not in title and "2160p" not in title: m = re.search( self.pattern.replace("480p", "."), title.lower()) self.quality = "480p" if m: if "720p" in title.lower(): self.quality = "720p" if "1080p" in title.lower(): self.quality = "1080p" if "2160p" in title.lower(): self.quality = "2160p" m = re.search(reject, title.lower()) if m: self.log_debug( title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)" ) title = re.sub(r'\[.*\] ', '', post.title) self.parse_download(series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: continue elif self.filename == 'SJ_Staffeln_Regex': if self.config.get("regex"): if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: m = re.search(self.pattern, title.lower()) if not m and "720p" not in title and "1080p" not in title and "2160p" not in title: m = re.search( self.pattern.replace("480p", "."), title.lower()) self.quality = "480p" if m: if "720p" in title.lower(): self.quality = "720p" if "1080p" in title.lower(): self.quality = "1080p" if "2160p" in title.lower(): self.quality = "2160p" m = re.search(reject, title.lower()) if m: self.log_debug( title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)" ) title = re.sub(r'\[.*\] ', '', post.title) self.parse_download(series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: continue else: if self.config.get("quality") != '480p': m = re.search(self.pattern, title.lower()) if m: if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: mm = re.search(self.quality, title.lower()) if mm: mmm = re.search(reject, title.lower()) if mmm: self.log_debug( title + " - Release ignoriert (basierend auf rejectlist-Einstellung)" ) continue if self.rsscrawler.get("surround"): if not re.match( r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)" ) continue try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if 'added' in storage: self.log_debug( title + " - Release ignoriert (bereits gefunden)" ) continue self.parse_download( series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: m = re.search(self.pattern, title.lower()) if m: if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: if "720p" in title.lower( ) or "1080p" in title.lower( ) or "2160p" in title.lower(): continue mm = re.search(reject, title.lower()) if mm: self.log_debug( title + " Release ignoriert (basierend auf rejectlist-Einstellung)" ) continue if self.rsscrawler.get("surround"): if not re.match( r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)" ) continue title = re.sub(r'\[.*\] ', '', post.title) try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if 'added' in storage: self.log_debug( title + " - Release ignoriert (bereits gefunden)" ) continue self.parse_download( series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) if set_sf: new_set_sf = self.settings_hash(True) if set_sf == new_set_sf: self.cdc.delete("SFSet-" + self.filename) self.cdc.store("SFSet-" + self.filename, set_sf) self.cdc.delete("SF-" + self.filename) self.cdc.store("SF-" + self.filename, sha_sf) if header and response: self.cdc.delete("SFHeaders-" + self.filename) self.cdc.store("SFHeaders-" + self.filename, response.headers['date']) return self.device
def ombi(configfile, dbfile, device, log_debug): db = RssDb(dbfile, 'Ombi') config = RssConfig('Ombi', configfile) url = config.get('url') api = config.get('api') if not url or not api: return device english = RssConfig('RSScrawler', configfile).get('english') try: requested_movies = requests.get(url + '/api/v1/Request/movie', headers={'ApiKey': api}) requested_movies = json.loads(requested_movies.text) requested_shows = requests.get(url + '/api/v1/Request/tv', headers={'ApiKey': api}) requested_shows = json.loads(requested_shows.text) except: log_debug("Ombi ist nicht erreichbar!") return False scraper = False for r in requested_movies: if bool(r.get("approved")): if not bool(r.get("available")): imdb_id = r.get("imdbId") if not db.retrieve('movie_' + str(imdb_id)) == 'added': response = imdb_movie(imdb_id, configfile, dbfile, scraper) title = response[0] if title: scraper = response[1] best_result = search.best_result_bl( title, configfile, dbfile) print(u"Film: " + title + u" durch Ombi hinzugefügt.") if best_result: search.download_bl(best_result, device, configfile, dbfile) if english: title = r.get('title') best_result = search.best_result_bl( title, configfile, dbfile) print(u"Film: " + title + u"durch Ombi hinzugefügt.") if best_result: search.download_bl(best_result, device, configfile, dbfile) db.store('movie_' + str(imdb_id), 'added') else: log_debug("Titel für IMDB-ID nicht abrufbar: " + imdb_id) for r in requested_shows: imdb_id = r.get("imdbId") infos = None child_requests = r.get("childRequests") for cr in child_requests: if bool(cr.get("approved")): if not bool(cr.get("available")): details = cr.get("seasonRequests") for season in details: sn = season.get("seasonNumber") eps = [] episodes = season.get("episodes") for episode in episodes: if not bool(episode.get("available")): enr = episode.get("episodeNumber") s = str(sn) if len(s) == 1: s = "0" + s s = "S" + s e = str(enr) if len(e) == 1: e = "0" + e se = s + "E" + e if not db.retrieve('show_' + str(imdb_id) + '_' + se) == 'added': eps.append(enr) if eps: if not infos: infos = imdb_show(imdb_id, configfile, dbfile, scraper) if infos: title = infos[0] all_eps = infos[1] scraper = infos[2] check_sn = False if all_eps: check_sn = all_eps.get(sn) if check_sn: sn_length = len(eps) check_sn_length = len(check_sn) if check_sn_length > sn_length: for ep in eps: e = str(ep) if len(e) == 1: e = "0" + e se = s + "E" + e payload = search.best_result_sj( title, configfile, dbfile) if payload: payload = decode_base64( payload).split("|") payload = encode_base64( payload[0] + "|" + payload[1] + "|" + se) added_episode = search.download_sj( payload, configfile, dbfile) if not added_episode: payload = decode_base64( payload).split("|") payload = encode_base64( payload[0] + "|" + payload[1] + "|" + s) add_season = search.download_sj( payload, configfile, dbfile) for e in eps: e = str(e) if len(e) == 1: e = "0" + e se = s + "E" + e db.store( 'show_' + str(imdb_id) + '_' + se, 'added') if not add_season: log_debug( u"Konnte kein Release für " + title + " " + se + "finden.") break db.store( 'show_' + str(imdb_id) + '_' + se, 'added') else: payload = search.best_result_sj( title, configfile, dbfile) if payload: payload = decode_base64( payload).split("|") payload = encode_base64( payload[0] + "|" + payload[1] + "|" + s) search.download_sj( payload, configfile, dbfile) for ep in eps: e = str(ep) if len(e) == 1: e = "0" + e se = s + "E" + e db.store( 'show_' + str(imdb_id) + '_' + se, 'added') print(u"Serie/Staffel/Episode: " + title + u" durch Ombi hinzugefügt.") return device
def download(configfile, dbfile, device, title, subdir, old_links, password, full_path=None, autostart=False): try: if not device or not is_device(device): device = get_device(configfile) if isinstance(old_links, list): links = [] for link in old_links: if link not in links: links.append(link) else: links = [old_links] links = str(links).replace(" ", "") crawljobs = RssConfig('Crawljobs', configfile) usesubdir = crawljobs.get("subdir") priority = "DEFAULT" if full_path: path = full_path else: if usesubdir: path = subdir + "/<jd:packagename>" else: path = "<jd:packagename>" if "Remux" in path: priority = "LOWER" try: device.linkgrabber.add_links( params=[{ "autostart": autostart, "links": links, "packageName": title, "extractPassword": password, "priority": priority, "downloadPassword": password, "destinationFolder": path, "comment": "RSScrawler by rix1337", "overwritePackagizerRules": False }]) except rsscrawler.myjdapi.TokenExpiredException: device = get_device(configfile) if not device or not is_device(device): return False device.linkgrabber.add_links( params=[{ "autostart": autostart, "links": links, "packageName": title, "extractPassword": password, "priority": priority, "downloadPassword": password, "destinationFolder": path, "comment": "RSScrawler by rix1337", "overwritePackagizerRules": False }]) db = RssDb(dbfile, 'crawldog') if db.retrieve(title): db.delete(title) db.store(title, 'retried') else: db.store(title, 'added') return device except rsscrawler.myjdapi.MYJDException as e: print(u"Fehler bei der Verbindung mit MyJDownloader: " + str(e)) return False
class SJ: def __init__(self, configfile, dbfile, device, logging, scraper, filename, internal_name): self._INTERNAL_NAME = internal_name self.configfile = configfile self.dbfile = dbfile self.device = device self.hostnames = RssConfig('Hostnames', self.configfile) self.sj = self.hostnames.get('sj') self.config = RssConfig(self._INTERNAL_NAME, self.configfile) self.rsscrawler = RssConfig("RSScrawler", self.configfile) self.hevc_retail = self.config.get("hevc_retail") self.retail_only = self.config.get("retail_only") self.hoster_fallback = self.config.get("hoster_fallback") self.hosters = RssConfig("Hosters", configfile).get_section() self.log_info = logging.info self.log_error = logging.error self.log_debug = logging.debug self.scraper = scraper self.filename = filename self.db = RssDb(self.dbfile, 'rsscrawler') self.quality = self.config.get("quality") self.cdc = RssDb(self.dbfile, 'cdc') self.last_set_sj = self.cdc.retrieve("SJSet-" + self.filename) self.last_sha_sj = self.cdc.retrieve("SJ-" + self.filename) self.headers = { 'If-Modified-Since': str(self.cdc.retrieve("SJHeaders-" + self.filename)) } settings = [ "quality", "rejectlist", "regex", "hevc_retail", "retail_only", "hoster_fallback" ] self.settings = [] self.settings.append(self.rsscrawler.get("english")) self.settings.append(self.rsscrawler.get("surround")) self.settings.append(self.hosters) for s in settings: self.settings.append(self.config.get(s)) self.empty_list = False if self.filename == 'SJ_Staffeln_Regex': self.level = 3 elif self.filename == 'MB_Staffeln': self.seasonssource = self.config.get('seasonssource').lower() self.level = 2 elif self.filename == 'SJ_Serien_Regex': self.level = 1 else: self.level = 0 self.pattern = r'^(' + "|".join( self.get_series_list(self.filename, self.level)).lower() + ')' self.listtype = "" self.day = 0 def settings_hash(self, refresh): if refresh: settings = [ "quality", "rejectlist", "regex", "hevc_retail", "retail_only", "hoster_fallback" ] self.settings = [] self.settings.append(self.rsscrawler.get("english")) self.settings.append(self.rsscrawler.get("surround")) self.settings.append(self.hosters) for s in settings: self.settings.append(self.config.get(s)) self.pattern = r'^(' + "|".join( self.get_series_list(self.filename, self.level)).lower() + ')' set_sj = str(self.settings) + str(self.pattern) return hashlib.sha256(set_sj.encode('ascii', 'ignore')).hexdigest() def get_series_list(self, liste, series_type): if series_type == 1: self.listtype = " (RegEx)" elif series_type == 2: self.listtype = " (Staffeln)" elif series_type == 3: self.listtype = " (Staffeln/RegEx)" cont = ListDb(self.dbfile, liste).retrieve() titles = [] if cont: for title in cont: if title: title = title.replace(" ", ".") titles.append(title) if not titles: self.empty_list = True return titles def parse_download(self, series_url, title, language_id): if not check_valid_release(title, self.retail_only, self.hevc_retail, self.dbfile): self.log_debug( title + u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)" ) return if self.filename == 'MB_Staffeln': if not self.config.get("seasonpacks"): staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower()) if staffelpack: self.log_debug("%s - Release ignoriert (Staffelpaket)" % title) return if not re.search(self.seasonssource, title.lower()): self.log_debug(title + " - Release hat falsche Quelle") return try: series_info = get_url(series_url, self.configfile, self.dbfile) series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0] api_url = 'https://' + self.sj + '/api/media/' + series_id + '/releases' response = get_url(api_url, self.configfile, self.dbfile, self.scraper) seasons = json.loads(response) for season in seasons: season = seasons[season] for item in season['items']: if item['name'] == title: valid = False for hoster in item['hoster']: if hoster: if check_hoster(hoster, self.configfile): valid = True if not valid and not self.hoster_fallback: storage = self.db.retrieve_all(title) if 'added' not in storage and 'notdl' not in storage: wrong_hoster = '[SJ/Hoster fehlt] - ' + title if 'wrong_hoster' not in storage: print(wrong_hoster) self.db.store(title, 'wrong_hoster') notify([wrong_hoster], self.configfile) else: self.log_debug(wrong_hoster) else: return self.send_package(title, series_url, language_id) except: print( u"SJ hat die Serien-API angepasst. Breche Download-Prüfung ab!" ) def send_package(self, title, series_url, language_id): englisch = "" if language_id == 2: englisch = "/Englisch" if self.filename == 'SJ_Serien_Regex': link_placeholder = '[Episode/RegEx' + englisch + '] - ' elif self.filename == 'SJ_Serien': link_placeholder = '[Episode' + englisch + '] - ' elif self.filename == 'SJ_Staffeln_Regex]': link_placeholder = '[Staffel/RegEx' + englisch + '] - ' else: link_placeholder = '[Staffel' + englisch + '] - ' try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug("Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return if 'added' in storage or 'notdl' in storage: self.log_debug(title + " - Release ignoriert (bereits gefunden)") else: download = add_decrypt(title, series_url, self.sj, self.dbfile) if download: self.db.store(title, 'added') log_entry = link_placeholder + title + ' - [SJ]' self.log_info(log_entry) notify(["[Click'n'Load notwendig] - " + log_entry], self.configfile) return log_entry def periodical_task(self): if not self.sj: return self.device if self.filename == 'SJ_Serien_Regex': if not self.config.get('regex'): self.log_debug("Suche für SJ-Regex deaktiviert!") return self.device elif self.filename == 'SJ_Staffeln_Regex': if not self.config.get('regex'): self.log_debug("Suche für SJ-Regex deaktiviert!") return self.device elif self.filename == 'MB_Staffeln': if not self.config.get('crawlseasons'): self.log_debug("Suche für SJ-Staffeln deaktiviert!") return self.device if self.empty_list: self.log_debug("Liste ist leer. Stoppe Suche für Serien!" + self.listtype) return self.device try: reject = self.config.get("rejectlist").replace( ",", "|").lower() if len( self.config.get("rejectlist")) > 0 else r"^unmatchable$" except TypeError: reject = r"^unmatchable$" set_sj = self.settings_hash(False) header = False response = False while self.day < 8: if self.last_set_sj == set_sj: try: response = get_url_headers( 'https://' + self.sj + '/api/releases/latest/' + str(self.day), self.configfile, self.dbfile, self.headers, self.scraper) self.scraper = response[1] response = response[0] if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex": feed = j_releases_to_feedparser_dict( response.text, "seasons", 'https://' + self.sj, True) else: feed = j_releases_to_feedparser_dict( response.text, "episodes", 'https://' + self.sj, True) except: print(u"SJ hat die Feed-API angepasst. Breche Suche ab!") feed = False if response: if response.status_code == 304: self.log_debug( "SJ-Feed seit letztem Aufruf nicht aktualisiert - breche Suche ab!" ) return self.device header = True else: try: response = get_url( 'https://' + self.sj + '/api/releases/latest/' + str(self.day), self.configfile, self.dbfile, self.scraper) if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex": feed = j_releases_to_feedparser_dict( response, "seasons", 'https://' + self.sj, True) else: feed = j_releases_to_feedparser_dict( response, "episodes", 'https://' + self.sj, True) except: print(u"SJ hat die Feed-API angepasst. Breche Suche ab!") feed = False self.day += 1 if feed and feed.entries: first_post_sj = feed.entries[0] concat_sj = first_post_sj.title + first_post_sj.published + str( self.settings) + str(self.pattern) sha_sj = hashlib.sha256(concat_sj.encode( 'ascii', 'ignore')).hexdigest() else: self.log_debug("Feed ist leer - breche Suche ab!") return False for post in feed.entries: concat = post.title + post.published + \ str(self.settings) + str(self.pattern) sha = hashlib.sha256(concat.encode('ascii', 'ignore')).hexdigest() if sha == self.last_sha_sj: self.log_debug("Feed ab hier bereits gecrawlt (" + post.title + ") - breche Suche ab!") break series_url = post.series_url title = post.title.replace("-", "-") if self.filename == 'SJ_Serien_Regex': if self.config.get("regex"): if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: m = re.search(self.pattern, title.lower()) if not m and "720p" not in title and "1080p" not in title and "2160p" not in title: m = re.search( self.pattern.replace("480p", "."), title.lower()) self.quality = "480p" if m: if "720p" in title.lower(): self.quality = "720p" if "1080p" in title.lower(): self.quality = "1080p" if "2160p" in title.lower(): self.quality = "2160p" m = re.search(reject, title.lower()) if m: self.log_debug( title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)" ) title = re.sub(r'\[.*\] ', '', post.title) self.parse_download(series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: continue elif self.filename == 'SJ_Staffeln_Regex': if self.config.get("regex"): if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: m = re.search(self.pattern, title.lower()) if not m and "720p" not in title and "1080p" not in title and "2160p" not in title: m = re.search( self.pattern.replace("480p", "."), title.lower()) self.quality = "480p" if m: if "720p" in title.lower(): self.quality = "720p" if "1080p" in title.lower(): self.quality = "1080p" if "2160p" in title.lower(): self.quality = "2160p" m = re.search(reject, title.lower()) if m: self.log_debug( title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)" ) title = re.sub(r'\[.*\] ', '', post.title) self.parse_download(series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: continue else: if self.config.get("quality") != '480p': m = re.search(self.pattern, title.lower()) if m: if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: mm = re.search(self.quality, title.lower()) if mm: mmm = re.search(reject, title.lower()) if mmm: self.log_debug( title + " - Release ignoriert (basierend auf rejectlist-Einstellung)" ) continue if self.rsscrawler.get("surround"): if not re.match( r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)" ) continue try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if 'added' in storage: self.log_debug( title + " - Release ignoriert (bereits gefunden)" ) continue self.parse_download( series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: m = re.search(self.pattern, title.lower()) if m: if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: if "720p" in title.lower( ) or "1080p" in title.lower( ) or "2160p" in title.lower(): continue mm = re.search(reject, title.lower()) if mm: self.log_debug( title + " Release ignoriert (basierend auf rejectlist-Einstellung)" ) continue if self.rsscrawler.get("surround"): if not re.match( r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)" ) continue title = re.sub(r'\[.*\] ', '', post.title) try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if 'added' in storage: self.log_debug( title + " - Release ignoriert (bereits gefunden)" ) continue self.parse_download( series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) if set_sj: new_set_sj = self.settings_hash(True) if set_sj == new_set_sj: self.cdc.delete("SJSet-" + self.filename) self.cdc.store("SJSet-" + self.filename, set_sj) self.cdc.delete("SJ-" + self.filename) self.cdc.store("SJ-" + self.filename, sha_sj) if header and response: self.cdc.delete("SJHeaders-" + self.filename) self.cdc.store("SJHeaders-" + self.filename, response.headers['date']) return self.device
def get_url_headers(url, configfile, dbfile, headers, scraper=False): config = RssConfig('RSScrawler', configfile) proxy = config.get('proxy') if not scraper: scraper = cloudscraper.create_scraper() db = RssDb(dbfile, 'proxystatus') db_normal = RssDb(dbfile, 'normalstatus') site = check_is_site(url, configfile) if proxy: try: if site and "SJ" in site: if db.retrieve("SJ"): if config.get("fallback") and not db_normal.retrieve("SJ"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "DJ" in site: if db.retrieve("DJ"): if config.get("fallback") and not db_normal.retrieve("DJ"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "SF" in site: if db.retrieve("SF"): if config.get("fallback") and not db_normal.retrieve("SF"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "MB" in site: if db.retrieve("MB"): if config.get("fallback") and not db_normal.retrieve("MB"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "HW" in site: if db.retrieve("HW"): if config.get("fallback") and not db_normal.retrieve("HW"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "FX" in site: if db.retrieve("FX"): if config.get("fallback") and not db_normal.retrieve("FX"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "HS" in site: if db.retrieve("HS"): if config.get("fallback") and not db_normal.retrieve("HS"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "NK" in site: if db.retrieve("NK"): if config.get("fallback") and not db_normal.retrieve("NK"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "DD" in site: if db.retrieve("DD"): if config.get("fallback") and not db_normal.retrieve("DD"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "FC" in site: if db.retrieve("FC"): if config.get("fallback") and not db_normal.retrieve("FC"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] proxies = {'http': proxy, 'https': proxy} response = scraper.get(url, headers=headers, proxies=proxies, timeout=30) return [response, scraper] except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ["", scraper] else: try: if site and "SJ" in site and db_normal.retrieve("SJ"): return ["", scraper] elif site and "DJ" in site and db_normal.retrieve("DJ"): return ["", scraper] elif site and "SF" in site and db_normal.retrieve("SF"): return ["", scraper] elif site and "MB" in site and db_normal.retrieve("MB"): return ["", scraper] elif site and "HW" in site and db_normal.retrieve("HW"): return ["", scraper] elif site and "FX" in site and db_normal.retrieve("FX"): return ["", scraper] elif site and "HS" in site and db_normal.retrieve("HS"): return ["", scraper] elif site and "NK" in site and db_normal.retrieve("NK"): return ["", scraper] elif site and "DD" in site and db_normal.retrieve("DD"): return ["", scraper] elif site and "FC" in site and db_normal.retrieve("FC"): return ["", scraper] response = scraper.get(url, headers=headers, timeout=30) return [response, scraper] except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ["", scraper]