def get_original_language(key, imdb_details, imdb_url, configfile, dbfile, scraper, log_debug): original_language = False if imdb_details and len(imdb_details) > 0: soup = BeautifulSoup(imdb_details, 'lxml') try: original_language = soup.find( 'h4', text=re.compile(r'Language:')).parent.find("a").text except: pass elif imdb_url and len(imdb_url) > 0: imdb_details = get_url(imdb_url, configfile, dbfile, scraper) if imdb_details: soup = BeautifulSoup(imdb_details, 'lxml') try: original_language = soup.find( 'h4', text=re.compile(r'Language:')).parent.find("a").text except: pass if not original_language: if imdb_details and len(imdb_details) > 0: soup = BeautifulSoup(imdb_details, 'lxml') try: original_language = \ soup.find('h3', text=re.compile(r'Language')).next.next.next.text.strip().replace("\n", "").split( ",")[ 0] except: pass elif imdb_url and len(imdb_url) > 0: imdb_details = get_url(imdb_url, configfile, dbfile, scraper) if imdb_details: soup = BeautifulSoup(imdb_details, 'lxml') try: original_language = \ soup.find('h3', text=re.compile(r'Language')).next.next.next.text.strip().replace("\n", "").split( ",")[0] except: pass if not original_language: log_debug("%s - Originalsprache nicht ermittelbar" % key) if original_language and original_language == "German": log_debug( "%s - Originalsprache ist Deutsch. Breche Suche nach zweisprachigem Release ab!" % key) return False else: return original_language
def j_parse_download(self, series_url, title, language_id): if not check_valid_release(title, self.retail_only, self.hevc_retail, self.dbfile): self.log_debug( title + u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)" ) return False if self.filename == 'List_ContentAll_Seasons': if not self.config.get("seasonpacks"): staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower()) if staffelpack: self.log_debug("%s - Release ignoriert (Staffelpaket)" % title) return False if not re.search(self.seasonssource, title.lower()): self.log_debug(title + " - Release hat falsche Quelle") return False try: series_info = get_url(series_url, self.configfile, self.dbfile) series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0] api_url = 'https://' + self.url + '/api/media/' + series_id + '/releases' response = get_url(api_url, self.configfile, self.dbfile, self.scraper) seasons = json.loads(response) for season in seasons: season = seasons[season] for item in season['items']: if item['name'] == title: valid = False for hoster in item['hoster']: if hoster: if check_hoster(hoster, self.configfile): valid = True if not valid and not self.hoster_fallback: storage = self.db.retrieve_all(title) if 'added' not in storage and 'notdl' not in storage: wrong_hoster = '[SJ/Hoster fehlt] - ' + title if 'wrong_hoster' not in storage: print(wrong_hoster) self.db.store(title, 'wrong_hoster') notify([wrong_hoster], self.configfile) else: self.log_debug(wrong_hoster) return False else: return [title, series_url, language_id, False, False] except: print(self._INTERNAL_NAME + u" hat die Serien-API angepasst. Breche Download-Prüfung ab!") return False
def ww_get_download_links(self, content, title): base_url = "https://" + CrawlerConfig('Hostnames', self.configfile).get('ww') content = content.replace("mkv|", "") download_links = [] try: response = get_url(content, self.configfile, self.dbfile, self.scraper) if not response or "NinjaFirewall 429" in response: print( u"WW hat den Link-Abruf für " + title + " blockiert. Eine spätere Anfrage hat möglicherweise Erfolg!") return False links = BeautifulSoup(response, 'lxml').findAll("div", {"id": "download-links"}) for link in links: hoster = link.text if 'Direct Download 100 MBit/s' not in hoster: url = base_url + link.find("a")["href"] download_links.append('href="' + url + '" ' + hoster + '<') download_links = "".join(download_links) download_links = get_download_links(self, download_links, title) return download_links except: return False
def fx_search_results(content, configfile, dbfile, scraper): articles = content.find("main").find_all("article") result_urls = [] for article in articles: url = article.find("a")["href"] if url: result_urls.append(url) items = [] if result_urls: results = [] for url in result_urls: results.append(get_url(url, configfile, dbfile, scraper)) for result in results: article = BeautifulSoup(str(result), 'lxml') titles = article.find_all("a", href=re.compile("filecrypt")) for title in titles: link = article.find("link", rel="canonical")["href"] title = title.text.encode("ascii", errors="ignore").decode().replace( "/", "") if title: if "download" in title.lower(): try: title = str( content.find("strong", text=re.compile( r".*Release.*")).nextSibling) except: continue items.append([title, link + "|" + title]) return items
def periodical_task(self): feeds = self.config.get("feeds") if feeds: added_items = [] feeds = feeds.replace(" ", "").split(',') for feed in feeds: feed = feedparser.parse( get_url(feed, self.configfile, self.dbfile, self.scraper)) for post in feed.entries: key = post.title.replace(" ", ".") epoch = datetime(1970, 1, 1) current_epoch = int(time()) published_format = "%Y-%m-%d %H:%M:%S+00:00" published_timestamp = str(parser.parse(post.published)) published_epoch = int((datetime.strptime( published_timestamp, published_format) - epoch).total_seconds()) if (current_epoch - 1800) > published_epoch: link_pool = post.summary unicode_links = re.findall(r'(http.*)', link_pool) links = [] for link in unicode_links: if check_hoster(link, self.configfile): links.append(str(link)) if self.config.get("hoster_fallback") and not links: for link in unicode_links: links.append(str(link)) storage = self.db.retrieve_all(key) if not links: if 'added' not in storage and 'notdl' not in storage: wrong_hoster = '[' + self._SITE + '/Hoster fehlt] - ' + key if 'wrong_hoster' not in storage: print(wrong_hoster) self.db.store(key, 'wrong_hoster') notify([wrong_hoster], self.configfile) else: self.log_debug(wrong_hoster) elif 'added' in storage: self.log_debug( "%s - Release ignoriert (bereits gefunden)" % key) else: self.device = myjd_download( self.configfile, self.dbfile, self.device, key, "FeedCrawler", links, "") if self.device: self.db.store(key, 'added') log_entry = '[Englisch] - ' + key + ' - [' + self._SITE + ']' self.log_info(log_entry) notify([log_entry], self.configfile) added_items.append(log_entry) else: self.log_debug( "%s - Releasezeitpunkt weniger als 30 Minuten in der Vergangenheit - wird ignoriert." % key) else: self.log_debug("Liste ist leer. Stoppe Suche für " + self._SITE + "!") return self.device
def nk_page_download_link(self, download_link, key): unused_get_feed_parameter(key) nk = self.hostnames.get('nk') download_link = get_url(download_link, self.configfile, self.dbfile) soup = BeautifulSoup(download_link, 'lxml') url_hosters = [] hosters = soup.find_all("a", href=re.compile("/go/")) for hoster in hosters: url_hosters.append(['https://' + nk + hoster["href"], hoster.text]) return check_download_links(self, url_hosters)
def dw_parse_download(self, release_url, title, language_id): if not check_valid_release(title, self.retail_only, self.hevc_retail, self.dbfile): self.log_debug( title + u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)" ) return False if self.filename == 'List_ContentAll_Seasons': if not self.config.get("seasonpacks"): staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower()) if staffelpack: self.log_debug("%s - Release ignoriert (Staffelpaket)" % title) return False if not re.search(self.seasonssource, title.lower()): self.log_debug(title + " - Release hat falsche Quelle") return False try: release_info = get_url(release_url, self.configfile, self.dbfile) post_hosters = BeautifulSoup(release_info, 'lxml').find( "div", { "id": "download" }).findAll("img", src=re.compile(r"images/hosterimg")) hosters = [] valid = False for hoster in post_hosters: hoster = hoster["title"].replace("Premium-Account bei ", "").replace("ddownload", "ddl") if hoster not in hosters: hosters.append(hoster) for hoster in hosters: if hoster: if check_hoster(hoster, self.configfile): valid = True if not valid and not self.hoster_fallback: storage = self.db.retrieve_all(title) if 'added' not in storage and 'notdl' not in storage: wrong_hoster = '[SJ/Hoster fehlt] - ' + title if 'wrong_hoster' not in storage: print(wrong_hoster) self.db.store(title, 'wrong_hoster') notify([wrong_hoster], self.configfile) else: self.log_debug(wrong_hoster) return False else: return [title, release_url, language_id, False, False] except: print(self._INTERNAL_NAME + u" hat die Serien-API angepasst. Breche Download-Prüfung ab!") return False
def dw_mirror(self, title): hostnames = CrawlerConfig('Hostnames', self.configfile) dw = hostnames.get('dw') if dw: dw_search = 'https://' + dw + '/?search=' + title dw_results = get_url(dw_search, self.configfile, self.dbfile, self.scraper) dw_results = dw_search_results(dw_results, dw) for result in dw_results: release_url = result[1].split("|")[0] release_info = get_url(release_url, self.configfile, self.dbfile) post_hosters = BeautifulSoup(release_info, 'lxml').find( "div", { "id": "download" }).findAll("img", src=re.compile(r"images/hosterimg")) hosters = [] valid = False for hoster in post_hosters: hoster = hoster["title"].replace("Premium-Account bei ", "").replace( "ddownload", "ddl") if hoster not in hosters: hosters.append(hoster) for hoster in hosters: if hoster: if check_hoster(hoster, self.configfile): valid = True if not valid and not self.hoster_fallback: return False else: return [release_url] return False
def get_imdb_id(key, content, filename, configfile, dbfile, scraper, log_debug): try: imdb_id = re.findall( r'.*?(?:href=.?http(?:|s):\/\/(?:|www\.)imdb\.com\/title\/(tt[0-9]{7,9}).*?).*?(\d(?:\.|\,)\d)(?:.|.*?)<\/a>.*?', content) except: imdb_id = False if imdb_id: imdb_id = imdb_id[0][0] else: try: search_title = re.findall( r"(.*?)(?:\.(?:(?:19|20)\d{2})|\.German|\.\d{3,4}p|\.S(?:\d{1,3})\.)", key)[0].replace(".", "+") search_url = "http://www.imdb.com/find?q=" + search_title search_page = get_url(search_url, configfile, dbfile, scraper) search_results = re.findall( r'<td class="result_text"> <a href="\/title\/(tt[0-9]{7,9})\/\?ref_=fn_al_tt_\d" >(.*?)<\/a>.*? \((\d{4})\)..(.{9})', search_page) except: return False total_results = len(search_results) if filename == 'List_ContentAll_Seasons': imdb_id = search_results[0][0] else: no_series = False while total_results > 0: attempt = 0 for result in search_results: if result[3] == "TV Series": no_series = False total_results -= 1 attempt += 1 else: no_series = True imdb_id = search_results[attempt][0] total_results = 0 break if no_series is False: log_debug("%s - Keine passende Film-IMDB-Seite gefunden" % key) if not imdb_id: return False return imdb_id
def by_page_download_link(self, download_link, key): unused_get_feed_parameter(key) by = self.hostnames.get('by') download_link = get_url(download_link, self.configfile, self.dbfile) soup = BeautifulSoup(download_link, 'lxml') links = soup.find_all("iframe") async_link_results = [] for link in links: link = link["src"] if 'https://' + by in link: async_link_results.append(link) async_link_results = get_urls_async(async_link_results, self.configfile, self.dbfile) links = async_link_results[0] url_hosters = [] for link in links: if link: link = BeautifulSoup(link, 'lxml').find("a", href=re.compile("/go\.php\?")) if link: url_hosters.append([link["href"], link.text.replace(" ", "")]) return check_download_links(self, url_hosters)
def get(title, configfile, dbfile, bl_only=False, sj_only=False): hostnames = CrawlerConfig('Hostnames', configfile) by = hostnames.get('by') dw = hostnames.get('dw') fx = hostnames.get('fx') nk = hostnames.get('nk') sj = hostnames.get('sj') specific_season = re.match(r'^(.*),(s\d{1,3})$', title.lower()) specific_episode = re.match(r'^(.*),(s\d{1,3}e\d{1,3})$', title.lower()) if specific_season: split = title.split(",") title = split[0] special = split[1].upper() elif specific_episode: split = title.split(",") title = split[0] special = split[1].upper() else: special = None bl_final = {} sj_final = {} scraper = cloudscraper.create_scraper() if not sj_only: mb_query = sanitize(title).replace(" ", "+") if special: bl_query = mb_query + "+" + special else: bl_query = mb_query unrated = [] config = CrawlerConfig('ContentAll', configfile) quality = config.get('quality') ignore = config.get('ignore') if "480p" not in quality: search_quality = "+" + quality else: search_quality = "" if by: by_search = 'https://' + by + '/?q=' + bl_query + search_quality else: by_search = None if dw: dw_search = 'https://' + dw + '/?kategorie=Movies&search=' + bl_query + search_quality else: dw_search = None if fx: fx_search = 'https://' + fx + '/?s=' + bl_query else: fx_search = None async_results = get_urls_async([by_search, dw_search, fx_search], configfile, dbfile, scraper) scraper = async_results[1] async_results = async_results[0] by_results = [] dw_results = [] fx_results = [] for res in async_results: if check_is_site(res, configfile) == 'BY': by_results = by_search_results(res, by) elif check_is_site(res, configfile) == 'DW': dw_results = dw_search_results(res, dw) elif check_is_site(res, configfile) == 'FX': fx_results = fx_search_results(fx_content_to_soup(res), configfile, dbfile, scraper) if nk: nk_search = post_url( 'https://' + nk + "/search", configfile, dbfile, data={'search': bl_query.replace("+", " ") + " " + quality}) nk_results = nk_search_results(nk_search, 'https://' + nk + '/') else: nk_results = [] password = by for result in by_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if "xxx" not in result[0].lower(): unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (BY)" ]) password = dw for result in dw_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (DW)" ]) password = fx.split('.')[0] for result in fx_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if "-low" not in result[0].lower(): unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (FX)" ]) password = nk.split('.')[0].capitalize() for result in nk_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (NK)" ]) rated = sorted(unrated, reverse=True) results = {} i = 0 for result in rated: res = {"payload": result[1], "title": result[2]} results["result" + str(i + 1000)] = res i += 1 bl_final = results if not bl_only: if sj: sj_query = sanitize(title).replace(" ", "+") sj_search = get_url( 'https://' + sj + '/serie/search?q=' + sj_query, configfile, dbfile, scraper) try: sj_results = BeautifulSoup(sj_search, 'lxml').findAll( "a", href=re.compile("/serie")) except: sj_results = [] else: sj_results = [] if special: append = " (" + special + ")" else: append = "" i = 0 results = {} for result in sj_results: r_title = result.text r_rating = fuzz.ratio(title.lower(), r_title) if r_rating > 40: res = { "payload": encode_base64(result['href'] + "|" + r_title + "|" + str(special)), "title": r_title + append } results["result" + str(i + 1000)] = res i += 1 sj_final = results return bl_final, sj_final
def download(payload, configfile, dbfile): hostnames = CrawlerConfig('Hostnames', configfile) sj = hostnames.get('sj') payload = decode_base64(payload).split("|") href = payload[0] title = payload[1] special = payload[2].strip().replace("None", "") series_url = 'https://' + sj + href series_info = get_url(series_url, configfile, dbfile) series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0] api_url = 'https://' + sj + '/api/media/' + series_id + '/releases' releases = get_url(api_url, configfile, dbfile) unsorted_seasons = json.loads(releases) listen = ["List_ContentShows_Shows", "List_ContentAll_Seasons"] for liste in listen: cont = ListDb(dbfile, liste).retrieve() list_title = sanitize(title) if not cont: cont = "" if list_title not in cont: ListDb(dbfile, liste).store(list_title) config = CrawlerConfig('ContentShows', configfile) english_ok = CrawlerConfig('FeedCrawler', configfile).get("english") quality = config.get('quality') ignore = config.get('rejectlist') result_seasons = {} result_episodes = {} seasons = {} for season in unsorted_seasons: if "sp" in season.lower(): seasons[season] = unsorted_seasons[season] for season in unsorted_seasons: if "sp" not in season.lower(): seasons[season] = unsorted_seasons[season] for season in seasons: releases = seasons[season] for release in releases['items']: name = release['name'].encode('ascii', errors='ignore').decode('utf-8') try: season = re.findall(r'.*\.(s\d{1,3}).*', name, re.IGNORECASE)[0] except: pass hosters = release['hoster'] try: valid = bool(release['resolution'] == quality) except: valid = re.match(re.compile(r'.*' + quality + r'.*'), name) if valid and special: valid = bool("." + special.lower() + "." in name.lower()) if valid and not english_ok: valid = bool(".german." in name.lower()) if valid: valid = False for hoster in hosters: if hoster and check_hoster( hoster, configfile) or config.get("hoster_fallback"): valid = True if valid: try: ep = release['episode'] if ep: existing = result_episodes.get(season) if existing: valid = False for e in existing: if e == ep: if rate(name, ignore) > rate( existing[e], ignore): valid = True else: valid = True if valid: existing.update({ep: name}) else: existing = {ep: name} result_episodes.update({season: existing}) continue except: pass existing = result_seasons.get(season) dont = False if existing: if rate(name, ignore) < rate(existing, ignore): dont = True if not dont: result_seasons.update({season: name}) try: if result_seasons[season] and result_episodes[season]: del result_episodes[season] except: pass success = False try: if result_seasons[season]: success = True except: try: if result_episodes[season]: success = True except: pass if success: logger.debug(u"Websuche erfolgreich für " + title + " - " + season) else: for release in releases['items']: name = release['name'].encode('ascii', errors='ignore').decode('utf-8') hosters = release['hoster'] valid = True if valid and special: valid = bool("." + special.lower() + "." in name.lower()) if valid and not english_ok: valid = bool(".german." in name.lower()) if valid: valid = False for hoster in hosters: if hoster and check_hoster( hoster, configfile) or config.get("hoster_fallback"): valid = True if valid: try: ep = release['episode'] if ep: existing = result_episodes.get(season) if existing: for e in existing: if e == ep: if rate(name, ignore) > rate( existing[e], ignore): existing.update({ep: name}) else: existing = {ep: name} result_episodes.update({season: existing}) continue except: pass existing = result_seasons.get(season) dont = False if existing: if rate(name, ignore) < rate(existing, ignore): dont = True if not dont: result_seasons.update({season: name}) try: if result_seasons[season] and result_episodes[season]: del result_episodes[season] except: pass logger.debug(u"Websuche erfolgreich für " + title + " - " + season) matches = [] for season in result_seasons: matches.append(result_seasons[season]) for season in result_episodes: for episode in result_episodes[season]: matches.append(result_episodes[season][episode]) notify_array = [] for title in matches: db = FeedDb(dbfile, 'FeedCrawler') if add_decrypt(title, series_url, sj, dbfile): db.store(title, 'added') log_entry = u'[Suche/Serie] - ' + title + ' - [SJ]' logger.info(log_entry) notify_array.append(log_entry) notify(notify_array, configfile) if not matches: return False return matches
def download(payload, device, configfile, dbfile): config = CrawlerConfig('ContentAll', configfile) db = FeedDb(dbfile, 'FeedCrawler') hostnames = CrawlerConfig('Hostnames', configfile) by = hostnames.get('by') nk = hostnames.get('nk') payload = decode_base64(payload).split("|") link = payload[0] password = payload[1] site = check_is_site(link, configfile) if not site: return False elif "DW" in site: download_method = add_decrypt_instead_of_download download_links = [link] key = payload[1] password = payload[2] else: url = get_url(link, configfile, dbfile) if not url or "NinjaFirewall 429" in url: return False download_method = myjd_download soup = BeautifulSoup(url, 'lxml') if "BY" in site: key = soup.find("small").text links = soup.find_all("iframe") async_link_results = [] for link in links: link = link["src"] if 'https://' + by in link: async_link_results.append(link) async_link_results = get_urls_async(async_link_results, configfile, dbfile) links = async_link_results[0] url_hosters = [] for link in links: if link: link = BeautifulSoup(link, 'lxml').find( "a", href=re.compile("/go\.php\?")) if link: url_hosters.append( [link["href"], link.text.replace(" ", "")]) elif "NK" in site: key = soup.find("span", {"class": "subtitle"}).text url_hosters = [] hosters = soup.find_all("a", href=re.compile("/go/")) for hoster in hosters: url_hosters.append( ['https://' + nk + hoster["href"], hoster.text]) elif "FX" in site: key = payload[1] password = payload[2] else: return False links = {} if "FX" in site: class FX: configfile = "" FX.configfile = configfile download_links = fx_get_download_links(FX, url, key) else: for url_hoster in reversed(url_hosters): try: link_hoster = url_hoster[1].lower().replace( 'target="_blank">', '').replace(" ", "-").replace("ddownload", "ddl") if check_hoster(link_hoster, configfile): link = url_hoster[0] if by in link: demasked_link = get_redirected_url( link, configfile, dbfile, False) if demasked_link: link = demasked_link links[link_hoster] = link except: pass if config.get("hoster_fallback") and not links: for url_hoster in reversed(url_hosters): link_hoster = url_hoster[1].lower().replace( 'target="_blank">', '').replace(" ", "-").replace("ddownload", "ddl") link = url_hoster[0] if by in link: demasked_link = get_redirected_url( link, configfile, dbfile, False) if demasked_link: link = demasked_link links[link_hoster] = link download_links = list(links.values()) englisch = False if "*englisch" in key.lower() or "*english" in key.lower(): key = key.replace('*ENGLISCH', '').replace("*Englisch", "").replace( "*ENGLISH", "").replace("*English", "").replace("*", "") englisch = True staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower()) if download_links: if staffel: if download_method(configfile, dbfile, device, key, "FeedCrawler", download_links, password): db.store( key.replace(".COMPLETE", "").replace(".Complete", ""), 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Staffel] - ' + key.replace( ".COMPLETE", "").replace(".Complete", "") + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return True else: retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if is_retail(key, dbfile): retail = True if download_method(configfile, dbfile, device, key, "FeedCrawler", download_links, password): db.store( key, 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Film' + ( '/Englisch' if englisch and not retail else '') + ('/Englisch/Retail' if englisch and retail else '') + ('/Retail' if not englisch and retail else '') + '] - ' + key + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return [key] else: return False
def sf_parse_download(self, series_url, title, language_id): if not check_valid_release(title, self.retail_only, self.hevc_retail, self.dbfile): self.log_debug( title + u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)" ) return False if self.filename == 'List_ContentAll_Seasons': if not self.config.get("seasonpacks"): staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower()) if staffelpack: self.log_debug("%s - Release ignoriert (Staffelpaket)" % title) return False if not re.search(self.seasonssource, title.lower()): self.log_debug(title + " - Release hat falsche Quelle") return False try: if language_id == 2: lang = 'EN' else: lang = 'DE' epoch = str(datetime.datetime.now().timestamp()).replace('.', '')[:-3] api_url = series_url + '?lang=' + lang + '&_=' + epoch response = get_url(api_url, self.configfile, self.dbfile, self.scraper) info = json.loads(response) is_episode = re.findall(r'.*\.(s\d{1,3}e\d{1,3})\..*', title, re.IGNORECASE) if is_episode: episode_string = re.findall(r'.*S\d{1,3}(E\d{1,3}).*', is_episode[0])[0].lower() season_string = re.findall(r'.*(S\d{1,3})E\d{1,3}.*', is_episode[0])[0].lower() season_title = rreplace(title.lower().replace(episode_string, ''), "-", ".*", 1).lower().replace(".repack", "") season_title = season_title.replace(".untouched", ".*").replace( ".dd+51", ".dd.51") episode = str(int(episode_string.replace("e", ""))) season = str(int(season_string.replace("s", ""))) episode_name = re.findall(r'.*\.s\d{1,3}(\..*).german', season_title, re.IGNORECASE) if episode_name: season_title = season_title.replace(episode_name[0], '') codec_tags = [".h264", ".x264"] for tag in codec_tags: season_title = season_title.replace(tag, ".*264") web_tags = [".web-rip", ".webrip", ".webdl", ".web-dl"] for tag in web_tags: season_title = season_title.replace(tag, ".web.*") else: season = False episode = False season_title = title multiple_episodes = re.findall(r'(e\d{1,3}-e*\d{1,3}\.)', season_title, re.IGNORECASE) if multiple_episodes: season_title = season_title.replace(multiple_episodes[0], '.*') content = BeautifulSoup(info['html'], 'lxml') releases = content.find( "small", text=re.compile(season_title, re.IGNORECASE)).parent.parent.parent links = releases.findAll("div", {'class': 'row'})[1].findAll('a') download_link = False for link in links: if check_hoster(link.text.replace('\n', ''), self.configfile): download_link = get_redirected_url( "https://" + self.url + link['href'], self.configfile, self.dbfile, self.scraper) break if not download_link and not self.hoster_fallback: storage = self.db.retrieve_all(title) if 'added' not in storage and 'notdl' not in storage: wrong_hoster = '[SF/Hoster fehlt] - ' + title if 'wrong_hoster' not in storage: print(wrong_hoster) self.db.store(title, 'wrong_hoster') notify([wrong_hoster], self.configfile) else: self.log_debug(wrong_hoster) return False else: return [title, download_link, language_id, season, episode] except: print(u"SF hat die Serien-API angepasst. Breche Download-Prüfung ab!") return False
def periodical_task(self): if not self.url: return self.device if self.filename == 'List_ContentShows_Shows_Regex': if not self.config.get('regex'): self.log_debug("Suche für " + self._SITE + "-Regex deaktiviert!") return self.device elif self.filename == 'List_ContentShows_Seasons_Regex': if not self.config.get('regex'): self.log_debug("Suche für " + self._SITE + "-Regex deaktiviert!") return self.device elif self.filename == 'List_ContentAll_Seasons': if not self.config.get('crawlseasons'): self.log_debug("Suche für " + self._SITE + "-Staffeln deaktiviert!") return self.device if self.empty_list: self.log_debug("Liste ist leer. Stoppe Suche für Serien!" + self.listtype) return self.device try: reject = self.config.get("rejectlist").replace( ",", "|").lower() if len( self.config.get("rejectlist")) > 0 else r"^unmatchable$" except TypeError: reject = r"^unmatchable$" current_set = settings_hash(self, False) sha = False header = False response = False while self.day < 8: if self.last_set == current_set: try: url = feed_url(self) if url: response = get_url_headers(url, self.configfile, self.dbfile, self.headers, self.scraper) self.scraper = response[1] response = response[0] if self.filename == "List_ContentAll_Seasons" or self.filename == "List_ContentShows_Seasons_Regex": feed = self.get_feed_method(response.text, "seasons", 'https://' + self.url, True) else: feed = self.get_feed_method(response.text, "episodes", 'https://' + self.url, True) else: feed = False except: print(self._SITE + u" hat die Feed-API angepasst. Breche Suche ab!") feed = False if response: if response.status_code == 304: self.log_debug( self._SITE + "-Feed seit letztem Aufruf nicht aktualisiert - breche Suche ab!" ) return self.device header = True else: try: url = feed_url(self) if url: response = get_url(url, self.configfile, self.dbfile, self.scraper) if self.filename == "List_ContentAll_Seasons" or self.filename == "List_ContentShows_Seasons_Regex": feed = self.get_feed_method(response, "seasons", 'https://' + self.url, True) else: feed = self.get_feed_method(response, "episodes", 'https://' + self.url, True) else: feed = False except: print(self._SITE + u" hat die Feed-API angepasst. Breche Suche ab!") feed = False self.day += 1 if feed and feed.entries: first_post = feed.entries[0] concat = first_post.title + first_post.published + str( self.settings) + str(self.pattern) sha = hashlib.sha256(concat.encode('ascii', 'ignore')).hexdigest() else: self.log_debug("Feed ist leer - breche Suche ab!") return False for post in feed.entries: concat = post.title + post.published + \ str(self.settings) + str(self.pattern) sha = hashlib.sha256(concat.encode('ascii', 'ignore')).hexdigest() if sha == self.last_sha: self.log_debug("Feed ab hier bereits gecrawlt (" + post.title + ") - breche Suche ab!") break series_url = post.series_url title = post.title.replace("-", "-") if self.filename == 'List_ContentShows_Shows_Regex': if self.config.get("regex"): if '.german.' in title.lower(): language_id = 1 elif self.feedcrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: m = re.search(self.pattern, title.lower()) if not m and "720p" not in title and "1080p" not in title and "2160p" not in title: m = re.search(self.pattern.replace("480p", "."), title.lower()) self.quality = "480p" if m: if "720p" in title.lower(): self.quality = "720p" if "1080p" in title.lower(): self.quality = "1080p" if "2160p" in title.lower(): self.quality = "2160p" m = re.search(reject, title.lower()) if m: self.log_debug( title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)" ) title = re.sub(r'\[.*\] ', '', post.title) package = self.parse_download_method( self, series_url, title, language_id) if package: title = package[0] site = self._SITE download_link = False if self.prefer_dw_mirror and "DW" not in site: download_links = dw_mirror(self, title) if download_links: download_link = download_links[0] site = "DW/" + site if not download_link: download_link = package[1] language_id = package[2] season = package[3] episode = package[4] send_package(self, title, download_link, language_id, season, episode, site) else: self.log_debug("%s - Englische Releases deaktiviert" % title) else: continue elif self.filename == 'List_ContentShows_Seasons_Regex': if self.config.get("regex"): if '.german.' in title.lower(): language_id = 1 elif self.feedcrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: m = re.search(self.pattern, title.lower()) if not m and "720p" not in title and "1080p" not in title and "2160p" not in title: m = re.search(self.pattern.replace("480p", "."), title.lower()) self.quality = "480p" if m: if "720p" in title.lower(): self.quality = "720p" if "1080p" in title.lower(): self.quality = "1080p" if "2160p" in title.lower(): self.quality = "2160p" m = re.search(reject, title.lower()) if m: self.log_debug( title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)" ) title = re.sub(r'\[.*\] ', '', post.title) package = self.parse_download_method( self, series_url, title, language_id) if package: title = package[0] site = self._SITE download_link = False if self.prefer_dw_mirror and "DW" not in site: download_links = dw_mirror(self, title) if download_links: download_link = download_links[0] site = "DW/" + site if not download_link: download_link = package[1] language_id = package[2] season = package[3] episode = package[4] send_package(self, title, download_link, language_id, season, episode, site) else: self.log_debug("%s - Englische Releases deaktiviert" % title) else: continue else: if self.config.get("quality") != '480p': m = re.search(self.pattern, title.lower()) if m: if '.german.' in title.lower(): language_id = 1 elif self.feedcrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: mm = re.search(self.quality, title.lower()) if mm: mmm = re.search(reject, title.lower()) if mmm: self.log_debug( title + " - Release ignoriert (basierend auf rejectlist-Einstellung)" ) continue if self.feedcrawler.get("surround"): if not re.match( r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)" ) continue try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if 'added' in storage: self.log_debug( title + " - Release ignoriert (bereits gefunden)" ) continue package = self.parse_download_method( self, series_url, title, language_id) if package: title = package[0] site = self._SITE download_link = False if self.prefer_dw_mirror and "DW" not in site: download_links = dw_mirror(self, title) if download_links: download_link = download_links[0] site = "DW/" + site if not download_link: download_link = package[1] language_id = package[2] season = package[3] episode = package[4] send_package(self, title, download_link, language_id, season, episode, site) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: m = re.search(self.pattern, title.lower()) if m: if '.german.' in title.lower(): language_id = 1 elif self.feedcrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: if "720p" in title.lower( ) or "1080p" in title.lower( ) or "2160p" in title.lower(): continue mm = re.search(reject, title.lower()) if mm: self.log_debug( title + " Release ignoriert (basierend auf rejectlist-Einstellung)" ) continue if self.feedcrawler.get("surround"): if not re.match( r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)" ) continue title = re.sub(r'\[.*\] ', '', post.title) try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if 'added' in storage: self.log_debug( title + " - Release ignoriert (bereits gefunden)" ) continue package = self.parse_download_method( self, series_url, title, language_id) if package: title = package[0] site = self._SITE download_link = False if self.prefer_dw_mirror and "DW" not in site: download_links = dw_mirror(self, title) if download_links: download_link = download_links[0] site = "DW/" + site if not download_link: download_link = package[1] language_id = package[2] season = package[3] episode = package[4] send_package(self, title, download_link, language_id, season, episode, site) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) if current_set and sha: new_set = settings_hash(self, True) if current_set == new_set: self.cdc.delete(self._INTERNAL_NAME + "Set-" + self.filename) self.cdc.store(self._INTERNAL_NAME + "Set-" + self.filename, current_set) self.cdc.delete(self._INTERNAL_NAME + "-" + self.filename) self.cdc.store(self._INTERNAL_NAME + "-" + self.filename, sha) if header and response: self.cdc.delete(self._INTERNAL_NAME + "Headers-" + self.filename) self.cdc.store(self._INTERNAL_NAME + "Headers-" + self.filename, response.headers['date']) return self.device