def parse_download(self, series_url, title, language_id): if not check_valid_release(title, False, False, self.dbfile): self.log_debug(title + u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)") return try: series_info = get_url(series_url, self.configfile, self.dbfile) series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0] api_url = 'https://' + self.dj + '/api/media/' + series_id + '/releases' response = get_url(api_url, self.configfile, self.dbfile, self.scraper) seasons = json.loads(response) for season in seasons: season = seasons[season] for item in season['items']: if item['name'] == title: valid = False for hoster in item['hoster']: if check_hoster(hoster, self.configfile): valid = True if not valid and not self.hoster_fallback: storage = self.db.retrieve_all(title) if 'added' not in storage and 'notdl' not in storage: wrong_hoster = '[DJ/Hoster fehlt] - ' + title if 'wrong_hoster' not in storage: print(wrong_hoster) self.db.store(title, 'wrong_hoster') notify([wrong_hoster], self.configfile) else: self.log_debug(wrong_hoster) else: return self.send_package(title, series_url, language_id) except: print(u"DJ hat die Doku-API angepasst. Breche Download-Prüfung ab!")
def parse_download(self, series_url, title, language_id): if not check_valid_release(title, self.retail_only, self.hevc_retail, self.dbfile): self.log_debug( title + u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)" ) return if self.filename == 'MB_Staffeln': if not self.config.get("seasonpacks"): staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower()) if staffelpack: self.log_debug("%s - Release ignoriert (Staffelpaket)" % title) return if not re.search(self.seasonssource, title.lower()): self.log_debug(title + " - Release hat falsche Quelle") return try: series_info = get_url(series_url, self.configfile, self.dbfile) series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0] api_url = 'https://' + self.sj + '/api/media/' + series_id + '/releases' response = get_url(api_url, self.configfile, self.dbfile, self.scraper) seasons = json.loads(response) for season in seasons: season = seasons[season] for item in season['items']: if item['name'] == title: valid = False for hoster in item['hoster']: if hoster: if check_hoster(hoster, self.configfile): valid = True if not valid and not self.hoster_fallback: storage = self.db.retrieve_all(title) if 'added' not in storage and 'notdl' not in storage: wrong_hoster = '[SJ/Hoster fehlt] - ' + title if 'wrong_hoster' not in storage: print(wrong_hoster) self.db.store(title, 'wrong_hoster') notify([wrong_hoster], self.configfile) else: self.log_debug(wrong_hoster) else: return self.send_package(title, series_url, language_id) except: print( u"SJ hat die Serien-API angepasst. Breche Download-Prüfung ab!" )
def parse_download(self, series_url, search_title, englisch, genre): req_page = get_url(series_url, self.configfile, self.dbfile) soup = BeautifulSoup(req_page, 'lxml') escape_brackets = search_title.replace( "(", ".*").replace(")", ".*").replace("+", ".*") title = soup.find(text=re.compile(escape_brackets)) if not title: try: episode = re.findall(r'\.S\d{1,3}(E\d{1,3}.*)\.German', escape_brackets).pop() escape_brackets_pack = escape_brackets.replace(episode, "") title = soup.find(text=re.compile(escape_brackets_pack)) except: title = False self.log_debug(search_title + " - Kein Link gefunden") if title: url_hosters = re.findall( r'<a href="([^"\'>]*)".+?\| (.+?)<', str(title.parent.parent)) links = [] for url_hoster in url_hosters: if re.match(self.hoster, url_hoster[1]): links.append(url_hoster[0]) if not links: self.log_debug( "%s - Release ignoriert (kein passender Link gefunden)" % search_title) else: return self.send_package(search_title, links, englisch, genre)
def periodical_task(self): feeds = self.config.get("feeds") if feeds: added_items = [] feeds = feeds.replace(" ", "").split(',') for feed in feeds: feed = feedparser.parse( get_url(feed, self.configfile, self.dbfile, self.scraper)) for post in feed.entries: key = post.title.replace(" ", ".") epoch = datetime(1970, 1, 1) current_epoch = int(time()) published_format = "%Y-%m-%d %H:%M:%S+00:00" published_timestamp = str(parser.parse(post.published)) published_epoch = int((datetime.strptime( published_timestamp, published_format) - epoch).total_seconds()) if (current_epoch - 1800) > published_epoch: link_pool = post.summary unicode_links = re.findall(r'(http.*)', link_pool) links = [] for link in unicode_links: if check_hoster(link, self.configfile): links.append(str(link)) if self.config.get("hoster_fallback") and not links: for link in unicode_links: links.append(str(link)) storage = self.db.retrieve_all(key) if not links: if 'added' not in storage and 'notdl' not in storage: wrong_hoster = '[DD/Hoster fehlt] - ' + key if 'wrong_hoster' not in storage: self.log_info(wrong_hoster) self.db.store(key, 'wrong_hoster') notify([wrong_hoster], self.configfile) else: self.log_debug(wrong_hoster) elif 'added' in storage: self.log_debug( "%s - Release ignoriert (bereits gefunden)" % key) else: self.device = myjd_download( self.configfile, self.dbfile, self.device, key, "RSScrawler", links, "") if self.device: self.db.store(key, 'added') log_entry = '[Englisch] - ' + key + ' - [DD]' self.log_info(log_entry) notify([log_entry], self.configfile) added_items.append(log_entry) else: self.log_debug( "%s - Releasezeitpunkt weniger als 30 Minuten in der Vergangenheit - wird ignoriert." % key) else: self.log_debug("Liste ist leer. Stoppe Suche für DD!") return self.device
def ha_search_to_soup(url, configfile, dbfile): content = [] search = BeautifulSoup(get_url(url, configfile, dbfile), 'lxml') if search: results = search.find("div", {"id": "content"}) if results: results = results.find_all("a") for r in results: try: title = r["title"] details = BeautifulSoup(get_url(r["href"], configfile, dbfile), 'lxml') content.append({ "key": title, "value": details }) except: pagination = r["href"] return ha_search_to_feedparser_dict(content)
def ha_search_results(url, configfile, dbfile): content = [] search = BeautifulSoup(get_url(url, configfile, dbfile), 'lxml') if search: results = search.find("div", {"id": "content"}) if results: results = results.find_all("a") for r in results: try: content.append((r["title"], r["href"])) except: break return content
def range_parse(self, series_url, search_title, englisch, fallback_title, genre): req_page = get_url(series_url, self.configfile, self.dbfile) soup = BeautifulSoup(req_page, 'lxml') try: titles = soup.findAll(text=re.compile(search_title)) added_items = [] if not titles: titles = soup.findAll(text=re.compile(fallback_title)) for title in titles: if self.quality != '480p' and self.quality in title: added_items.append(self.parse_download(series_url, title, englisch, genre)) if self.quality == '480p' and not (('.720p.' in title) or ('.1080p.' in title) or ('.2160p.' in title)): added_items.append(self.parse_download(series_url, title, englisch, genre)) return added_items except re.error as e: self.log_error('Konstantenfehler: %s' % e)
def periodical_task(self): feeds = self.config.get("feeds") if feeds: added_items = [] feeds = feeds.replace(" ", "").split(',') hoster = re.compile(self.config.get("hoster")) for feed in feeds: feed = feedparser.parse(get_url(feed, self.configfile, self.dbfile)) for post in feed.entries: key = post.title.replace(" ", ".") epoch = datetime(1970, 1, 1) current_epoch = int(time()) published_format = "%Y-%m-%d %H:%M:%S+00:00" published_timestamp = str(parser.parse(post.published)) published_epoch = int((datetime.strptime( published_timestamp, published_format) - epoch).total_seconds()) if (current_epoch - 1800) > published_epoch: link_pool = post.summary unicode_links = re.findall(r'(http.*)', link_pool) links = [] for link in unicode_links: if re.match(hoster, link): links.append(str(link)) if not links: self.log_debug( "%s - Release ignoriert (kein passender Link gefunden)" % key) elif self.db.retrieve(key) == 'added': self.log_debug( "%s - Release ignoriert (bereits gefunden)" % key) else: self.device = myjd_download(self.configfile, self.device, key, "RSScrawler", links, "") if self.device: self.db.store( key, 'added' ) log_entry = '[DD] - Englisch - ' + key self.log_info(log_entry) notify([log_entry], self.configfile) added_items.append(log_entry) else: self.log_debug( "%s - Releasezeitpunkt weniger als 30 Minuten in der Vergangenheit - wird ignoriert." % key) else: self.log_debug("Liste ist leer. Stoppe Suche für DD!") return self.device
def get_imdb_id(key, content, filename, configfile, dbfile, scraper, log_debug): try: imdb_id = re.findall( r'.*?(?:href=.?http(?:|s):\/\/(?:|www\.)imdb\.com\/title\/(tt[0-9]{7,9}).*?).*?(\d(?:\.|\,)\d)(?:.|.*?)<\/a>.*?', content) except: imdb_id = False if imdb_id: imdb_id = imdb_id[0][0] else: try: search_title = re.findall( r"(.*?)(?:\.(?:(?:19|20)\d{2})|\.German|\.\d{3,4}p|\.S(?:\d{1,3})\.)", key)[0].replace(".", "+") search_url = "http://www.imdb.com/find?q=" + search_title search_page = get_url(search_url, configfile, dbfile, scraper) search_results = re.findall( r'<td class="result_text"> <a href="\/title\/(tt[0-9]{7,9})\/\?ref_=fn_al_tt_\d" >(.*?)<\/a>.*? \((\d{4})\)..(.{9})', search_page) except: return False total_results = len(search_results) if filename == 'MB_Staffeln': imdb_id = search_results[0][0] else: no_series = False while total_results > 0: attempt = 0 for result in search_results: if result[3] == "TV Series": no_series = False total_results -= 1 attempt += 1 else: no_series = True imdb_id = search_results[attempt][0] total_results = 0 break if no_series is False: log_debug( "%s - Keine passende Film-IMDB-Seite gefunden" % key) if not imdb_id: return False return imdb_id
def get_original_language(key, imdb_details, imdb_url, configfile, dbfile, scraper, log_debug): original_language = False if imdb_details and len(imdb_details) > 0: soup = BeautifulSoup(imdb_details, 'lxml') original_language = soup.find('h4', text=re.compile(r'Language:')).parent.find("a").text elif imdb_url and len(imdb_url) > 0: imdb_details = get_url(imdb_url, configfile, dbfile, scraper) if imdb_details: soup = BeautifulSoup(imdb_details, 'lxml') original_language = soup.find('h4', text=re.compile(r'Language:')).parent.find("a").text if not original_language: log_debug("%s - Originalsprache nicht ermittelbar" % key) if original_language and original_language == "German": log_debug( "%s - Originalsprache ist Deutsch. Breche Suche nach zweisprachigem Release ab!" % key) return False else: return original_language
def periodical_task(self): if not self.sf: return self.device if self.filename == 'SJ_Serien_Regex': if not self.config.get('regex'): self.log_debug("Suche für SF-Regex deaktiviert!") return self.device elif self.filename == 'SJ_Staffeln_Regex': if not self.config.get('regex'): self.log_debug("Suche für SF-Regex deaktiviert!") return self.device elif self.filename == 'MB_Staffeln': if not self.config.get('crawlseasons'): self.log_debug("Suche für SF-Staffeln deaktiviert!") return self.device if self.empty_list: self.log_debug("Liste ist leer. Stoppe Suche für Serien!" + self.listtype) return self.device try: reject = self.config.get("rejectlist").replace( ",", "|").lower() if len( self.config.get("rejectlist")) > 0 else r"^unmatchable$" except TypeError: reject = r"^unmatchable$" set_sf = self.settings_hash(False) header = False response = False while self.day < 8: if self.last_set_sf == set_sf: try: delta = ( datetime.datetime.now() - datetime.timedelta(days=self.day)).strftime("%Y-%m-%d") response = get_url_headers( 'https://' + self.sf + '/updates/' + delta, self.configfile, self.dbfile, self.headers, self.scraper) self.scraper = response[1] response = response[0] if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex": feed = sf_releases_to_feedparser_dict( response.text, "seasons", 'https://' + self.sf, True) else: feed = sf_releases_to_feedparser_dict( response.text, "episodes", 'https://' + self.sf, True) except: print(u"SF hat die Feed-API angepasst. Breche Suche ab!") feed = False if response: if response.status_code == 304: self.log_debug( "SF-Feed seit letztem Aufruf nicht aktualisiert - breche Suche ab!" ) return self.device header = True else: try: delta = ( datetime.datetime.now() - datetime.timedelta(days=self.day)).strftime("%Y-%m-%d") response = get_url( 'https://' + self.sf + '/updates/' + delta, self.configfile, self.dbfile, self.scraper) if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex": feed = sf_releases_to_feedparser_dict( response, "seasons", 'https://' + self.sf, True) else: feed = sf_releases_to_feedparser_dict( response, "episodes", 'https://' + self.sf, True) except: print(u"SF hat die Feed-API angepasst. Breche Suche ab!") feed = False self.day += 1 if feed and feed.entries: first_post_sf = feed.entries[0] concat_sf = first_post_sf.title + first_post_sf.published + str( self.settings) + str(self.pattern) sha_sf = hashlib.sha256(concat_sf.encode( 'ascii', 'ignore')).hexdigest() else: self.log_debug("Feed ist leer - breche Suche ab!") return False for post in feed.entries: concat = post.title + post.published + \ str(self.settings) + str(self.pattern) sha = hashlib.sha256(concat.encode('ascii', 'ignore')).hexdigest() if sha == self.last_sha_sf: self.log_debug("Feed ab hier bereits gecrawlt (" + post.title + ") - breche Suche ab!") break series_url = post.series_url title = post.title.replace("-", "-") if self.filename == 'SJ_Serien_Regex': if self.config.get("regex"): if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: m = re.search(self.pattern, title.lower()) if not m and "720p" not in title and "1080p" not in title and "2160p" not in title: m = re.search( self.pattern.replace("480p", "."), title.lower()) self.quality = "480p" if m: if "720p" in title.lower(): self.quality = "720p" if "1080p" in title.lower(): self.quality = "1080p" if "2160p" in title.lower(): self.quality = "2160p" m = re.search(reject, title.lower()) if m: self.log_debug( title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)" ) title = re.sub(r'\[.*\] ', '', post.title) self.parse_download(series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: continue elif self.filename == 'SJ_Staffeln_Regex': if self.config.get("regex"): if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: m = re.search(self.pattern, title.lower()) if not m and "720p" not in title and "1080p" not in title and "2160p" not in title: m = re.search( self.pattern.replace("480p", "."), title.lower()) self.quality = "480p" if m: if "720p" in title.lower(): self.quality = "720p" if "1080p" in title.lower(): self.quality = "1080p" if "2160p" in title.lower(): self.quality = "2160p" m = re.search(reject, title.lower()) if m: self.log_debug( title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)" ) title = re.sub(r'\[.*\] ', '', post.title) self.parse_download(series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: continue else: if self.config.get("quality") != '480p': m = re.search(self.pattern, title.lower()) if m: if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: mm = re.search(self.quality, title.lower()) if mm: mmm = re.search(reject, title.lower()) if mmm: self.log_debug( title + " - Release ignoriert (basierend auf rejectlist-Einstellung)" ) continue if self.rsscrawler.get("surround"): if not re.match( r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)" ) continue try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if 'added' in storage: self.log_debug( title + " - Release ignoriert (bereits gefunden)" ) continue self.parse_download( series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: m = re.search(self.pattern, title.lower()) if m: if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: if "720p" in title.lower( ) or "1080p" in title.lower( ) or "2160p" in title.lower(): continue mm = re.search(reject, title.lower()) if mm: self.log_debug( title + " Release ignoriert (basierend auf rejectlist-Einstellung)" ) continue if self.rsscrawler.get("surround"): if not re.match( r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)" ) continue title = re.sub(r'\[.*\] ', '', post.title) try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if 'added' in storage: self.log_debug( title + " - Release ignoriert (bereits gefunden)" ) continue self.parse_download( series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) if set_sf: new_set_sf = self.settings_hash(True) if set_sf == new_set_sf: self.cdc.delete("SFSet-" + self.filename) self.cdc.store("SFSet-" + self.filename, set_sf) self.cdc.delete("SF-" + self.filename) self.cdc.store("SF-" + self.filename, sha_sf) if header and response: self.cdc.delete("SFHeaders-" + self.filename) self.cdc.store("SFHeaders-" + self.filename, response.headers['date']) return self.device
def parse_download(self, series_url, title, language_id): if not check_valid_release(title, self.retail_only, self.hevc_retail, self.dbfile): self.log_debug( title + u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)" ) return if self.filename == 'MB_Staffeln': if not self.config.get("seasonpacks"): staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower()) if staffelpack: self.log_debug("%s - Release ignoriert (Staffelpaket)" % title) return if not re.search(self.seasonssource, title.lower()): self.log_debug(title + " - Release hat falsche Quelle") return try: if language_id == 2: lang = 'EN' else: lang = 'DE' epoch = str(datetime.datetime.now().timestamp()).replace('.', '')[:-3] api_url = series_url + '?lang=' + lang + '&_=' + epoch response = get_url(api_url, self.configfile, self.dbfile, self.scraper) info = json.loads(response) is_episode = re.findall(r'.*\.(s\d{1,3}e\d{1,3})\..*', title, re.IGNORECASE) if is_episode: episode_string = re.findall(r'.*S\d{1,3}(E\d{1,3}).*', is_episode[0])[0].lower() season_string = re.findall(r'.*(S\d{1,3})E\d{1,3}.*', is_episode[0])[0].lower() season_title = rreplace( title.lower().replace(episode_string, ''), "-", ".*", 1).lower() season_title = season_title.replace(".untouched", ".*").replace( ".dd+51", ".dd.51") episode = str(int(episode_string.replace("e", ""))) season = str(int(season_string.replace("s", ""))) episode_name = re.findall(r'.*\.s\d{1,3}(\..*).german', season_title, re.IGNORECASE) if episode_name: season_title = season_title.replace(episode_name[0], '') codec_tags = [".h264", ".x264"] for tag in codec_tags: season_title = season_title.replace(tag, ".*264") web_tags = [".web-rip", ".webrip", ".webdl", ".web-dl"] for tag in web_tags: season_title = season_title.replace(tag, ".web.*") else: season = False episode = False season_title = title multiple_episodes = re.findall(r'(e\d{1,3}-e*\d{1,3}\.)', season_title, re.IGNORECASE) if multiple_episodes: season_title = season_title.replace( multiple_episodes[0], '.*') content = BeautifulSoup(info['html'], 'lxml') releases = content.find( "small", text=re.compile(season_title, re.IGNORECASE)).parent.parent.parent links = releases.findAll("div", {'class': 'row'})[1].findAll('a') valid = False for link in links: download_link = link['href'] if check_hoster(link.text.replace('\n', ''), self.configfile): valid = True break if not valid and not self.hoster_fallback: storage = self.db.retrieve_all(title) if 'added' not in storage and 'notdl' not in storage: wrong_hoster = '[SF/Hoster fehlt] - ' + title if 'wrong_hoster' not in storage: self.log_info(wrong_hoster) self.db.store(title, 'wrong_hoster') notify([wrong_hoster], self.configfile) else: self.log_debug(wrong_hoster) else: return self.send_package(title, download_link, language_id, season, episode) except: print( u"SF hat die Serien-API angepasst. Breche Download-Prüfung ab!" )
def download_sj(sj_id, special, device, configfile, dbfile): url = get_url(decode_base64("aHR0cDovL3Nlcmllbmp1bmtpZXMub3JnLz9jYXQ9") + str(sj_id), configfile, dbfile) season_pool = re.findall(r'<h2>Staffeln:(.*?)<h2>Feeds', url).pop() season_links = re.findall( r'href="(.{1,125})">.{1,90}(Staffel|Season).*?(\d{1,2}-?\d{1,2}|\d{1,2})', season_pool) title = html_to_str(re.findall(r'>(.{1,85}?) &#', season_pool).pop()) rsscrawler = RssConfig('RSScrawler', configfile) listen = ["SJ_Serien", "MB_Staffeln"] for liste in listen: cont = ListDb(dbfile, liste).retrieve() list_title = sanitize(title) if not cont: cont = "" if not list_title in cont: ListDb(dbfile, liste).store(list_title) staffeln = [] staffel_nr = [] seasons = [] for s in season_links: if "staffel" in s[1].lower(): staffeln.append([s[2], s[0]]) if "-" in s[2]: split = s[2].split("-") split = range(int(split[0]), int(split[1]) + 1) for nr in split: staffel_nr.append(str(nr)) else: staffel_nr.append(s[2]) else: seasons.append([s[2], s[0]]) if rsscrawler.get("english"): for se in seasons: if not se[0] in staffel_nr: staffeln.append(se) to_dl = [] for s in staffeln: if "-" in s[0]: split = s[0].split("-") split = range(int(split[0]), int(split[1]) + 1) for i in split: to_dl.append([str(i), s[1]]) else: to_dl.append([s[0], s[1]]) found_seasons = {} for dl in to_dl: if len(dl[0]) is 1: sxx = "S0" + str(dl[0]) else: sxx = "S" + str(dl[0]) link = dl[1] if sxx not in found_seasons: found_seasons[sxx] = link something_found = False for sxx, link in found_seasons.items(): config = RssConfig('SJ', configfile) quality = config.get('quality') url = get_url(link, configfile, dbfile) pakete = re.findall(re.compile(r'<p><strong>(.*?\.' + sxx + r'\..*?' + quality + r'.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) folgen = re.findall(re.compile(r'<p><strong>(.*?\.' + sxx + r'E\d{1,3}.*?' + quality + r'.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) lq_pakete = re.findall(re.compile( r'<p><strong>(.*?\.' + sxx + r'\..*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) lq_folgen = re.findall(re.compile( r'<p><strong>(.*?\.' + sxx + r'E\d{1,3}.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) if not pakete and not folgen and not lq_pakete and not lq_folgen: sxx = sxx.replace("S0", "S") pakete = re.findall(re.compile(r'<p><strong>(.*?\.' + sxx + r'\..*?' + quality + r'.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) folgen = re.findall(re.compile( r'<p><strong>(.*?\.' + sxx + r'E\d{1,3}.*?' + quality + r'.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) lq_pakete = re.findall(re.compile( r'<p><strong>(.*?\.' + sxx + r'\..*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) lq_folgen = re.findall(re.compile( r'<p><strong>(.*?\.' + sxx + r'E\d{1,3}.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) if special and "e" in special.lower(): pakete = [] lq_pakete = [] best_matching_links = [] if pakete: links = [] for x in pakete: title = x[0] score = rate(title, configfile) hoster = [[x[2], x[1]], [x[4], x[3]]] if special: if special.lower() in title.lower(): links.append([score, title, hoster]) else: links.append([score, title, hoster]) if links: highest_score = sorted(links, reverse=True)[0][0] for l in links: if l[0] == highest_score: for hoster in l[2]: best_matching_links.append( [l[1], hoster[0], hoster[1]]) elif folgen: links = [] for x in folgen: title = x[0] score = rate(title, configfile) hoster = [[x[2], x[1]], [x[4], x[3]]] if special: if special.lower() in title.lower(): links.append([score, title, hoster]) else: links.append([score, title, hoster]) if links: highest_score = sorted(links, reverse=True)[0][0] for l in links: if l[0] == highest_score: for hoster in l[2]: best_matching_links.append( [l[1], hoster[0], hoster[1]]) elif lq_pakete: links = [] for x in lq_pakete: title = x[0] score = rate(title, configfile) hoster = [[x[2], x[1]], [x[4], x[3]]] if special: if special.lower() in title.lower(): links.append([score, title, hoster]) else: links.append([score, title, hoster]) if links: highest_score = sorted(links, reverse=True)[0][0] for l in links: if l[0] == highest_score: for hoster in l[2]: best_matching_links.append( [l[1], hoster[0], hoster[1]]) elif lq_folgen: links = [] for x in lq_folgen: title = x[0] score = rate(title, configfile) hoster = [[x[2], x[1]], [x[4], x[3]]] if special: if special.lower() in title.lower(): links.append([score, title, hoster]) else: links.append([score, title, hoster]) if links: highest_score = sorted(links, reverse=True)[0][0] for l in links: if l[0] == highest_score: for hoster in l[2]: best_matching_links.append( [l[1], hoster[0], hoster[1]]) notify_array = [] for best_link in best_matching_links: dl_title = best_link[0].replace( "Staffelpack ", "").replace("Staffelpack.", "") dl_hoster = best_link[1] dl_link = best_link[2] config = RssConfig('SJ', configfile) hoster = re.compile(config.get('hoster')) db = RssDb(dbfile, 'rsscrawler') if re.match(hoster, dl_hoster.lower()): if myjd_download(configfile, device, dl_title, "RSScrawler", dl_link, decode_base64("c2VyaWVuanVua2llcy5vcmc=")): db.store(dl_title, 'added') log_entry = '[Suche/Serie] - ' + dl_title logging.info(log_entry) notify_array.append(log_entry) else: return False if len(best_matching_links) > 0: something_found = True notify(notify_array, configfile) if not something_found: return False return True
def download_bl(payload, device, configfile, dbfile): payload = decode_base64(payload).split(";") link = payload[0] password = payload[1] url = get_url(link, configfile, dbfile) config = RssConfig('MB', configfile) hoster = re.compile(config.get('hoster')) db = RssDb(dbfile, 'rsscrawler') soup = BeautifulSoup(url, 'lxml') download = soup.find("div", {"id": "content"}) try: key = re.findall(r'Permanent Link: (.*?)"', str(download)).pop() url_hosters = re.findall(r'href="([^"\'>]*)".+?(.+?)<', str(download)) except: items_head = soup.find("div", {"class": "topbox"}) key = items_head.contents[1].a["title"] items_download = soup.find("div", {"class": "download"}) url_hosters = [] download = items_download.find_all("span", {"style": "display:inline;"}, text=True) for link in download: link = link.a text = link.text.strip() if text: url_hosters.append([str(link["href"]), str(text)]) links = {} for url_hoster in reversed(url_hosters): if not decode_base64("bW92aWUtYmxvZy50by8=") in url_hoster[0] and "https://goo.gl/" not in url_hoster[0]: link_hoster = url_hoster[1].lower().replace('target="_blank">', '').replace(" ", "-") if re.match(hoster, link_hoster): links[link_hoster] = url_hoster[0] download_links = links.values() if six.PY2 else list(links.values()) englisch = False if "*englisch*" in key.lower(): key = key.replace('*ENGLISCH*', '').replace("*Englisch*", "") englisch = True staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower()) if config.get('enforcedl') and '.dl.' not in key.lower(): fail = False get_imdb_url = url key_regex = r'<title>' + \ re.escape( key) + r'.*?<\/title>\n.*?<link>(?:(?:.*?\n){1,25}).*?[mM][kK][vV].*?(?:|href=.?http(?:|s):\/\/(?:|www\.)imdb\.com\/title\/(tt[0-9]{7,9}).*?)[iI][mM][dD][bB].*?(?!\d(?:\.|\,)\d)(?:.|.*?)<\/a>' imdb_id = re.findall(key_regex, get_imdb_url) if len(imdb_id) > 0: if not imdb_id[0]: fail = True else: imdb_id = imdb_id[0] else: fail = True if fail: search_title = re.findall( r"(.*?)(?:\.(?:(?:19|20)\d{2})|\.German|\.\d{3,4}p|\.S(?:\d{1,3})\.)", key)[0].replace(".", "+") search_url = "http://www.imdb.com/find?q=" + search_title search_page = get_url(search_url, configfile, dbfile) search_results = re.findall( r'<td class="result_text"> <a href="\/title\/(tt[0-9]{7,9})\/\?ref_=fn_al_tt_\d" >(.*?)<\/a>.*? \((\d{4})\)..(.{9})', search_page) total_results = len(search_results) if staffel: imdb_id = search_results[0][0] else: no_series = False while total_results > 0: attempt = 0 for result in search_results: if result[3] == "TV Series": no_series = False total_results -= 1 attempt += 1 else: no_series = True imdb_id = search_results[attempt][0] total_results = 0 break if no_series is False: logging.debug( "%s - Keine passende Film-IMDB-Seite gefunden" % key) if staffel: filename = 'MB_Staffeln' else: filename = 'MB_Filme' bl = BL(configfile, dbfile, device, logging, filename=filename) if not imdb_id: if not bl.dual_download(key, password): logging.debug( "%s - Kein zweisprachiges Release gefunden." % key) else: if isinstance(imdb_id, list): imdb_id = imdb_id.pop() imdb_url = "http://www.imdb.com/title/" + imdb_id details = get_url(imdb_url, configfile, dbfile) if not details: logging.debug("%s - Originalsprache nicht ermittelbar" % key) original_language = re.findall( r"Language:<\/h4>\n.*?\n.*?url'>(.*?)<\/a>", details) if original_language: original_language = original_language[0] if original_language == "German": logging.debug( "%s - Originalsprache ist Deutsch. Breche Suche nach zweisprachigem Release ab!" % key) else: if not bl.dual_download(key, password) and not englisch: logging.debug( "%s - Kein zweisprachiges Release gefunden! Breche ab." % key) if download_links: if staffel: if myjd_download(configfile, device, key, "RSScrawler", download_links, password): db.store( key.replace(".COMPLETE", "").replace(".Complete", ""), 'notdl' if config.get( 'enforcedl') and '.dl.' not in key.lower() else 'added' ) log_entry = '[Staffel] - ' + key.replace(".COMPLETE", "").replace(".Complete", "") logging.info(log_entry) notify([log_entry], configfile) return True elif '.3d.' in key.lower(): retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if config.get('enforcedl'): if cutoff(key, '2', dbfile): retail = True if myjd_download(configfile, device, key, "RSScrawler/3Dcrawler", download_links, password): db.store( key, 'notdl' if config.get( 'enforcedl') and '.dl.' not in key.lower() else 'added' ) log_entry = '[Suche/Film] - ' + ( 'Retail/' if retail else "") + '3D - ' + key logging.info(log_entry) notify([log_entry], configfile) return True else: retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if config.get('enforcedl'): if cutoff(key, '1', dbfile): retail = True else: if cutoff(key, '0', dbfile): retail = True if myjd_download(configfile, device, key, "RSScrawler", download_links, password): db.store( key, 'notdl' if config.get( 'enforcedl') and '.dl.' not in key.lower() else 'added' ) log_entry = '[Suche/Film] - ' + ('Englisch - ' if englisch and not retail else "") + ( 'Englisch/Retail - ' if englisch and retail else "") + ( 'Retail - ' if not englisch and retail else "") + key logging.info(log_entry) notify([log_entry], configfile) return True else: return False
def get(title, configfile, dbfile): specific_season = re.match(r'^(.*);(s\d{1,3})$', title.lower()) specific_episode = re.match(r'^(.*);(s\d{1,3}e\d{1,3})$', title.lower()) if specific_season: split = title.split(";") title = split[0] special = split[1].upper() elif specific_episode: split = title.split(";") title = split[0] special = split[1].upper() else: special = None query = title.replace(".", " ").replace(" ", "+") if special: bl_query = query + "+" + special else: bl_query = query unrated = [] config = RssConfig('MB', configfile) quality = config.get('quality') if "480p" not in quality: search_quality = "+" + quality else: search_quality = "" mb_search = get_url( decode_base64('aHR0cDovL21vdmllLWJsb2cudG8=') + '/search/' + bl_query + "+" + search_quality + '/feed/rss2/', configfile, dbfile) mb_results = re.findall(r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', mb_search) password = decode_base64("bW92aWUtYmxvZy5vcmc=") for result in mb_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if not result[0].endswith("-MB") and not result[0].endswith(".MB"): unrated.append( [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (MB)"]) hw_search = get_url( decode_base64('aHR0cDovL2hkLXdvcmxkLm9yZw==') + '/search/' + bl_query + "+" + search_quality + '/feed/rss2/', configfile, dbfile) hw_results = re.findall(r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', hw_search) password = decode_base64("aGQtd29ybGQub3Jn") for result in hw_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append( [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (HW)"]) ha_search = decode_base64('aHR0cDovL3d3dy5oZC1hcmVhLm9yZy8/cz1zZWFyY2gmcT0=') + bl_query + "&c=" + search_quality ha_results = ha_search_results(ha_search, configfile, dbfile) password = decode_base64("aGQtYXJlYS5vcmc=") for result in ha_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append( [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (HA)"]) if config.get("crawl3d"): mb_search = get_url( decode_base64('aHR0cDovL21vdmllLWJsb2cudG8=') + '/search/' + bl_query + "+3D+1080p" + '/feed/rss2/', configfile, dbfile) mb_results = re.findall(r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', mb_search) for result in mb_results: if not result[1].endswith("-MB") and not result[1].endswith(".MB"): unrated.append( [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (3D-MB)"]) hw_search = get_url( decode_base64('aHR0cDovL2hkLXdvcmxkLm9yZw==') + '/search/' + bl_query + "+3D+1080p" + '/feed/rss2/', configfile, dbfile) hw_results = re.findall(r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', hw_search) password = decode_base64("aGQtd29ybGQub3Jn") for result in hw_results: unrated.append( [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (3D-HW)"]) ha_search = decode_base64('aHR0cDovL3d3dy5oZC1hcmVhLm9yZy8/cz1zZWFyY2gmcT0=') + bl_query + "&c=1080p" ha_results = ha_search_results(ha_search, configfile, dbfile) password = decode_base64("aGQtYXJlYS5vcmc=") for result in ha_results: if "3d" in result[0].lower(): unrated.append( [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (3D-HA)"]) rated = sorted(unrated, reverse=True) results = {} i = 0 for result in rated: res = {"link": result[1], "title": result[2]} results["result" + str(i)] = res i += 1 mb_final = results sj_search = post_url(decode_base64("aHR0cDovL3Nlcmllbmp1bmtpZXMub3JnL21lZGlhL2FqYXgvc2VhcmNoL3NlYXJjaC5waHA="), configfile, dbfile, data={'string': "'" + query + "'"}) try: sj_results = json.loads(sj_search) except: sj_results = [] if special: append = " (" + special + ")" else: append = "" i = 0 results = {} for result in sj_results: r_title = html_to_str(result[1]) r_rating = fuzz.ratio(title.lower(), r_title) if r_rating > 65: res = {"id": result[0], "title": r_title + append, "special": special} results["result" + str(i)] = res i += 1 sj_final = results return mb_final, sj_final
def periodical_task(self): if not self.config.get('youtube'): self.log_debug("Suche für YouTube deaktiviert!") return self.device added_items = [] channels = [] videos = [] self.liste = self.read_input(self.youtube) if not self.liste: self.log_debug("Liste ist leer. Stoppe Suche für YouTube!") for item in self.liste: if len(item) > 0: if self.config.get("youtube") is False: self.log_debug( "Liste ist leer. Stoppe Suche für YouTube!") return self.device channels.append(item) for channel in channels: if 'list=' in channel: id_cutter = channel.rfind('list=') + 5 channel = channel[id_cutter:] url = 'https://www.youtube.com/playlist?list=' + channel response = get_url(url, self.configfile, self.dbfile) else: url = 'https://www.youtube.com/user/' + channel + '/videos' urlc = 'https://www.youtube.com/channel/' + channel + '/videos' cnotfound = False try: response = get_url(url, self.configfile, self.dbfile) except HTTPError: try: response = get_url(urlc, self.configfile, self.dbfile) except HTTPError: cnotfound = True if cnotfound: self.log_debug("YouTube-Kanal: " + channel + " nicht gefunden!") break links = re.findall( r'VideoRenderer":{"videoId":"(.*?)",".*?[Tt]ext":"(.*?)"}', response) maxvideos = int(self.config.get("maxvideos")) if maxvideos < 1: self.log_debug("Anzahl zu suchender YouTube-Videos (" + str(maxvideos) + ") zu gering. Suche stattdessen 1 Video!") maxvideos = 1 elif maxvideos > 50: self.log_debug("Anzahl zu suchender YouTube-Videos (" + str(maxvideos) + ") zu hoch. Suche stattdessen maximal 50 Videos!") maxvideos = 50 for link in links[:maxvideos]: if len(link[0]) > 10: videos.append( [link[0], link[1], channel]) for video in videos: channel = video[2] title = video[1] if "[private" in title.lower() and "video]" in title.lower(): self.log_debug( "[%s] - YouTube-Video ignoriert (Privates Video)" % video) continue video_title = title.replace("&", "&").replace(">", ">").replace( "<", "<").replace('"', '"').replace("'", "'").replace("\u0026", "&") video = video[0] download_link = 'https://www.youtube.com/watch?v=' + video if download_link: if self.db.retrieve(video) == 'added': self.log_debug( "[%s] - YouTube-Video ignoriert (bereits gefunden)" % video) else: ignore = "|".join(["%s" % p for p in self.config.get("ignore").lower().split( ',')]) if self.config.get("ignore") else r"^unmatchable$" ignorevideo = re.search(ignore, video_title.lower()) if ignorevideo: self.log_debug(video_title + " (" + channel + ") " + "[" + video + "] - YouTube-Video ignoriert (basierend auf ignore-Einstellung)") continue self.device = myjd_download(self.configfile, self.device, "YouTube/" + channel, "RSScrawler", download_link, "") if self.device: self.db.store( video, 'added' ) log_entry = '[YouTube] - ' + video_title + ' (' + channel + ')' self.log_info(log_entry) notify([log_entry], self.configfile) added_items.append(log_entry) return self.device
def periodical_task(self): if self.filename == 'DJ_Dokus_Regex': if not self.config.get('regex'): self.log_debug("Suche für DJ-Regex deaktiviert!") return self.device if self.empty_list: self.log_debug( "Liste ist leer. Stoppe Suche für Dokus!" + self.listtype) return self.device try: reject = self.config.get("rejectlist").replace(",", "|").lower() if len( self.config.get("rejectlist")) > 0 else r"^unmatchable$" except TypeError: reject = r"^unmatchable$" set_dj = self.settings_hash(False) header = False if self.last_set_dj == set_dj: try: response = get_url_headers( decode_base64('aHR0cDovL2Rva3VqdW5raWVzLm9yZy8='), self.configfile, self.dbfile, self.headers) feed = dj_content_to_soup(response.content) except: response = False feed = False if response: if response.status_code == 304: self.log_debug( "DJ-Feed seit letztem Aufruf nicht aktualisiert - breche Suche ab!") return self.device header = True else: feed = dj_content_to_soup( get_url(decode_base64('aHR0cDovL2Rva3VqdW5raWVzLm9yZy8='), self.configfile, self.dbfile)) response = False if feed and feed.entries: first_post_dj = feed.entries[0] concat_dj = first_post_dj.title + first_post_dj.published + str(self.settings) + str(self.pattern) sha_dj = hashlib.sha256(concat_dj.encode( 'ascii', 'ignore')).hexdigest() else: self.log_debug( "Feed ist leer - breche Suche ab!") return False for post in feed.entries: if not post.link: continue concat = post.title + post.published + str(self.settings) + str(self.pattern) sha = hashlib.sha256(concat.encode( 'ascii', 'ignore')).hexdigest() if sha == self.last_sha_dj: self.log_debug( "Feed ab hier bereits gecrawlt (" + post.title + ") - breche Suche ab!") break link = post.link title = post.title genre = post.genre if self.filename == 'DJ_Dokus_Regex': if self.config.get("regex"): if '[DEUTSCH]' in title or '[TV-FILM]' in title: language_ok = 1 elif self.rsscrawler.get('english'): language_ok = 2 else: language_ok = 0 if language_ok: m = re.search(self.pattern, title.lower()) if not m and "720p" not in title and "1080p" not in title and "2160p" not in title: m = re.search(self.pattern.replace( "480p", "."), title.lower()) self.quality = "480p" if m: if not re.match(self.genres, genre.lower()): self.log_debug( title + " - Release aufgrund unerwünschten Genres ignoriert (" + genre + ")") continue if "720p" in title.lower(): self.quality = "720p" if "1080p" in title.lower(): self.quality = "1080p" if "2160p" in title.lower(): self.quality = "2160p" m = re.search(reject, title.lower()) if m: self.log_debug( title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)") title = re.sub(r'\[.*\] ', '', post.title) self.range_checkr(link, title, language_ok, genre) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: continue else: if self.config.get("quality") != '480p': m = re.search(self.pattern, title.lower()) if m: if not re.match(self.genres, genre.lower()): self.log_debug(title + " - Release aufgrund unerwünschten Genres ignoriert (" + genre + ")") continue if 'german' in title.lower(): language_ok = 1 elif self.rsscrawler.get('english'): language_ok = 2 else: language_ok = 0 if language_ok: mm = re.search(self.quality, title.lower()) if mm: mmm = re.search(reject, title.lower()) if mmm: self.log_debug( title + " - Release ignoriert (basierend auf rejectlist-Einstellung)") continue if self.rsscrawler.get("surround"): if not re.match(r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)") continue title = re.sub(r'\[.*\] ', '', post.title) try: storage = self.db.retrieve(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if storage == 'added': self.log_debug( title + " - Release ignoriert (bereits gefunden)") continue self.range_checkr(link, title, language_ok, genre) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: m = re.search(self.pattern, title.lower()) if m: if '[DEUTSCH]' in title: language_ok = 1 elif self.rsscrawler.get('english'): language_ok = 2 else: language_ok = 0 if language_ok: if "720p" in title.lower() or "1080p" in title.lower() or "2160p" in title.lower(): continue mm = re.search(reject, title.lower()) if mm: self.log_debug( title + " Release ignoriert (basierend auf rejectlist-Einstellung)") continue if self.rsscrawler.get("surround"): if not re.match(r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)") continue title = re.sub(r'\[.*\] ', '', post.title) try: storage = self.db.retrieve(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if storage == 'added': self.log_debug( title + " - Release ignoriert (bereits gefunden)") continue self.range_checkr(link, title, language_ok, genre) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) if set_dj: new_set_dj = self.settings_hash(True) if set_dj == new_set_dj: self.cdc.delete("DJSet-" + self.filename) self.cdc.store("DJSet-" + self.filename, set_dj) self.cdc.delete("DJ-" + self.filename) self.cdc.store("DJ-" + self.filename, sha_dj) if header and response: self.cdc.delete("DJHeaders-" + self.filename) self.cdc.store("DJHeaders-" + self.filename, response.headers['Last-Modified']) return self.device
def ha_url_to_soup(url, configfile, dbfile): content = BeautifulSoup(get_url(url, configfile, dbfile), 'lxml') return ha_to_feedparser_dict(content)
def get(title, configfile, dbfile, bl_only=False, sj_only=False): hostnames = RssConfig('Hostnames', configfile) mb = hostnames.get('mb') hw = hostnames.get('hw') hs = hostnames.get('hs') fx = hostnames.get('fx') nk = hostnames.get('nk') sj = hostnames.get('sj') specific_season = re.match(r'^(.*),(s\d{1,3})$', title.lower()) specific_episode = re.match(r'^(.*),(s\d{1,3}e\d{1,3})$', title.lower()) if specific_season: split = title.split(",") title = split[0] special = split[1].upper() elif specific_episode: split = title.split(",") title = split[0] special = split[1].upper() else: special = None bl_final = {} sj_final = {} scraper = cloudscraper.create_scraper() if not sj_only: mb_query = sanitize(title).replace(" ", "+") if special: bl_query = mb_query + "+" + special else: bl_query = mb_query unrated = [] config = RssConfig('MB', configfile) quality = config.get('quality') ignore = config.get('ignore') if "480p" not in quality: search_quality = "+" + quality else: search_quality = "" if mb: mb_search = 'https://' + mb + '/search/' + bl_query + search_quality + '/feed/rss2/' else: mb_search = None if hw: hw_search = 'https://' + hw + '/search/' + bl_query + search_quality + '/feed/rss2/' else: hw_search = None if hs: hs_search = 'https://' + hs + '/search/' + bl_query + search_quality + '/feed' else: hs_search = None if fx: fx_search = 'https://' + fx + '/?s=' + bl_query else: fx_search = None async_results = get_urls_async( [mb_search, hw_search, hs_search, fx_search], configfile, dbfile, scraper) scraper = async_results[1] async_results = async_results[0] mb_results = [] hw_results = [] hs_results = [] fx_results = [] for res in async_results: if check_is_site(res, configfile) == 'MB': mb_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) elif check_is_site(res, configfile) == 'HW': hw_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) elif check_is_site(res, configfile) == 'HS': hs_results = hs_search_results(res) elif check_is_site(res, configfile) == 'FX': fx_results = fx_search_results(fx_content_to_soup(res), configfile, dbfile, scraper) if nk: nk_search = post_url( 'https://' + nk + "/search", configfile, dbfile, data={'search': bl_query.replace("+", " ") + " " + quality}) nk_results = nk_search_results(nk_search, 'https://' + nk + '/') else: nk_results = [] password = mb for result in mb_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if not result[0].endswith("-MB") and not result[0].endswith(".MB"): unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (MB)" ]) password = hw for result in hw_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (HW)" ]) password = hs for result in hs_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (HS)" ]) password = fx.split('.')[0] for result in fx_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (FX)" ]) password = nk.split('.')[0].capitalize() for result in nk_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (NK)" ]) if config.get("crawl3d"): if mb: mb_search = 'https://' + mb + '/search/' + bl_query + search_quality + "+3D/feed/rss2/" else: mb_search = None if hw: hw_search = 'https://' + hw + '/search/' + bl_query + search_quality + "+3D/feed/rss2/" else: hw_search = None if hs: hs_search = 'https://' + hs + '/search/' + bl_query + search_quality + '+3D/feed' else: hs_search = None if fx: fx_search = 'https://' + fx + '/?s=' + bl_query + "+3D" else: fx_search = None async_results = get_urls_async( [mb_search, hw_search, hs_search, fx_search], configfile, dbfile, scraper) async_results = async_results[0] mb_results = [] hw_results = [] hs_results = [] fx_results = [] for res in async_results: if check_is_site(res, configfile) == 'MB': mb_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) elif check_is_site(res, configfile) == 'HW': hw_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) elif check_is_site(res, configfile) == 'HS': hs_results = hs_search_results(res) elif check_is_site(res, configfile) == 'FX': fx_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) if nk: nk_search = post_url('https://' + nk + "/search", configfile, dbfile, data={ 'search': bl_query.replace("+", " ") + " " + quality + "3D" }) nk_results = nk_search_results(nk_search, 'https://' + nk + '/') else: nk_results = [] password = mb for result in mb_results: if not result[1].endswith("-MB") and not result[1].endswith( ".MB"): unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-MB)" ]) password = hw for result in hw_results: unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-HW)" ]) password = hs for result in hs_results: unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-HS)" ]) password = fx.split('.')[0] for result in fx_results: unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-FX)" ]) password = nk.split('.')[0].capitalize() for result in nk_results: unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-NK)" ]) rated = sorted(unrated, reverse=True) results = {} i = 0 for result in rated: res = {"payload": result[1], "title": result[2]} results["result" + str(i + 1000)] = res i += 1 bl_final = results if not bl_only: if sj: sj_query = sanitize(title).replace(" ", "+") sj_search = get_url( 'https://' + sj + '/serie/search?q=' + sj_query, configfile, dbfile, scraper) try: sj_results = BeautifulSoup(sj_search, 'lxml').findAll( "a", href=re.compile("/serie")) except: sj_results = [] else: sj_results = [] if special: append = " (" + special + ")" else: append = "" i = 0 results = {} for result in sj_results: r_title = result.text r_rating = fuzz.ratio(title.lower(), r_title) if r_rating > 40: res = { "payload": encode_base64(result['href'] + "|" + r_title + "|" + str(special)), "title": r_title + append } results["result" + str(i + 1000)] = res i += 1 sj_final = results return bl_final, sj_final
def download_bl(payload, device, configfile, dbfile): hostnames = RssConfig('Hostnames', configfile) mb = hostnames.get('mb') nk = hostnames.get('nk') fc = hostnames.get('fc').replace('www.', '').split('.')[0] payload = decode_base64(payload).split("|") link = payload[0] password = payload[1] url = get_url(link, configfile, dbfile) if not url or "NinjaFirewall 429" in url: return False config = RssConfig('MB', configfile) db = RssDb(dbfile, 'rsscrawler') soup = BeautifulSoup(url, 'lxml') site = check_is_site(link, configfile) if not site: return False else: if "MB" in site: if not fc: print( u"FC Hostname nicht gesetzt. MB kann keine Links finden!") return False key = soup.find("span", {"class": "fn"}).text hosters = soup.find_all("a", href=re.compile(fc)) url_hosters = [] for hoster in hosters: dl = hoster["href"] hoster = hoster.text url_hosters.append([dl, hoster]) elif "HW" in site: if not fc: print( u"FC Hostname nicht gesetzt. MB kann keine Links finden!") return False key = re.findall(r'Permanent Link: (.*?)"', str(soup)).pop() hosters = soup.find_all("a", href=re.compile(fc)) url_hosters = [] for hoster in hosters: dl = hoster["href"] hoster = hoster.text url_hosters.append([dl, hoster]) elif "HS" in site: download = soup.find("div", {"class": "entry-content"}) key = soup.find("h2", {"class": "entry-title"}).text url_hosters = re.findall(r'href="([^"\'>]*)".+?(.+?)<', str(download)) elif "NK" in site: key = soup.find("span", {"class": "subtitle"}).text url_hosters = [] hosters = soup.find_all("a", href=re.compile("/go/")) for hoster in hosters: url_hosters.append( ['https://' + nk + hoster["href"], hoster.text]) elif "FX" in site: key = payload[1] password = payload[2] else: return False links = {} if "MB" in site or "HW" in site or "HS" in site or "NK" in site: for url_hoster in reversed(url_hosters): try: if mb.split('.')[0] not in url_hoster[ 0] and "https://goo.gl/" not in url_hoster[0]: link_hoster = url_hoster[1].lower().replace( 'target="_blank">', '').replace(" ", "-") if check_hoster(link_hoster, configfile): links[link_hoster] = url_hoster[0] except: pass if config.get("hoster_fallback") and not links: for url_hoster in reversed(url_hosters): if mb.split('.')[0] not in url_hoster[ 0] and "https://goo.gl/" not in url_hoster[0]: link_hoster = url_hoster[1].lower().replace( 'target="_blank">', '').replace(" ", "-") links[link_hoster] = url_hoster[0] download_links = list(links.values()) elif "FX" in site: download_links = fx_download_links(url, key, configfile) englisch = False if "*englisch" in key.lower() or "*english" in key.lower(): key = key.replace('*ENGLISCH', '').replace("*Englisch", "").replace( "*ENGLISH", "").replace("*English", "").replace("*", "") englisch = True staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower()) if config.get('enforcedl') and '.dl.' not in key.lower(): fail = False get_imdb_url = url key_regex = r'<title>' + \ re.escape( key) + r'.*?<\/title>\n.*?<link>(?:(?:.*?\n){1,25}).*?[mM][kK][vV].*?(?:|href=.?http(?:|s):\/\/(?:|www\.)imdb\.com\/title\/(tt[0-9]{7,9}).*?)[iI][mM][dD][bB].*?(?!\d(?:\.|\,)\d)(?:.|.*?)<\/a>' imdb_id = re.findall(key_regex, get_imdb_url) if len(imdb_id) > 0: if not imdb_id[0]: fail = True else: imdb_id = imdb_id[0] else: fail = True if fail: try: search_title = re.findall( r"(.*?)(?:\.(?:(?:19|20)\d{2})|\.German|\.\d{3,4}p|\.S(?:\d{1,3})\.)", key)[0].replace(".", "+") search_url = "http://www.imdb.com/find?q=" + search_title search_page = get_url(search_url, configfile, dbfile) search_results = re.findall( r'<td class="result_text"> <a href="\/title\/(tt[0-9]{7,9})\/\?ref_=fn_al_tt_\d" >(.*?)<\/a>.*? \((\d{4})\)..(.{9})', search_page) total_results = len(search_results) except: return False if staffel: try: imdb_id = search_results[0][0] except: imdb_id = False else: no_series = False while total_results > 0: attempt = 0 for result in search_results: if result[3] == "TV Series": no_series = False total_results -= 1 attempt += 1 else: no_series = True imdb_id = search_results[attempt][0] total_results = 0 break if no_series is False: logger.debug( "%s - Keine passende Film-IMDB-Seite gefunden" % key) if staffel: filename = 'MB_Staffeln' else: filename = 'MB_Filme' scraper = cloudscraper.create_scraper() blog = BL(configfile, dbfile, device, logging, scraper, filename=filename) if not imdb_id: if not blog.dual_download(key, password): logger.debug("%s - Kein zweisprachiges Release gefunden." % key) else: if isinstance(imdb_id, list): imdb_id = imdb_id.pop() imdb_url = "http://www.imdb.com/title/" + imdb_id details = get_url(imdb_url, configfile, dbfile) if not details: logger.debug("%s - Originalsprache nicht ermittelbar" % key) original_language = re.findall( r"Language:<\/h4>\n.*?\n.*?url'>(.*?)<\/a>", details) if original_language: original_language = original_language[0] if original_language == "German": logger.debug( "%s - Originalsprache ist Deutsch. Breche Suche nach zweisprachigem Release ab!" % key) else: if not blog.dual_download(key, password) and not englisch: logger.debug( "%s - Kein zweisprachiges Release gefunden!" % key) if download_links: if staffel: if myjd_download(configfile, dbfile, device, key, "RSScrawler", download_links, password): db.store( key.replace(".COMPLETE", "").replace(".Complete", ""), 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Staffel] - ' + key.replace( ".COMPLETE", "").replace(".Complete", "") + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return True elif '.3d.' in key.lower(): retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if config.get('enforcedl'): if is_retail(key, '2', dbfile): retail = True if myjd_download(configfile, dbfile, device, key, "RSScrawler/3Dcrawler", download_links, password): db.store( key, 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Film' + ( '/Retail' if retail else "") + '/3D] - ' + key + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return True else: retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if config.get('enforcedl'): if is_retail(key, '1', dbfile): retail = True else: if is_retail(key, '0', dbfile): retail = True if myjd_download(configfile, dbfile, device, key, "RSScrawler", download_links, password): db.store( key, 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Film' + ( '/Englisch' if englisch and not retail else '') + ('/Englisch/Retail' if englisch and retail else '') + ('/Retail' if not englisch and retail else '') + '] - ' + key + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return [key] else: return False
def download_sj(payload, configfile, dbfile): hostnames = RssConfig('Hostnames', configfile) sj = hostnames.get('sj') payload = decode_base64(payload).split("|") href = payload[0] title = payload[1] special = payload[2].strip().replace("None", "") series_url = 'https://' + sj + href series_info = get_url(series_url, configfile, dbfile) series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0] api_url = 'https://' + sj + '/api/media/' + series_id + '/releases' releases = get_url(api_url, configfile, dbfile) seasons = json.loads(releases) listen = ["SJ_Serien", "MB_Staffeln"] for liste in listen: cont = ListDb(dbfile, liste).retrieve() list_title = sanitize(title) if not cont: cont = "" if list_title not in cont: ListDb(dbfile, liste).store(list_title) config = RssConfig('SJ', configfile) english_ok = RssConfig('RSScrawler', configfile).get("english") quality = config.get('quality') ignore = config.get('rejectlist') result_seasons = {} result_episodes = {} for season in seasons: releases = seasons[season] for release in releases['items']: name = release['name'].encode('ascii', errors='ignore').decode('utf-8') hosters = release['hoster'] try: valid = bool(release['resolution'] == quality) except: valid = re.match(re.compile(r'.*' + quality + r'.*'), name) if valid and special: valid = bool("." + special.lower() + "." in name.lower()) if valid and not english_ok: valid = bool(".german." in name.lower()) if valid: valid = False for hoster in hosters: if hoster and check_hoster( hoster, configfile) or config.get("hoster_fallback"): valid = True if valid: try: ep = release['episode'] if ep: existing = result_episodes.get(season) if existing: for e in existing: if e == ep: if rate(name, ignore) > rate( existing[e], ignore): existing.update({ep: name}) else: existing = {ep: name} result_episodes.update({season: existing}) continue except: pass existing = result_seasons.get(season) dont = False if existing: if rate(name, ignore) < rate(existing, ignore): dont = True if not dont: result_seasons.update({season: name}) try: if result_seasons[season] and result_episodes[season]: del result_episodes[season] except: pass success = False try: if result_seasons[season]: success = True except: try: if result_episodes[season]: success = True except: pass if success: logger.debug(u"Websuche erfolgreich für " + title + " - " + season) else: for release in releases['items']: name = release['name'].encode('ascii', errors='ignore').decode('utf-8') hosters = release['hoster'] valid = True if valid and special: valid = bool("." + special.lower() + "." in name.lower()) if valid and not english_ok: valid = bool(".german." in name.lower()) if valid: valid = False for hoster in hosters: if hoster and check_hoster( hoster, configfile) or config.get("hoster_fallback"): valid = True if valid: try: ep = release['episode'] if ep: existing = result_episodes.get(season) if existing: for e in existing: if e == ep: if rate(name, ignore) > rate( existing[e], ignore): existing.update({ep: name}) else: existing = {ep: name} result_episodes.update({season: existing}) continue except: pass existing = result_seasons.get(season) dont = False if existing: if rate(name, ignore) < rate(existing, ignore): dont = True if not dont: result_seasons.update({season: name}) try: if result_seasons[season] and result_episodes[season]: del result_episodes[season] except: pass logger.debug(u"Websuche erfolgreich für " + title + " - " + season) matches = [] for season in result_seasons: matches.append(result_seasons[season]) for season in result_episodes: for episode in result_episodes[season]: matches.append(result_episodes[season][episode]) notify_array = [] for title in matches: db = RssDb(dbfile, 'rsscrawler') if add_decrypt(title, series_url, sj, dbfile): db.store(title, 'added') log_entry = u'[Suche/Serie] - ' + title + ' - [SJ]' logger.info(log_entry) notify_array.append(log_entry) notify(notify_array, configfile) if not matches: return False return matches