def get_url_headers(url, configfile, dbfile, headers): config = RssConfig('RSScrawler', configfile) proxy = config.get('proxy') scraper = cfscrape.create_scraper(delay=10) agent = fake_user_agent() headers.update({'User-Agent': agent}) if proxy: sj = decode_base64("c2VyaWVuanVua2llcy5vcmc=") mb = decode_base64("bW92aWUtYmxvZy50bw==") db = RssDb(dbfile, 'proxystatus') if sj in url: if db.retrieve("SJ") and config.get("fallback"): return scraper.get(url, headers=headers, timeout=30) elif mb in url: if db.retrieve("MB") and config.get("fallback"): return scraper.get(url, headers=headers, timeout=30) proxies = {'http': proxy, 'https': proxy} try: response = scraper.get(url, headers=headers, proxies=proxies, timeout=30) return response except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return "" else: try: response = scraper.get(url, headers=headers, timeout=30) return response except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ""
def send_package(self, title, links, englisch_info, genre): if genre == "Doku": genre = "" else: genre = "/" + genre englisch = "" if englisch_info: englisch = "Englisch - " if self.filename == 'DJ_Dokus_Regex': link_placeholder = '[Doku' + genre + '/RegEx] - ' + englisch else: link_placeholder = '[Doku' + genre + '] - ' + englisch try: storage = self.db.retrieve(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return if storage == 'added': self.log_debug(title + " - Release ignoriert (bereits gefunden)") else: self.device = myjd_download(self.configfile, self.device, title, "RSScrawler", links, decode_base64("ZG9rdWp1bmtpZXMub3Jn")) if self.device: self.db.store(title, 'added') log_entry = link_placeholder + title self.log_info(log_entry) notify([log_entry], self.configfile) return log_entry
def check_url(configfile, dbfile): sj_url = decode_base64("aHR0cDovL3Nlcmllbmp1bmtpZXMub3Jn") mb_url = decode_base64("aHR0cDovL21vdmllLWJsb2cudG8v") proxy = RssConfig('RSScrawler', configfile).get('proxy') scraper = cfscrape.create_scraper(delay=10) agent = fake_user_agent() sj_blocked_proxy = False mb_blocked_proxy = False if proxy: db = RssDb(dbfile, 'proxystatus') proxies = {'http': proxy, 'https': proxy} if "block." in str( scraper.get(sj_url, headers={'User-Agent': agent}, proxies=proxies, timeout=30, allow_redirects=False).headers.get("location")): print(u"Der Zugriff auf SJ ist mit der aktuellen Proxy-IP nicht möglich!") if RssConfig('RSScrawler', configfile).get("fallback"): db.store("SJ", "Blocked") sj_blocked_proxy = True else: db.delete("SJ") if "<Response [403]>" in str( scraper.get(mb_url, headers={'User-Agent': agent}, proxies=proxies, timeout=30, allow_redirects=False)): print(u"Der Zugriff auf MB ist mit der aktuellen Proxy-IP nicht möglich!") if RssConfig('RSScrawler', configfile).get("fallback"): db.store("MB", "Blocked") mb_blocked_proxy = True else: db.delete("MB") # TODO check if HA is working! if not proxy or sj_blocked_proxy == True or mb_blocked_proxy == True: if "block." in str( scraper.get(sj_url, headers={'User-Agent': agent}, timeout=30, allow_redirects=False).headers.get( "location")): print(u"Der Zugriff auf SJ ist mit der aktuellen IP nicht möglich!") if "<Response [403]>" in str( scraper.get(mb_url, headers={'User-Agent': agent}, timeout=30, allow_redirects=False)): print(u"Der Zugriff auf MB ist mit der aktuellen IP nicht möglich!") return
def download_sj(sj_id, special, device, configfile, dbfile): url = get_url(decode_base64("aHR0cDovL3Nlcmllbmp1bmtpZXMub3JnLz9jYXQ9") + str(sj_id), configfile, dbfile) season_pool = re.findall(r'<h2>Staffeln:(.*?)<h2>Feeds', url).pop() season_links = re.findall( r'href="(.{1,125})">.{1,90}(Staffel|Season).*?(\d{1,2}-?\d{1,2}|\d{1,2})', season_pool) title = html_to_str(re.findall(r'>(.{1,85}?) &#', season_pool).pop()) rsscrawler = RssConfig('RSScrawler', configfile) listen = ["SJ_Serien", "MB_Staffeln"] for liste in listen: cont = ListDb(dbfile, liste).retrieve() list_title = sanitize(title) if not cont: cont = "" if not list_title in cont: ListDb(dbfile, liste).store(list_title) staffeln = [] staffel_nr = [] seasons = [] for s in season_links: if "staffel" in s[1].lower(): staffeln.append([s[2], s[0]]) if "-" in s[2]: split = s[2].split("-") split = range(int(split[0]), int(split[1]) + 1) for nr in split: staffel_nr.append(str(nr)) else: staffel_nr.append(s[2]) else: seasons.append([s[2], s[0]]) if rsscrawler.get("english"): for se in seasons: if not se[0] in staffel_nr: staffeln.append(se) to_dl = [] for s in staffeln: if "-" in s[0]: split = s[0].split("-") split = range(int(split[0]), int(split[1]) + 1) for i in split: to_dl.append([str(i), s[1]]) else: to_dl.append([s[0], s[1]]) found_seasons = {} for dl in to_dl: if len(dl[0]) is 1: sxx = "S0" + str(dl[0]) else: sxx = "S" + str(dl[0]) link = dl[1] if sxx not in found_seasons: found_seasons[sxx] = link something_found = False for sxx, link in found_seasons.items(): config = RssConfig('SJ', configfile) quality = config.get('quality') url = get_url(link, configfile, dbfile) pakete = re.findall(re.compile(r'<p><strong>(.*?\.' + sxx + r'\..*?' + quality + r'.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) folgen = re.findall(re.compile(r'<p><strong>(.*?\.' + sxx + r'E\d{1,3}.*?' + quality + r'.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) lq_pakete = re.findall(re.compile( r'<p><strong>(.*?\.' + sxx + r'\..*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) lq_folgen = re.findall(re.compile( r'<p><strong>(.*?\.' + sxx + r'E\d{1,3}.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) if not pakete and not folgen and not lq_pakete and not lq_folgen: sxx = sxx.replace("S0", "S") pakete = re.findall(re.compile(r'<p><strong>(.*?\.' + sxx + r'\..*?' + quality + r'.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) folgen = re.findall(re.compile( r'<p><strong>(.*?\.' + sxx + r'E\d{1,3}.*?' + quality + r'.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) lq_pakete = re.findall(re.compile( r'<p><strong>(.*?\.' + sxx + r'\..*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) lq_folgen = re.findall(re.compile( r'<p><strong>(.*?\.' + sxx + r'E\d{1,3}.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'), url) if special and "e" in special.lower(): pakete = [] lq_pakete = [] best_matching_links = [] if pakete: links = [] for x in pakete: title = x[0] score = rate(title, configfile) hoster = [[x[2], x[1]], [x[4], x[3]]] if special: if special.lower() in title.lower(): links.append([score, title, hoster]) else: links.append([score, title, hoster]) if links: highest_score = sorted(links, reverse=True)[0][0] for l in links: if l[0] == highest_score: for hoster in l[2]: best_matching_links.append( [l[1], hoster[0], hoster[1]]) elif folgen: links = [] for x in folgen: title = x[0] score = rate(title, configfile) hoster = [[x[2], x[1]], [x[4], x[3]]] if special: if special.lower() in title.lower(): links.append([score, title, hoster]) else: links.append([score, title, hoster]) if links: highest_score = sorted(links, reverse=True)[0][0] for l in links: if l[0] == highest_score: for hoster in l[2]: best_matching_links.append( [l[1], hoster[0], hoster[1]]) elif lq_pakete: links = [] for x in lq_pakete: title = x[0] score = rate(title, configfile) hoster = [[x[2], x[1]], [x[4], x[3]]] if special: if special.lower() in title.lower(): links.append([score, title, hoster]) else: links.append([score, title, hoster]) if links: highest_score = sorted(links, reverse=True)[0][0] for l in links: if l[0] == highest_score: for hoster in l[2]: best_matching_links.append( [l[1], hoster[0], hoster[1]]) elif lq_folgen: links = [] for x in lq_folgen: title = x[0] score = rate(title, configfile) hoster = [[x[2], x[1]], [x[4], x[3]]] if special: if special.lower() in title.lower(): links.append([score, title, hoster]) else: links.append([score, title, hoster]) if links: highest_score = sorted(links, reverse=True)[0][0] for l in links: if l[0] == highest_score: for hoster in l[2]: best_matching_links.append( [l[1], hoster[0], hoster[1]]) notify_array = [] for best_link in best_matching_links: dl_title = best_link[0].replace( "Staffelpack ", "").replace("Staffelpack.", "") dl_hoster = best_link[1] dl_link = best_link[2] config = RssConfig('SJ', configfile) hoster = re.compile(config.get('hoster')) db = RssDb(dbfile, 'rsscrawler') if re.match(hoster, dl_hoster.lower()): if myjd_download(configfile, device, dl_title, "RSScrawler", dl_link, decode_base64("c2VyaWVuanVua2llcy5vcmc=")): db.store(dl_title, 'added') log_entry = '[Suche/Serie] - ' + dl_title logging.info(log_entry) notify_array.append(log_entry) else: return False if len(best_matching_links) > 0: something_found = True notify(notify_array, configfile) if not something_found: return False return True
def download_bl(payload, device, configfile, dbfile): payload = decode_base64(payload).split(";") link = payload[0] password = payload[1] url = get_url(link, configfile, dbfile) config = RssConfig('MB', configfile) hoster = re.compile(config.get('hoster')) db = RssDb(dbfile, 'rsscrawler') soup = BeautifulSoup(url, 'lxml') download = soup.find("div", {"id": "content"}) try: key = re.findall(r'Permanent Link: (.*?)"', str(download)).pop() url_hosters = re.findall(r'href="([^"\'>]*)".+?(.+?)<', str(download)) except: items_head = soup.find("div", {"class": "topbox"}) key = items_head.contents[1].a["title"] items_download = soup.find("div", {"class": "download"}) url_hosters = [] download = items_download.find_all("span", {"style": "display:inline;"}, text=True) for link in download: link = link.a text = link.text.strip() if text: url_hosters.append([str(link["href"]), str(text)]) links = {} for url_hoster in reversed(url_hosters): if not decode_base64("bW92aWUtYmxvZy50by8=") in url_hoster[0] and "https://goo.gl/" not in url_hoster[0]: link_hoster = url_hoster[1].lower().replace('target="_blank">', '').replace(" ", "-") if re.match(hoster, link_hoster): links[link_hoster] = url_hoster[0] download_links = links.values() if six.PY2 else list(links.values()) englisch = False if "*englisch*" in key.lower(): key = key.replace('*ENGLISCH*', '').replace("*Englisch*", "") englisch = True staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower()) if config.get('enforcedl') and '.dl.' not in key.lower(): fail = False get_imdb_url = url key_regex = r'<title>' + \ re.escape( key) + r'.*?<\/title>\n.*?<link>(?:(?:.*?\n){1,25}).*?[mM][kK][vV].*?(?:|href=.?http(?:|s):\/\/(?:|www\.)imdb\.com\/title\/(tt[0-9]{7,9}).*?)[iI][mM][dD][bB].*?(?!\d(?:\.|\,)\d)(?:.|.*?)<\/a>' imdb_id = re.findall(key_regex, get_imdb_url) if len(imdb_id) > 0: if not imdb_id[0]: fail = True else: imdb_id = imdb_id[0] else: fail = True if fail: search_title = re.findall( r"(.*?)(?:\.(?:(?:19|20)\d{2})|\.German|\.\d{3,4}p|\.S(?:\d{1,3})\.)", key)[0].replace(".", "+") search_url = "http://www.imdb.com/find?q=" + search_title search_page = get_url(search_url, configfile, dbfile) search_results = re.findall( r'<td class="result_text"> <a href="\/title\/(tt[0-9]{7,9})\/\?ref_=fn_al_tt_\d" >(.*?)<\/a>.*? \((\d{4})\)..(.{9})', search_page) total_results = len(search_results) if staffel: imdb_id = search_results[0][0] else: no_series = False while total_results > 0: attempt = 0 for result in search_results: if result[3] == "TV Series": no_series = False total_results -= 1 attempt += 1 else: no_series = True imdb_id = search_results[attempt][0] total_results = 0 break if no_series is False: logging.debug( "%s - Keine passende Film-IMDB-Seite gefunden" % key) if staffel: filename = 'MB_Staffeln' else: filename = 'MB_Filme' bl = BL(configfile, dbfile, device, logging, filename=filename) if not imdb_id: if not bl.dual_download(key, password): logging.debug( "%s - Kein zweisprachiges Release gefunden." % key) else: if isinstance(imdb_id, list): imdb_id = imdb_id.pop() imdb_url = "http://www.imdb.com/title/" + imdb_id details = get_url(imdb_url, configfile, dbfile) if not details: logging.debug("%s - Originalsprache nicht ermittelbar" % key) original_language = re.findall( r"Language:<\/h4>\n.*?\n.*?url'>(.*?)<\/a>", details) if original_language: original_language = original_language[0] if original_language == "German": logging.debug( "%s - Originalsprache ist Deutsch. Breche Suche nach zweisprachigem Release ab!" % key) else: if not bl.dual_download(key, password) and not englisch: logging.debug( "%s - Kein zweisprachiges Release gefunden! Breche ab." % key) if download_links: if staffel: if myjd_download(configfile, device, key, "RSScrawler", download_links, password): db.store( key.replace(".COMPLETE", "").replace(".Complete", ""), 'notdl' if config.get( 'enforcedl') and '.dl.' not in key.lower() else 'added' ) log_entry = '[Staffel] - ' + key.replace(".COMPLETE", "").replace(".Complete", "") logging.info(log_entry) notify([log_entry], configfile) return True elif '.3d.' in key.lower(): retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if config.get('enforcedl'): if cutoff(key, '2', dbfile): retail = True if myjd_download(configfile, device, key, "RSScrawler/3Dcrawler", download_links, password): db.store( key, 'notdl' if config.get( 'enforcedl') and '.dl.' not in key.lower() else 'added' ) log_entry = '[Suche/Film] - ' + ( 'Retail/' if retail else "") + '3D - ' + key logging.info(log_entry) notify([log_entry], configfile) return True else: retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if config.get('enforcedl'): if cutoff(key, '1', dbfile): retail = True else: if cutoff(key, '0', dbfile): retail = True if myjd_download(configfile, device, key, "RSScrawler", download_links, password): db.store( key, 'notdl' if config.get( 'enforcedl') and '.dl.' not in key.lower() else 'added' ) log_entry = '[Suche/Film] - ' + ('Englisch - ' if englisch and not retail else "") + ( 'Englisch/Retail - ' if englisch and retail else "") + ( 'Retail - ' if not englisch and retail else "") + key logging.info(log_entry) notify([log_entry], configfile) return True else: return False
def get(title, configfile, dbfile): specific_season = re.match(r'^(.*);(s\d{1,3})$', title.lower()) specific_episode = re.match(r'^(.*);(s\d{1,3}e\d{1,3})$', title.lower()) if specific_season: split = title.split(";") title = split[0] special = split[1].upper() elif specific_episode: split = title.split(";") title = split[0] special = split[1].upper() else: special = None query = title.replace(".", " ").replace(" ", "+") if special: bl_query = query + "+" + special else: bl_query = query unrated = [] config = RssConfig('MB', configfile) quality = config.get('quality') if "480p" not in quality: search_quality = "+" + quality else: search_quality = "" mb_search = get_url( decode_base64('aHR0cDovL21vdmllLWJsb2cudG8=') + '/search/' + bl_query + "+" + search_quality + '/feed/rss2/', configfile, dbfile) mb_results = re.findall(r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', mb_search) password = decode_base64("bW92aWUtYmxvZy5vcmc=") for result in mb_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if not result[0].endswith("-MB") and not result[0].endswith(".MB"): unrated.append( [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (MB)"]) hw_search = get_url( decode_base64('aHR0cDovL2hkLXdvcmxkLm9yZw==') + '/search/' + bl_query + "+" + search_quality + '/feed/rss2/', configfile, dbfile) hw_results = re.findall(r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', hw_search) password = decode_base64("aGQtd29ybGQub3Jn") for result in hw_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append( [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (HW)"]) ha_search = decode_base64('aHR0cDovL3d3dy5oZC1hcmVhLm9yZy8/cz1zZWFyY2gmcT0=') + bl_query + "&c=" + search_quality ha_results = ha_search_results(ha_search, configfile, dbfile) password = decode_base64("aGQtYXJlYS5vcmc=") for result in ha_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append( [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (HA)"]) if config.get("crawl3d"): mb_search = get_url( decode_base64('aHR0cDovL21vdmllLWJsb2cudG8=') + '/search/' + bl_query + "+3D+1080p" + '/feed/rss2/', configfile, dbfile) mb_results = re.findall(r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', mb_search) for result in mb_results: if not result[1].endswith("-MB") and not result[1].endswith(".MB"): unrated.append( [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (3D-MB)"]) hw_search = get_url( decode_base64('aHR0cDovL2hkLXdvcmxkLm9yZw==') + '/search/' + bl_query + "+3D+1080p" + '/feed/rss2/', configfile, dbfile) hw_results = re.findall(r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', hw_search) password = decode_base64("aGQtd29ybGQub3Jn") for result in hw_results: unrated.append( [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (3D-HW)"]) ha_search = decode_base64('aHR0cDovL3d3dy5oZC1hcmVhLm9yZy8/cz1zZWFyY2gmcT0=') + bl_query + "&c=1080p" ha_results = ha_search_results(ha_search, configfile, dbfile) password = decode_base64("aGQtYXJlYS5vcmc=") for result in ha_results: if "3d" in result[0].lower(): unrated.append( [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (3D-HA)"]) rated = sorted(unrated, reverse=True) results = {} i = 0 for result in rated: res = {"link": result[1], "title": result[2]} results["result" + str(i)] = res i += 1 mb_final = results sj_search = post_url(decode_base64("aHR0cDovL3Nlcmllbmp1bmtpZXMub3JnL21lZGlhL2FqYXgvc2VhcmNoL3NlYXJjaC5waHA="), configfile, dbfile, data={'string': "'" + query + "'"}) try: sj_results = json.loads(sj_search) except: sj_results = [] if special: append = " (" + special + ")" else: append = "" i = 0 results = {} for result in sj_results: r_title = html_to_str(result[1]) r_rating = fuzz.ratio(title.lower(), r_title) if r_rating > 65: res = {"id": result[0], "title": r_title + append, "special": special} results["result" + str(i)] = res i += 1 sj_final = results return mb_final, sj_final
def periodical_task(self): if self.filename == 'DJ_Dokus_Regex': if not self.config.get('regex'): self.log_debug("Suche für DJ-Regex deaktiviert!") return self.device if self.empty_list: self.log_debug( "Liste ist leer. Stoppe Suche für Dokus!" + self.listtype) return self.device try: reject = self.config.get("rejectlist").replace(",", "|").lower() if len( self.config.get("rejectlist")) > 0 else r"^unmatchable$" except TypeError: reject = r"^unmatchable$" set_dj = self.settings_hash(False) header = False if self.last_set_dj == set_dj: try: response = get_url_headers( decode_base64('aHR0cDovL2Rva3VqdW5raWVzLm9yZy8='), self.configfile, self.dbfile, self.headers) feed = dj_content_to_soup(response.content) except: response = False feed = False if response: if response.status_code == 304: self.log_debug( "DJ-Feed seit letztem Aufruf nicht aktualisiert - breche Suche ab!") return self.device header = True else: feed = dj_content_to_soup( get_url(decode_base64('aHR0cDovL2Rva3VqdW5raWVzLm9yZy8='), self.configfile, self.dbfile)) response = False if feed and feed.entries: first_post_dj = feed.entries[0] concat_dj = first_post_dj.title + first_post_dj.published + str(self.settings) + str(self.pattern) sha_dj = hashlib.sha256(concat_dj.encode( 'ascii', 'ignore')).hexdigest() else: self.log_debug( "Feed ist leer - breche Suche ab!") return False for post in feed.entries: if not post.link: continue concat = post.title + post.published + str(self.settings) + str(self.pattern) sha = hashlib.sha256(concat.encode( 'ascii', 'ignore')).hexdigest() if sha == self.last_sha_dj: self.log_debug( "Feed ab hier bereits gecrawlt (" + post.title + ") - breche Suche ab!") break link = post.link title = post.title genre = post.genre if self.filename == 'DJ_Dokus_Regex': if self.config.get("regex"): if '[DEUTSCH]' in title or '[TV-FILM]' in title: language_ok = 1 elif self.rsscrawler.get('english'): language_ok = 2 else: language_ok = 0 if language_ok: m = re.search(self.pattern, title.lower()) if not m and "720p" not in title and "1080p" not in title and "2160p" not in title: m = re.search(self.pattern.replace( "480p", "."), title.lower()) self.quality = "480p" if m: if not re.match(self.genres, genre.lower()): self.log_debug( title + " - Release aufgrund unerwünschten Genres ignoriert (" + genre + ")") continue if "720p" in title.lower(): self.quality = "720p" if "1080p" in title.lower(): self.quality = "1080p" if "2160p" in title.lower(): self.quality = "2160p" m = re.search(reject, title.lower()) if m: self.log_debug( title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)") title = re.sub(r'\[.*\] ', '', post.title) self.range_checkr(link, title, language_ok, genre) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: continue else: if self.config.get("quality") != '480p': m = re.search(self.pattern, title.lower()) if m: if not re.match(self.genres, genre.lower()): self.log_debug(title + " - Release aufgrund unerwünschten Genres ignoriert (" + genre + ")") continue if 'german' in title.lower(): language_ok = 1 elif self.rsscrawler.get('english'): language_ok = 2 else: language_ok = 0 if language_ok: mm = re.search(self.quality, title.lower()) if mm: mmm = re.search(reject, title.lower()) if mmm: self.log_debug( title + " - Release ignoriert (basierend auf rejectlist-Einstellung)") continue if self.rsscrawler.get("surround"): if not re.match(r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)") continue title = re.sub(r'\[.*\] ', '', post.title) try: storage = self.db.retrieve(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if storage == 'added': self.log_debug( title + " - Release ignoriert (bereits gefunden)") continue self.range_checkr(link, title, language_ok, genre) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: m = re.search(self.pattern, title.lower()) if m: if '[DEUTSCH]' in title: language_ok = 1 elif self.rsscrawler.get('english'): language_ok = 2 else: language_ok = 0 if language_ok: if "720p" in title.lower() or "1080p" in title.lower() or "2160p" in title.lower(): continue mm = re.search(reject, title.lower()) if mm: self.log_debug( title + " Release ignoriert (basierend auf rejectlist-Einstellung)") continue if self.rsscrawler.get("surround"): if not re.match(r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)") continue title = re.sub(r'\[.*\] ', '', post.title) try: storage = self.db.retrieve(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if storage == 'added': self.log_debug( title + " - Release ignoriert (bereits gefunden)") continue self.range_checkr(link, title, language_ok, genre) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) if set_dj: new_set_dj = self.settings_hash(True) if set_dj == new_set_dj: self.cdc.delete("DJSet-" + self.filename) self.cdc.store("DJSet-" + self.filename, set_dj) self.cdc.delete("DJ-" + self.filename) self.cdc.store("DJ-" + self.filename, sha_dj) if header and response: self.cdc.delete("DJHeaders-" + self.filename) self.cdc.store("DJHeaders-" + self.filename, response.headers['Last-Modified']) return self.device
def ombi(configfile, dbfile, device, log_debug): db = RssDb(dbfile, 'Ombi') config = RssConfig('Ombi', configfile) url = config.get('url') api = config.get('api') if not url or not api: return device english = RssConfig('RSScrawler', configfile).get('english') try: requested_movies = requests.get(url + '/api/v1/Request/movie', headers={'ApiKey': api}) requested_movies = json.loads(requested_movies.text) requested_shows = requests.get(url + '/api/v1/Request/tv', headers={'ApiKey': api}) requested_shows = json.loads(requested_shows.text) except: log_debug("Ombi ist nicht erreichbar!") return False scraper = False for r in requested_movies: if bool(r.get("approved")): if not bool(r.get("available")): imdb_id = r.get("imdbId") if not db.retrieve('movie_' + str(imdb_id)) == 'added': response = imdb_movie(imdb_id, configfile, dbfile, scraper) title = response[0] if title: scraper = response[1] best_result = search.best_result_bl( title, configfile, dbfile) print(u"Film: " + title + u" durch Ombi hinzugefügt.") if best_result: search.download_bl(best_result, device, configfile, dbfile) if english: title = r.get('title') best_result = search.best_result_bl( title, configfile, dbfile) print(u"Film: " + title + u"durch Ombi hinzugefügt.") if best_result: search.download_bl(best_result, device, configfile, dbfile) db.store('movie_' + str(imdb_id), 'added') else: log_debug("Titel für IMDB-ID nicht abrufbar: " + imdb_id) for r in requested_shows: imdb_id = r.get("imdbId") infos = None child_requests = r.get("childRequests") for cr in child_requests: if bool(cr.get("approved")): if not bool(cr.get("available")): details = cr.get("seasonRequests") for season in details: sn = season.get("seasonNumber") eps = [] episodes = season.get("episodes") for episode in episodes: if not bool(episode.get("available")): enr = episode.get("episodeNumber") s = str(sn) if len(s) == 1: s = "0" + s s = "S" + s e = str(enr) if len(e) == 1: e = "0" + e se = s + "E" + e if not db.retrieve('show_' + str(imdb_id) + '_' + se) == 'added': eps.append(enr) if eps: if not infos: infos = imdb_show(imdb_id, configfile, dbfile, scraper) if infos: title = infos[0] all_eps = infos[1] scraper = infos[2] check_sn = False if all_eps: check_sn = all_eps.get(sn) if check_sn: sn_length = len(eps) check_sn_length = len(check_sn) if check_sn_length > sn_length: for ep in eps: e = str(ep) if len(e) == 1: e = "0" + e se = s + "E" + e payload = search.best_result_sj( title, configfile, dbfile) if payload: payload = decode_base64( payload).split("|") payload = encode_base64( payload[0] + "|" + payload[1] + "|" + se) added_episode = search.download_sj( payload, configfile, dbfile) if not added_episode: payload = decode_base64( payload).split("|") payload = encode_base64( payload[0] + "|" + payload[1] + "|" + s) add_season = search.download_sj( payload, configfile, dbfile) for e in eps: e = str(e) if len(e) == 1: e = "0" + e se = s + "E" + e db.store( 'show_' + str(imdb_id) + '_' + se, 'added') if not add_season: log_debug( u"Konnte kein Release für " + title + " " + se + "finden.") break db.store( 'show_' + str(imdb_id) + '_' + se, 'added') else: payload = search.best_result_sj( title, configfile, dbfile) if payload: payload = decode_base64( payload).split("|") payload = encode_base64( payload[0] + "|" + payload[1] + "|" + s) search.download_sj( payload, configfile, dbfile) for ep in eps: e = str(ep) if len(e) == 1: e = "0" + e se = s + "E" + e db.store( 'show_' + str(imdb_id) + '_' + se, 'added') print(u"Serie/Staffel/Episode: " + title + u" durch Ombi hinzugefügt.") return device
def download_sj(payload, configfile, dbfile): hostnames = RssConfig('Hostnames', configfile) sj = hostnames.get('sj') payload = decode_base64(payload).split("|") href = payload[0] title = payload[1] special = payload[2].strip().replace("None", "") series_url = 'https://' + sj + href series_info = get_url(series_url, configfile, dbfile) series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0] api_url = 'https://' + sj + '/api/media/' + series_id + '/releases' releases = get_url(api_url, configfile, dbfile) seasons = json.loads(releases) listen = ["SJ_Serien", "MB_Staffeln"] for liste in listen: cont = ListDb(dbfile, liste).retrieve() list_title = sanitize(title) if not cont: cont = "" if list_title not in cont: ListDb(dbfile, liste).store(list_title) config = RssConfig('SJ', configfile) english_ok = RssConfig('RSScrawler', configfile).get("english") quality = config.get('quality') ignore = config.get('rejectlist') result_seasons = {} result_episodes = {} for season in seasons: releases = seasons[season] for release in releases['items']: name = release['name'].encode('ascii', errors='ignore').decode('utf-8') hosters = release['hoster'] try: valid = bool(release['resolution'] == quality) except: valid = re.match(re.compile(r'.*' + quality + r'.*'), name) if valid and special: valid = bool("." + special.lower() + "." in name.lower()) if valid and not english_ok: valid = bool(".german." in name.lower()) if valid: valid = False for hoster in hosters: if hoster and check_hoster( hoster, configfile) or config.get("hoster_fallback"): valid = True if valid: try: ep = release['episode'] if ep: existing = result_episodes.get(season) if existing: for e in existing: if e == ep: if rate(name, ignore) > rate( existing[e], ignore): existing.update({ep: name}) else: existing = {ep: name} result_episodes.update({season: existing}) continue except: pass existing = result_seasons.get(season) dont = False if existing: if rate(name, ignore) < rate(existing, ignore): dont = True if not dont: result_seasons.update({season: name}) try: if result_seasons[season] and result_episodes[season]: del result_episodes[season] except: pass success = False try: if result_seasons[season]: success = True except: try: if result_episodes[season]: success = True except: pass if success: logger.debug(u"Websuche erfolgreich für " + title + " - " + season) else: for release in releases['items']: name = release['name'].encode('ascii', errors='ignore').decode('utf-8') hosters = release['hoster'] valid = True if valid and special: valid = bool("." + special.lower() + "." in name.lower()) if valid and not english_ok: valid = bool(".german." in name.lower()) if valid: valid = False for hoster in hosters: if hoster and check_hoster( hoster, configfile) or config.get("hoster_fallback"): valid = True if valid: try: ep = release['episode'] if ep: existing = result_episodes.get(season) if existing: for e in existing: if e == ep: if rate(name, ignore) > rate( existing[e], ignore): existing.update({ep: name}) else: existing = {ep: name} result_episodes.update({season: existing}) continue except: pass existing = result_seasons.get(season) dont = False if existing: if rate(name, ignore) < rate(existing, ignore): dont = True if not dont: result_seasons.update({season: name}) try: if result_seasons[season] and result_episodes[season]: del result_episodes[season] except: pass logger.debug(u"Websuche erfolgreich für " + title + " - " + season) matches = [] for season in result_seasons: matches.append(result_seasons[season]) for season in result_episodes: for episode in result_episodes[season]: matches.append(result_episodes[season][episode]) notify_array = [] for title in matches: db = RssDb(dbfile, 'rsscrawler') if add_decrypt(title, series_url, sj, dbfile): db.store(title, 'added') log_entry = u'[Suche/Serie] - ' + title + ' - [SJ]' logger.info(log_entry) notify_array.append(log_entry) notify(notify_array, configfile) if not matches: return False return matches
def download_bl(payload, device, configfile, dbfile): hostnames = RssConfig('Hostnames', configfile) mb = hostnames.get('mb') nk = hostnames.get('nk') fc = hostnames.get('fc').replace('www.', '').split('.')[0] payload = decode_base64(payload).split("|") link = payload[0] password = payload[1] url = get_url(link, configfile, dbfile) if not url or "NinjaFirewall 429" in url: return False config = RssConfig('MB', configfile) db = RssDb(dbfile, 'rsscrawler') soup = BeautifulSoup(url, 'lxml') site = check_is_site(link, configfile) if not site: return False else: if "MB" in site: if not fc: print( u"FC Hostname nicht gesetzt. MB kann keine Links finden!") return False key = soup.find("span", {"class": "fn"}).text hosters = soup.find_all("a", href=re.compile(fc)) url_hosters = [] for hoster in hosters: dl = hoster["href"] hoster = hoster.text url_hosters.append([dl, hoster]) elif "HW" in site: if not fc: print( u"FC Hostname nicht gesetzt. MB kann keine Links finden!") return False key = re.findall(r'Permanent Link: (.*?)"', str(soup)).pop() hosters = soup.find_all("a", href=re.compile(fc)) url_hosters = [] for hoster in hosters: dl = hoster["href"] hoster = hoster.text url_hosters.append([dl, hoster]) elif "HS" in site: download = soup.find("div", {"class": "entry-content"}) key = soup.find("h2", {"class": "entry-title"}).text url_hosters = re.findall(r'href="([^"\'>]*)".+?(.+?)<', str(download)) elif "NK" in site: key = soup.find("span", {"class": "subtitle"}).text url_hosters = [] hosters = soup.find_all("a", href=re.compile("/go/")) for hoster in hosters: url_hosters.append( ['https://' + nk + hoster["href"], hoster.text]) elif "FX" in site: key = payload[1] password = payload[2] else: return False links = {} if "MB" in site or "HW" in site or "HS" in site or "NK" in site: for url_hoster in reversed(url_hosters): try: if mb.split('.')[0] not in url_hoster[ 0] and "https://goo.gl/" not in url_hoster[0]: link_hoster = url_hoster[1].lower().replace( 'target="_blank">', '').replace(" ", "-") if check_hoster(link_hoster, configfile): links[link_hoster] = url_hoster[0] except: pass if config.get("hoster_fallback") and not links: for url_hoster in reversed(url_hosters): if mb.split('.')[0] not in url_hoster[ 0] and "https://goo.gl/" not in url_hoster[0]: link_hoster = url_hoster[1].lower().replace( 'target="_blank">', '').replace(" ", "-") links[link_hoster] = url_hoster[0] download_links = list(links.values()) elif "FX" in site: download_links = fx_download_links(url, key, configfile) englisch = False if "*englisch" in key.lower() or "*english" in key.lower(): key = key.replace('*ENGLISCH', '').replace("*Englisch", "").replace( "*ENGLISH", "").replace("*English", "").replace("*", "") englisch = True staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower()) if config.get('enforcedl') and '.dl.' not in key.lower(): fail = False get_imdb_url = url key_regex = r'<title>' + \ re.escape( key) + r'.*?<\/title>\n.*?<link>(?:(?:.*?\n){1,25}).*?[mM][kK][vV].*?(?:|href=.?http(?:|s):\/\/(?:|www\.)imdb\.com\/title\/(tt[0-9]{7,9}).*?)[iI][mM][dD][bB].*?(?!\d(?:\.|\,)\d)(?:.|.*?)<\/a>' imdb_id = re.findall(key_regex, get_imdb_url) if len(imdb_id) > 0: if not imdb_id[0]: fail = True else: imdb_id = imdb_id[0] else: fail = True if fail: try: search_title = re.findall( r"(.*?)(?:\.(?:(?:19|20)\d{2})|\.German|\.\d{3,4}p|\.S(?:\d{1,3})\.)", key)[0].replace(".", "+") search_url = "http://www.imdb.com/find?q=" + search_title search_page = get_url(search_url, configfile, dbfile) search_results = re.findall( r'<td class="result_text"> <a href="\/title\/(tt[0-9]{7,9})\/\?ref_=fn_al_tt_\d" >(.*?)<\/a>.*? \((\d{4})\)..(.{9})', search_page) total_results = len(search_results) except: return False if staffel: try: imdb_id = search_results[0][0] except: imdb_id = False else: no_series = False while total_results > 0: attempt = 0 for result in search_results: if result[3] == "TV Series": no_series = False total_results -= 1 attempt += 1 else: no_series = True imdb_id = search_results[attempt][0] total_results = 0 break if no_series is False: logger.debug( "%s - Keine passende Film-IMDB-Seite gefunden" % key) if staffel: filename = 'MB_Staffeln' else: filename = 'MB_Filme' scraper = cloudscraper.create_scraper() blog = BL(configfile, dbfile, device, logging, scraper, filename=filename) if not imdb_id: if not blog.dual_download(key, password): logger.debug("%s - Kein zweisprachiges Release gefunden." % key) else: if isinstance(imdb_id, list): imdb_id = imdb_id.pop() imdb_url = "http://www.imdb.com/title/" + imdb_id details = get_url(imdb_url, configfile, dbfile) if not details: logger.debug("%s - Originalsprache nicht ermittelbar" % key) original_language = re.findall( r"Language:<\/h4>\n.*?\n.*?url'>(.*?)<\/a>", details) if original_language: original_language = original_language[0] if original_language == "German": logger.debug( "%s - Originalsprache ist Deutsch. Breche Suche nach zweisprachigem Release ab!" % key) else: if not blog.dual_download(key, password) and not englisch: logger.debug( "%s - Kein zweisprachiges Release gefunden!" % key) if download_links: if staffel: if myjd_download(configfile, dbfile, device, key, "RSScrawler", download_links, password): db.store( key.replace(".COMPLETE", "").replace(".Complete", ""), 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Staffel] - ' + key.replace( ".COMPLETE", "").replace(".Complete", "") + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return True elif '.3d.' in key.lower(): retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if config.get('enforcedl'): if is_retail(key, '2', dbfile): retail = True if myjd_download(configfile, dbfile, device, key, "RSScrawler/3Dcrawler", download_links, password): db.store( key, 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Film' + ( '/Retail' if retail else "") + '/3D] - ' + key + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return True else: retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if config.get('enforcedl'): if is_retail(key, '1', dbfile): retail = True else: if is_retail(key, '0', dbfile): retail = True if myjd_download(configfile, dbfile, device, key, "RSScrawler", download_links, password): db.store( key, 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Film' + ( '/Englisch' if englisch and not retail else '') + ('/Englisch/Retail' if englisch and retail else '') + ('/Retail' if not englisch and retail else '') + '] - ' + key + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return [key] else: return False