def post_url(url, configfile, dbfile, data, scraper=False): config = RssConfig('RSScrawler', configfile) proxy = config.get('proxy') if not scraper: scraper = cloudscraper.create_scraper() db = RssDb(dbfile, 'proxystatus') db_normal = RssDb(dbfile, 'normalstatus') site = check_is_site(url, configfile) # Temporary fix for FX if site and "FX" in site: scraper = requests.session() scraper.headers = scraper.headers scraper.cookies = scraper.cookies scraper.verify = False if proxy: try: if site and "SJ" in site: if db.retrieve("SJ"): if config.get("fallback") and not db_normal.retrieve("SJ"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "DJ" in site: if db.retrieve("DJ"): if config.get("fallback") and not db_normal.retrieve("DJ"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "SF" in site: if db.retrieve("SF"): if config.get("fallback") and not db_normal.retrieve("SF"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "MB" in site: if db.retrieve("MB"): if config.get("fallback") and not db_normal.retrieve("MB"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "HW" in site: if db.retrieve("HW"): if config.get("fallback") and not db_normal.retrieve("HW"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "FX" in site: if db.retrieve("FX"): if config.get("fallback") and not db_normal.retrieve("FX"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "HS" in site: if db.retrieve("HS"): if config.get("fallback") and not db_normal.retrieve("HS"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "NK" in site: if db.retrieve("NK"): if config.get("fallback") and not db_normal.retrieve("NK"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "DD" in site: if db.retrieve("DD"): if config.get("fallback") and not db_normal.retrieve("DD"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "FC" in site: if db.retrieve("FC"): if config.get("fallback") and not db_normal.retrieve("FC"): return scraper.post(url, data, timeout=30).content else: return "" proxies = {'http': proxy, 'https': proxy} response = scraper.post(url, data, proxies=proxies, timeout=30).content return response except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return "" else: try: if site and "SJ" in site and db_normal.retrieve("SJ"): return "" elif site and "DJ" in site and db_normal.retrieve("DJ"): return "" elif site and "SF" in site and db_normal.retrieve("SF"): return "" elif site and "MB" in site and db_normal.retrieve("MB"): return "" elif site and "HW" in site and db_normal.retrieve("HW"): return "" elif site and "FX" in site and db_normal.retrieve("FX"): return "" elif site and "HS" in site and db_normal.retrieve("HS"): return "" elif site and "NK" in site and db_normal.retrieve("NK"): return "" elif site and "DD" in site and db_normal.retrieve("DD"): return "" elif site and "FC" in site and db_normal.retrieve("FC"): return "" response = scraper.post(url, data, timeout=30).content return response except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ""
def download_bl(payload, device, configfile, dbfile): hostnames = RssConfig('Hostnames', configfile) mb = hostnames.get('mb') nk = hostnames.get('nk') fc = hostnames.get('fc').replace('www.', '').split('.')[0] payload = decode_base64(payload).split("|") link = payload[0] password = payload[1] url = get_url(link, configfile, dbfile) if not url or "NinjaFirewall 429" in url: return False config = RssConfig('MB', configfile) db = RssDb(dbfile, 'rsscrawler') soup = BeautifulSoup(url, 'lxml') site = check_is_site(link, configfile) if not site: return False else: if "MB" in site: if not fc: print( u"FC Hostname nicht gesetzt. MB kann keine Links finden!") return False key = soup.find("span", {"class": "fn"}).text hosters = soup.find_all("a", href=re.compile(fc)) url_hosters = [] for hoster in hosters: dl = hoster["href"] hoster = hoster.text url_hosters.append([dl, hoster]) elif "HW" in site: if not fc: print( u"FC Hostname nicht gesetzt. MB kann keine Links finden!") return False key = re.findall(r'Permanent Link: (.*?)"', str(soup)).pop() hosters = soup.find_all("a", href=re.compile(fc)) url_hosters = [] for hoster in hosters: dl = hoster["href"] hoster = hoster.text url_hosters.append([dl, hoster]) elif "HS" in site: download = soup.find("div", {"class": "entry-content"}) key = soup.find("h2", {"class": "entry-title"}).text url_hosters = re.findall(r'href="([^"\'>]*)".+?(.+?)<', str(download)) elif "NK" in site: key = soup.find("span", {"class": "subtitle"}).text url_hosters = [] hosters = soup.find_all("a", href=re.compile("/go/")) for hoster in hosters: url_hosters.append( ['https://' + nk + hoster["href"], hoster.text]) elif "FX" in site: key = payload[1] password = payload[2] else: return False links = {} if "MB" in site or "HW" in site or "HS" in site or "NK" in site: for url_hoster in reversed(url_hosters): try: if mb.split('.')[0] not in url_hoster[ 0] and "https://goo.gl/" not in url_hoster[0]: link_hoster = url_hoster[1].lower().replace( 'target="_blank">', '').replace(" ", "-") if check_hoster(link_hoster, configfile): links[link_hoster] = url_hoster[0] except: pass if config.get("hoster_fallback") and not links: for url_hoster in reversed(url_hosters): if mb.split('.')[0] not in url_hoster[ 0] and "https://goo.gl/" not in url_hoster[0]: link_hoster = url_hoster[1].lower().replace( 'target="_blank">', '').replace(" ", "-") links[link_hoster] = url_hoster[0] download_links = list(links.values()) elif "FX" in site: download_links = fx_download_links(url, key, configfile) englisch = False if "*englisch" in key.lower() or "*english" in key.lower(): key = key.replace('*ENGLISCH', '').replace("*Englisch", "").replace( "*ENGLISH", "").replace("*English", "").replace("*", "") englisch = True staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower()) if config.get('enforcedl') and '.dl.' not in key.lower(): fail = False get_imdb_url = url key_regex = r'<title>' + \ re.escape( key) + r'.*?<\/title>\n.*?<link>(?:(?:.*?\n){1,25}).*?[mM][kK][vV].*?(?:|href=.?http(?:|s):\/\/(?:|www\.)imdb\.com\/title\/(tt[0-9]{7,9}).*?)[iI][mM][dD][bB].*?(?!\d(?:\.|\,)\d)(?:.|.*?)<\/a>' imdb_id = re.findall(key_regex, get_imdb_url) if len(imdb_id) > 0: if not imdb_id[0]: fail = True else: imdb_id = imdb_id[0] else: fail = True if fail: try: search_title = re.findall( r"(.*?)(?:\.(?:(?:19|20)\d{2})|\.German|\.\d{3,4}p|\.S(?:\d{1,3})\.)", key)[0].replace(".", "+") search_url = "http://www.imdb.com/find?q=" + search_title search_page = get_url(search_url, configfile, dbfile) search_results = re.findall( r'<td class="result_text"> <a href="\/title\/(tt[0-9]{7,9})\/\?ref_=fn_al_tt_\d" >(.*?)<\/a>.*? \((\d{4})\)..(.{9})', search_page) total_results = len(search_results) except: return False if staffel: try: imdb_id = search_results[0][0] except: imdb_id = False else: no_series = False while total_results > 0: attempt = 0 for result in search_results: if result[3] == "TV Series": no_series = False total_results -= 1 attempt += 1 else: no_series = True imdb_id = search_results[attempt][0] total_results = 0 break if no_series is False: logger.debug( "%s - Keine passende Film-IMDB-Seite gefunden" % key) if staffel: filename = 'MB_Staffeln' else: filename = 'MB_Filme' scraper = cloudscraper.create_scraper() blog = BL(configfile, dbfile, device, logging, scraper, filename=filename) if not imdb_id: if not blog.dual_download(key, password): logger.debug("%s - Kein zweisprachiges Release gefunden." % key) else: if isinstance(imdb_id, list): imdb_id = imdb_id.pop() imdb_url = "http://www.imdb.com/title/" + imdb_id details = get_url(imdb_url, configfile, dbfile) if not details: logger.debug("%s - Originalsprache nicht ermittelbar" % key) original_language = re.findall( r"Language:<\/h4>\n.*?\n.*?url'>(.*?)<\/a>", details) if original_language: original_language = original_language[0] if original_language == "German": logger.debug( "%s - Originalsprache ist Deutsch. Breche Suche nach zweisprachigem Release ab!" % key) else: if not blog.dual_download(key, password) and not englisch: logger.debug( "%s - Kein zweisprachiges Release gefunden!" % key) if download_links: if staffel: if myjd_download(configfile, dbfile, device, key, "RSScrawler", download_links, password): db.store( key.replace(".COMPLETE", "").replace(".Complete", ""), 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Staffel] - ' + key.replace( ".COMPLETE", "").replace(".Complete", "") + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return True elif '.3d.' in key.lower(): retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if config.get('enforcedl'): if is_retail(key, '2', dbfile): retail = True if myjd_download(configfile, dbfile, device, key, "RSScrawler/3Dcrawler", download_links, password): db.store( key, 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Film' + ( '/Retail' if retail else "") + '/3D] - ' + key + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return True else: retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if config.get('enforcedl'): if is_retail(key, '1', dbfile): retail = True else: if is_retail(key, '0', dbfile): retail = True if myjd_download(configfile, dbfile, device, key, "RSScrawler", download_links, password): db.store( key, 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Film' + ( '/Englisch' if englisch and not retail else '') + ('/Englisch/Retail' if englisch and retail else '') + ('/Retail' if not englisch and retail else '') + '] - ' + key + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return [key] else: return False
def get_url_headers(url, configfile, dbfile, headers, scraper=False): config = RssConfig('RSScrawler', configfile) proxy = config.get('proxy') if not scraper: scraper = cloudscraper.create_scraper() db = RssDb(dbfile, 'proxystatus') db_normal = RssDb(dbfile, 'normalstatus') site = check_is_site(url, configfile) if proxy: try: if site and "SJ" in site: if db.retrieve("SJ"): if config.get("fallback") and not db_normal.retrieve("SJ"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "DJ" in site: if db.retrieve("DJ"): if config.get("fallback") and not db_normal.retrieve("DJ"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "SF" in site: if db.retrieve("SF"): if config.get("fallback") and not db_normal.retrieve("SF"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "MB" in site: if db.retrieve("MB"): if config.get("fallback") and not db_normal.retrieve("MB"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "HW" in site: if db.retrieve("HW"): if config.get("fallback") and not db_normal.retrieve("HW"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "FX" in site: if db.retrieve("FX"): if config.get("fallback") and not db_normal.retrieve("FX"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "HS" in site: if db.retrieve("HS"): if config.get("fallback") and not db_normal.retrieve("HS"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "NK" in site: if db.retrieve("NK"): if config.get("fallback") and not db_normal.retrieve("NK"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "DD" in site: if db.retrieve("DD"): if config.get("fallback") and not db_normal.retrieve("DD"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "FC" in site: if db.retrieve("FC"): if config.get("fallback") and not db_normal.retrieve("FC"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] proxies = {'http': proxy, 'https': proxy} response = scraper.get(url, headers=headers, proxies=proxies, timeout=30) return [response, scraper] except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ["", scraper] else: try: if site and "SJ" in site and db_normal.retrieve("SJ"): return ["", scraper] elif site and "DJ" in site and db_normal.retrieve("DJ"): return ["", scraper] elif site and "SF" in site and db_normal.retrieve("SF"): return ["", scraper] elif site and "MB" in site and db_normal.retrieve("MB"): return ["", scraper] elif site and "HW" in site and db_normal.retrieve("HW"): return ["", scraper] elif site and "FX" in site and db_normal.retrieve("FX"): return ["", scraper] elif site and "HS" in site and db_normal.retrieve("HS"): return ["", scraper] elif site and "NK" in site and db_normal.retrieve("NK"): return ["", scraper] elif site and "DD" in site and db_normal.retrieve("DD"): return ["", scraper] elif site and "FC" in site and db_normal.retrieve("FC"): return ["", scraper] response = scraper.get(url, headers=headers, timeout=30) return [response, scraper] except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ["", scraper]
def get(title, configfile, dbfile, bl_only=False, sj_only=False): hostnames = RssConfig('Hostnames', configfile) mb = hostnames.get('mb') hw = hostnames.get('hw') hs = hostnames.get('hs') fx = hostnames.get('fx') nk = hostnames.get('nk') sj = hostnames.get('sj') specific_season = re.match(r'^(.*),(s\d{1,3})$', title.lower()) specific_episode = re.match(r'^(.*),(s\d{1,3}e\d{1,3})$', title.lower()) if specific_season: split = title.split(",") title = split[0] special = split[1].upper() elif specific_episode: split = title.split(",") title = split[0] special = split[1].upper() else: special = None bl_final = {} sj_final = {} scraper = cloudscraper.create_scraper() if not sj_only: mb_query = sanitize(title).replace(" ", "+") if special: bl_query = mb_query + "+" + special else: bl_query = mb_query unrated = [] config = RssConfig('MB', configfile) quality = config.get('quality') ignore = config.get('ignore') if "480p" not in quality: search_quality = "+" + quality else: search_quality = "" if mb: mb_search = 'https://' + mb + '/search/' + bl_query + search_quality + '/feed/rss2/' else: mb_search = None if hw: hw_search = 'https://' + hw + '/search/' + bl_query + search_quality + '/feed/rss2/' else: hw_search = None if hs: hs_search = 'https://' + hs + '/search/' + bl_query + search_quality + '/feed' else: hs_search = None if fx: fx_search = 'https://' + fx + '/?s=' + bl_query else: fx_search = None async_results = get_urls_async( [mb_search, hw_search, hs_search, fx_search], configfile, dbfile, scraper) scraper = async_results[1] async_results = async_results[0] mb_results = [] hw_results = [] hs_results = [] fx_results = [] for res in async_results: if check_is_site(res, configfile) == 'MB': mb_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) elif check_is_site(res, configfile) == 'HW': hw_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) elif check_is_site(res, configfile) == 'HS': hs_results = hs_search_results(res) elif check_is_site(res, configfile) == 'FX': fx_results = fx_search_results(fx_content_to_soup(res), configfile, dbfile, scraper) if nk: nk_search = post_url( 'https://' + nk + "/search", configfile, dbfile, data={'search': bl_query.replace("+", " ") + " " + quality}) nk_results = nk_search_results(nk_search, 'https://' + nk + '/') else: nk_results = [] password = mb for result in mb_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if not result[0].endswith("-MB") and not result[0].endswith(".MB"): unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (MB)" ]) password = hw for result in hw_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (HW)" ]) password = hs for result in hs_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (HS)" ]) password = fx.split('.')[0] for result in fx_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (FX)" ]) password = nk.split('.')[0].capitalize() for result in nk_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (NK)" ]) if config.get("crawl3d"): if mb: mb_search = 'https://' + mb + '/search/' + bl_query + search_quality + "+3D/feed/rss2/" else: mb_search = None if hw: hw_search = 'https://' + hw + '/search/' + bl_query + search_quality + "+3D/feed/rss2/" else: hw_search = None if hs: hs_search = 'https://' + hs + '/search/' + bl_query + search_quality + '+3D/feed' else: hs_search = None if fx: fx_search = 'https://' + fx + '/?s=' + bl_query + "+3D" else: fx_search = None async_results = get_urls_async( [mb_search, hw_search, hs_search, fx_search], configfile, dbfile, scraper) async_results = async_results[0] mb_results = [] hw_results = [] hs_results = [] fx_results = [] for res in async_results: if check_is_site(res, configfile) == 'MB': mb_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) elif check_is_site(res, configfile) == 'HW': hw_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) elif check_is_site(res, configfile) == 'HS': hs_results = hs_search_results(res) elif check_is_site(res, configfile) == 'FX': fx_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) if nk: nk_search = post_url('https://' + nk + "/search", configfile, dbfile, data={ 'search': bl_query.replace("+", " ") + " " + quality + "3D" }) nk_results = nk_search_results(nk_search, 'https://' + nk + '/') else: nk_results = [] password = mb for result in mb_results: if not result[1].endswith("-MB") and not result[1].endswith( ".MB"): unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-MB)" ]) password = hw for result in hw_results: unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-HW)" ]) password = hs for result in hs_results: unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-HS)" ]) password = fx.split('.')[0] for result in fx_results: unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-FX)" ]) password = nk.split('.')[0].capitalize() for result in nk_results: unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-NK)" ]) rated = sorted(unrated, reverse=True) results = {} i = 0 for result in rated: res = {"payload": result[1], "title": result[2]} results["result" + str(i + 1000)] = res i += 1 bl_final = results if not bl_only: if sj: sj_query = sanitize(title).replace(" ", "+") sj_search = get_url( 'https://' + sj + '/serie/search?q=' + sj_query, configfile, dbfile, scraper) try: sj_results = BeautifulSoup(sj_search, 'lxml').findAll( "a", href=re.compile("/serie")) except: sj_results = [] else: sj_results = [] if special: append = " (" + special + ")" else: append = "" i = 0 results = {} for result in sj_results: r_title = result.text r_rating = fuzz.ratio(title.lower(), r_title) if r_rating > 40: res = { "payload": encode_base64(result['href'] + "|" + r_title + "|" + str(special)), "title": r_title + append } results["result" + str(i + 1000)] = res i += 1 sj_final = results return bl_final, sj_final