Exemplos de get_url em Python, exemplos de feedcrawler.url.get_url em Python

Exemplo n.º 1

0

Exibir arquivo

def get_original_language(key, imdb_details, imdb_url, configfile, dbfile,
                          scraper, log_debug):
    original_language = False
    if imdb_details and len(imdb_details) > 0:
        soup = BeautifulSoup(imdb_details, 'lxml')
        try:
            original_language = soup.find(
                'h4', text=re.compile(r'Language:')).parent.find("a").text
        except:
            pass
    elif imdb_url and len(imdb_url) > 0:
        imdb_details = get_url(imdb_url, configfile, dbfile, scraper)
        if imdb_details:
            soup = BeautifulSoup(imdb_details, 'lxml')
            try:
                original_language = soup.find(
                    'h4', text=re.compile(r'Language:')).parent.find("a").text
            except:
                pass

    if not original_language:
        if imdb_details and len(imdb_details) > 0:
            soup = BeautifulSoup(imdb_details, 'lxml')
            try:
                original_language = \
                    soup.find('h3', text=re.compile(r'Language')).next.next.next.text.strip().replace("\n", "").split(
                        ",")[
                        0]
            except:
                pass
        elif imdb_url and len(imdb_url) > 0:
            imdb_details = get_url(imdb_url, configfile, dbfile, scraper)
            if imdb_details:
                soup = BeautifulSoup(imdb_details, 'lxml')
                try:
                    original_language = \
                        soup.find('h3', text=re.compile(r'Language')).next.next.next.text.strip().replace("\n",
                                                                                                          "").split(
                            ",")[0]
                except:
                    pass

    if not original_language:
        log_debug("%s - Originalsprache nicht ermittelbar" % key)

    if original_language and original_language == "German":
        log_debug(
            "%s - Originalsprache ist Deutsch. Breche Suche nach zweisprachigem Release ab!"
            % key)
        return False
    else:
        return original_language

Exemplo n.º 2

0

Exibir arquivo

def j_parse_download(self, series_url, title, language_id):
    if not check_valid_release(title, self.retail_only, self.hevc_retail,
                               self.dbfile):
        self.log_debug(
            title +
            u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)"
        )
        return False
    if self.filename == 'List_ContentAll_Seasons':
        if not self.config.get("seasonpacks"):
            staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower())
            if staffelpack:
                self.log_debug("%s - Release ignoriert (Staffelpaket)" % title)
                return False
        if not re.search(self.seasonssource, title.lower()):
            self.log_debug(title + " - Release hat falsche Quelle")
            return False
    try:
        series_info = get_url(series_url, self.configfile, self.dbfile)
        series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0]
        api_url = 'https://' + self.url + '/api/media/' + series_id + '/releases'

        response = get_url(api_url, self.configfile, self.dbfile, self.scraper)
        seasons = json.loads(response)
        for season in seasons:
            season = seasons[season]
            for item in season['items']:
                if item['name'] == title:
                    valid = False
                    for hoster in item['hoster']:
                        if hoster:
                            if check_hoster(hoster, self.configfile):
                                valid = True
                    if not valid and not self.hoster_fallback:
                        storage = self.db.retrieve_all(title)
                        if 'added' not in storage and 'notdl' not in storage:
                            wrong_hoster = '[SJ/Hoster fehlt] - ' + title
                            if 'wrong_hoster' not in storage:
                                print(wrong_hoster)
                                self.db.store(title, 'wrong_hoster')
                                notify([wrong_hoster], self.configfile)
                            else:
                                self.log_debug(wrong_hoster)
                            return False
                    else:
                        return [title, series_url, language_id, False, False]
    except:
        print(self._INTERNAL_NAME +
              u" hat die Serien-API angepasst. Breche Download-Prüfung ab!")
        return False

Exemplo n.º 3

0

Exibir arquivo

def ww_get_download_links(self, content, title):
    base_url = "https://" + CrawlerConfig('Hostnames',
                                          self.configfile).get('ww')
    content = content.replace("mkv|", "")
    download_links = []
    try:
        response = get_url(content, self.configfile, self.dbfile, self.scraper)
        if not response or "NinjaFirewall 429" in response:
            print(
                u"WW hat den Link-Abruf für " + title +
                " blockiert. Eine spätere Anfrage hat möglicherweise Erfolg!")
            return False
        links = BeautifulSoup(response,
                              'lxml').findAll("div", {"id": "download-links"})
        for link in links:
            hoster = link.text
            if 'Direct Download 100 MBit/s' not in hoster:
                url = base_url + link.find("a")["href"]
                download_links.append('href="' + url + '" ' + hoster + '<')
        download_links = "".join(download_links)

        download_links = get_download_links(self, download_links, title)
        return download_links
    except:
        return False

Exemplo n.º 4

0

Exibir arquivo

def fx_search_results(content, configfile, dbfile, scraper):
    articles = content.find("main").find_all("article")
    result_urls = []
    for article in articles:
        url = article.find("a")["href"]
        if url:
            result_urls.append(url)

    items = []

    if result_urls:
        results = []
        for url in result_urls:
            results.append(get_url(url, configfile, dbfile, scraper))

        for result in results:
            article = BeautifulSoup(str(result), 'lxml')
            titles = article.find_all("a", href=re.compile("filecrypt"))
            for title in titles:
                link = article.find("link", rel="canonical")["href"]
                title = title.text.encode("ascii",
                                          errors="ignore").decode().replace(
                                              "/", "")
                if title:
                    if "download" in title.lower():
                        try:
                            title = str(
                                content.find("strong",
                                             text=re.compile(
                                                 r".*Release.*")).nextSibling)
                        except:
                            continue
                    items.append([title, link + "|" + title])
    return items

Exemplo n.º 5

0

Exibir arquivo

Arquivo: content_custom_dd.py Projeto: Crabtronic/FeedCrawler

    def periodical_task(self):
        feeds = self.config.get("feeds")
        if feeds:
            added_items = []
            feeds = feeds.replace(" ", "").split(',')
            for feed in feeds:
                feed = feedparser.parse(
                    get_url(feed, self.configfile, self.dbfile, self.scraper))
                for post in feed.entries:
                    key = post.title.replace(" ", ".")

                    epoch = datetime(1970, 1, 1)
                    current_epoch = int(time())
                    published_format = "%Y-%m-%d %H:%M:%S+00:00"
                    published_timestamp = str(parser.parse(post.published))
                    published_epoch = int((datetime.strptime(
                        published_timestamp, published_format) -
                                           epoch).total_seconds())
                    if (current_epoch - 1800) > published_epoch:
                        link_pool = post.summary
                        unicode_links = re.findall(r'(http.*)', link_pool)
                        links = []
                        for link in unicode_links:
                            if check_hoster(link, self.configfile):
                                links.append(str(link))
                        if self.config.get("hoster_fallback") and not links:
                            for link in unicode_links:
                                links.append(str(link))
                        storage = self.db.retrieve_all(key)
                        if not links:
                            if 'added' not in storage and 'notdl' not in storage:
                                wrong_hoster = '[' + self._SITE + '/Hoster fehlt] - ' + key
                                if 'wrong_hoster' not in storage:
                                    print(wrong_hoster)
                                    self.db.store(key, 'wrong_hoster')
                                    notify([wrong_hoster], self.configfile)
                                else:
                                    self.log_debug(wrong_hoster)
                        elif 'added' in storage:
                            self.log_debug(
                                "%s - Release ignoriert (bereits gefunden)" %
                                key)
                        else:
                            self.device = myjd_download(
                                self.configfile, self.dbfile, self.device, key,
                                "FeedCrawler", links, "")
                            if self.device:
                                self.db.store(key, 'added')
                                log_entry = '[Englisch] - ' + key + ' - [' + self._SITE + ']'
                                self.log_info(log_entry)
                                notify([log_entry], self.configfile)
                                added_items.append(log_entry)
                    else:
                        self.log_debug(
                            "%s - Releasezeitpunkt weniger als 30 Minuten in der Vergangenheit - wird ignoriert."
                            % key)
        else:
            self.log_debug("Liste ist leer. Stoppe Suche für " + self._SITE +
                           "!")
        return self.device

Exemplo n.º 6

0

Exibir arquivo

def nk_page_download_link(self, download_link, key):
    unused_get_feed_parameter(key)
    nk = self.hostnames.get('nk')
    download_link = get_url(download_link, self.configfile, self.dbfile)
    soup = BeautifulSoup(download_link, 'lxml')
    url_hosters = []
    hosters = soup.find_all("a", href=re.compile("/go/"))
    for hoster in hosters:
        url_hosters.append(['https://' + nk + hoster["href"], hoster.text])
    return check_download_links(self, url_hosters)

Exemplo n.º 7

0

Exibir arquivo

def dw_parse_download(self, release_url, title, language_id):
    if not check_valid_release(title, self.retail_only, self.hevc_retail,
                               self.dbfile):
        self.log_debug(
            title +
            u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)"
        )
        return False
    if self.filename == 'List_ContentAll_Seasons':
        if not self.config.get("seasonpacks"):
            staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower())
            if staffelpack:
                self.log_debug("%s - Release ignoriert (Staffelpaket)" % title)
                return False
        if not re.search(self.seasonssource, title.lower()):
            self.log_debug(title + " - Release hat falsche Quelle")
            return False
    try:
        release_info = get_url(release_url, self.configfile, self.dbfile)
        post_hosters = BeautifulSoup(release_info, 'lxml').find(
            "div", {
                "id": "download"
            }).findAll("img", src=re.compile(r"images/hosterimg"))
        hosters = []
        valid = False
        for hoster in post_hosters:
            hoster = hoster["title"].replace("Premium-Account bei ",
                                             "").replace("ddownload", "ddl")
            if hoster not in hosters:
                hosters.append(hoster)

        for hoster in hosters:
            if hoster:
                if check_hoster(hoster, self.configfile):
                    valid = True
        if not valid and not self.hoster_fallback:
            storage = self.db.retrieve_all(title)
            if 'added' not in storage and 'notdl' not in storage:
                wrong_hoster = '[SJ/Hoster fehlt] - ' + title
                if 'wrong_hoster' not in storage:
                    print(wrong_hoster)
                    self.db.store(title, 'wrong_hoster')
                    notify([wrong_hoster], self.configfile)
                else:
                    self.log_debug(wrong_hoster)
                return False
        else:
            return [title, release_url, language_id, False, False]
    except:
        print(self._INTERNAL_NAME +
              u" hat die Serien-API angepasst. Breche Download-Prüfung ab!")
        return False

Exemplo n.º 8

0

Exibir arquivo

def dw_mirror(self, title):
    hostnames = CrawlerConfig('Hostnames', self.configfile)
    dw = hostnames.get('dw')

    if dw:
        dw_search = 'https://' + dw + '/?search=' + title

        dw_results = get_url(dw_search, self.configfile, self.dbfile,
                             self.scraper)
        dw_results = dw_search_results(dw_results, dw)

        for result in dw_results:
            release_url = result[1].split("|")[0]
            release_info = get_url(release_url, self.configfile, self.dbfile)
            post_hosters = BeautifulSoup(release_info, 'lxml').find(
                "div", {
                    "id": "download"
                }).findAll("img", src=re.compile(r"images/hosterimg"))
            hosters = []
            valid = False
            for hoster in post_hosters:
                hoster = hoster["title"].replace("Premium-Account bei ",
                                                 "").replace(
                                                     "ddownload", "ddl")
                if hoster not in hosters:
                    hosters.append(hoster)

            for hoster in hosters:
                if hoster:
                    if check_hoster(hoster, self.configfile):
                        valid = True
            if not valid and not self.hoster_fallback:
                return False
            else:
                return [release_url]

    return False

Exemplo n.º 9

0

Exibir arquivo

def get_imdb_id(key, content, filename, configfile, dbfile, scraper,
                log_debug):
    try:
        imdb_id = re.findall(
            r'.*?(?:href=.?http(?:|s):\/\/(?:|www\.)imdb\.com\/title\/(tt[0-9]{7,9}).*?).*?(\d(?:\.|\,)\d)(?:.|.*?)<\/a>.*?',
            content)
    except:
        imdb_id = False

    if imdb_id:
        imdb_id = imdb_id[0][0]
    else:
        try:
            search_title = re.findall(
                r"(.*?)(?:\.(?:(?:19|20)\d{2})|\.German|\.\d{3,4}p|\.S(?:\d{1,3})\.)",
                key)[0].replace(".", "+")
            search_url = "http://www.imdb.com/find?q=" + search_title
            search_page = get_url(search_url, configfile, dbfile, scraper)
            search_results = re.findall(
                r'<td class="result_text"> <a href="\/title\/(tt[0-9]{7,9})\/\?ref_=fn_al_tt_\d" >(.*?)<\/a>.*? \((\d{4})\)..(.{9})',
                search_page)
        except:
            return False
        total_results = len(search_results)
        if filename == 'List_ContentAll_Seasons':
            imdb_id = search_results[0][0]
        else:
            no_series = False
            while total_results > 0:
                attempt = 0
                for result in search_results:
                    if result[3] == "TV Series":
                        no_series = False
                        total_results -= 1
                        attempt += 1
                    else:
                        no_series = True
                        imdb_id = search_results[attempt][0]
                        total_results = 0
                        break
            if no_series is False:
                log_debug("%s - Keine passende Film-IMDB-Seite gefunden" % key)
        if not imdb_id:
            return False

    return imdb_id

Exemplo n.º 10

0

Exibir arquivo

def by_page_download_link(self, download_link, key):
    unused_get_feed_parameter(key)
    by = self.hostnames.get('by')
    download_link = get_url(download_link, self.configfile, self.dbfile)
    soup = BeautifulSoup(download_link, 'lxml')
    links = soup.find_all("iframe")
    async_link_results = []
    for link in links:
        link = link["src"]
        if 'https://' + by in link:
            async_link_results.append(link)
    async_link_results = get_urls_async(async_link_results, self.configfile,
                                        self.dbfile)
    links = async_link_results[0]
    url_hosters = []
    for link in links:
        if link:
            link = BeautifulSoup(link,
                                 'lxml').find("a",
                                              href=re.compile("/go\.php\?"))
            if link:
                url_hosters.append([link["href"], link.text.replace(" ", "")])
    return check_download_links(self, url_hosters)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: search.py Projeto: Crabtronic/FeedCrawler

def get(title, configfile, dbfile, bl_only=False, sj_only=False):
    hostnames = CrawlerConfig('Hostnames', configfile)
    by = hostnames.get('by')
    dw = hostnames.get('dw')
    fx = hostnames.get('fx')
    nk = hostnames.get('nk')
    sj = hostnames.get('sj')

    specific_season = re.match(r'^(.*),(s\d{1,3})$', title.lower())
    specific_episode = re.match(r'^(.*),(s\d{1,3}e\d{1,3})$', title.lower())
    if specific_season:
        split = title.split(",")
        title = split[0]
        special = split[1].upper()
    elif specific_episode:
        split = title.split(",")
        title = split[0]
        special = split[1].upper()
    else:
        special = None

    bl_final = {}
    sj_final = {}
    scraper = cloudscraper.create_scraper()

    if not sj_only:
        mb_query = sanitize(title).replace(" ", "+")
        if special:
            bl_query = mb_query + "+" + special
        else:
            bl_query = mb_query

        unrated = []

        config = CrawlerConfig('ContentAll', configfile)
        quality = config.get('quality')
        ignore = config.get('ignore')

        if "480p" not in quality:
            search_quality = "+" + quality
        else:
            search_quality = ""

        if by:
            by_search = 'https://' + by + '/?q=' + bl_query + search_quality
        else:
            by_search = None
        if dw:
            dw_search = 'https://' + dw + '/?kategorie=Movies&search=' + bl_query + search_quality
        else:
            dw_search = None
        if fx:
            fx_search = 'https://' + fx + '/?s=' + bl_query
        else:
            fx_search = None

        async_results = get_urls_async([by_search, dw_search, fx_search],
                                       configfile, dbfile, scraper)
        scraper = async_results[1]
        async_results = async_results[0]

        by_results = []
        dw_results = []
        fx_results = []

        for res in async_results:
            if check_is_site(res, configfile) == 'BY':
                by_results = by_search_results(res, by)
            elif check_is_site(res, configfile) == 'DW':
                dw_results = dw_search_results(res, dw)
            elif check_is_site(res, configfile) == 'FX':
                fx_results = fx_search_results(fx_content_to_soup(res),
                                               configfile, dbfile, scraper)

        if nk:
            nk_search = post_url(
                'https://' + nk + "/search",
                configfile,
                dbfile,
                data={'search': bl_query.replace("+", " ") + " " + quality})
            nk_results = nk_search_results(nk_search, 'https://' + nk + '/')
        else:
            nk_results = []

        password = by
        for result in by_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            if "xxx" not in result[0].lower():
                unrated.append([
                    rate(result[0], ignore),
                    encode_base64(result[1] + "|" + password),
                    result[0] + " (BY)"
                ])

        password = dw
        for result in dw_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            unrated.append([
                rate(result[0], ignore),
                encode_base64(result[1] + "|" + password), result[0] + " (DW)"
            ])

        password = fx.split('.')[0]
        for result in fx_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            if "-low" not in result[0].lower():
                unrated.append([
                    rate(result[0], ignore),
                    encode_base64(result[1] + "|" + password),
                    result[0] + " (FX)"
                ])

        password = nk.split('.')[0].capitalize()
        for result in nk_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            unrated.append([
                rate(result[0], ignore),
                encode_base64(result[1] + "|" + password), result[0] + " (NK)"
            ])

        rated = sorted(unrated, reverse=True)

        results = {}
        i = 0

        for result in rated:
            res = {"payload": result[1], "title": result[2]}
            results["result" + str(i + 1000)] = res
            i += 1
        bl_final = results

    if not bl_only:
        if sj:
            sj_query = sanitize(title).replace(" ", "+")
            sj_search = get_url(
                'https://' + sj + '/serie/search?q=' + sj_query, configfile,
                dbfile, scraper)
            try:
                sj_results = BeautifulSoup(sj_search, 'lxml').findAll(
                    "a", href=re.compile("/serie"))
            except:
                sj_results = []
        else:
            sj_results = []

        if special:
            append = " (" + special + ")"
        else:
            append = ""
        i = 0
        results = {}
        for result in sj_results:
            r_title = result.text
            r_rating = fuzz.ratio(title.lower(), r_title)
            if r_rating > 40:
                res = {
                    "payload":
                    encode_base64(result['href'] + "|" + r_title + "|" +
                                  str(special)),
                    "title":
                    r_title + append
                }
                results["result" + str(i + 1000)] = res
                i += 1
        sj_final = results

    return bl_final, sj_final

Exemplo n.º 12

0

Exibir arquivo

def download(payload, configfile, dbfile):
    hostnames = CrawlerConfig('Hostnames', configfile)
    sj = hostnames.get('sj')

    payload = decode_base64(payload).split("|")
    href = payload[0]
    title = payload[1]
    special = payload[2].strip().replace("None", "")

    series_url = 'https://' + sj + href
    series_info = get_url(series_url, configfile, dbfile)
    series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0]

    api_url = 'https://' + sj + '/api/media/' + series_id + '/releases'
    releases = get_url(api_url, configfile, dbfile)

    unsorted_seasons = json.loads(releases)

    listen = ["List_ContentShows_Shows", "List_ContentAll_Seasons"]
    for liste in listen:
        cont = ListDb(dbfile, liste).retrieve()
        list_title = sanitize(title)
        if not cont:
            cont = ""
        if list_title not in cont:
            ListDb(dbfile, liste).store(list_title)

    config = CrawlerConfig('ContentShows', configfile)
    english_ok = CrawlerConfig('FeedCrawler', configfile).get("english")
    quality = config.get('quality')
    ignore = config.get('rejectlist')

    result_seasons = {}
    result_episodes = {}

    seasons = {}
    for season in unsorted_seasons:
        if "sp" in season.lower():
            seasons[season] = unsorted_seasons[season]
    for season in unsorted_seasons:
        if "sp" not in season.lower():
            seasons[season] = unsorted_seasons[season]

    for season in seasons:
        releases = seasons[season]
        for release in releases['items']:
            name = release['name'].encode('ascii',
                                          errors='ignore').decode('utf-8')
            try:
                season = re.findall(r'.*\.(s\d{1,3}).*', name,
                                    re.IGNORECASE)[0]
            except:
                pass
            hosters = release['hoster']
            try:
                valid = bool(release['resolution'] == quality)
            except:
                valid = re.match(re.compile(r'.*' + quality + r'.*'), name)
            if valid and special:
                valid = bool("." + special.lower() + "." in name.lower())
            if valid and not english_ok:
                valid = bool(".german." in name.lower())
            if valid:
                valid = False
                for hoster in hosters:
                    if hoster and check_hoster(
                            hoster,
                            configfile) or config.get("hoster_fallback"):
                        valid = True
            if valid:
                try:
                    ep = release['episode']
                    if ep:
                        existing = result_episodes.get(season)
                        if existing:
                            valid = False
                            for e in existing:
                                if e == ep:
                                    if rate(name, ignore) > rate(
                                            existing[e], ignore):
                                        valid = True
                                else:
                                    valid = True
                            if valid:
                                existing.update({ep: name})
                        else:
                            existing = {ep: name}
                        result_episodes.update({season: existing})
                        continue
                except:
                    pass

                existing = result_seasons.get(season)
                dont = False
                if existing:
                    if rate(name, ignore) < rate(existing, ignore):
                        dont = True
                if not dont:
                    result_seasons.update({season: name})

        try:
            if result_seasons[season] and result_episodes[season]:
                del result_episodes[season]
        except:
            pass

        success = False
        try:
            if result_seasons[season]:
                success = True
        except:
            try:
                if result_episodes[season]:
                    success = True
            except:
                pass

        if success:
            logger.debug(u"Websuche erfolgreich für " + title + " - " + season)
        else:
            for release in releases['items']:
                name = release['name'].encode('ascii',
                                              errors='ignore').decode('utf-8')
                hosters = release['hoster']
                valid = True
                if valid and special:
                    valid = bool("." + special.lower() + "." in name.lower())
                if valid and not english_ok:
                    valid = bool(".german." in name.lower())
                if valid:
                    valid = False
                    for hoster in hosters:
                        if hoster and check_hoster(
                                hoster,
                                configfile) or config.get("hoster_fallback"):
                            valid = True
                if valid:
                    try:
                        ep = release['episode']
                        if ep:
                            existing = result_episodes.get(season)
                            if existing:
                                for e in existing:
                                    if e == ep:
                                        if rate(name, ignore) > rate(
                                                existing[e], ignore):
                                            existing.update({ep: name})
                            else:
                                existing = {ep: name}
                            result_episodes.update({season: existing})
                            continue
                    except:
                        pass

                    existing = result_seasons.get(season)
                    dont = False
                    if existing:
                        if rate(name, ignore) < rate(existing, ignore):
                            dont = True
                    if not dont:
                        result_seasons.update({season: name})

            try:
                if result_seasons[season] and result_episodes[season]:
                    del result_episodes[season]
            except:
                pass
            logger.debug(u"Websuche erfolgreich für " + title + " - " + season)

    matches = []

    for season in result_seasons:
        matches.append(result_seasons[season])
    for season in result_episodes:
        for episode in result_episodes[season]:
            matches.append(result_episodes[season][episode])

    notify_array = []
    for title in matches:
        db = FeedDb(dbfile, 'FeedCrawler')
        if add_decrypt(title, series_url, sj, dbfile):
            db.store(title, 'added')
            log_entry = u'[Suche/Serie] - ' + title + ' - [SJ]'
            logger.info(log_entry)
            notify_array.append(log_entry)

    notify(notify_array, configfile)

    if not matches:
        return False
    return matches

Exemplo n.º 13

0

Exibir arquivo

Arquivo: content_all.py Projeto: Crabtronic/FeedCrawler

def download(payload, device, configfile, dbfile):
    config = CrawlerConfig('ContentAll', configfile)
    db = FeedDb(dbfile, 'FeedCrawler')
    hostnames = CrawlerConfig('Hostnames', configfile)
    by = hostnames.get('by')
    nk = hostnames.get('nk')

    payload = decode_base64(payload).split("|")
    link = payload[0]
    password = payload[1]

    site = check_is_site(link, configfile)
    if not site:
        return False
    elif "DW" in site:
        download_method = add_decrypt_instead_of_download
        download_links = [link]
        key = payload[1]
        password = payload[2]
    else:
        url = get_url(link, configfile, dbfile)
        if not url or "NinjaFirewall 429" in url:
            return False
        download_method = myjd_download
        soup = BeautifulSoup(url, 'lxml')

        if "BY" in site:
            key = soup.find("small").text
            links = soup.find_all("iframe")
            async_link_results = []
            for link in links:
                link = link["src"]
                if 'https://' + by in link:
                    async_link_results.append(link)
            async_link_results = get_urls_async(async_link_results, configfile,
                                                dbfile)
            links = async_link_results[0]
            url_hosters = []
            for link in links:
                if link:
                    link = BeautifulSoup(link, 'lxml').find(
                        "a", href=re.compile("/go\.php\?"))
                    if link:
                        url_hosters.append(
                            [link["href"],
                             link.text.replace(" ", "")])
        elif "NK" in site:
            key = soup.find("span", {"class": "subtitle"}).text
            url_hosters = []
            hosters = soup.find_all("a", href=re.compile("/go/"))
            for hoster in hosters:
                url_hosters.append(
                    ['https://' + nk + hoster["href"], hoster.text])
        elif "FX" in site:
            key = payload[1]
            password = payload[2]
        else:
            return False

        links = {}
        if "FX" in site:

            class FX:
                configfile = ""

            FX.configfile = configfile
            download_links = fx_get_download_links(FX, url, key)
        else:
            for url_hoster in reversed(url_hosters):
                try:
                    link_hoster = url_hoster[1].lower().replace(
                        'target="_blank">',
                        '').replace(" ", "-").replace("ddownload", "ddl")
                    if check_hoster(link_hoster, configfile):
                        link = url_hoster[0]
                        if by in link:
                            demasked_link = get_redirected_url(
                                link, configfile, dbfile, False)
                            if demasked_link:
                                link = demasked_link
                        links[link_hoster] = link
                except:
                    pass
            if config.get("hoster_fallback") and not links:
                for url_hoster in reversed(url_hosters):
                    link_hoster = url_hoster[1].lower().replace(
                        'target="_blank">',
                        '').replace(" ", "-").replace("ddownload", "ddl")
                    link = url_hoster[0]
                    if by in link:
                        demasked_link = get_redirected_url(
                            link, configfile, dbfile, False)
                        if demasked_link:
                            link = demasked_link
                    links[link_hoster] = link
            download_links = list(links.values())

    englisch = False
    if "*englisch" in key.lower() or "*english" in key.lower():
        key = key.replace('*ENGLISCH', '').replace("*Englisch", "").replace(
            "*ENGLISH", "").replace("*English", "").replace("*", "")
        englisch = True

    staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower())

    if download_links:
        if staffel:
            if download_method(configfile, dbfile, device, key, "FeedCrawler",
                               download_links, password):
                db.store(
                    key.replace(".COMPLETE", "").replace(".Complete", ""),
                    'notdl' if config.get('enforcedl')
                    and '.dl.' not in key.lower() else 'added')
                log_entry = '[Suche/Staffel] - ' + key.replace(
                    ".COMPLETE", "").replace(".Complete",
                                             "") + ' - [' + site + ']'
                logger.info(log_entry)
                notify([log_entry], configfile)
                return True
        else:
            retail = False
            if config.get('cutoff') and '.COMPLETE.' not in key.lower():
                if is_retail(key, dbfile):
                    retail = True
            if download_method(configfile, dbfile, device, key, "FeedCrawler",
                               download_links, password):
                db.store(
                    key, 'notdl' if config.get('enforcedl')
                    and '.dl.' not in key.lower() else 'added')
                log_entry = '[Suche/Film' + (
                    '/Englisch' if englisch and not retail else
                    '') + ('/Englisch/Retail' if englisch and retail else
                           '') + ('/Retail' if not englisch and retail else
                                  '') + '] - ' + key + ' - [' + site + ']'
                logger.info(log_entry)
                notify([log_entry], configfile)
                return [key]
    else:
        return False

Exemplo n.º 14

0

Exibir arquivo

def sf_parse_download(self, series_url, title, language_id):
    if not check_valid_release(title, self.retail_only, self.hevc_retail,
                               self.dbfile):
        self.log_debug(
            title +
            u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)"
        )
        return False
    if self.filename == 'List_ContentAll_Seasons':
        if not self.config.get("seasonpacks"):
            staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower())
            if staffelpack:
                self.log_debug("%s - Release ignoriert (Staffelpaket)" % title)
                return False
        if not re.search(self.seasonssource, title.lower()):
            self.log_debug(title + " - Release hat falsche Quelle")
            return False
    try:
        if language_id == 2:
            lang = 'EN'
        else:
            lang = 'DE'
        epoch = str(datetime.datetime.now().timestamp()).replace('.', '')[:-3]
        api_url = series_url + '?lang=' + lang + '&_=' + epoch
        response = get_url(api_url, self.configfile, self.dbfile, self.scraper)
        info = json.loads(response)

        is_episode = re.findall(r'.*\.(s\d{1,3}e\d{1,3})\..*', title,
                                re.IGNORECASE)
        if is_episode:
            episode_string = re.findall(r'.*S\d{1,3}(E\d{1,3}).*',
                                        is_episode[0])[0].lower()
            season_string = re.findall(r'.*(S\d{1,3})E\d{1,3}.*',
                                       is_episode[0])[0].lower()
            season_title = rreplace(title.lower().replace(episode_string,
                                                          ''), "-", ".*",
                                    1).lower().replace(".repack", "")
            season_title = season_title.replace(".untouched", ".*").replace(
                ".dd+51", ".dd.51")
            episode = str(int(episode_string.replace("e", "")))
            season = str(int(season_string.replace("s", "")))
            episode_name = re.findall(r'.*\.s\d{1,3}(\..*).german',
                                      season_title, re.IGNORECASE)
            if episode_name:
                season_title = season_title.replace(episode_name[0], '')
            codec_tags = [".h264", ".x264"]
            for tag in codec_tags:
                season_title = season_title.replace(tag, ".*264")
            web_tags = [".web-rip", ".webrip", ".webdl", ".web-dl"]
            for tag in web_tags:
                season_title = season_title.replace(tag, ".web.*")
        else:
            season = False
            episode = False
            season_title = title
            multiple_episodes = re.findall(r'(e\d{1,3}-e*\d{1,3}\.)',
                                           season_title, re.IGNORECASE)
            if multiple_episodes:
                season_title = season_title.replace(multiple_episodes[0], '.*')

        content = BeautifulSoup(info['html'], 'lxml')
        releases = content.find(
            "small", text=re.compile(season_title,
                                     re.IGNORECASE)).parent.parent.parent
        links = releases.findAll("div", {'class': 'row'})[1].findAll('a')
        download_link = False
        for link in links:
            if check_hoster(link.text.replace('\n', ''), self.configfile):
                download_link = get_redirected_url(
                    "https://" + self.url + link['href'], self.configfile,
                    self.dbfile, self.scraper)
                break
        if not download_link and not self.hoster_fallback:
            storage = self.db.retrieve_all(title)
            if 'added' not in storage and 'notdl' not in storage:
                wrong_hoster = '[SF/Hoster fehlt] - ' + title
                if 'wrong_hoster' not in storage:
                    print(wrong_hoster)
                    self.db.store(title, 'wrong_hoster')
                    notify([wrong_hoster], self.configfile)
                else:
                    self.log_debug(wrong_hoster)
                return False
        else:
            return [title, download_link, language_id, season, episode]
    except:
        print(u"SF hat die Serien-API angepasst. Breche Download-Prüfung ab!")
        return False

Exemplo n.º 15

0

Exibir arquivo

def periodical_task(self):
    if not self.url:
        return self.device

    if self.filename == 'List_ContentShows_Shows_Regex':
        if not self.config.get('regex'):
            self.log_debug("Suche für " + self._SITE + "-Regex deaktiviert!")
            return self.device
    elif self.filename == 'List_ContentShows_Seasons_Regex':
        if not self.config.get('regex'):
            self.log_debug("Suche für " + self._SITE + "-Regex deaktiviert!")
            return self.device
    elif self.filename == 'List_ContentAll_Seasons':
        if not self.config.get('crawlseasons'):
            self.log_debug("Suche für " + self._SITE +
                           "-Staffeln deaktiviert!")
            return self.device

    if self.empty_list:
        self.log_debug("Liste ist leer. Stoppe Suche für Serien!" +
                       self.listtype)
        return self.device
    try:
        reject = self.config.get("rejectlist").replace(
            ",", "|").lower() if len(
                self.config.get("rejectlist")) > 0 else r"^unmatchable$"
    except TypeError:
        reject = r"^unmatchable$"

    current_set = settings_hash(self, False)
    sha = False

    header = False
    response = False

    while self.day < 8:
        if self.last_set == current_set:
            try:
                url = feed_url(self)
                if url:
                    response = get_url_headers(url, self.configfile,
                                               self.dbfile, self.headers,
                                               self.scraper)
                    self.scraper = response[1]
                    response = response[0]
                    if self.filename == "List_ContentAll_Seasons" or self.filename == "List_ContentShows_Seasons_Regex":
                        feed = self.get_feed_method(response.text, "seasons",
                                                    'https://' + self.url,
                                                    True)
                    else:
                        feed = self.get_feed_method(response.text, "episodes",
                                                    'https://' + self.url,
                                                    True)
                else:
                    feed = False
            except:
                print(self._SITE +
                      u" hat die Feed-API angepasst. Breche Suche ab!")
                feed = False

            if response:
                if response.status_code == 304:
                    self.log_debug(
                        self._SITE +
                        "-Feed seit letztem Aufruf nicht aktualisiert - breche  Suche ab!"
                    )
                    return self.device
                header = True
        else:
            try:
                url = feed_url(self)
                if url:
                    response = get_url(url, self.configfile, self.dbfile,
                                       self.scraper)
                    if self.filename == "List_ContentAll_Seasons" or self.filename == "List_ContentShows_Seasons_Regex":
                        feed = self.get_feed_method(response, "seasons",
                                                    'https://' + self.url,
                                                    True)
                    else:
                        feed = self.get_feed_method(response, "episodes",
                                                    'https://' + self.url,
                                                    True)
                else:
                    feed = False
            except:
                print(self._SITE +
                      u" hat die Feed-API angepasst. Breche Suche ab!")
                feed = False

        self.day += 1

        if feed and feed.entries:
            first_post = feed.entries[0]
            concat = first_post.title + first_post.published + str(
                self.settings) + str(self.pattern)
            sha = hashlib.sha256(concat.encode('ascii', 'ignore')).hexdigest()
        else:
            self.log_debug("Feed ist leer - breche  Suche ab!")
            return False

        for post in feed.entries:
            concat = post.title + post.published + \
                     str(self.settings) + str(self.pattern)
            sha = hashlib.sha256(concat.encode('ascii', 'ignore')).hexdigest()
            if sha == self.last_sha:
                self.log_debug("Feed ab hier bereits gecrawlt (" + post.title +
                               ") - breche  Suche ab!")
                break

            series_url = post.series_url
            title = post.title.replace("-", "-")

            if self.filename == 'List_ContentShows_Shows_Regex':
                if self.config.get("regex"):
                    if '.german.' in title.lower():
                        language_id = 1
                    elif self.feedcrawler.get('english'):
                        language_id = 2
                    else:
                        language_id = 0
                    if language_id:
                        m = re.search(self.pattern, title.lower())
                        if not m and "720p" not in title and "1080p" not in title and "2160p" not in title:
                            m = re.search(self.pattern.replace("480p", "."),
                                          title.lower())
                            self.quality = "480p"
                        if m:
                            if "720p" in title.lower():
                                self.quality = "720p"
                            if "1080p" in title.lower():
                                self.quality = "1080p"
                            if "2160p" in title.lower():
                                self.quality = "2160p"
                            m = re.search(reject, title.lower())
                            if m:
                                self.log_debug(
                                    title +
                                    " - Release durch Regex gefunden (trotz rejectlist-Einstellung)"
                                )
                            title = re.sub(r'\[.*\] ', '', post.title)
                            package = self.parse_download_method(
                                self, series_url, title, language_id)
                            if package:
                                title = package[0]
                                site = self._SITE
                                download_link = False
                                if self.prefer_dw_mirror and "DW" not in site:
                                    download_links = dw_mirror(self, title)
                                    if download_links:
                                        download_link = download_links[0]
                                        site = "DW/" + site
                                if not download_link:
                                    download_link = package[1]
                                language_id = package[2]
                                season = package[3]
                                episode = package[4]
                                send_package(self, title, download_link,
                                             language_id, season, episode,
                                             site)
                    else:
                        self.log_debug("%s - Englische Releases deaktiviert" %
                                       title)

                else:
                    continue
            elif self.filename == 'List_ContentShows_Seasons_Regex':
                if self.config.get("regex"):
                    if '.german.' in title.lower():
                        language_id = 1
                    elif self.feedcrawler.get('english'):
                        language_id = 2
                    else:
                        language_id = 0
                    if language_id:
                        m = re.search(self.pattern, title.lower())
                        if not m and "720p" not in title and "1080p" not in title and "2160p" not in title:
                            m = re.search(self.pattern.replace("480p", "."),
                                          title.lower())
                            self.quality = "480p"
                        if m:
                            if "720p" in title.lower():
                                self.quality = "720p"
                            if "1080p" in title.lower():
                                self.quality = "1080p"
                            if "2160p" in title.lower():
                                self.quality = "2160p"
                            m = re.search(reject, title.lower())
                            if m:
                                self.log_debug(
                                    title +
                                    " - Release durch Regex gefunden (trotz rejectlist-Einstellung)"
                                )
                            title = re.sub(r'\[.*\] ', '', post.title)
                            package = self.parse_download_method(
                                self, series_url, title, language_id)
                            if package:
                                title = package[0]
                                site = self._SITE
                                download_link = False
                                if self.prefer_dw_mirror and "DW" not in site:
                                    download_links = dw_mirror(self, title)
                                    if download_links:
                                        download_link = download_links[0]
                                        site = "DW/" + site
                                if not download_link:
                                    download_link = package[1]
                                language_id = package[2]
                                season = package[3]
                                episode = package[4]
                                send_package(self, title, download_link,
                                             language_id, season, episode,
                                             site)
                    else:
                        self.log_debug("%s - Englische Releases deaktiviert" %
                                       title)

                else:
                    continue
            else:
                if self.config.get("quality") != '480p':
                    m = re.search(self.pattern, title.lower())
                    if m:
                        if '.german.' in title.lower():
                            language_id = 1
                        elif self.feedcrawler.get('english'):
                            language_id = 2
                        else:
                            language_id = 0
                        if language_id:
                            mm = re.search(self.quality, title.lower())
                            if mm:
                                mmm = re.search(reject, title.lower())
                                if mmm:
                                    self.log_debug(
                                        title +
                                        " - Release ignoriert (basierend auf rejectlist-Einstellung)"
                                    )
                                    continue
                                if self.feedcrawler.get("surround"):
                                    if not re.match(
                                            r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*',
                                            title):
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (kein Mehrkanalton)"
                                        )
                                        continue
                                try:
                                    storage = self.db.retrieve_all(title)
                                except Exception as e:
                                    self.log_debug(
                                        "Fehler bei Datenbankzugriff: %s, Grund: %s"
                                        % (e, title))
                                    return self.device
                                if 'added' in storage:
                                    self.log_debug(
                                        title +
                                        " - Release ignoriert (bereits gefunden)"
                                    )
                                    continue
                                package = self.parse_download_method(
                                    self, series_url, title, language_id)
                                if package:
                                    title = package[0]
                                    site = self._SITE
                                    download_link = False
                                    if self.prefer_dw_mirror and "DW" not in site:
                                        download_links = dw_mirror(self, title)
                                        if download_links:
                                            download_link = download_links[0]
                                            site = "DW/" + site
                                    if not download_link:
                                        download_link = package[1]
                                    language_id = package[2]
                                    season = package[3]
                                    episode = package[4]
                                    send_package(self, title, download_link,
                                                 language_id, season, episode,
                                                 site)
                        else:
                            self.log_debug(
                                "%s - Englische Releases deaktiviert" % title)

                    else:
                        m = re.search(self.pattern, title.lower())
                        if m:
                            if '.german.' in title.lower():
                                language_id = 1
                            elif self.feedcrawler.get('english'):
                                language_id = 2
                            else:
                                language_id = 0
                            if language_id:
                                if "720p" in title.lower(
                                ) or "1080p" in title.lower(
                                ) or "2160p" in title.lower():
                                    continue
                                mm = re.search(reject, title.lower())
                                if mm:
                                    self.log_debug(
                                        title +
                                        " Release ignoriert (basierend auf rejectlist-Einstellung)"
                                    )
                                    continue
                                if self.feedcrawler.get("surround"):
                                    if not re.match(
                                            r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*',
                                            title):
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (kein Mehrkanalton)"
                                        )
                                        continue
                                title = re.sub(r'\[.*\] ', '', post.title)
                                try:
                                    storage = self.db.retrieve_all(title)
                                except Exception as e:
                                    self.log_debug(
                                        "Fehler bei Datenbankzugriff: %s, Grund: %s"
                                        % (e, title))
                                    return self.device
                                if 'added' in storage:
                                    self.log_debug(
                                        title +
                                        " - Release ignoriert (bereits gefunden)"
                                    )
                                    continue
                                package = self.parse_download_method(
                                    self, series_url, title, language_id)
                                if package:
                                    title = package[0]
                                    site = self._SITE
                                    download_link = False
                                    if self.prefer_dw_mirror and "DW" not in site:
                                        download_links = dw_mirror(self, title)
                                        if download_links:
                                            download_link = download_links[0]
                                            site = "DW/" + site
                                    if not download_link:
                                        download_link = package[1]
                                    language_id = package[2]
                                    season = package[3]
                                    episode = package[4]
                                    send_package(self, title, download_link,
                                                 language_id, season, episode,
                                                 site)
                            else:
                                self.log_debug(
                                    "%s - Englische Releases deaktiviert" %
                                    title)

    if current_set and sha:
        new_set = settings_hash(self, True)
        if current_set == new_set:
            self.cdc.delete(self._INTERNAL_NAME + "Set-" + self.filename)
            self.cdc.store(self._INTERNAL_NAME + "Set-" + self.filename,
                           current_set)
            self.cdc.delete(self._INTERNAL_NAME + "-" + self.filename)
            self.cdc.store(self._INTERNAL_NAME + "-" + self.filename, sha)

    if header and response:
        self.cdc.delete(self._INTERNAL_NAME + "Headers-" + self.filename)
        self.cdc.store(self._INTERNAL_NAME + "Headers-" + self.filename,
                       response.headers['date'])

    return self.device