Exemplos de CrawlerConfig em Python, exemplos de feedcrawler.config.CrawlerConfig em Python

Exemplo n.º 1

0

Exibir arquivo

def get_device(configfile):
    conf = CrawlerConfig('FeedCrawler', configfile)
    myjd_user = str(conf.get('myjd_user'))
    myjd_pass = str(conf.get('myjd_pass'))
    myjd_device = str(conf.get('myjd_device'))

    jd = feedcrawler.myjdapi.Myjdapi()
    jd.set_app_key('FeedCrawler')

    if myjd_user and myjd_pass and myjd_device:
        try:
            jd.connect(myjd_user, myjd_pass)
            jd.update_devices()
            device = jd.get_device(myjd_device)
        except feedcrawler.myjdapi.MYJDException as e:
            print(u"Fehler bei der Verbindung mit MyJDownloader: " + str(e))
            return False
        if not device or not is_device(device):
            return False
        return device
    elif myjd_user and myjd_pass:
        myjd_device = get_if_one_device(myjd_user, myjd_pass)
        try:
            jd.connect(myjd_user, myjd_pass)
            jd.update_devices()
            device = jd.get_device(myjd_device)
        except feedcrawler.myjdapi.MYJDException as e:
            print(u"Fehler bei der Verbindung mit MyJDownloader: " + str(e))
            return False
        if not device or not is_device(device):
            return False
        return device
    else:
        return False

Exemplo n.º 2

0

Exibir arquivo

Arquivo: content_custom_dd.py Projeto: Crabtronic/FeedCrawler

 def __init__(self, configfile, dbfile, device, logging, scraper):
     self.configfile = configfile
     self.dbfile = dbfile
     self.device = device
     self.config = CrawlerConfig("CustomDD", self.configfile)
     self.log_info = logging.info
     self.log_error = logging.error
     self.log_debug = logging.debug
     self.scraper = scraper
     self.db = FeedDb(self.dbfile, 'FeedCrawler')

Exemplo n.º 3

0

Exibir arquivo

def notify(items, configfile):
    notifications = CrawlerConfig('Notifications', configfile)
    homeassistant_settings = notifications.get("homeassistant").split(',')
    pushbullet_token = notifications.get("pushbullet")
    telegram_settings = notifications.get("telegram").split(',')
    pushover_settings = notifications.get("pushover").split(',')
    if len(items) > 0:
        cut_items = list(api_request_cutter(items, 5))
        if len(notifications.get("homeassistant")) > 0:
            for cut_item in cut_items:
                homassistant_url = homeassistant_settings[0]
                homeassistant_password = homeassistant_settings[1]
                home_assistant(cut_item, homassistant_url,
                               homeassistant_password)
        if len(notifications.get("pushbullet")) > 0:
            pushbullet(items, pushbullet_token)
        if len(notifications.get("telegram")) > 0:
            for cut_item in cut_items:
                telegram_token = telegram_settings[0]
                telegram_chatid = telegram_settings[1]
                telegram(cut_item, telegram_token, telegram_chatid)
        if len(notifications.get('pushover')) > 0:
            for cut_item in cut_items:
                pushover_user = pushover_settings[0]
                pushover_token = pushover_settings[1]
                pushover(cut_item, pushover_user, pushover_token)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: content_shows_sj.py Projeto: Crabtronic/FeedCrawler

    def __init__(self, configfile, dbfile, device, logging, scraper, filename):
        self.configfile = configfile
        self.dbfile = dbfile
        self.device = device

        self.hostnames = CrawlerConfig('Hostnames', self.configfile)
        self.url = self.hostnames.get('sj')

        self.filename = filename
        if "List_ContentAll_Seasons" in self.filename:
            self.config = CrawlerConfig("ContentAll", self.configfile)
        else:
            self.config = CrawlerConfig("ContentShows", self.configfile)
        self.feedcrawler = CrawlerConfig("FeedCrawler", self.configfile)
        self.hevc_retail = self.config.get("hevc_retail")
        self.retail_only = self.config.get("retail_only")
        self.hoster_fallback = self.config.get("hoster_fallback")
        self.hosters = CrawlerConfig("Hosters", configfile).get_section()
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        self.scraper = scraper
        self.db = FeedDb(self.dbfile, 'FeedCrawler')
        self.quality = self.config.get("quality")
        self.prefer_dw_mirror = self.feedcrawler.get("prefer_dw_mirror")
        self.cdc = FeedDb(self.dbfile, 'cdc')
        self.last_set = self.cdc.retrieve(self._INTERNAL_NAME + "Set-" + self.filename)
        self.last_sha = self.cdc.retrieve(self._INTERNAL_NAME + "-" + self.filename)
        self.headers = {'If-Modified-Since': str(self.cdc.retrieve(self._INTERNAL_NAME + "Headers-" + self.filename))}
        self.settings_array = ["quality", "rejectlist", "regex", "hevc_retail", "retail_only", "hoster_fallback"]
        self.settings = []
        self.settings.append(self.feedcrawler.get("english"))
        self.settings.append(self.feedcrawler.get("surround"))
        self.settings.append(self.feedcrawler.get("prefer_dw_mirror"))
        self.settings.append(self.hosters)
        for s in self.settings_array:
            self.settings.append(self.config.get(s))

        self.mediatype = "Serien"
        self.listtype = ""

        self.empty_list = False
        if self.filename == 'List_ContentShows_Seasons_Regex':
            self.listtype = " (Staffeln/RegEx)"
        elif self.filename == 'List_ContentAll_Seasons':
            self.seasonssource = self.config.get('seasonssource').lower()
            self.listtype = " (Staffeln)"
        elif self.filename == 'List_ContentShows_Shows_Regex':
            self.listtype = " (RegEx)"
        list_content = shared_shows.get_series_list(self)
        if list_content:
            self.pattern = r'^(' + "|".join(list_content).lower() + ')'
        else:
            self.empty_list = True

        self.day = 0

        self.get_feed_method = j_releases_to_feedparser_dict
        self.parse_download_method = j_parse_download

Exemplo n.º 5

0

Exibir arquivo

def ww_get_download_links(self, content, title):
    base_url = "https://" + CrawlerConfig('Hostnames',
                                          self.configfile).get('ww')
    content = content.replace("mkv|", "")
    download_links = []
    try:
        response = get_url(content, self.configfile, self.dbfile, self.scraper)
        if not response or "NinjaFirewall 429" in response:
            print(
                u"WW hat den Link-Abruf für " + title +
                " blockiert. Eine spätere Anfrage hat möglicherweise Erfolg!")
            return False
        links = BeautifulSoup(response,
                              'lxml').findAll("div", {"id": "download-links"})
        for link in links:
            hoster = link.text
            if 'Direct Download 100 MBit/s' not in hoster:
                url = base_url + link.find("a")["href"]
                download_links.append('href="' + url + '" ' + hoster + '<')
        download_links = "".join(download_links)

        download_links = get_download_links(self, download_links, title)
        return download_links
    except:
        return False

Exemplo n.º 6

0

Exibir arquivo

def check_hoster(to_check, configfile):
    hosters = CrawlerConfig("Hosters", configfile).get_section()
    for hoster in hosters:
        if hosters[hoster] == "True":
            if hoster in to_check.lower() or to_check.lower() in hoster:
                return True
    return False

Exemplo n.º 7

0

Exibir arquivo

def by_feed_enricher(self, content):
    base_url = "https://" + CrawlerConfig('Hostnames',
                                          self.configfile).get('by')
    content = BeautifulSoup(content, 'lxml')
    posts = content.findAll("a",
                            href=re.compile("/category/"),
                            text=re.compile("Download"))
    async_results = []
    for post in posts:
        try:
            async_results.append(base_url + post['href'])
        except:
            pass
    async_results = get_urls_async(async_results, self.configfile, self.dbfile,
                                   self.scraper)
    results = async_results[0]

    entries = []
    if results:
        for result in results:
            try:
                content = []
                details = BeautifulSoup(result, 'lxml').findAll(
                    "td", {
                        "valign": "TOP",
                        "align": "CENTER"
                    })[1]
                title = details.find("small").text
                published = details.find("th", {"align": "RIGHT"}).text
                try:
                    imdb = details.find("a", href=re.compile("imdb.com"))
                    imdb_link = imdb["href"].replace("https://anonym.to/?", "")
                    imdb_score = imdb.text.replace(" ", "").replace("/10", "")
                    if "0.0" in imdb_score:
                        imdb_score = "9.9"
                    content.append('<a href="' + imdb_link + '"' + imdb_score +
                                   '</a>')
                except:
                    pass
                links = details.find_all("iframe")
                for link in links:
                    content.append('href="' + link["src"] + '"')

                content = "".join(content)

                entries.append(
                    FakeFeedParserDict({
                        "title":
                        title,
                        "published":
                        published,
                        "content":
                        [FakeFeedParserDict({"value": content + " mkv"})]
                    }))
            except:
                pass

    feed = {"entries": entries}
    feed = FakeFeedParserDict(feed)
    return feed

Exemplo n.º 8

0

Exibir arquivo

def ww_feed_enricher(self, content):
    base_url = "https://" + CrawlerConfig('Hostnames',
                                          self.configfile).get('ww')
    content = BeautifulSoup(content, 'lxml')
    posts = content.findAll("li")
    entries = []
    if posts:
        for post in posts:
            try:
                link = post.findAll("a", href=re.compile("/download"))[1]
                title = link.nextSibling.nextSibling.strip()
                published = post.find("span", {
                    "class": "main-date"
                }).text.replace("\n", "")
                content = "mkv|" + base_url + link["href"]

                entries.append(
                    FakeFeedParserDict({
                        "title":
                        title,
                        "published":
                        published,
                        "content": [FakeFeedParserDict({"value": content})]
                    }))
            except:
                pass

    feed = {"entries": entries}
    feed = FakeFeedParserDict(feed)
    return feed

Exemplo n.º 9

0

Exibir arquivo

def check_is_site(string, configfile):
    hostnames = CrawlerConfig('Hostnames', configfile)
    sj = hostnames.get('sj')
    dj = hostnames.get('dj')
    sf = hostnames.get('sf')
    by = hostnames.get('by')
    dw = hostnames.get('dw')
    fx = hostnames.get('fx')
    nk = hostnames.get('nk')
    ww = hostnames.get('ww')
    dd = hostnames.get('dd')

    if sj and sj.split('.')[0] in string:
        return "SJ"
    elif dj and dj.split('.')[0] in string:
        return "DJ"
    elif sf and sf.split('.')[0] in string:
        return "SF"
    elif by and by.split('.')[0] in string:
        return "BY"
    elif dw and dw.split('.')[0] in string:
        return "DW"
    elif fx and fx.split('.')[0] in string:
        return "FX"
    elif nk and nk.split('.')[0] in string:
        return "NK"
    elif ww and ww.split('.')[0] in string:
        return "WW"
    elif dd and dd.split('.')[0] in string:
        return "DD"
    else:
        return False

Exemplo n.º 10

0

Exibir arquivo

def nk_feed_enricher(self, content):
    base_url = "https://" + CrawlerConfig('Hostnames',
                                          self.configfile).get('nk')
    content = BeautifulSoup(content, 'lxml')
    posts = content.findAll("a", {"class": "btn"},
                            href=re.compile("/release/"))
    async_results = []
    for post in posts:
        try:
            async_results.append(base_url + post['href'])
        except:
            pass
    async_results = get_urls_async(async_results, self.configfile, self.dbfile,
                                   self.scraper)[0]

    entries = []
    if async_results:
        for result in async_results:
            try:
                content = []
                details = BeautifulSoup(result,
                                        'lxml').find("div",
                                                     {"class": "article"})
                title = details.find("span", {"class": "subtitle"}).text
                published = details.find("p", {"class": "meta"}).text
                content.append("mkv ")
                try:
                    imdb = details.find("a",
                                        href=re.compile("imdb.com"))["href"]
                    content.append('<a href="' + imdb + '" 9,9</a>')
                except:
                    pass
                links = details.find_all("a", href=re.compile("/go/"))
                for link in links:
                    content.append('href="' + base_url + link["href"] + '">' +
                                   link.text + '<')
                content = "".join(content)

                entries.append(
                    FakeFeedParserDict({
                        "title":
                        title,
                        "published":
                        published,
                        "content": [FakeFeedParserDict({"value": content})]
                    }))
            except:
                pass

    feed = {"entries": entries}
    feed = FakeFeedParserDict(feed)
    return feed

Exemplo n.º 11

0

Exibir arquivo

def dw_mirror(self, title):
    hostnames = CrawlerConfig('Hostnames', self.configfile)
    dw = hostnames.get('dw')

    if dw:
        dw_search = 'https://' + dw + '/?search=' + title

        dw_results = get_url(dw_search, self.configfile, self.dbfile,
                             self.scraper)
        dw_results = dw_search_results(dw_results, dw)

        for result in dw_results:
            release_url = result[1].split("|")[0]
            release_info = get_url(release_url, self.configfile, self.dbfile)
            post_hosters = BeautifulSoup(release_info, 'lxml').find(
                "div", {
                    "id": "download"
                }).findAll("img", src=re.compile(r"images/hosterimg"))
            hosters = []
            valid = False
            for hoster in post_hosters:
                hoster = hoster["title"].replace("Premium-Account bei ",
                                                 "").replace(
                                                     "ddownload", "ddl")
                if hoster not in hosters:
                    hosters.append(hoster)

            for hoster in hosters:
                if hoster:
                    if check_hoster(hoster, self.configfile):
                        valid = True
            if not valid and not self.hoster_fallback:
                return False
            else:
                return [release_url]

    return False

Exemplo n.º 12

0

Exibir arquivo

Arquivo: files.py Projeto: Crabtronic/FeedCrawler

def myjd_input(configfile, port, user, password, device):
    if user and password and device:
        print(u"Zugangsdaten aus den Parametern übernommen.")
    elif user and password and not device:
        device = get_if_one_device(user, password)
        if device:
            print(u"Gerätename " + device + " automatisch ermittelt.")
    else:
        print(u"Bitte die Zugangsdaten für My JDownloader angeben:")
        user = input("Nutzername/Email:")
        password = input("Passwort:")
        device = get_if_one_device(user, password)
        if device:
            print(u"Gerätename " + device + " automatisch ermittelt.")
        else:
            device = input(u"Gerätename:")
    if not port:
        port = '9090'

    sections = [
        'FeedCrawler', 'Hostnames', 'Crawljobs', 'Notifications', 'Hosters',
        'Ombi', 'ContentAll', 'ContentShows', 'CustomDJ', 'CustomDD'
    ]
    for section in sections:
        CrawlerConfig(section, configfile)
    if port:
        CrawlerConfig('FeedCrawler', configfile).save("port", port)

    CrawlerConfig('FeedCrawler', configfile).save("myjd_user", user)
    CrawlerConfig('FeedCrawler', configfile).save("myjd_pass", password)
    CrawlerConfig('FeedCrawler', configfile).save("myjd_device", device)
    device = get_device(configfile)
    if device:
        return device
    else:
        return False

Exemplo n.º 13

0

Exibir arquivo

def download(payload, configfile, dbfile):
    hostnames = CrawlerConfig('Hostnames', configfile)
    sj = hostnames.get('sj')

    payload = decode_base64(payload).split("|")
    href = payload[0]
    title = payload[1]
    special = payload[2].strip().replace("None", "")

    series_url = 'https://' + sj + href
    series_info = get_url(series_url, configfile, dbfile)
    series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0]

    api_url = 'https://' + sj + '/api/media/' + series_id + '/releases'
    releases = get_url(api_url, configfile, dbfile)

    unsorted_seasons = json.loads(releases)

    listen = ["List_ContentShows_Shows", "List_ContentAll_Seasons"]
    for liste in listen:
        cont = ListDb(dbfile, liste).retrieve()
        list_title = sanitize(title)
        if not cont:
            cont = ""
        if list_title not in cont:
            ListDb(dbfile, liste).store(list_title)

    config = CrawlerConfig('ContentShows', configfile)
    english_ok = CrawlerConfig('FeedCrawler', configfile).get("english")
    quality = config.get('quality')
    ignore = config.get('rejectlist')

    result_seasons = {}
    result_episodes = {}

    seasons = {}
    for season in unsorted_seasons:
        if "sp" in season.lower():
            seasons[season] = unsorted_seasons[season]
    for season in unsorted_seasons:
        if "sp" not in season.lower():
            seasons[season] = unsorted_seasons[season]

    for season in seasons:
        releases = seasons[season]
        for release in releases['items']:
            name = release['name'].encode('ascii',
                                          errors='ignore').decode('utf-8')
            try:
                season = re.findall(r'.*\.(s\d{1,3}).*', name,
                                    re.IGNORECASE)[0]
            except:
                pass
            hosters = release['hoster']
            try:
                valid = bool(release['resolution'] == quality)
            except:
                valid = re.match(re.compile(r'.*' + quality + r'.*'), name)
            if valid and special:
                valid = bool("." + special.lower() + "." in name.lower())
            if valid and not english_ok:
                valid = bool(".german." in name.lower())
            if valid:
                valid = False
                for hoster in hosters:
                    if hoster and check_hoster(
                            hoster,
                            configfile) or config.get("hoster_fallback"):
                        valid = True
            if valid:
                try:
                    ep = release['episode']
                    if ep:
                        existing = result_episodes.get(season)
                        if existing:
                            valid = False
                            for e in existing:
                                if e == ep:
                                    if rate(name, ignore) > rate(
                                            existing[e], ignore):
                                        valid = True
                                else:
                                    valid = True
                            if valid:
                                existing.update({ep: name})
                        else:
                            existing = {ep: name}
                        result_episodes.update({season: existing})
                        continue
                except:
                    pass

                existing = result_seasons.get(season)
                dont = False
                if existing:
                    if rate(name, ignore) < rate(existing, ignore):
                        dont = True
                if not dont:
                    result_seasons.update({season: name})

        try:
            if result_seasons[season] and result_episodes[season]:
                del result_episodes[season]
        except:
            pass

        success = False
        try:
            if result_seasons[season]:
                success = True
        except:
            try:
                if result_episodes[season]:
                    success = True
            except:
                pass

        if success:
            logger.debug(u"Websuche erfolgreich für " + title + " - " + season)
        else:
            for release in releases['items']:
                name = release['name'].encode('ascii',
                                              errors='ignore').decode('utf-8')
                hosters = release['hoster']
                valid = True
                if valid and special:
                    valid = bool("." + special.lower() + "." in name.lower())
                if valid and not english_ok:
                    valid = bool(".german." in name.lower())
                if valid:
                    valid = False
                    for hoster in hosters:
                        if hoster and check_hoster(
                                hoster,
                                configfile) or config.get("hoster_fallback"):
                            valid = True
                if valid:
                    try:
                        ep = release['episode']
                        if ep:
                            existing = result_episodes.get(season)
                            if existing:
                                for e in existing:
                                    if e == ep:
                                        if rate(name, ignore) > rate(
                                                existing[e], ignore):
                                            existing.update({ep: name})
                            else:
                                existing = {ep: name}
                            result_episodes.update({season: existing})
                            continue
                    except:
                        pass

                    existing = result_seasons.get(season)
                    dont = False
                    if existing:
                        if rate(name, ignore) < rate(existing, ignore):
                            dont = True
                    if not dont:
                        result_seasons.update({season: name})

            try:
                if result_seasons[season] and result_episodes[season]:
                    del result_episodes[season]
            except:
                pass
            logger.debug(u"Websuche erfolgreich für " + title + " - " + season)

    matches = []

    for season in result_seasons:
        matches.append(result_seasons[season])
    for season in result_episodes:
        for episode in result_episodes[season]:
            matches.append(result_episodes[season][episode])

    notify_array = []
    for title in matches:
        db = FeedDb(dbfile, 'FeedCrawler')
        if add_decrypt(title, series_url, sj, dbfile):
            db.store(title, 'added')
            log_entry = u'[Suche/Serie] - ' + title + ' - [SJ]'
            logger.info(log_entry)
            notify_array.append(log_entry)

    notify(notify_array, configfile)

    if not matches:
        return False
    return matches

Exemplo n.º 14

0

Exibir arquivo

Arquivo: content_all.py Projeto: Crabtronic/FeedCrawler

def download(payload, device, configfile, dbfile):
    config = CrawlerConfig('ContentAll', configfile)
    db = FeedDb(dbfile, 'FeedCrawler')
    hostnames = CrawlerConfig('Hostnames', configfile)
    by = hostnames.get('by')
    nk = hostnames.get('nk')

    payload = decode_base64(payload).split("|")
    link = payload[0]
    password = payload[1]

    site = check_is_site(link, configfile)
    if not site:
        return False
    elif "DW" in site:
        download_method = add_decrypt_instead_of_download
        download_links = [link]
        key = payload[1]
        password = payload[2]
    else:
        url = get_url(link, configfile, dbfile)
        if not url or "NinjaFirewall 429" in url:
            return False
        download_method = myjd_download
        soup = BeautifulSoup(url, 'lxml')

        if "BY" in site:
            key = soup.find("small").text
            links = soup.find_all("iframe")
            async_link_results = []
            for link in links:
                link = link["src"]
                if 'https://' + by in link:
                    async_link_results.append(link)
            async_link_results = get_urls_async(async_link_results, configfile,
                                                dbfile)
            links = async_link_results[0]
            url_hosters = []
            for link in links:
                if link:
                    link = BeautifulSoup(link, 'lxml').find(
                        "a", href=re.compile("/go\.php\?"))
                    if link:
                        url_hosters.append(
                            [link["href"],
                             link.text.replace(" ", "")])
        elif "NK" in site:
            key = soup.find("span", {"class": "subtitle"}).text
            url_hosters = []
            hosters = soup.find_all("a", href=re.compile("/go/"))
            for hoster in hosters:
                url_hosters.append(
                    ['https://' + nk + hoster["href"], hoster.text])
        elif "FX" in site:
            key = payload[1]
            password = payload[2]
        else:
            return False

        links = {}
        if "FX" in site:

            class FX:
                configfile = ""

            FX.configfile = configfile
            download_links = fx_get_download_links(FX, url, key)
        else:
            for url_hoster in reversed(url_hosters):
                try:
                    link_hoster = url_hoster[1].lower().replace(
                        'target="_blank">',
                        '').replace(" ", "-").replace("ddownload", "ddl")
                    if check_hoster(link_hoster, configfile):
                        link = url_hoster[0]
                        if by in link:
                            demasked_link = get_redirected_url(
                                link, configfile, dbfile, False)
                            if demasked_link:
                                link = demasked_link
                        links[link_hoster] = link
                except:
                    pass
            if config.get("hoster_fallback") and not links:
                for url_hoster in reversed(url_hosters):
                    link_hoster = url_hoster[1].lower().replace(
                        'target="_blank">',
                        '').replace(" ", "-").replace("ddownload", "ddl")
                    link = url_hoster[0]
                    if by in link:
                        demasked_link = get_redirected_url(
                            link, configfile, dbfile, False)
                        if demasked_link:
                            link = demasked_link
                    links[link_hoster] = link
            download_links = list(links.values())

    englisch = False
    if "*englisch" in key.lower() or "*english" in key.lower():
        key = key.replace('*ENGLISCH', '').replace("*Englisch", "").replace(
            "*ENGLISH", "").replace("*English", "").replace("*", "")
        englisch = True

    staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower())

    if download_links:
        if staffel:
            if download_method(configfile, dbfile, device, key, "FeedCrawler",
                               download_links, password):
                db.store(
                    key.replace(".COMPLETE", "").replace(".Complete", ""),
                    'notdl' if config.get('enforcedl')
                    and '.dl.' not in key.lower() else 'added')
                log_entry = '[Suche/Staffel] - ' + key.replace(
                    ".COMPLETE", "").replace(".Complete",
                                             "") + ' - [' + site + ']'
                logger.info(log_entry)
                notify([log_entry], configfile)
                return True
        else:
            retail = False
            if config.get('cutoff') and '.COMPLETE.' not in key.lower():
                if is_retail(key, dbfile):
                    retail = True
            if download_method(configfile, dbfile, device, key, "FeedCrawler",
                               download_links, password):
                db.store(
                    key, 'notdl' if config.get('enforcedl')
                    and '.dl.' not in key.lower() else 'added')
                log_entry = '[Suche/Film' + (
                    '/Englisch' if englisch and not retail else
                    '') + ('/Englisch/Retail' if englisch and retail else
                           '') + ('/Retail' if not englisch and retail else
                                  '') + '] - ' + key + ' - [' + site + ']'
                logger.info(log_entry)
                notify([log_entry], configfile)
                return [key]
    else:
        return False

Exemplo n.º 15

0

Exibir arquivo

Arquivo: url.py Projeto: Crabtronic/FeedCrawler

def post_url_headers(url, configfile, dbfile, headers, data, scraper=False):
    config = CrawlerConfig('FeedCrawler', configfile)
    proxy = config.get('proxy')
    if not scraper:
        scraper = cloudscraper.create_scraper()

    db = FeedDb(dbfile, 'proxystatus')
    db_normal = FeedDb(dbfile, 'normalstatus')
    site = check_is_site(url, configfile)

    if proxy:
        try:
            if site and "SJ" in site:
                if db.retrieve("SJ"):
                    if config.get("fallback") and not db_normal.retrieve("SJ"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "DJ" in site:
                if db.retrieve("DJ"):
                    if config.get("fallback") and not db_normal.retrieve("DJ"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "SF" in site:
                if db.retrieve("SF"):
                    if config.get("fallback") and not db_normal.retrieve("SF"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "BY" in site:
                if db.retrieve("BY"):
                    if config.get("fallback") and not db_normal.retrieve("BY"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "DW" in site:
                if db.retrieve("DW"):
                    if config.get("fallback") and not db_normal.retrieve("DW"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "FX" in site:
                if db.retrieve("FX"):
                    if config.get("fallback") and not db_normal.retrieve("FX"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "NK" in site:
                if db.retrieve("NK"):
                    if config.get("fallback") and not db_normal.retrieve("NK"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "WW" in site:
                if db.retrieve("WW"):
                    if config.get("fallback") and not db_normal.retrieve("WW"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "DD" in site:
                if db.retrieve("DD"):
                    if config.get("fallback") and not db_normal.retrieve("DD"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            proxies = {'http': proxy, 'https': proxy}
            response = scraper.post(url,
                                    data,
                                    headers=headers,
                                    proxies=proxies,
                                    timeout=30)
            return [response, scraper]
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return ["", scraper]
    else:
        try:
            if site and "SJ" in site and db_normal.retrieve("SJ"):
                return ["", scraper]
            elif site and "DJ" in site and db_normal.retrieve("DJ"):
                return ["", scraper]
            elif site and "SF" in site and db_normal.retrieve("SF"):
                return ["", scraper]
            elif site and "BY" in site and db_normal.retrieve("BY"):
                return ["", scraper]
            elif site and "DW" in site and db_normal.retrieve("DW"):
                return ["", scraper]
            elif site and "FX" in site and db_normal.retrieve("FX"):
                return ["", scraper]
            elif site and "NK" in site and db_normal.retrieve("NK"):
                return ["", scraper]
            elif site and "WW" in site and db_normal.retrieve("WW"):
                return ["", scraper]
            elif site and "DD" in site and db_normal.retrieve("DD"):
                return ["", scraper]
            response = scraper.post(url, data, headers=headers, timeout=30)
            return [response, scraper]
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return ["", scraper]

Exemplo n.º 16

0

Exibir arquivo

Arquivo: content_custom_dd.py Projeto: Crabtronic/FeedCrawler

class DD:
    _SITE = 'DD'

    def __init__(self, configfile, dbfile, device, logging, scraper):
        self.configfile = configfile
        self.dbfile = dbfile
        self.device = device
        self.config = CrawlerConfig("CustomDD", self.configfile)
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        self.scraper = scraper
        self.db = FeedDb(self.dbfile, 'FeedCrawler')

    def periodical_task(self):
        feeds = self.config.get("feeds")
        if feeds:
            added_items = []
            feeds = feeds.replace(" ", "").split(',')
            for feed in feeds:
                feed = feedparser.parse(
                    get_url(feed, self.configfile, self.dbfile, self.scraper))
                for post in feed.entries:
                    key = post.title.replace(" ", ".")

                    epoch = datetime(1970, 1, 1)
                    current_epoch = int(time())
                    published_format = "%Y-%m-%d %H:%M:%S+00:00"
                    published_timestamp = str(parser.parse(post.published))
                    published_epoch = int((datetime.strptime(
                        published_timestamp, published_format) -
                                           epoch).total_seconds())
                    if (current_epoch - 1800) > published_epoch:
                        link_pool = post.summary
                        unicode_links = re.findall(r'(http.*)', link_pool)
                        links = []
                        for link in unicode_links:
                            if check_hoster(link, self.configfile):
                                links.append(str(link))
                        if self.config.get("hoster_fallback") and not links:
                            for link in unicode_links:
                                links.append(str(link))
                        storage = self.db.retrieve_all(key)
                        if not links:
                            if 'added' not in storage and 'notdl' not in storage:
                                wrong_hoster = '[' + self._SITE + '/Hoster fehlt] - ' + key
                                if 'wrong_hoster' not in storage:
                                    print(wrong_hoster)
                                    self.db.store(key, 'wrong_hoster')
                                    notify([wrong_hoster], self.configfile)
                                else:
                                    self.log_debug(wrong_hoster)
                        elif 'added' in storage:
                            self.log_debug(
                                "%s - Release ignoriert (bereits gefunden)" %
                                key)
                        else:
                            self.device = myjd_download(
                                self.configfile, self.dbfile, self.device, key,
                                "FeedCrawler", links, "")
                            if self.device:
                                self.db.store(key, 'added')
                                log_entry = '[Englisch] - ' + key + ' - [' + self._SITE + ']'
                                self.log_info(log_entry)
                                notify([log_entry], self.configfile)
                                added_items.append(log_entry)
                    else:
                        self.log_debug(
                            "%s - Releasezeitpunkt weniger als 30 Minuten in der Vergangenheit - wird ignoriert."
                            % key)
        else:
            self.log_debug("Liste ist leer. Stoppe Suche für " + self._SITE +
                           "!")
        return self.device

Exemplo n.º 17

0

Exibir arquivo

Arquivo: search.py Projeto: Crabtronic/FeedCrawler

def get(title, configfile, dbfile, bl_only=False, sj_only=False):
    hostnames = CrawlerConfig('Hostnames', configfile)
    by = hostnames.get('by')
    dw = hostnames.get('dw')
    fx = hostnames.get('fx')
    nk = hostnames.get('nk')
    sj = hostnames.get('sj')

    specific_season = re.match(r'^(.*),(s\d{1,3})$', title.lower())
    specific_episode = re.match(r'^(.*),(s\d{1,3}e\d{1,3})$', title.lower())
    if specific_season:
        split = title.split(",")
        title = split[0]
        special = split[1].upper()
    elif specific_episode:
        split = title.split(",")
        title = split[0]
        special = split[1].upper()
    else:
        special = None

    bl_final = {}
    sj_final = {}
    scraper = cloudscraper.create_scraper()

    if not sj_only:
        mb_query = sanitize(title).replace(" ", "+")
        if special:
            bl_query = mb_query + "+" + special
        else:
            bl_query = mb_query

        unrated = []

        config = CrawlerConfig('ContentAll', configfile)
        quality = config.get('quality')
        ignore = config.get('ignore')

        if "480p" not in quality:
            search_quality = "+" + quality
        else:
            search_quality = ""

        if by:
            by_search = 'https://' + by + '/?q=' + bl_query + search_quality
        else:
            by_search = None
        if dw:
            dw_search = 'https://' + dw + '/?kategorie=Movies&search=' + bl_query + search_quality
        else:
            dw_search = None
        if fx:
            fx_search = 'https://' + fx + '/?s=' + bl_query
        else:
            fx_search = None

        async_results = get_urls_async([by_search, dw_search, fx_search],
                                       configfile, dbfile, scraper)
        scraper = async_results[1]
        async_results = async_results[0]

        by_results = []
        dw_results = []
        fx_results = []

        for res in async_results:
            if check_is_site(res, configfile) == 'BY':
                by_results = by_search_results(res, by)
            elif check_is_site(res, configfile) == 'DW':
                dw_results = dw_search_results(res, dw)
            elif check_is_site(res, configfile) == 'FX':
                fx_results = fx_search_results(fx_content_to_soup(res),
                                               configfile, dbfile, scraper)

        if nk:
            nk_search = post_url(
                'https://' + nk + "/search",
                configfile,
                dbfile,
                data={'search': bl_query.replace("+", " ") + " " + quality})
            nk_results = nk_search_results(nk_search, 'https://' + nk + '/')
        else:
            nk_results = []

        password = by
        for result in by_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            if "xxx" not in result[0].lower():
                unrated.append([
                    rate(result[0], ignore),
                    encode_base64(result[1] + "|" + password),
                    result[0] + " (BY)"
                ])

        password = dw
        for result in dw_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            unrated.append([
                rate(result[0], ignore),
                encode_base64(result[1] + "|" + password), result[0] + " (DW)"
            ])

        password = fx.split('.')[0]
        for result in fx_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            if "-low" not in result[0].lower():
                unrated.append([
                    rate(result[0], ignore),
                    encode_base64(result[1] + "|" + password),
                    result[0] + " (FX)"
                ])

        password = nk.split('.')[0].capitalize()
        for result in nk_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            unrated.append([
                rate(result[0], ignore),
                encode_base64(result[1] + "|" + password), result[0] + " (NK)"
            ])

        rated = sorted(unrated, reverse=True)

        results = {}
        i = 0

        for result in rated:
            res = {"payload": result[1], "title": result[2]}
            results["result" + str(i + 1000)] = res
            i += 1
        bl_final = results

    if not bl_only:
        if sj:
            sj_query = sanitize(title).replace(" ", "+")
            sj_search = get_url(
                'https://' + sj + '/serie/search?q=' + sj_query, configfile,
                dbfile, scraper)
            try:
                sj_results = BeautifulSoup(sj_search, 'lxml').findAll(
                    "a", href=re.compile("/serie"))
            except:
                sj_results = []
        else:
            sj_results = []

        if special:
            append = " (" + special + ")"
        else:
            append = ""
        i = 0
        results = {}
        for result in sj_results:
            r_title = result.text
            r_rating = fuzz.ratio(title.lower(), r_title)
            if r_rating > 40:
                res = {
                    "payload":
                    encode_base64(result['href'] + "|" + r_title + "|" +
                                  str(special)),
                    "title":
                    r_title + append
                }
                results["result" + str(i + 1000)] = res
                i += 1
        sj_final = results

    return bl_final, sj_final

Exemplo n.º 18

0

Exibir arquivo

Arquivo: crawler.py Projeto: Crabtronic/FeedCrawler

def crawldog(configfile, dbfile):
    disable_request_warnings(InsecureRequestWarning)
    crawljobs = CrawlerConfig('Crawljobs', configfile)
    autostart = crawljobs.get("autostart")
    db = FeedDb(dbfile, 'crawldog')

    grabber_was_collecting = False
    grabber_collecting = False
    device = False

    while True:
        try:
            if not device or not is_device(device):
                device = get_device(configfile)

            myjd_packages = get_info(configfile, device)
            if myjd_packages:
                grabber_collecting = myjd_packages[2]

                if grabber_was_collecting or grabber_collecting:
                    grabber_was_collecting = grabber_collecting
                    time.sleep(5)
                else:
                    packages_in_downloader_decrypted = myjd_packages[4][0]
                    packages_in_linkgrabber_decrypted = myjd_packages[4][1]
                    offline_packages = myjd_packages[4][2]
                    encrypted_packages = myjd_packages[4][3]

                    try:
                        watched_titles = db.retrieve_all_titles()
                    except:
                        watched_titles = False

                    notify_list = []

                    if packages_in_downloader_decrypted or packages_in_linkgrabber_decrypted or offline_packages or encrypted_packages:

                        if watched_titles:
                            for title in watched_titles:
                                if packages_in_downloader_decrypted:
                                    for package in packages_in_downloader_decrypted:
                                        if title[0] in package[
                                                'name'] or title[0].replace(
                                                    ".",
                                                    " ") in package['name']:
                                            check = hoster_check(
                                                configfile, device, [package],
                                                title[0], [0])
                                            device = check[0]
                                            if device:
                                                db.delete(title[0])

                                if packages_in_linkgrabber_decrypted:
                                    for package in packages_in_linkgrabber_decrypted:
                                        if title[0] in package[
                                                'name'] or title[0].replace(
                                                    ".",
                                                    " ") in package['name']:
                                            check = hoster_check(
                                                configfile, device, [package],
                                                title[0], [0])
                                            device = check[0]
                                            episode = FeedDb(
                                                dbfile,
                                                'episode_remover').retrieve(
                                                    title[0])
                                            if episode:
                                                filenames = package[
                                                    'filenames']
                                                if len(filenames) > 1:
                                                    fname_episodes = []
                                                    for fname in filenames:
                                                        try:
                                                            if re.match(
                                                                    r'.*S\d{1,3}E\d{1,3}.*',
                                                                    fname,
                                                                    flags=re.
                                                                    IGNORECASE
                                                            ):
                                                                fname = re.findall(
                                                                    r'S\d{1,3}E(\d{1,3})',
                                                                    fname,
                                                                    flags=re.
                                                                    IGNORECASE
                                                                ).pop()
                                                            else:
                                                                fname = fname.replace(
                                                                    "hddl8", ""
                                                                ).replace(
                                                                    "dd51", ""
                                                                ).replace(
                                                                    "264", ""
                                                                ).replace(
                                                                    "265", "")
                                                        except:
                                                            fname = fname.replace(
                                                                "hddl8",
                                                                "").replace(
                                                                    "dd51", ""
                                                                ).replace(
                                                                    "264", ""
                                                                ).replace(
                                                                    "265", "")
                                                        fname_episode = "".join(
                                                            re.findall(
                                                                r'\d+',
                                                                fname.split(
                                                                    ".part")
                                                                [0]))
                                                        try:
                                                            fname_episodes.append(
                                                                str(
                                                                    int(fname_episode
                                                                        )))
                                                        except:
                                                            pass
                                                    replacer = longest_substr(
                                                        fname_episodes)

                                                    new_fname_episodes = []
                                                    for new_ep_fname in fname_episodes:
                                                        try:
                                                            new_fname_episodes.append(
                                                                str(
                                                                    int(
                                                                        new_ep_fname
                                                                        .
                                                                        replace(
                                                                            replacer,
                                                                            ""
                                                                        ))))
                                                        except:
                                                            pass
                                                    replacer = longest_substr(
                                                        new_fname_episodes)

                                                    newer_fname_episodes = []
                                                    for new_ep_fname in new_fname_episodes:
                                                        try:
                                                            newer_fname_episodes.append(
                                                                str(
                                                                    int(
                                                                        re.sub(
                                                                            replacer,
                                                                            "",
                                                                            new_ep_fname,
                                                                            1))
                                                                ))
                                                        except:
                                                            pass

                                                    replacer = longest_substr(
                                                        newer_fname_episodes)

                                                    even_newer_fname_episodes = []
                                                    for newer_ep_fname in newer_fname_episodes:
                                                        try:
                                                            even_newer_fname_episodes.append(
                                                                str(
                                                                    int(
                                                                        re.sub(
                                                                            replacer,
                                                                            "",
                                                                            newer_ep_fname,
                                                                            1))
                                                                ))
                                                        except:
                                                            pass

                                                    if even_newer_fname_episodes:
                                                        fname_episodes = even_newer_fname_episodes
                                                    elif newer_fname_episodes:
                                                        fname_episodes = newer_fname_episodes
                                                    elif new_fname_episodes:
                                                        fname_episodes = new_fname_episodes

                                                    delete_linkids = []
                                                    pos = 0
                                                    for delete_id in package[
                                                            'linkids']:
                                                        if str(episode) != str(
                                                                fname_episodes[
                                                                    pos]):
                                                            delete_linkids.append(
                                                                delete_id)
                                                        pos += 1
                                                    if delete_linkids:
                                                        delete_uuids = [
                                                            package['uuid']
                                                        ]
                                                        FeedDb(
                                                            dbfile,
                                                            'episode_remover'
                                                        ).delete(title[0])
                                                        device = remove_from_linkgrabber(
                                                            configfile, device,
                                                            delete_linkids,
                                                            delete_uuids)
                                            if autostart:
                                                device = move_to_downloads(
                                                    configfile, device,
                                                    package['linkids'],
                                                    [package['uuid']])
                                            if device:
                                                db.delete(title[0])

                                if offline_packages:
                                    for package in offline_packages:
                                        if title[0] in package[
                                                'name'] or title[0].replace(
                                                    ".",
                                                    " ") in package['name']:
                                            notify_list.append("[Offline] - " +
                                                               title[0])
                                            print((u"[Offline] - " + title[0]))
                                            db.delete(title[0])

                                if encrypted_packages:
                                    for package in encrypted_packages:
                                        if title[0] in package[
                                                'name'] or title[0].replace(
                                                    ".",
                                                    " ") in package['name']:
                                            if title[1] == 'added':
                                                if retry_decrypt(
                                                        configfile, dbfile,
                                                        device,
                                                        package['linkids'],
                                                    [package['uuid']],
                                                        package['urls']):
                                                    db.delete(title[0])
                                                    db.store(
                                                        title[0], 'retried')
                                            else:
                                                add_decrypt(
                                                    package['name'],
                                                    package['url'], "", dbfile)
                                                device = remove_from_linkgrabber(
                                                    configfile, device,
                                                    package['linkids'],
                                                    [package['uuid']])
                                                notify_list.append(
                                                    "[Click'n'Load notwendig] - "
                                                    + title[0])
                                                print(
                                                    u"[Click'n'Load notwendig] - "
                                                    + title[0])
                                                db.delete(title[0])
                    else:
                        if not grabber_collecting:
                            db.reset()

                    if notify_list:
                        notify(notify_list, configfile)

                time.sleep(30)
            else:
                print(
                    u"Scheinbar ist der JDownloader nicht erreichbar - bitte prüfen und neustarten!"
                )
        except Exception:
            traceback.print_exc()
            time.sleep(30)

Exemplo n.º 19

0

Exibir arquivo

def get_search_results(self, bl_query):
    hostnames = CrawlerConfig('Hostnames', self.configfile)
    by = hostnames.get('by')
    dw = hostnames.get('dw')
    fx = hostnames.get('fx')
    nk = hostnames.get('nk')

    search_results = []

    config = CrawlerConfig('ContentAll', self.configfile)
    quality = config.get('quality')

    if "480p" not in quality:
        search_quality = "+" + quality
    else:
        search_quality = ""

    if by:
        by_search = 'https://' + by + '/?q=' + bl_query + search_quality
    else:
        by_search = None
    if dw:
        dw_search = 'https://' + dw + '/?kategorie=Movies&search=' + bl_query + search_quality
    else:
        dw_search = None
    if fx:
        fx_search = 'https://' + fx + '/?s=' + bl_query
    else:
        fx_search = None

    async_results = get_urls_async([by_search, dw_search, fx_search],
                                   self.configfile, self.dbfile, self.scraper)
    scraper = async_results[1]
    async_results = async_results[0]

    by_results = []
    dw_results = []
    fx_results = []

    for res in async_results:
        if check_is_site(res, self.configfile) == 'BY':
            by_results = by_search_results(res, by)
        elif check_is_site(res, self.configfile) == 'DW':
            dw_results = dw_search_results(res, dw)
        elif check_is_site(res, self.configfile) == 'FX':
            fx_results = fx_search_results(fx_content_to_soup(res),
                                           self.configfile, self.dbfile,
                                           scraper)

    if nk:
        nk_search = post_url(
            'https://' + nk + "/search",
            self.configfile,
            self.dbfile,
            data={'search': bl_query.replace("+", " ") + " " + quality})
        nk_results = nk_search_results(nk_search, 'https://' + nk + '/')
    else:
        nk_results = []

    password = by
    for result in by_results:
        if "480p" in quality:
            if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                0].lower() or "2160p" in \
                    result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                0].lower() or "complete.uhd.bluray" in result[0].lower():
                continue
        if "xxx" not in result[0].lower():
            search_results.append([result[0], result[1] + "|" + password])

    password = dw
    for result in dw_results:
        if "480p" in quality:
            if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                0].lower() or "2160p" in \
                    result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                0].lower() or "complete.uhd.bluray" in result[0].lower():
                continue
        search_results.append([result[0], result[1] + "|" + password])

    for result in fx_results:
        if "480p" in quality:
            if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                0].lower() or "2160p" in \
                    result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                0].lower() or "complete.uhd.bluray" in result[0].lower():
                continue
        if "-low" not in result[0].lower():
            search_results.append([result[0], result[1]])

        password = nk.split('.')[0].capitalize()
    for result in nk_results:
        if "480p" in quality:
            if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                0].lower() or "2160p" in \
                    result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                0].lower() or "complete.uhd.bluray" in result[0].lower():
                continue
        search_results.append([result[0], result[1] + "|" + password])

    return search_results

Exemplo n.º 20

0

Exibir arquivo

def dw_feed_enricher(self, content):
    base_url = "https://" + CrawlerConfig('Hostnames',
                                          self.configfile).get('dw')
    content = BeautifulSoup(content, 'lxml')
    posts = content.findAll("a", href=re.compile("download/"))
    href_by_id = {}
    async_results = []
    for post in posts:
        try:
            post_id = post['href'].replace("download/", "").split("/")[0]
            post_link = base_url + "/" + post['href']
            post_hosters = post.parent.findAll(
                "img", src=re.compile(r"images/icon_hoster"))
            hosters = []
            for hoster in post_hosters:
                hosters.append(hoster["title"].replace("Download bei ", ""))
            hosters = "|".join(hosters)
            href_by_id[post_id] = {"hosters": hosters, "link": post_link}
            async_results.append(post_link)
        except:
            pass
    async_results = get_urls_async(async_results, self.configfile, self.dbfile,
                                   self.scraper)
    results = async_results[0]

    entries = []
    if results:
        for result in results:
            try:
                content = []
                details = BeautifulSoup(result, 'lxml')
                title = details.title.text.split(' //')[0].replace(
                    "*mirror*", "").strip()
                post_id = details.find(
                    "a",
                    {"data-warezkorb": re.compile(r"\d*")})["data-warezkorb"]
                details = details.findAll("div", {"class": "row"})[3]
                published = details.findAll("td")[1].text.replace("Datum", "")
                try:
                    imdb = details.findAll("td")[6].find("a")
                    imdb_link = imdb["href"]
                    imdb_score = imdb.find("b").text.replace(" ", "").replace(
                        "/10", "")
                    if "0.0" in imdb_score:
                        imdb_score = "9.9"
                    content.append('<a href="' + imdb_link + '"' + imdb_score +
                                   '</a>')
                except:
                    pass

                content.append('DOWNLOADLINK="' + href_by_id[post_id]["link"] +
                               '"')
                content.append('HOSTERS="' + href_by_id[post_id]["hosters"] +
                               '"')

                content = "".join(content)

                entries.append(
                    FakeFeedParserDict({
                        "title":
                        title,
                        "published":
                        published,
                        "content":
                        [FakeFeedParserDict({"value": content + " mkv"})]
                    }))
            except:
                pass

    feed = {"entries": entries}
    feed = FakeFeedParserDict(feed)
    return feed

Exemplo n.º 21

0

Exibir arquivo

def check_packages_types(links, packages, configfile, device):
    decrypted = []
    failed = []
    offline = []
    for package in packages:
        name = package.get('name')
        total_links = package.get('childCount')
        enabled = package.get('enabled')
        size = package.get('bytesTotal')
        done = package.get('bytesLoaded')
        if done and size:
            completed = 100 * done // size
        else:
            completed = 0
        size = readable_size(size)
        done = readable_size(done)
        if not done:
            done = "0"
        speed = package.get('speed')
        if speed:
            speed = readable_size(speed) + "/s"
        hosts = package.get('hosts')
        save_to = package.get('saveTo')
        eta = package.get('eta')
        if eta:
            eta = readable_time(eta)
        uuid = package.get('uuid')
        url = False
        urls = []
        filenames = []
        linkids = []
        package_failed = False
        package_offline = False
        package_online = False
        if links:
            delete_linkids = []
            for link in links:
                if uuid == link.get('packageUUID'):
                    linkid = link.get('uuid')
                    linkids.append(linkid)
                    if link.get('availability') == 'OFFLINE' or link.get(
                            'status') == 'Datei nicht gefunden' or link.get('status') == 'File not found':
                        delete_linkids.append(linkid)
                        package_offline = True
                    elif 'Falscher Captcha Code!' in link.get('name') or 'Wrong Captcha!' in link.get('name') or (
                            link.get('comment') and 'BLOCK_HOSTER' in link.get('comment')):
                        delete_linkids.append(linkid)
                        package_failed = True
                    else:
                        package_online = True
                    url = link.get('url')
                    if url:
                        url = str(url)
                        if url not in urls:
                            urls.append(url)
                        filename = str(link.get('name'))
                        if filename not in filenames:
                            filenames.append(filename)
            if CrawlerConfig("FeedCrawler", configfile).get('one_mirror_policy'):
                if delete_linkids:
                    if package_online:
                        device = remove_from_linkgrabber(configfile, device, delete_linkids, [])
        for h in hosts:
            if h == 'linkcrawlerretry':
                package_failed = True
        status = package.get('status')
        eta_ext = False
        if status:
            if 'fehler' in status.lower() or 'error' in status.lower():
                package_failed = True
            elif "entpacken" in status.lower() or "extracting" in status.lower():
                eta_ext = re.findall(r'eta: (.*?)\)', status, re.IGNORECASE)
                if eta_ext:
                    eta_ext = eta_ext[0].replace("d", "").replace("m", "").replace("s", "")
                if len(eta_ext) == 5:
                    eta_ext = "00:" + eta_ext
                elif len(eta_ext) == 2:
                    eta_ext = "00:00:" + eta_ext
        if package_failed:
            package_offline = False
        if package_failed and not package_offline and len(urls) == 1:
            url = urls[0]
        elif urls:
            urls = "\n".join(urls)
        if package_failed and not package_offline:
            failed.append({"name": name,
                           "path": save_to,
                           "urls": urls,
                           "url": url,
                           "linkids": linkids,
                           "uuid": uuid})
        elif package_offline:
            offline.append({"name": name,
                            "path": save_to,
                            "urls": urls,
                            "linkids": linkids,
                            "uuid": uuid})
        else:
            decrypted.append({"name": name,
                              "links": total_links,
                              "enabled": enabled,
                              "hosts": hosts,
                              "path": save_to,
                              "size": size,
                              "done": done,
                              "percentage": completed,
                              "speed": speed,
                              "eta": eta,
                              "eta_ext": eta_ext,
                              "urls": urls,
                              "filenames": filenames,
                              "linkids": linkids,
                              "uuid": uuid})
    if not failed:
        failed = False
    if not offline:
        offline = False
    if not decrypted:
        decrypted = False
    return [failed, offline, decrypted, device]

Exemplo n.º 22

0

Exibir arquivo

Arquivo: crawler.py Projeto: Crabtronic/FeedCrawler

def main():
    arguments = docopt(__doc__, version='FeedCrawler')

    print(u"┌──────────────────────────────────────────────┐")
    print(u"  FeedCrawler " + version + " von RiX")
    print(u"  https://github.com/rix1337/FeedCrawler")
    print(u"└──────────────────────────────────────────────┘")

    if arguments['--docker']:
        configpath = "/config"
    else:
        configpath = files.config(arguments['--config'])
    configfile = os.path.join(configpath, "FeedCrawler.ini")
    dbfile = os.path.join(configpath, "FeedCrawler.db")

    # ToDo Remove this migration from RSScrawler to Feedcrawler in next major version
    if os.path.exists("RSScrawler.conf"):
        os.remove("RSScrawler.conf")

    # ToDo Remove this migration from RSScrawler to Feedcrawler in next major version
    if os.path.exists(os.path.join(configpath, "RSScrawler.log")):
        os.rename(os.path.join(configpath, "RSScrawler.log"),
                  os.path.join(configpath, "FeedCrawler.log"))
        print(u"Migration des RSScrawler-Logs erfolgreich!")

    # ToDo Remove this migration from RSScrawler to Feedcrawler in next major version
    if os.path.exists(os.path.join(configpath, "RSScrawler.ini")):
        with open(os.path.join(configpath, "RSScrawler.ini"), 'r') as file:
            filedata = file.read()

        filedata = filedata.replace("[RSScrawler]", "[FeedCrawler]")
        filedata = filedata.replace("[MB]", "[ContentAll]")
        filedata = filedata.replace("[SJ]", "[ContentShows]")
        filedata = filedata.replace("[DJ]", "[CustomDJ]")
        filedata = filedata.replace("[DD]", "[CustomDD]")

        with open(os.path.join(configpath, "FeedCrawler.ini"), 'w') as file:
            file.write(filedata)

        os.remove(os.path.join(configpath, "RSScrawler.ini"))
        print(u"Migration der RSScrawler-Einstellungen erfolgreich!")

    # ToDo Remove this migration from RSScrawler to Feedcrawler in next major version
    if os.path.exists(os.path.join(configpath, "RSScrawler.db")):
        os.rename(os.path.join(configpath, "RSScrawler.db"),
                  os.path.join(configpath, "FeedCrawler.db"))
        FeedDb(dbfile, 'rsscrawler').rename_table('FeedCrawler')
        FeedDb(dbfile, 'MB_Filme').rename_table('List_ContentAll_Movies')
        FeedDb(dbfile, 'MB_Regex').rename_table('List_ContentAll_Movies_Regex')
        FeedDb(dbfile, 'MB_Staffeln').rename_table('List_ContentAll_Seasons')
        FeedDb(dbfile, 'SJ_Serien').rename_table('List_ContentShows_Shows')
        FeedDb(dbfile,
               'SJ_Serien_Regex').rename_table('List_ContentShows_Shows_Regex')
        FeedDb(dbfile, 'SJ_Staffeln_Regex').rename_table(
            'List_ContentShows_Seasons_Regex')
        FeedDb(dbfile, 'DJ_Dokus').rename_table('List_CustomDJ_Documentaries')
        FeedDb(
            dbfile,
            'DJ_Dokus_Regex').rename_table('List_CustomDJ_Documentaries_Regex')
        print(u"Migration der RSScrawler-Datenbank erfolgreich!")

    print(u"Nutze das Verzeichnis " + configpath + u" für Einstellungen/Logs")

    log_level = logging.__dict__[arguments['--log-level']] if arguments[
        '--log-level'] in logging.__dict__ else logging.INFO
    log_file = os.path.join(configpath, 'FeedCrawler.log')
    log_format = '%(asctime)s - %(message)s'

    hostnames = CrawlerConfig('Hostnames', configfile)

    def clean_up_hostname(host, string):
        if '/' in string:
            string = string.replace('https://', '').replace('http://', '')
            string = re.findall(r'([a-z-.]*\.[a-z]*)', string)[0]
            hostnames.save(host, string)
        if re.match(r'.*[A-Z].*', string):
            hostnames.save(host, string.lower())
        if string:
            print(u'Hostname für ' + host.upper() + ": " + string)
        else:
            print(u'Hostname für ' + host.upper() + ': Nicht gesetzt!')
        return string

    set_hostnames = {}
    list_names = ['dw', 'fx', 'sj', 'dj', 'sf', 'ww', 'nk', 'by', 'dd']
    for name in list_names:
        hostname = clean_up_hostname(name, hostnames.get(name))
        if hostname:
            set_hostnames[name] = hostname

    if not arguments['--testlauf'] and not set_hostnames:
        print(
            u'Keine Hostnamen in der FeedCrawler.ini gefunden! Beende FeedCrawler!'
        )
        time.sleep(10)
        sys.exit(1)

    disable_request_warnings(InsecureRequestWarning)

    if arguments['--testlauf']:
        device = False
    else:
        if not os.path.exists(configfile):
            if arguments['--docker']:
                if arguments['--jd-user'] and arguments['--jd-pass']:
                    device = files.myjd_input(configfile, arguments['--port'],
                                              arguments['--jd-user'],
                                              arguments['--jd-pass'],
                                              arguments['--jd-device'])
                else:
                    device = False
            else:
                device = files.myjd_input(configfile, arguments['--port'],
                                          arguments['--jd-user'],
                                          arguments['--jd-pass'],
                                          arguments['--jd-device'])
        else:
            feedcrawler = CrawlerConfig('FeedCrawler', configfile)
            user = feedcrawler.get('myjd_user')
            password = feedcrawler.get('myjd_pass')
            if user and password:
                device = get_device(configfile)
                if not device:
                    device = get_if_one_device(user, password)
                    if device:
                        print(u"Gerätename " + device +
                              " automatisch ermittelt.")
                        feedcrawler.save('myjd_device', device)
                        device = get_device(configfile)
            else:
                device = files.myjd_input(configfile, arguments['--port'],
                                          arguments['--jd-user'],
                                          arguments['--jd-pass'],
                                          arguments['--jd-device'])

        if not device and not arguments['--testlauf']:
            print(
                u'My JDownloader Zugangsdaten fehlerhaft! Beende FeedCrawler!')
            time.sleep(10)
            sys.exit(1)
        else:
            print(u"Erfolgreich mit My JDownloader verbunden. Gerätename: " +
                  device.name)

    feedcrawler = CrawlerConfig('FeedCrawler', configfile)

    port = int(feedcrawler.get("port"))
    docker = False
    if arguments['--docker']:
        port = int('9090')
        docker = True
    elif arguments['--port']:
        port = int(arguments['--port'])

    if feedcrawler.get("prefix"):
        prefix = '/' + feedcrawler.get("prefix")
    else:
        prefix = ''
    local_address = 'http://' + common.check_ip() + ':' + str(port) + prefix
    if not arguments['--docker']:
        print(u'Der Webserver ist erreichbar unter ' + local_address)

    if arguments['--keep-cdc']:
        print(u"CDC-Tabelle nicht geleert!")
    else:
        FeedDb(dbfile, 'cdc').reset()

    p = multiprocessing.Process(target=web_server,
                                args=(port, local_address, docker, configfile,
                                      dbfile, log_level, log_file, log_format,
                                      device))
    p.start()

    if not arguments['--testlauf']:
        c = multiprocessing.Process(target=crawler,
                                    args=(configfile, dbfile, device,
                                          feedcrawler, log_level, log_file,
                                          log_format))
        c.start()

        w = multiprocessing.Process(target=crawldog, args=(configfile, dbfile))
        w.start()

        print(u'Drücke [Strg] + [C] zum Beenden')

        def signal_handler():
            print(u'Beende FeedCrawler...')
            p.terminate()
            c.terminate()
            w.terminate()
            sys.exit(0)

        signal.signal(signal.SIGINT, signal_handler)

        try:
            while True:
                signal.pause()
        except AttributeError:
            while True:
                time.sleep(1)
    else:
        crawler(configfile, dbfile, device, feedcrawler, log_level, log_file,
                log_format)
        p.terminate()
        sys.exit(0)

Exemplo n.º 23

0

Exibir arquivo

Arquivo: url.py Projeto: Crabtronic/FeedCrawler

def check_url(configfile, dbfile, scraper=False):
    hostnames = CrawlerConfig('Hostnames', configfile)
    sj = hostnames.get('sj')
    dj = hostnames.get('dj')
    sf = hostnames.get('sf')
    by = hostnames.get('by')
    dw = hostnames.get('dw')
    fx = hostnames.get('fx')
    nk = hostnames.get('nk')
    ww = hostnames.get('ww')
    dd = hostnames.get('dd')

    if not scraper:
        scraper = cloudscraper.create_scraper()

    sj_url = 'https://' + sj
    dj_url = 'https://' + dj
    sf_url = 'https://' + sf
    by_url = 'https://' + by
    dw_url = 'https://' + dw
    fx_url = 'https://' + fx
    nk_url = 'https://' + nk
    ww_url = 'https://' + ww
    dd_url = 'https://' + dd

    sj_blocked_proxy = False
    dj_blocked_proxy = False
    sf_blocked_proxy = False
    by_blocked_proxy = False
    dw_blocked_proxy = False
    fx_blocked_proxy = False
    nk_blocked_proxy = False
    ww_blocked_proxy = False
    dd_blocked_proxy = False
    sj_blocked = False
    dj_blocked = False
    sf_blocked = False
    by_blocked = False
    dw_blocked = False
    fx_blocked = False
    nk_blocked = False
    ww_blocked = False
    dd_blocked = False

    db = FeedDb(dbfile, 'proxystatus')
    db.delete("SJ")
    db.delete("DJ")
    db.delete("SF")
    db.delete("BY")
    db.delete("DW")
    db.delete("FX")
    db.delete("NK")
    db.delete("WW")
    db.delete("DD")
    db_normal = FeedDb(dbfile, 'normalstatus')
    db_normal.delete("SJ")
    db_normal.delete("DJ")
    db_normal.delete("SF")
    db_normal.delete("BY")
    db_normal.delete("DW")
    db_normal.delete("FX")
    db_normal.delete("NK")
    db_normal.delete("WW")
    db_normal.delete("DD")

    proxy = CrawlerConfig('FeedCrawler', configfile).get('proxy')
    fallback = CrawlerConfig('FeedCrawler', configfile).get('fallback')

    if proxy:
        proxies = {'http': proxy, 'https': proxy}

        if not sj:
            db.store("SJ", "Blocked")
        else:
            try:
                if "block." in str(
                        scraper.get(
                            sj_url,
                            proxies=proxies,
                            timeout=30,
                            allow_redirects=False).headers.get("location")):
                    sj_blocked_proxy = True
                else:
                    db.delete("SJ")
            except:
                sj_blocked_proxy = True
            if sj_blocked_proxy:
                print(
                    u"Der Zugriff auf SJ ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("SJ", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not dj:
            db.store("DJ", "Blocked")
        else:
            try:
                if "block." in str(
                        scraper.get(
                            dj_url,
                            proxies=proxies,
                            timeout=30,
                            allow_redirects=False).headers.get("location")):
                    dj_blocked_proxy = True
                else:
                    db.delete("DJ")
            except:
                dj_blocked_proxy = True
            if dj_blocked_proxy:
                print(
                    u"Der Zugriff auf DJ ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("DJ", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not sf:
            db.store("SF", "Blocked")
        else:
            try:
                delta = datetime.datetime.now().strftime("%Y-%m-%d")
                sf_test = scraper.get(sf_url + '/updates/' + delta,
                                      proxies=proxies,
                                      timeout=30,
                                      allow_redirects=False)
                if not sf_test.text or sf_test.status_code is not (
                        200 or 304) or '<h3><a href="/' not in sf_test.text:
                    sf_blocked_proxy = True
                else:
                    db.delete("SF")
            except:
                sf_blocked_proxy = True
            if sf_blocked_proxy:
                print(
                    u"Der Zugriff auf SF ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("SF", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not by:
            db.store("BY", "Blocked")
        else:
            try:
                if scraper.get(by_url,
                               proxies=proxies,
                               timeout=30,
                               allow_redirects=False).status_code == 403:
                    by_blocked_proxy = True
                else:
                    db.delete("BY")
            except:
                by_blocked_proxy = True
            if by_blocked_proxy:
                print(
                    u"Der Zugriff auf BY ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("BY", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not dw:
            db.store("DW", "Blocked")
        else:
            try:
                dw_test = scraper.get(dw_url +
                                      "/downloads/hauptkategorie/movies/",
                                      proxies=proxies,
                                      timeout=30,
                                      allow_redirects=False)
                if not dw_test.text or dw_test.status_code is not (
                        200 or 304
                ) or '<a id="first_element" href=' not in dw_test.text:
                    dw_blocked_proxy = True
                else:
                    db.delete("DW")
            except:
                dw_blocked_proxy = True
            if dw_blocked_proxy:
                print(
                    u"Der Zugriff auf DW ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("DW", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not fx:
            db.store("FX", "Blocked")
        else:
            try:
                if scraper.get(fx_url,
                               proxies=proxies,
                               timeout=30,
                               allow_redirects=False).status_code == 403:
                    fx_blocked_proxy = True
                else:
                    db.delete("FX")
            except:
                fx_blocked_proxy = True
            if fx_blocked_proxy:
                print(
                    u"Der Zugriff auf FX ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("FX", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not nk:
            db.store("NK", "Blocked")
        else:
            try:
                if scraper.get(nk_url,
                               proxies=proxies,
                               timeout=30,
                               allow_redirects=False).status_code == 403:
                    nk_blocked_proxy = True
                else:
                    db.delete("NK")
            except:
                nk_blocked_proxy = True
            if nk_blocked_proxy:
                print(
                    u"Der Zugriff auf NK ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("NK", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not ww:
            db.store("WW", "Blocked")
        else:
            try:
                ww_test = scraper.post(ww_url + "/ajax",
                                       data="p=1&t=l&q=1",
                                       proxies=proxies,
                                       timeout=30,
                                       allow_redirects=False)
                if not ww_test.text or ww_test.status_code is not (
                        200 or
                        304) or '<span class="main-rls">' not in ww_test.text:
                    ww_blocked_proxy = True
                else:
                    db.delete("WW")
            except:
                ww_blocked_proxy = True
            if ww_blocked_proxy:
                print(
                    u"Der Zugriff auf WW ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("WW", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not dd:
            db.store("DD", "Blocked")
        else:
            try:
                if scraper.get(dd_url,
                               proxies=proxies,
                               timeout=30,
                               allow_redirects=False).status_code == 403:
                    dd_blocked_proxy = True
                else:
                    db.delete("DD")
            except:
                dd_blocked_proxy = True
            if dd_blocked_proxy:
                print(
                    u"Der Zugriff auf DD ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("DD", "Blocked")
                scraper = cloudscraper.create_scraper()

    if not proxy or (proxy and sj_blocked_proxy and fallback):
        if not sj:
            db.store("SJ", "Blocked")
        else:
            try:
                if "block." in str(
                        scraper.get(
                            sj_url, timeout=30,
                            allow_redirects=False).headers.get("location")):
                    sj_blocked = True
            except:
                sj_blocked = True
            if sj_blocked:
                db_normal.store("SJ", "Blocked")
                print(
                    u"Der Zugriff auf SJ ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and dj_blocked_proxy and fallback):
        if not dj:
            db.store("DJ", "Blocked")
        else:
            try:
                if "block." in str(
                        scraper.get(
                            dj_url, timeout=30,
                            allow_redirects=False).headers.get("location")):
                    dj_blocked = True
            except:
                dj_blocked = True
            if dj_blocked:
                db_normal.store("DJ", "Blocked")
                print(
                    u"Der Zugriff auf DJ ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and sf_blocked_proxy and fallback):
        if not sf:
            db.store("SF", "Blocked")
        else:
            try:
                delta = datetime.datetime.now().strftime("%Y-%m-%d")
                sf_test = scraper.get(sf_url + '/updates/' + delta,
                                      timeout=30,
                                      allow_redirects=False)
                if not sf_test.text or sf_test.status_code is not (
                        200 or 304) or '<h3><a href="/' not in sf_test.text:
                    sf_blocked = True
            except:
                sf_blocked = True
            if sf_blocked:
                db_normal.store("SF", "Blocked")
                print(
                    u"Der Zugriff auf SF ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and by_blocked_proxy and fallback):
        if not by:
            db.store("BY", "Blocked")
        else:
            try:
                if scraper.get(by_url, timeout=30,
                               allow_redirects=False).status_code == 403:
                    by_blocked = True
            except:
                by_blocked = True
            if by_blocked:
                db_normal.store("BY", "Blocked")
                print(
                    u"Der Zugriff auf BY ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and dw_blocked_proxy and fallback):
        if not dw:
            db.store("DW", "Blocked")
        else:
            try:
                dw_test = scraper.get(dw_url +
                                      "/downloads/hauptkategorie/movies/",
                                      timeout=30,
                                      allow_redirects=False)
                if not dw_test.text or dw_test.status_code is not (
                        200 or 304
                ) or '<a id="first_element" href=' not in dw_test.text:
                    dw_blocked = True
            except:
                dw_blocked = True
            if dw_blocked:
                db_normal.store("DW", "Blocked")
                print(
                    u"Der Zugriff auf DW ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and fx_blocked_proxy and fallback):
        if not fx:
            db.store("FX", "Blocked")
        else:
            try:
                if scraper.get(fx_url, timeout=30,
                               allow_redirects=False).status_code == 403:
                    fx_blocked = True
            except:
                fx_blocked = True
            if fx_blocked:
                db_normal.store("FX", "Blocked")
                print(
                    u"Der Zugriff auf FX ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and nk_blocked_proxy and fallback):
        if not nk:
            db.store("NK", "Blocked")
        else:
            try:
                if scraper.get(nk_url, timeout=30,
                               allow_redirects=False).status_code == 403:
                    nk_blocked = True
            except:
                nk_blocked = True
            if nk_blocked:
                db_normal.store("NK", "Blocked")
                print(
                    u"Der Zugriff auf NK ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and ww_blocked_proxy and fallback):
        if not ww:
            db.store("WW", "Blocked")
        else:
            try:
                ww_test = scraper.post(ww_url + "/ajax",
                                       data="p=1&t=l&q=1",
                                       timeout=30,
                                       allow_redirects=False)
                if not ww_test.text or ww_test.status_code is not (
                        200 or
                        304) or '<span class="main-rls">' not in ww_test.text:
                    ww_blocked = True
            except:
                ww_blocked = True
            if ww_blocked:
                db_normal.store("WW", "Blocked")
                print(
                    u"Der Zugriff auf WW ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and dd_blocked_proxy and fallback):
        if not dd:
            db.store("DD", "Blocked")
        else:
            try:
                if scraper.get(dd_url, timeout=30,
                               allow_redirects=False).status_code == 403:
                    dd_blocked = True
            except:
                dd_blocked = True
            if dd_blocked:
                db_normal.store("DD", "Blocked")
                print(
                    u"Der Zugriff auf DD ist mit der aktuellen IP nicht möglich!"
                )

    return scraper

Exemplo n.º 24

0

Exibir arquivo

Arquivo: content_all_fx.py Projeto: Crabtronic/FeedCrawler

class BL:
    _SITE = 'FX'
    SUBSTITUTE = r"[&#\s/]"

    def __init__(self, configfile, dbfile, device, logging, scraper, filename):
        self.configfile = configfile
        self.dbfile = dbfile
        self.device = device

        self.hostnames = CrawlerConfig('Hostnames', self.configfile)
        self.url = self.hostnames.get('fx')
        self.password = self.url.split('.')[0]

        self.URL = 'https://' + self.url
        self.FEED_URLS = [self.URL]

        self.config = CrawlerConfig("ContentAll", self.configfile)
        self.feedcrawler = CrawlerConfig("FeedCrawler", self.configfile)
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        self.scraper = scraper
        self.filename = filename
        self.pattern = False
        self.db = FeedDb(self.dbfile, 'FeedCrawler')
        self.hevc_retail = self.config.get("hevc_retail")
        self.retail_only = self.config.get("retail_only")
        self.hosters = CrawlerConfig("Hosters", configfile).get_section()
        self.hoster_fallback = self.config.get("hoster_fallback")
        self.prefer_dw_mirror = self.feedcrawler.get("prefer_dw_mirror")

        search = int(
            CrawlerConfig("ContentAll", self.configfile).get("search"))
        i = 2
        while i <= search:
            page_url = self.URL + "/page/" + str(i)
            if page_url not in self.FEED_URLS:
                self.FEED_URLS.append(page_url)
            i += 1
        self.cdc = FeedDb(self.dbfile, 'cdc')

        self.last_set_all = self.cdc.retrieve("ALLSet-" + self.filename)
        self.headers = {
            'If-Modified-Since':
            str(self.cdc.retrieve(self._SITE + "Headers-" + self.filename))
        }

        self.last_sha = self.cdc.retrieve(self._SITE + "-" + self.filename)
        settings = [
            "quality", "search", "ignore", "regex", "cutoff", "enforcedl",
            "crawlseasons", "seasonsquality", "seasonpacks", "seasonssource",
            "imdbyear", "imdb", "hevc_retail", "retail_only", "hoster_fallback"
        ]
        self.settings = []
        self.settings.append(self.feedcrawler.get("english"))
        self.settings.append(self.feedcrawler.get("surround"))
        self.settings.append(self.feedcrawler.get("prefer_dw_mirror"))
        self.settings.append(self.hosters)
        for s in settings:
            self.settings.append(self.config.get(s))
        self.search_imdb_done = False
        self.search_regular_done = False
        self.dl_unsatisfied = False

        self.get_feed_method = fx_feed_enricher
        self.get_url_method = get_url
        self.get_url_headers_method = get_url_headers
        self.get_download_links_method = fx_get_download_links
        self.download_method = myjd_download

        try:
            self.imdb = float(self.config.get('imdb'))
        except:
            self.imdb = 0.0

    def periodical_task(self):
        self.device = shared_blogs.periodical_task(self)
        return self.device

Exemplo n.º 25

0

Exibir arquivo

Arquivo: url.py Projeto: Crabtronic/FeedCrawler

def get_redirected_url(url, configfile, dbfile, scraper=False):
    config = CrawlerConfig('FeedCrawler', configfile)
    proxy = config.get('proxy')
    if not scraper:
        scraper = cloudscraper.create_scraper()

    db = FeedDb(dbfile, 'proxystatus')
    db_normal = FeedDb(dbfile, 'normalstatus')
    site = check_is_site(url, configfile)

    if proxy:
        try:
            if site and "SJ" in site:
                if db.retrieve("SJ"):
                    if config.get("fallback") and not db_normal.retrieve("SJ"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            elif site and "DJ" in site:
                if db.retrieve("DJ"):
                    if config.get("fallback") and not db_normal.retrieve("DJ"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            elif site and "SF" in site:
                if db.retrieve("SF"):
                    if config.get("fallback") and not db_normal.retrieve("SF"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            elif site and "BY" in site:
                if db.retrieve("BY"):
                    if config.get("fallback") and not db_normal.retrieve("BY"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            elif site and "DW" in site:
                if db.retrieve("DW"):
                    if config.get("fallback") and not db_normal.retrieve("DW"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            elif site and "FX" in site:
                if db.retrieve("FX"):
                    if config.get("fallback") and not db_normal.retrieve("FX"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            elif site and "NK" in site:
                if db.retrieve("NK"):
                    if config.get("fallback") and not db_normal.retrieve("NK"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            elif site and "WW" in site:
                return url
            elif site and "DD" in site:
                if db.retrieve("DD"):
                    if config.get("fallback") and not db_normal.retrieve("DD"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            proxies = {'http': proxy, 'https': proxy}
            response = scraper.get(url,
                                   allow_redirects=False,
                                   proxies=proxies,
                                   timeout=30).headers._store["location"][1]
            return response
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return url
    else:
        try:
            if site and "SJ" in site and db_normal.retrieve("SJ"):
                return url
            elif site and "DJ" in site and db_normal.retrieve("DJ"):
                return url
            elif site and "SF" in site and db_normal.retrieve("SF"):
                return url
            elif site and "BY" in site and db_normal.retrieve("BY"):
                return url
            elif site and "DW" in site and db_normal.retrieve("DW"):
                return url
            elif site and "FX" in site and db_normal.retrieve("FX"):
                return url
            elif site and "NK" in site and db_normal.retrieve("NK"):
                return url
            elif site and "WW" in site:
                return url
            elif site and "DD" in site and db_normal.retrieve("DD"):
                return url
            response = scraper.get(url, allow_redirects=False,
                                   timeout=30).headers._store["location"][1]
            return response
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return url

Exemplo n.º 26

0

Exibir arquivo

    def __init__(self, configfile, dbfile, device, logging, scraper, filename):
        self.configfile = configfile
        self.dbfile = dbfile
        self.device = device

        self.hostnames = CrawlerConfig('Hostnames', self.configfile)
        self.url = self.hostnames.get('dw')
        self.password = self.url.split('.')[0]

        if "List_ContentAll_Seasons" not in filename:
            self.URL = 'https://' + self.url + "/downloads/hauptkategorie/movies/"
        else:
            self.URL = 'https://' + self.url + "/downloads/hauptkategorie/serien/"
        self.FEED_URLS = [self.URL]

        self.config = CrawlerConfig("ContentAll", self.configfile)
        self.feedcrawler = CrawlerConfig("FeedCrawler", self.configfile)
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        self.scraper = scraper
        self.filename = filename
        self.pattern = False
        self.db = FeedDb(self.dbfile, 'FeedCrawler')
        self.hevc_retail = self.config.get("hevc_retail")
        self.retail_only = self.config.get("retail_only")
        self.hosters = CrawlerConfig("Hosters", configfile).get_section()
        self.hoster_fallback = self.config.get("hoster_fallback")
        self.prefer_dw_mirror = self.feedcrawler.get("prefer_dw_mirror")

        search = int(
            CrawlerConfig("ContentAll", self.configfile).get("search"))
        i = 2
        while i <= search:
            page_url = self.URL + "order/zeit/sort/D/seite/" + str(i) + "/"
            if page_url not in self.FEED_URLS:
                self.FEED_URLS.append(page_url)
            i += 1
        self.cdc = FeedDb(self.dbfile, 'cdc')

        self.last_set_all = self.cdc.retrieve("ALLSet-" + self.filename)
        self.headers = {
            'If-Modified-Since':
            str(self.cdc.retrieve(self._SITE + "Headers-" + self.filename))
        }

        self.last_sha = self.cdc.retrieve(self._SITE + "-" + self.filename)
        settings = [
            "quality", "search", "ignore", "regex", "cutoff", "enforcedl",
            "crawlseasons", "seasonsquality", "seasonpacks", "seasonssource",
            "imdbyear", "imdb", "hevc_retail", "retail_only", "hoster_fallback"
        ]
        self.settings = []
        self.settings.append(self.feedcrawler.get("english"))
        self.settings.append(self.feedcrawler.get("surround"))
        self.settings.append(self.feedcrawler.get("prefer_dw_mirror"))
        self.settings.append(self.hosters)
        for s in settings:
            self.settings.append(self.config.get(s))
        self.search_imdb_done = False
        self.search_regular_done = False
        self.dl_unsatisfied = False

        self.get_feed_method = dw_feed_enricher
        self.get_url_method = get_url
        self.get_url_headers_method = get_url_headers
        self.get_download_links_method = dw_get_download_links
        self.download_method = add_decrypt_instead_of_download

        try:
            self.imdb = float(self.config.get('imdb'))
        except:
            self.imdb = 0.0

Exemplo n.º 27

0

Exibir arquivo

def download(configfile, dbfile, device, title, subdir, old_links, password, full_path=None, autostart=False):
    try:
        if not device or not is_device(device):
            device = get_device(configfile)

        if isinstance(old_links, list):
            links = []
            for link in old_links:
                if link not in links:
                    links.append(link)
        else:
            links = [old_links]

        links = str(links).replace(" ", "")
        crawljobs = CrawlerConfig('Crawljobs', configfile)
        usesubdir = crawljobs.get("subdir")
        priority = "DEFAULT"

        if full_path:
            path = full_path
        else:
            if usesubdir:
                path = subdir + "/<jd:packagename>"
            else:
                path = "<jd:packagename>"
        if "Remux" in path:
            priority = "LOWER"

        try:
            device.linkgrabber.add_links(params=[
                {
                    "autostart": autostart,
                    "links": links,
                    "packageName": title,
                    "extractPassword": password,
                    "priority": priority,
                    "downloadPassword": password,
                    "destinationFolder": path,
                    "comment": "FeedCrawler by rix1337",
                    "overwritePackagizerRules": False
                }])
        except feedcrawler.myjdapi.TokenExpiredException:
            device = get_device(configfile)
            if not device or not is_device(device):
                return False
            device.linkgrabber.add_links(params=[
                {
                    "autostart": autostart,
                    "links": links,
                    "packageName": title,
                    "extractPassword": password,
                    "priority": priority,
                    "downloadPassword": password,
                    "destinationFolder": path,
                    "comment": "FeedCrawler by rix1337",
                    "overwritePackagizerRules": False
                }])
        db = FeedDb(dbfile, 'crawldog')
        if db.retrieve(title):
            db.delete(title)
            db.store(title, 'retried')
        else:
            db.store(title, 'added')
        return device
    except feedcrawler.myjdapi.MYJDException as e:
        print(u"Fehler bei der Verbindung mit MyJDownloader: " + str(e))
        return False