Пример #1
0
    def on_task_input(self, task, config):
        if not config:
            return
        config = self.prepare_config(config)
        entries = []
        queue_name = config.get('queue_name')

        with Session() as session:
            for queue_item in queue_get(session=session, downloaded=False, queue_name=queue_name):
                entry = Entry()
                # make sure the entry has IMDB fields filled
                entry['url'] = ''
                if queue_item.imdb_id:
                    entry['imdb_id'] = queue_item.imdb_id
                    entry['imdb_url'] = make_imdb_url(queue_item.imdb_id)
                if queue_item.tmdb_id:
                    entry['tmdb_id'] = queue_item.tmdb_id

                # check if title is a imdb url (leftovers from old database?)
                # TODO: maybe this should be fixed at the queue_get ...
                if 'http://' in queue_item.title:
                    plugin.get_plugin_by_name('tmdb_lookup').instance.lookup(entry)
                    log.debug('queue contains url instead of title')
                    if entry.get('movie_name'):
                        entry['title'] = entry['movie_name']
                    else:
                        log.error('Found imdb url in imdb queue, but lookup failed: %s' % entry['title'])
                        continue
                else:
                    # normal title
                    entry['title'] = queue_item.title

                # Add the year and quality if configured to (make sure not to double it up)
                if config.get('year') and entry.get('movie_year') \
                        and str(entry['movie_year']) not in entry['title']:
                    plugin.get_plugin_by_name('tmdb_lookup').instance.lookup(entry)
                    entry['title'] += ' %s' % entry['movie_year']
                # TODO: qualities can now be ranges.. how should we handle this?
                if config.get('quality') and queue_item.quality != 'ANY':
                    log.info('quality option of emit_movie_queue is disabled while we figure out how to handle ranges')
                    # entry['title'] += ' %s' % queue_item.quality
                entries.append(entry)
                if entry.get('imdb_id'):
                    log.debug('Added title and IMDB id to new entry: %s - %s',
                              entry['title'], entry['imdb_id'])
                elif entry.get('tmdb_id'):
                    log.debug('Added title and TMDB id to new entry: %s - %s',
                              entry['title'], entry['tmdb_id'])
                else:
                    # should this ever happen though?
                    log.debug('Added title to new entry: %s', entry['title'])

        return entries
Пример #2
0
    def entries_from_lines(self, lines):
        """

        :param lines: list of lines
        :return list: list of entries generated from lines
        """
        entries = []
        for line in lines:
            entry = Entry(irc_raw_message=line)

            # Use the message as title
            entry['title'] = line

            # find a url...
            url_match = URL_MATCHER.findall(line)
            if url_match:
                # We have a URL(s)!, generate an entry
                urls = list(url_match)
                url = urls[-1]
                entry.update({'urls': urls, 'url': url})

            if not entry.get('url'):
                log.error('Parsing message failed. No url found.')
                continue

            entries.append(entry)

        return entries
Пример #3
0
 def on_task_input(self, task, config):
     entries = []
     with Session() as session:
         digest_entries = session.query(DigestEntry).filter(
             DigestEntry.list == config['list'])
         # Remove any entries older than the expire time, if defined.
         if isinstance(config.get('expire'), basestring):
             expire_time = parse_timedelta(config['expire'])
             digest_entries.filter(
                 DigestEntry.added < datetime.now() - expire_time).delete()
         for index, digest_entry in enumerate(
                 digest_entries.order_by(DigestEntry.added.desc()).all()):
             # Just remove any entries past the limit, if set.
             if 0 < config.get('limit', -1) <= index:
                 session.delete(digest_entry)
                 continue
             entry = Entry(digest_entry.entry)
             if config.get('restore_state') and entry.get('digest_state'):
                 # Not sure this is the best way, but we don't want hooks running on this task
                 # (like backlog hooking entry.fail)
                 entry._state = entry['digest_state']
             entries.append(entry)
             # If expire is 'True', we remove it after it is output once.
             if config.get('expire', True) is True:
                 session.delete(digest_entry)
     return entries
Пример #4
0
    def __init__(self, path: str, fields: list, encoding: str):
        self.filename = path
        self.fields = fields
        self.encoding = encoding

        self.entries = []
        try:
            content = open(self.filename, encoding=self.encoding)
        except FileNotFoundError as exc:
            entries = []
            pass
        else:
            try:
                # TODO: use the load from our serialization system if that goes in
                entries = load_yaml(content)
            except Exception as exc:
                raise PluginError(f'Error opening yaml file `{self.filename}`: {exc}')
        if not entries:
            return
        if isinstance(entries, list):
            for entry in entries:
                if isinstance(entry, dict):
                    entry = Entry(**entry)
                else:
                    raise PluginError(f'Elements of `{self.filename}` must be dictionaries')
                if not entry.get('url'):
                    entry['url'] = f'mock://localhost/entry_list/{random.random()}'
                self.entries.append(entry)
        else:
            raise PluginError(f'List `{self.filename}` must be a yaml list')
Пример #5
0
def lookup_movie(title, session, identifiers=None):
    try:
        imdb_lookup = plugin.get('imdb_lookup', 'movie_list').lookup
    except DependencyError:
        imdb_lookup = None

    try:
        tmdb_lookup = plugin.get('tmdb_lookup', 'movie_list').lookup
    except DependencyError:
        tmdb_lookup = None

    if not (imdb_lookup or tmdb_lookup):
        return

    entry = Entry(title=title)
    if identifiers:
        for identifier in identifiers:
            for key, value in identifier.items():
                entry[key] = value
    try:
        imdb_lookup(entry, session=session)
    # IMDB lookup raises PluginError instead of the normal ValueError
    except PluginError:
        tmdb_lookup(entry)

    # Return only if lookup was successful
    if entry.get('movie_name'):
        return entry
    return
Пример #6
0
    def entries_from_lines(self, lines):
        """

        :param lines: list of lines
        :return list: list of entries generated from lines
        """
        entries = []
        for line in lines:
            entry = Entry(irc_raw_message=line)

            # Use the message as title
            entry['title'] = line

            # find a url...
            url_match = URL_MATCHER.findall(line)
            if url_match:
                # We have a URL(s)!, generate an entry
                urls = list(url_match)
                url = urls[-1]
                entry.update({'urls': urls, 'url': url})

            if not entry.get('url'):
                logger.error('Parsing message failed. No url found.')
                continue

            entries.append(entry)

        return entries
Пример #7
0
def lookup_movie(title, session, identifiers=None):
    try:
        imdb_lookup = plugin.get('imdb_lookup', 'movie_list').lookup
    except DependencyError:
        imdb_lookup = None

    try:
        tmdb_lookup = plugin.get('tmdb_lookup', 'movie_list').lookup
    except DependencyError:
        tmdb_lookup = None

    if not (imdb_lookup or tmdb_lookup):
        return

    entry = Entry(title=title)
    if identifiers:
        for identifier in identifiers:
            for key, value in identifier.items():
                entry[key] = value
    try:
        imdb_lookup(entry, session=session)
    # IMDB lookup raises PluginError instead of the normal ValueError
    except PluginError:
        tmdb_lookup(entry)

    # Return only if lookup was successful
    if entry.get('movie_name'):
        return entry
    return
Пример #8
0
    def on_task_input(self, task):
        if not task.manager.options.inject:
            return

        options = self.parse_arguments(task.manager.options.inject)

        # disable other inputs
        log.info('Disabling the rest of the input phase.')
        task.disable_phase('input')

        # create our injected entry
        entry = Entry(options['entry'], injected=True)
        if not 'url' in entry:
            entry['url'] = 'http://localhost/inject/%s' % ''.join([
                random.choice(string.letters + string.digits)
                for x in range(1, 30)
            ])
        if entry.get('immortal'):
            log.debug('Injected entry is immortal')

        task.all_entries.append(entry)

        if options.get('accept', False):
            log.debug('accepting the injection')
            entry.accept('--inject accepted')
    def on_task_input(self, task, config):
        if not config:
            return
        config = self.prepare_config(config)
        entries = []

        with Session() as session:
            for queue_item in queue_get(session=session, downloaded=False):
                entry = Entry()
                # make sure the entry has IMDB fields filled
                entry["url"] = ""
                if queue_item.imdb_id:
                    entry["imdb_id"] = queue_item.imdb_id
                    entry["imdb_url"] = make_imdb_url(queue_item.imdb_id)
                if queue_item.tmdb_id:
                    entry["tmdb_id"] = queue_item.tmdb_id

                plugin.get_plugin_by_name("tmdb_lookup").instance.lookup(entry)
                # check if title is a imdb url (leftovers from old database?)
                # TODO: maybe this should be fixed at the queue_get ...
                if "http://" in queue_item.title:
                    log.debug("queue contains url instead of title")
                    if entry.get("movie_name"):
                        entry["title"] = entry["movie_name"]
                    else:
                        log.error("Found imdb url in imdb queue, but lookup failed: %s" % entry["title"])
                        continue
                else:
                    # normal title
                    entry["title"] = queue_item.title

                # Add the year and quality if configured to (make sure not to double it up)
                if (
                    config.get("year")
                    and entry.get("movie_year")
                    and unicode(entry["movie_year"]) not in entry["title"]
                ):
                    entry["title"] += " %s" % entry["movie_year"]
                # TODO: qualities can now be ranges.. how should we handle this?
                if config.get("quality") and queue_item.quality != "ANY":
                    log.info("quality option of emit_movie_queue is disabled while we figure out how to handle ranges")
                    # entry['title'] += ' %s' % queue_item.quality
                entries.append(entry)
                log.debug("Added title and IMDB id to new entry: %s - %s" % (entry["title"], entry["imdb_id"]))

        return entries
Пример #10
0
    def on_task_input(self, task, config):
        if not config:
            return
        config = self.prepare_config(config)

        entries = []

        for queue_item in queue_get():
            entry = Entry()
            # make sure the entry has IMDB fields filled
            entry['url'] = ''
            if queue_item.imdb_id:
                entry[
                    'imdb_url'] = 'http://www.imdb.com/title/' + queue_item.imdb_id
                entry['imdb_id'] = queue_item.imdb_id
            if queue_item.tmdb_id:
                entry['tmdb_id'] = queue_item.tmdb_id

            get_plugin_by_name('tmdb_lookup').instance.lookup(entry)
            # check if title is a imdb url (leftovers from old database?)
            # TODO: maybe this should be fixed at the queue_get ...
            if 'http://' in queue_item.title:
                log.debug('queue contains url instead of title')
                if entry.get('movie_name'):
                    entry['title'] = entry['movie_name']
                else:
                    log.error(
                        'Found imdb url in imdb queue, but lookup failed: %s' %
                        entry['title'])
                    continue
            else:
                # normal title
                entry['title'] = queue_item.title

            # Add the year and quality if configured to
            if config.get('year') and entry.get('movie_year'):
                entry['title'] += ' %s' % entry['movie_year']
            # TODO: qualities can now be ranges.. how should we handle this?
            #if config.get('quality') and queue_item.quality != 'ANY':
            #    entry['title'] += ' %s' % queue_item.quality
            entries.append(entry)
            log.debug('Added title and IMDB id to new entry: %s - %s' %
                      (entry['title'], entry['imdb_id']))

        return entries
Пример #11
0
def parse_what(what, lookup=True, session=None):
    """
    Determines what information was provided by the search string `what`.
    If `lookup` is true, will fill in other information from tmdb.

    :param what: Can be one of:
      <Movie Title>: Search based on title
      imdb_id=<IMDB id>: search based on imdb id
      tmdb_id=<TMDB id>: search based on tmdb id
    :param bool lookup: Whether missing info should be filled in from tmdb.
    :param session: An existing session that will be used for lookups if provided.
    :rtype: dict
    :return: A dictionary with 'title', 'imdb_id' and 'tmdb_id' keys
    """

    tmdb_lookup = plugin.get_plugin_by_name('api_tmdb').instance.lookup

    result = {'title': None, 'imdb_id': None, 'tmdb_id': None}
    result['imdb_id'] = extract_id(what)
    if not result['imdb_id']:
        if isinstance(what, int):
            result['tmdb_id'] = what
        elif what.startswith('tmdb_id='):
            result['tmdb_id'] = what[8:]
        else:
            result['title'] = what

    if not lookup:
        # If not doing an online lookup we can return here
        return result

    search_entry = Entry(title=result['title'] or '')
    for field in ['imdb_id', 'tmdb_id']:
        if result.get(field):
            search_entry[field] = result[field]
    # Put lazy lookup fields on the search entry
    plugin.get_plugin_by_name('imdb_lookup').instance.register_lazy_fields(search_entry)
    plugin.get_plugin_by_name('tmdb_lookup').instance.lookup(search_entry)

    try:
        # Both ids are optional, but if movie_name was populated at least one of them will be there
        return {'title': search_entry['movie_name'], 'imdb_id': search_entry.get('imdb_id'),
                'tmdb_id': search_entry.get('tmdb_id')}
    except KeyError as e:
        raise QueueError(e.message)
Пример #12
0
def parse_what(what, lookup=True, session=None):
    """
    Determines what information was provided by the search string `what`.
    If `lookup` is true, will fill in other information from tmdb.

    :param what: Can be one of:
      <Movie Title>: Search based on title
      imdb_id=<IMDB id>: search based on imdb id
      tmdb_id=<TMDB id>: search based on tmdb id
    :param bool lookup: Whether missing info should be filled in from tmdb.
    :param session: An existing session that will be used for lookups if provided.
    :rtype: dict
    :return: A dictionary with 'title', 'imdb_id' and 'tmdb_id' keys
    """

    tmdb_lookup = plugin.get_plugin_by_name('api_tmdb').instance.lookup

    result = {'title': None, 'imdb_id': None, 'tmdb_id': None}
    result['imdb_id'] = extract_id(what)
    if not result['imdb_id']:
        if isinstance(what, int):
            result['tmdb_id'] = what
        elif what.startswith('tmdb_id='):
            result['tmdb_id'] = what[8:]
        else:
            result['title'] = what

    if not lookup:
        # If not doing an online lookup we can return here
        return result

    search_entry = Entry(title=result['title'] or '')
    for field in ['imdb_id', 'tmdb_id']:
        if result.get(field):
            search_entry[field] = result[field]
    # Put lazy lookup fields on the search entry
    plugin.get_plugin_by_name('imdb_lookup').instance.register_lazy_fields(search_entry)
    plugin.get_plugin_by_name('tmdb_lookup').instance.lookup(search_entry)

    try:
        # Both ids are optional, but if movie_name was populated at least one of them will be there
        return {'title': search_entry['movie_name'], 'imdb_id': search_entry.get('imdb_id'),
                'tmdb_id': search_entry.get('tmdb_id')}
    except KeyError as e:
        raise QueueError(e.message)
Пример #13
0
    def on_task_input(self, task, config):
        config = get_config(config)

        log.debug('Starting MyAnimeList plugin')
        # Retrieve username and remove invalid characters
        username = safe_username(config['username'])

        status = config.get('list', 'watching')

        url = self.API_URL % username
        log.verbose('Retrieving MyAnimeList on %s.', url)

        headers = {'User-Agent': config.get('user-agent', self.user_agent)}
        log.debug('Using %s', headers)

        resp = task.requests.get(url, headers=headers)
        if not resp or resp.status_code != 200:
            log.warning('No data returned from MyAnimeList.')
            return

        content_type = resp.headers.get('content-type')
        if content_type == 'application/xml; charset=UTF-8':
            data = parse_xml(resp.text.encode('utf-8'))
            log.debug('Parsed xml to list of dicts')
        else:
            log.warning('Content type not xml: %s' % content_type)
            data = ''

        if not isinstance(data, list):
            raise plugin.PluginError('Incompatible response: %r.' % data)

        entries = []
        for item in data:
            if item['my_status'] == maps['my_status'][status]:
                entry = Entry()
                entry.update_using_map(anime_map, item, ignore_none=True)

                names = item['series_synonyms']
                if names and ';' in names:
                    log.debug('Parsing series_synonyms: %s', names)
                    names = [n.strip() for n in names.split(';')]
                    names = [
                        n for n in names if n and n != item['series_title']
                    ]
                    if names:
                        entry['configure_series_alternate_name'] = names
                    log.debug('Added alternate names: %r', names)

                if entry.isvalid():
                    entries.append(entry)
                    log.debug('Appended entry: %s', entry.get('title'))
                else:
                    log.debug('Invalid entry? %s', entry)

        log.debug('Returning %s entries', len(entries))
        return entries
Пример #14
0
    def on_task_input(self, task, config):
        if not config:
            return
        config = self.prepare_config(config)

        entries = []

        for queue_item in queue_get():
            entry = Entry()
            # make sure the entry has IMDB fields filled
            entry['url'] = ''
            if queue_item.imdb_id:
                entry['imdb_url'] = 'http://www.imdb.com/title/' + queue_item.imdb_id
                entry['imdb_id'] = queue_item.imdb_id
            if queue_item.tmdb_id:
                entry['tmdb_id'] = queue_item.tmdb_id

            get_plugin_by_name('tmdb_lookup').instance.lookup(entry)
            # check if title is a imdb url (leftovers from old database?)
            # TODO: maybe this should be fixed at the queue_get ...
            if 'http://' in queue_item.title:
                log.debug('queue contains url instead of title')
                if entry.get('movie_name'):
                    entry['title'] = entry['movie_name']
                else:
                    log.error('Found imdb url in imdb queue, but lookup failed: %s' % entry['title'])
                    continue
            else:
                # normal title
                entry['title'] = queue_item.title

            # Add the year and quality if configured to
            if config.get('year') and entry.get('movie_year'):
                entry['title'] += ' %s' % entry['movie_year']
            # TODO: qualities can now be ranges.. how should we handle this?
            #if config.get('quality') and queue_item.quality != 'ANY':
            #    entry['title'] += ' %s' % queue_item.quality
            entries.append(entry)
            log.debug('Added title and IMDB id to new entry: %s - %s' %
                     (entry['title'], entry['imdb_id']))

        return entries
Пример #15
0
    def on_task_input(self, task, config):
        config = get_config(config)

        log.debug('Starting MyAnimeList plugin')
        # Retrieve username and remove invalid characters
        username = safe_username(config['username'])

        status = config.get('list', 'watching')

        url = self.API_URL % username
        log.verbose('Retrieving MyAnimeList on %s.', url)

        headers = {'User-Agent': config.get('user-agent', self.user_agent)}
        log.debug('Using %s', headers)

        resp = task.requests.get(url, headers=headers)
        if not resp or resp.status_code != 200:
            log.warning('No data returned from MyAnimeList.')
            return

        content_type = resp.headers.get('content-type')
        if content_type == 'application/xml; charset=UTF-8':
            data = parse_xml(resp.text.encode('utf-8'))
            log.debug('Parsed xml to list of dicts')
        else:
            log.warning('Content type not xml: %s' % content_type)
            data = ''

        if not isinstance(data, list):
            raise plugin.PluginError('Incompatible response: %r.' % data)

        entries = []
        for item in data:
            if item['my_status'] == maps['my_status'][status]:
                entry = Entry()
                entry.update_using_map(anime_map, item, ignore_none=True)

                names = item['series_synonyms']
                if names and ';' in names:
                    log.debug('Parsing series_synonyms: %s', names)
                    names = [n.strip() for n in names.split(';')]
                    names = [n for n in names if n and n != item['series_title']]
                    if names:
                        entry['configure_series_alternate_name'] = names
                    log.debug('Added alternate names: %r', names)

                if entry.isvalid():
                    entries.append(entry)
                    log.debug('Appended entry: %s', entry.get('title'))
                else:
                    log.debug('Invalid entry? %s', entry)

        log.debug('Returning %s entries', len(entries))
        return entries
Пример #16
0
    def on_feed_input(self, feed, config):
        if not config:
            return
        config = self.prepare_config(config)
        
        entries = []
        imdb_entries = queue_get()

        for imdb_entry in imdb_entries:
            entry = Entry()
            # make sure the entry has IMDB fields filled
            entry['url'] = ''
            entry['imaginary'] = True
            entry['imdb_url'] = 'http://www.imdb.com/title/' + imdb_entry.imdb_id
            entry['imdb_id'] = imdb_entry.imdb_id

            get_plugin_by_name('tmdb_lookup').instance.lookup(entry)
            # check if title is a imdb url (leftovers from old database?)
            # TODO: maybe this should be fixed at the queue_get ...
            if 'http://' in imdb_entry.title:
                log.debug('queue contains url instead of title')
                if entry.get('movie_name'):
                    entry['title'] = entry['movie_name']
                else:
                    log.error('Found imdb url in imdb queue, but lookup failed: %s' % entry['title'])
                    continue
            else:
                # normal title
                entry['title'] = imdb_entry.title


            # Add the year and quality if configured to
            if config.get('year') and entry.get('movie_year'):
                entry['title'] += ' %s' % entry['movie_year']
            if config.get('quality') and imdb_entry.quality != 'ANY':
                entry['title'] += ' %s' % imdb_entry.quality
            entries.append(entry)
            log.debug('Added title and IMDB id to new entry: %s - %s' %
                     (entry['title'], entry['imdb_id']))

        return entries
Пример #17
0
def lookup_movie(title, session, identifiers=None):
    entry = Entry(title=title)
    if identifiers:
        for identifier in identifiers:
            for key, value in identifier.items():
                entry[key] = value
    try:
        imdb_lookup(entry, session=session)
    except PluginError:
        tmdb_lookup(entry)
    if entry.get('movie_name'):
        return entry
Пример #18
0
    def on_feed_input(self, feed, config):
        if not config:
            return
        config = self.prepare_config(config)

        entries = []
        imdb_entries = queue_get()

        for imdb_entry in imdb_entries:
            entry = Entry()
            # make sure the entry has IMDB fields filled
            entry["url"] = ""
            entry["imdb_url"] = "http://www.imdb.com/title/" + imdb_entry.imdb_id
            entry["imdb_id"] = imdb_entry.imdb_id

            get_plugin_by_name("tmdb_lookup").instance.lookup(entry)
            # check if title is a imdb url (leftovers from old database?)
            # TODO: maybe this should be fixed at the queue_get ...
            if "http://" in imdb_entry.title:
                log.debug("queue contains url instead of title")
                if entry.get("movie_name"):
                    entry["title"] = entry["movie_name"]
                else:
                    log.error("Found imdb url in imdb queue, but lookup failed: %s" % entry["title"])
                    continue
            else:
                # normal title
                entry["title"] = imdb_entry.title

            # Add the year and quality if configured to
            if config.get("year") and entry.get("movie_year"):
                entry["title"] += " %s" % entry["movie_year"]
            if config.get("quality") and imdb_entry.quality != "ANY":
                entry["title"] += " %s" % imdb_entry.quality
            entries.append(entry)
            log.debug("Added title and IMDB id to new entry: %s - %s" % (entry["title"], entry["imdb_id"]))

        return entries
Пример #19
0
    def _get_sftp_config(cls, entry: Entry):
        """
        Parses a url and returns a hashable config, source path, and destination path
        """
        # parse url
        parsed = urlparse(entry['url'])
        host: str = parsed.hostname
        username: str = parsed.username
        password: str = parsed.password
        port: int = parsed.port or DEFAULT_SFTP_PORT

        # get private key info if it exists
        private_key: str = entry.get('private_key')
        private_key_pass: str = entry.get('private_key_pass')

        config: Optional[SftpConfig] = None

        if parsed.scheme == 'sftp':
            config = SftpConfig(host, port, username, password, private_key, private_key_pass)
        else:
            logger.warning('Scheme does not match SFTP: {}', entry['url'])

        return config
Пример #20
0
    def on_task_input(self, task, config):
        config = get_config(config)

        log.debug('Starting MyAnimeList plugin')
        # Retrieve username and remove invalid characters
        username = safe_username(config['username'])

        status = config.get('list', 'watching')

        url = self.API_URL % username
        log.verbose('Retrieving MyAnimeList on %s.', url)

        headers = {'User-Agent': config.get('user-agent', self.user_agent)}
        log.debug('Using %s', headers)

        resp = task.requests.get(url, headers=headers)
        if not resp or resp.status_code != 200:
            log.warning('No data returned from MyAnimeList.')
            return

        content_type = resp.headers.get('content-type')
        if content_type == 'application/xml; charset=UTF-8':
            data = parse_xml(resp.text.encode('utf-8'))
            log.debug('Parsed xml to list of dicts')
        else:
            log.warning('Content type not xml: %s' % content_type)
            data = ''

        if not isinstance(data, list):
            raise PluginError('Incompatible items in response: %r.' % data)

        entries = []
        for item in data:
            if item['my_status'] == self.watched_map.get(status):
                entry = Entry()
                entry.update_using_map(self.anime_map, item, ignore_none=True)
                mal_url = 'http://myanimelist.net/anime/%s' % entry['mal_id']
                entry['url'] = mal_url
                entry['mal_url'] = mal_url
                entry['mal_type'] = self.type_map.get(entry['mal_type'])
                entry['mal_my_status'] = self.inv_watched_map.get(entry['mal_my_status'])
                entry['mal_status'] = self.status_map.get(entry['mal_status'])
                entries.append(entry)
                log.debug('Appended entry: %s', entry.get('title'))

        log.debug('Returning %s entries', len(entries))
        return entries
Пример #21
0
    def on_task_input(self, task):
        if not task.manager.options.inject:
            return

        options = self.parse_arguments(task.manager.options.inject)

        # disable other inputs
        log.info('Disabling the rest of the input phase.')
        task.disable_phase('input')

        # create our injected entry
        entry = Entry(options['entry'], injected=True)
        if not 'url' in entry:
            entry['url'] = 'http://localhost/inject/%s' % ''.join([random.choice(string.letters + string.digits) for x in range(1, 30)])
        if entry.get('immortal'):
            log.debug('Injected entry is immortal')

        task.all_entries.append(entry)

        if options.get('accept', False):
            log.debug('accepting the injection')
            task.accept(entry, '--inject accepted')
Пример #22
0
 def on_task_input(self, task, config):
     entries = []
     with Session() as session:
         digest_entries = session.query(DigestEntry).filter(DigestEntry.list == config['list'])
         # Remove any entries older than the expire time, if defined.
         if isinstance(config.get('expire'), basestring):
             expire_time = parse_timedelta(config['expire'])
             digest_entries.filter(DigestEntry.added < datetime.now() - expire_time).delete()
         for index, digest_entry in enumerate(digest_entries.order_by(DigestEntry.added.desc()).all()):
             # Just remove any entries past the limit, if set.
             if 0 < config.get('limit', -1) <= index:
                 session.delete(digest_entry)
                 continue
             entry = Entry(digest_entry.entry)
             if config.get('restore_state') and entry.get('digest_state'):
                 # Not sure this is the best way, but we don't want hooks running on this task
                 # (like backlog hooking entry.fail)
                 entry._state = entry['digest_state']
             entries.append(entry)
             # If expire is 'True', we remove it after it is output once.
             if config.get('expire', True) is True:
                 session.delete(digest_entry)
     return entries
Пример #23
0
    def on_feed_input(self, feed):
        if not InputInject.options:
            return

        # disable other inputs
        log.info('Disabling the rest of the input phase.')
        feed.disable_phase('input')

        # create our injected entry
        import string
        import random

        entry = Entry(InputInject.options['entry'], injected=True)
        if not 'url' in entry:
            entry['url'] = 'http://localhost/inject/%s' % ''.join([random.choice(string.letters + string.digits) for x in range(1, 30)])
        if entry.get('immortal'):
            log.debug('Injected entry is immortal')

        feed.entries.append(entry)

        if InputInject.options.get('accept', False):
            log.debug('accepting the injection')
            feed.accept(entry, '--inject accepted')
Пример #24
0
    def items(self):
        if self._items is None:
            if self.config['list'] in ['collection', 'watched'] and self.config['type'] == 'auto':
                raise plugin.PluginError('`type` cannot be `auto` for %s list.' % self.config['list'])

            endpoint = self.get_list_endpoint()

            log.verbose('Retrieving `%s` list `%s`', self.config['type'], self.config['list'])
            try:
                result = self.session.get(get_api_url(endpoint))
                try:
                    data = result.json()
                except ValueError:
                    log.debug('Could not decode json from response: %s', result.text)
                    raise plugin.PluginError('Error getting list from trakt.')
            except RequestException as e:
                raise plugin.PluginError('Could not retrieve list from trakt (%s)' % e)

            if not data:
                log.warning('No data returned from trakt for %s list %s.', self.config['type'], self.config['list'])
                return []

            entries = []
            list_type = (self.config['type']).rstrip('s')
            for item in data:
                if self.config['type'] == 'auto':
                    list_type = item['type']
                # Collection and watched lists don't return 'type' along with the items (right now)
                if 'type' in item and item['type'] != list_type:
                    log.debug('Skipping %s because it is not a %s', item[item['type']].get('title', 'unknown'),
                              list_type)
                    continue
                if list_type != 'episode' and not item[list_type]['title']:
                    # Skip shows/movies with no title
                    log.warning('Item in trakt list does not appear to have a title, skipping.')
                    continue
                entry = Entry()
                if list_type == 'episode':
                    entry['url'] = 'https://trakt.tv/shows/%s/seasons/%s/episodes/%s' % (
                        item['show']['ids']['slug'], item['episode']['season'], item['episode']['number'])
                else:
                    entry['url'] = 'https://trakt.tv/%ss/%s' % (list_type, item[list_type]['ids'].get('slug'))

                entry.update_using_map(field_maps[list_type], item)

                # get movie name translation
                language = self.config.get('language')
                if list_type == 'movie' and language:
                    endpoint = ['movies', entry['trakt_movie_id'], 'translations', language]
                    try:
                        result = self.session.get(get_api_url(endpoint))
                        try:
                            translation = result.json()
                        except ValueError:
                            raise plugin.PluginError('Error decoding movie translation from trakt: %s.' % result.text)
                    except RequestException as e:
                        raise plugin.PluginError('Could not retrieve movie translation from trakt: %s' % str(e))
                    if not translation:
                        log.warning('No translation data returned from trakt for movie %s.', entry['title'])
                    else:
                        log.verbose('Found `%s` translation for movie `%s`: %s',
                                    language, entry['movie_name'], translation[0]['title'])
                        entry['title'] = translation[0]['title']
                        if entry.get('movie_year'):
                            entry['title'] += ' (' + str(entry['movie_year']) + ')'
                        entry['movie_name'] = translation[0]['title']

                # Override the title if strip_dates is on. TODO: a better way?
                if self.config.get('strip_dates'):
                    if list_type in ['show', 'movie']:
                        entry['title'] = item[list_type]['title']
                    elif list_type == 'episode':
                        entry['title'] = '{show[title]} S{episode[season]:02}E{episode[number]:02}'.format(**item)
                        if item['episode']['title']:
                            entry['title'] += ' {episode[title]}'.format(**item)
                if entry.isvalid():
                    if self.config.get('strip_dates'):
                        # Remove year from end of name if present
                        entry['title'] = split_title_year(entry['title'])[0]
                    entries.append(entry)
                else:
                    log.debug('Invalid entry created? %s', entry)

            self._items = entries
        return self._items
Пример #25
0
    def on_task_input(self, task, config):
        """Creates an entry for each item in your uoccin watchlist.

        Example::
            
            uoccin_emit:
              path: /path/to/gdrive/uoccin
              type: series
              tags: [ 'favorite', 'hires' ]
              check_tags: all

        Options path and type are required while the others are for filtering:
        - 'any' will include all the items marked with one or more tags in the list
        - 'all' will only include the items marked with all the listed tags
        - 'none' will only include the items not marked with any of the listed tags.

        The entries created will have a valid imdb/tvdb url and id.
        """
        imdb_lookup = plugin.get_plugin_by_name('imdb_lookup').instance
        udata = load_uoccin_data(config['path'])
        section = udata['movies'] if config['type'] == 'movies' else udata[
            'series']
        entries = []
        for eid, itm in list(section.items()):
            if not itm['watchlist']:
                continue
            if 'tags' in config:
                n = len(set(config['tags']) & set(itm.get('tags', [])))
                if config['check_tags'] == 'any' and n <= 0:
                    continue
                if config['check_tags'] == 'all' and n != len(config['tags']):
                    continue
                if config['check_tags'] == 'none' and n > 0:
                    continue
            if config['type'] == 'movies':
                entry = Entry()
                entry['url'] = 'http://www.imdb.com/title/' + eid
                entry['imdb_id'] = eid
                if itm['name'] != 'N/A':
                    entry['title'] = itm['name']
                else:
                    try:
                        imdb_lookup.lookup(entry)
                    except plugin.PluginError as e:
                        self.log.trace('entry %s imdb failed (%s)' %
                                       (entry['imdb_id'], e.value))
                        continue
                    entry['title'] = entry.get('imdb_name')
                if 'tags' in itm:
                    entry['uoccin_tags'] = itm['tags']
                if entry.isvalid():
                    entries.append(entry)
                else:
                    self.log.debug('Invalid entry created? %s' % entry)
            else:
                sname = itm['name']
                try:
                    sname = lookup_series(tvdb_id=eid).name
                except LookupError:
                    self.log.warning(
                        'Unable to lookup series %s from tvdb, using raw name.'
                        % eid)
                surl = 'http://thetvdb.com/?tab=series&id=' + eid
                if config['type'] == 'series':
                    entry = Entry()
                    entry['url'] = surl
                    entry['title'] = sname
                    entry['tvdb_id'] = eid
                    if 'tags' in itm:
                        entry['uoccin_tags'] = itm['tags']
                    if entry.isvalid():
                        entries.append(entry)
                    else:
                        self.log.debug('Invalid entry created? %s' % entry)
                elif config['ep_flags'] == 'collected':
                    slist = itm.get('collected', {})
                    for sno in list(slist.keys()):
                        for eno in slist[sno]:
                            entry = Entry()
                            entry['url'] = surl
                            entry['title'] = '%s S%02dE%02d' % (
                                sname, int(sno), int(eno))
                            entry['tvdb_id'] = eid
                            if entry.isvalid():
                                entries.append(entry)
                            else:
                                self.log.debug('Invalid entry created? %s' %
                                               entry)
                else:
                    slist = itm.get('watched', {})
                    for sno in list(slist.keys()):
                        for eno in slist[sno]:
                            entry = Entry()
                            entry['url'] = surl
                            entry['title'] = '%s S%02dE%02d' % (sname,
                                                                int(sno), eno)
                            entry['tvdb_id'] = eid
                            if entry.isvalid():
                                entries.append(entry)
                            else:
                                self.log.debug('Invalid entry created? %s' %
                                               entry)
        entries.sort(key=lambda x: x['title'])
        return entries
Пример #26
0
 def process(self):
     imdb_lookup = plugin.get_plugin_by_name('imdb_lookup').instance
     self.changes.sort()
     udata = load_uoccin_data(self.folder)
     for line in self.changes:
         tmp = line.split('|')
         typ = tmp[1]
         tid = tmp[2]
         fld = tmp[3]
         val = tmp[4]
         self.log.verbose('processing: type=%s, target=%s, field=%s, value=%s' % (typ, tid, fld, val))
         if typ == 'movie':
             # default
             mov = udata['movies'].setdefault(tid, 
                 {'name':'N/A', 'watchlist':False, 'collected':False, 'watched':False})
             # movie title is unknown at this time
             fake = Entry()
             fake['url'] = 'http://www.imdb.com/title/' + tid
             fake['imdb_id'] = tid
             try:
                 imdb_lookup.lookup(fake)
                 mov['name'] = fake.get('imdb_name')
             except plugin.PluginError:
                 self.log.warning('Unable to lookup movie %s from imdb, using raw name.' % tid)
             # setting
             if fld == 'watchlist':
                 mov['watchlist'] = val == 'true'
             elif fld == 'collected':
                 mov['collected'] = val == 'true'
             elif fld == 'watched':
                 mov['watched'] = val == 'true'
             elif fld == 'tags':
                 mov['tags'] = re.split(',\s*', val)
             elif fld == 'subtitles':
                 mov['subtitles'] = re.split(',\s*', val)
             elif fld == 'rating':
                 mov['rating'] = int(val)
             # cleaning
             if not (mov['watchlist'] or mov['collected'] or mov['watched']):
                 self.log.verbose('deleting unused section: movies\%s' % tid)
                 udata['movies'].pop(tid)
         elif typ == 'series':
             tmp = tid.split('.')
             sid = tmp[0]
             sno = tmp[1] if len(tmp) > 2 else None
             eno = tmp[2] if len(tmp) > 2 else None
             # default
             ser = udata['series'].setdefault(sid, {'name':'N/A', 'watchlist':False, 'collected':{}, 'watched':{}})
             # series name is unknown at this time
             try:
                 series = lookup_series(tvdb_id=sid)
                 ser['name'] = series.seriesname
             except LookupError:
                 self.log.warning('Unable to lookup series %s from tvdb, using raw name.' % sid)
             # setting
             if fld == 'watchlist':
                 ser['watchlist'] = val == 'true'
             elif fld == 'tags':
                 ser['tags'] = re.split(',\s*', val)
             elif fld == 'rating':
                 ser['rating'] = int(val)
             elif sno is None or eno is None:
                 self.log.warning('invalid line "%s": season and episode numbers are required' % line)
             elif fld == 'collected':
                 season = ser['collected'].setdefault(sno, {})
                 if val == 'true':
                     season.setdefault(eno, [])
                 else:
                     if eno in season:
                         season.pop(eno)
                     if not season:
                         self.log.verbose('deleting unused section: series\%s\collected\%s' % (sid, sno))
                         ser['collected'].pop(sno)
             elif fld == 'subtitles':
                 ser['collected'].setdefault(sno, {})[eno] = re.split(',\s*', val)
             elif fld == 'watched':
                 season = ser['watched'].setdefault(sno, [])
                 if val == 'true':
                     season = ser['watched'][sno] = list(set(season) | set([int(eno)]))
                 elif int(eno) in season:
                     season.remove(int(eno))
                 season.sort()
                 if not season:
                     self.log.debug('deleting unused section: series\%s\watched\%s' % (sid, sno))
                     ser['watched'].pop(sno)
             # cleaning
             if not (ser['watchlist'] or ser['collected'] or ser['watched']):
                 self.log.debug('deleting unused section: series\%s' % sid)
                 udata['series'].pop(sid)
         else:
             self.log.warning('invalid element type "%s"' % typ)
     # save the updated uoccin.json
     ufile = os.path.join(self.folder, 'uoccin.json')
     try:
         text = json.dumps(udata, sort_keys=True, indent=4, separators=(',', ': '))
         with open(ufile, 'w') as f:
             f.write(text)
     except Exception as err:
         self.log.debug('error writing %s: %s' % (ufile, err))
         raise plugin.PluginError('error writing %s: %s' % (ufile, err))
Пример #27
0
    def on_task_input(self, task, config):
        config = self.prepare_config(config)
        passkeys = config.get('passkeys')
        limit = config.get('limit')
        show_detail = config.get('show_detail')

        torrent_dict, torrents_hashes = self.get_torrents_data(task, config)
        if not torrents_hashes:
            return torrents_hashes
        try:
            data = {'sign': config['iyuu'], 'version': config['version']}
            sites_response = task.requests.get(
                'http://api.iyuu.cn/index.php?s=App.Api.Sites',
                timeout=60,
                params=data).json()
            if sites_response.get('ret') != 200:
                raise plugin.PluginError(
                    'http://api.iyuu.cn/index.php?s=App.Api.Sites: {}'.format(
                        sites_response))
            sites_json = self.modify_sites(sites_response['data']['sites'])

            reseed_response = task.requests.post(
                'http://api.iyuu.cn/index.php?s=App.Api.Infohash',
                json=torrents_hashes,
                timeout=60).json()
            if reseed_response.get('ret') != 200:
                raise plugin.PluginError(
                    'http://api.iyuu.cn/index.php?s=App.Api.Infohash Error: {}'
                    .format(reseed_response))
            reseed_json = reseed_response['data']
        except (RequestException, JSONDecodeError) as e:
            raise plugin.PluginError(
                'Error when trying to send request to iyuu: {}'.format(e))

        entries = []
        site_limit = {}
        if sites_json and reseed_json:
            for info_hash, seeds_data in reseed_json.items():
                client_torrent = torrent_dict[info_hash]
                for torrent in seeds_data['torrent']:
                    site = sites_json.get(str(torrent['sid']))
                    if not site:
                        continue
                    if torrent['info_hash'] in torrent_dict.keys():
                        continue
                    site_name = self._get_site_name(site['base_url'])
                    passkey = passkeys.get(site_name)
                    if not passkey:
                        if show_detail:
                            logger.info(
                                'no passkey, skip site: {}, title: {}'.format(
                                    site_name, client_torrent['title']))
                        continue
                    if not site_limit.get(site_name):
                        site_limit[site_name] = 1
                    else:
                        if site_limit[site_name] >= limit:
                            logger.info(
                                'site_limit:{} >= limit: {}, skip site: {}, title: {}'
                                .format(site_limit[site_name], limit,
                                        site_name, client_torrent['title']))
                            continue
                        site_limit[site_name] = site_limit[site_name] + 1
                    torrent_id = str(torrent['torrent_id'])
                    entry = Entry(title=client_torrent['title'],
                                  torrent_info_hash=torrent['info_hash'])
                    entry['autoTMM'] = client_torrent['qbittorrent_auto_tmm']
                    entry['category'] = client_torrent['qbittorrent_category']
                    entry['savepath'] = client_torrent['qbittorrent_save_path']
                    entry['paused'] = 'true'
                    entry['class_name'] = site_name
                    Executor.build_reseed(entry, config, site, passkey,
                                          torrent_id)
                    if show_detail:
                        logger.info(
                            f"accept site: {site_name}, title: {client_torrent['title']}, url: {entry.get('url', None)}"
                        )
                    if entry.get('url'):
                        entries.append(entry)
        return entries
Пример #28
0
    def on_task_input(self, task, config):
        config = self.build_config(config)

        log.debug("Requesting task `%s` url `%s`", task.name, config["url"])

        # Used to identify which etag/modified to use
        url_hash = str(hash(config["url"]))

        # set etag and last modified headers if config has not changed since
        # last run and if caching wasn't disabled with --no-cache argument.
        all_entries = (
            config["all_entries"] or task.config_modified or task.manager.options.nocache or task.manager.options.retry
        )
        headers = {}
        if not all_entries:
            etag = task.simple_persistence.get("%s_etag" % url_hash, None)
            if etag:
                log.debug("Sending etag %s for task %s", etag, task.name)
                headers["If-None-Match"] = etag
            modified = task.simple_persistence.get("%s_modified" % url_hash, None)
            if modified:
                if not isinstance(modified, basestring):
                    log.debug("Invalid date was stored for last modified time.")
                else:
                    headers["If-Modified-Since"] = modified
                    log.debug("Sending last-modified %s for task %s", headers["If-Modified-Since"], task.name)

        # Get the feed content
        if config["url"].startswith(("http", "https", "ftp", "file")):
            # Get feed using requests library
            auth = None
            if "username" in config and "password" in config:
                auth = (config["username"], config["password"])
            try:
                # Use the raw response so feedparser can read the headers and status values
                response = task.requests.get(config["url"], timeout=60, headers=headers, raise_status=False, auth=auth)
                content = response.content
            except RequestException as e:
                raise PluginError("Unable to download the RSS for task %s (%s): %s" % (task.name, config["url"], e))
            if config.get("ascii"):
                # convert content to ascii (cleanup), can also help with parsing problems on malformed feeds
                content = response.text.encode("ascii", "ignore")

            # status checks
            status = response.status_code
            if status == 304:
                log.verbose("%s hasn't changed since last run. Not creating entries.", config["url"])
                # Let details plugin know that it is ok if this feed doesn't produce any entries
                task.no_entries_ok = True
                return []
            elif status == 401:
                raise PluginError(
                    "Authentication needed for task %s (%s): %s"
                    % (task.name, config["url"], response.headers["www-authenticate"]),
                    log,
                )
            elif status == 404:
                raise PluginError("RSS Feed %s (%s) not found" % (task.name, config["url"]), log)
            elif status == 500:
                raise PluginError("Internal server exception on task %s (%s)" % (task.name, config["url"]), log)
            elif status != 200:
                raise PluginError("HTTP error %s received from %s" % (status, config["url"]), log)

            # update etag and last modified
            if not config["all_entries"]:
                etag = response.headers.get("etag")
                if etag:
                    task.simple_persistence["%s_etag" % url_hash] = etag
                    log.debug("etag %s saved for task %s", etag, task.name)
                if response.headers.get("last-modified"):
                    modified = response.headers["last-modified"]
                    task.simple_persistence["%s_modified" % url_hash] = modified
                    log.debug("last modified %s saved for task %s", modified, task.name)
        else:
            # This is a file, open it
            with open(config["url"], "rb") as f:
                content = f.read()
            if config.get("ascii"):
                # Just assuming utf-8 file in this case
                content = content.decode("utf-8", "ignore").encode("ascii", "ignore")

        if not content:
            log.error("No data recieved for rss feed.")
            return
        try:
            rss = feedparser.parse(content)
        except LookupError as e:
            raise PluginError("Unable to parse the RSS (from %s): %s" % (config["url"], e))

        # check for bozo
        ex = rss.get("bozo_exception", False)
        if ex or rss.get("bozo"):
            if rss.entries:
                msg = "Bozo error %s while parsing feed, but entries were produced, ignoring the error." % type(ex)
                if config.get("silent", False):
                    log.debug(msg)
                else:
                    log.verbose(msg)
            else:
                if isinstance(ex, feedparser.NonXMLContentType):
                    # see: http://www.feedparser.org/docs/character-encoding.html#advanced.encoding.nonxml
                    log.debug("ignoring feedparser.NonXMLContentType")
                elif isinstance(ex, feedparser.CharacterEncodingOverride):
                    # see: ticket 88
                    log.debug("ignoring feedparser.CharacterEncodingOverride")
                elif isinstance(ex, UnicodeEncodeError):
                    raise PluginError("Feed has UnicodeEncodeError while parsing...")
                elif isinstance(ex, (xml.sax._exceptions.SAXParseException, xml.sax._exceptions.SAXException)):
                    # save invalid data for review, this is a bit ugly but users seem to really confused when
                    # html pages (login pages) are received
                    self.process_invalid_content(task, content, config["url"])
                    if task.manager.options.debug:
                        log.exception(ex)
                    raise PluginError("Received invalid RSS content from task %s (%s)" % (task.name, config["url"]))
                elif isinstance(ex, httplib.BadStatusLine) or isinstance(ex, IOError):
                    raise ex  # let the @internet decorator handle
                else:
                    # all other bozo errors
                    self.process_invalid_content(task, content, config["url"])
                    raise PluginError(
                        "Unhandled bozo_exception. Type: %s (task: %s)" % (ex.__class__.__name__, task.name), log
                    )

        log.debug("encoding %s", rss.encoding)

        last_entry_id = ""
        if not all_entries:
            # Test to make sure entries are in descending order
            if rss.entries and rss.entries[0].get("published_parsed") and rss.entries[-1].get("published_parsed"):
                if rss.entries[0]["published_parsed"] < rss.entries[-1]["published_parsed"]:
                    # Sort them if they are not
                    rss.entries.sort(key=lambda x: x["published_parsed"], reverse=True)
            last_entry_id = task.simple_persistence.get("%s_last_entry" % url_hash)

        # new entries to be created
        entries = []

        # field name for url can be configured by setting link.
        # default value is auto but for example guid is used in some feeds
        ignored = 0
        for entry in rss.entries:

            # Check if title field is overridden in config
            title_field = config.get("title", "title")
            # ignore entries without title
            if not entry.get(title_field):
                log.debug("skipping entry without title")
                ignored += 1
                continue

            # Set the title from the source field
            entry.title = entry[title_field]

            # Check we haven't already processed this entry in a previous run
            if last_entry_id == entry.title + entry.get("guid", ""):
                log.verbose("Not processing entries from last run.")
                # Let details plugin know that it is ok if this task doesn't produce any entries
                task.no_entries_ok = True
                break

            # remove annoying zero width spaces
            entry.title = entry.title.replace("\u200B", "")

            # Dict with fields to grab mapping from rss field name to FlexGet field name
            fields = {"guid": "guid", "author": "author", "description": "description", "infohash": "torrent_info_hash"}
            # extend the dict of fields to grab with other_fields list in config
            for field_map in config.get("other_fields", []):
                fields.update(field_map)

            # helper
            # TODO: confusing? refactor into class member ...

            def add_entry(ea):
                ea["title"] = entry.title

                for rss_field, flexget_field in fields.iteritems():
                    if rss_field in entry:
                        if not isinstance(getattr(entry, rss_field), basestring):
                            # Error if this field is not a string
                            log.error("Cannot grab non text field `%s` from rss.", rss_field)
                            # Remove field from list of fields to avoid repeated error
                            config["other_fields"].remove(rss_field)
                            continue
                        if not getattr(entry, rss_field):
                            log.debug("Not grabbing blank field %s from rss for %s.", rss_field, ea["title"])
                            continue
                        try:
                            ea[flexget_field] = decode_html(entry[rss_field])
                            if rss_field in config.get("other_fields", []):
                                # Print a debug message for custom added fields
                                log.debug("Field `%s` set to `%s` for `%s`", rss_field, ea[rss_field], ea["title"])
                        except UnicodeDecodeError:
                            log.warning("Failed to decode entry `%s` field `%s`", ea["title"], rss_field)
                # Also grab pubdate if available
                if hasattr(entry, "published_parsed") and entry.published_parsed:
                    ea["rss_pubdate"] = datetime(*entry.published_parsed[:6])
                # store basic auth info
                if "username" in config and "password" in config:
                    ea["basic_auth_username"] = config["username"]
                    ea["basic_auth_password"] = config["password"]
                entries.append(ea)

            # create from enclosures if present
            enclosures = entry.get("enclosures", [])

            if len(enclosures) > 1 and not config.get("group_links"):
                # There is more than 1 enclosure, create an Entry for each of them
                log.debug("adding %i entries from enclosures", len(enclosures))
                for enclosure in enclosures:
                    if not "href" in enclosure:
                        log.debug("RSS-entry `%s` enclosure does not have URL", entry.title)
                        continue
                    # There is a valid url for this enclosure, create an Entry for it
                    ee = Entry()
                    self.add_enclosure_info(ee, enclosure, config.get("filename", True), True)
                    add_entry(ee)
                # If we created entries for enclosures, we should not create an Entry for the main rss item
                continue

            # create flexget entry
            e = Entry()

            if not isinstance(config.get("link"), list):
                # If the link field is not a list, search for first valid url
                if config["link"] == "auto":
                    # Auto mode, check for a single enclosure url first
                    if len(entry.get("enclosures", [])) == 1 and entry["enclosures"][0].get("href"):
                        self.add_enclosure_info(e, entry["enclosures"][0], config.get("filename", True))
                    else:
                        # If there is no enclosure url, check link, then guid field for urls
                        for field in ["link", "guid"]:
                            if entry.get(field):
                                e["url"] = entry[field]
                                break
                else:
                    if entry.get(config["link"]):
                        e["url"] = entry[config["link"]]
            else:
                # If link was passed as a list, we create a list of urls
                for field in config["link"]:
                    if entry.get(field):
                        e.setdefault("url", entry[field])
                        if entry[field] not in e.setdefault("urls", []):
                            e["urls"].append(entry[field])

            if config.get("group_links"):
                # Append a list of urls from enclosures to the urls field if group_links is enabled
                e.setdefault("urls", [e["url"]]).extend(
                    [enc.href for enc in entry.get("enclosures", []) if enc.get("href") not in e["urls"]]
                )

            if not e.get("url"):
                log.debug("%s does not have link (%s) or enclosure", entry.title, config["link"])
                ignored += 1
                continue

            add_entry(e)

        # Save last spot in rss
        if rss.entries:
            log.debug("Saving location in rss feed.")
            task.simple_persistence["%s_last_entry" % url_hash] = rss.entries[0].title + rss.entries[0].get("guid", "")

        if ignored:
            if not config.get("silent"):
                log.warning("Skipped %s RSS-entries without required information (title, link or enclosures)", ignored)

        return entries
Пример #29
0
 def process(self):
     imdb_lookup = plugin.get_plugin_by_name('imdb_lookup').instance
     self.changes.sort()
     udata = load_uoccin_data(self.folder)
     for line in self.changes:
         tmp = line.split('|')
         typ = tmp[1]
         tid = tmp[2]
         fld = tmp[3]
         val = tmp[4]
         self.log.verbose(
             'processing: type=%s, target=%s, field=%s, value=%s' %
             (typ, tid, fld, val))
         if typ == 'movie':
             # default
             mov = udata['movies'].setdefault(
                 tid, {
                     'name': 'N/A',
                     'watchlist': False,
                     'collected': False,
                     'watched': False
                 })
             # movie title is unknown at this time
             fake = Entry()
             fake['url'] = 'http://www.imdb.com/title/' + tid
             fake['imdb_id'] = tid
             try:
                 imdb_lookup.lookup(fake)
                 mov['name'] = fake.get('imdb_name')
             except plugin.PluginError:
                 self.log.warning(
                     'Unable to lookup movie %s from imdb, using raw name.'
                     % tid)
             # setting
             if fld == 'watchlist':
                 mov['watchlist'] = val == 'true'
             elif fld == 'collected':
                 mov['collected'] = val == 'true'
             elif fld == 'watched':
                 mov['watched'] = val == 'true'
             elif fld == 'tags':
                 mov['tags'] = re.split(',\s*', val)
             elif fld == 'subtitles':
                 mov['subtitles'] = re.split(',\s*', val)
             elif fld == 'rating':
                 mov['rating'] = int(val)
             # cleaning
             if not (mov['watchlist'] or mov['collected']
                     or mov['watched']):
                 self.log.verbose('deleting unused section: movies\%s' %
                                  tid)
                 udata['movies'].pop(tid)
         elif typ == 'series':
             tmp = tid.split('.')
             sid = tmp[0]
             sno = tmp[1] if len(tmp) > 2 else None
             eno = tmp[2] if len(tmp) > 2 else None
             # default
             ser = udata['series'].setdefault(
                 sid, {
                     'name': 'N/A',
                     'watchlist': False,
                     'collected': {},
                     'watched': {}
                 })
             # series name is unknown at this time
             try:
                 series = lookup_series(tvdb_id=sid)
                 ser['name'] = series.name
             except LookupError:
                 self.log.warning(
                     'Unable to lookup series %s from tvdb, using raw name.'
                     % sid)
             # setting
             if fld == 'watchlist':
                 ser['watchlist'] = val == 'true'
             elif fld == 'tags':
                 ser['tags'] = re.split(',\s*', val)
             elif fld == 'rating':
                 ser['rating'] = int(val)
             elif sno is None or eno is None:
                 self.log.warning(
                     'invalid line "%s": season and episode numbers are required'
                     % line)
             elif fld == 'collected':
                 season = ser['collected'].setdefault(sno, {})
                 if val == 'true':
                     season.setdefault(eno, [])
                 else:
                     if eno in season:
                         season.pop(eno)
                     if not season:
                         self.log.verbose(
                             'deleting unused section: series\%s\collected\%s'
                             % (sid, sno))
                         ser['collected'].pop(sno)
             elif fld == 'subtitles':
                 ser['collected'].setdefault(sno, {})[eno] = re.split(
                     ',\s*', val)
             elif fld == 'watched':
                 season = ser['watched'].setdefault(sno, [])
                 if val == 'true':
                     season = ser['watched'][sno] = list(
                         set(season) | set([int(eno)]))
                 elif int(eno) in season:
                     season.remove(int(eno))
                 season.sort()
                 if not season:
                     self.log.debug(
                         'deleting unused section: series\%s\watched\%s' %
                         (sid, sno))
                     ser['watched'].pop(sno)
             # cleaning
             if not (ser['watchlist'] or ser['collected']
                     or ser['watched']):
                 self.log.debug('deleting unused section: series\%s' % sid)
                 udata['series'].pop(sid)
         else:
             self.log.warning('invalid element type "%s"' % typ)
     # save the updated uoccin.json
     ufile = os.path.join(self.folder, 'uoccin.json')
     try:
         text = json.dumps(udata,
                           sort_keys=True,
                           indent=4,
                           separators=(',', ': '))
         with open(ufile, 'w') as f:
             f.write(text)
     except Exception as err:
         self.log.debug('error writing %s: %s' % (ufile, err))
         raise plugin.PluginError('error writing %s: %s' % (ufile, err))
Пример #30
0
            else:
                # If link was passed as a list, we create a list of urls
                for field in config['link']:
                    if entry.get(field):
                        e.setdefault('url', entry[field])
                        if entry[field] not in e.setdefault('urls', []):
                            e['urls'].append(entry[field])

            if config.get('group_links'):
                # Append a list of urls from enclosures to the urls field if group_links is enabled
                e.setdefault('urls', [e['url']]).extend([
                    enc.href for enc in entry.get('enclosures', [])
                    if enc.get('href') not in e['urls']
                ])

            if not e.get('url'):
                log.debug('%s does not have link (%s) or enclosure' %
                          (entry.title, config['link']))
                ignored += 1
                continue

            add_entry(e)

        # Save last spot in rss
        if rss.entries:
            log.debug('Saving location in rss feed.')
            task.simple_persistence[
                '%s_last_entry' %
                url_hash] = rss.entries[0].title + rss.entries[0].get(
                    'guid', '')
Пример #31
0
    def on_task_input(self, task, config):
        config = self.build_config(config)

        log.debug('Requesting task `%s` url `%s`', task.name, config['url'])

        # Used to identify which etag/modified to use
        url_hash = hashlib.md5(config['url'].encode('utf-8')).hexdigest()

        # set etag and last modified headers if config has not changed since
        # last run and if caching wasn't disabled with --no-cache argument.
        all_entries = (config['all_entries'] or task.config_modified
                       or task.options.nocache or task.options.retry)
        headers = task.requests.headers
        if not all_entries:
            etag = task.simple_persistence.get('%s_etag' % url_hash, None)
            if etag:
                log.debug('Sending etag %s for task %s', etag, task.name)
                headers['If-None-Match'] = etag
            modified = task.simple_persistence.get('%s_modified' % url_hash,
                                                   None)
            if modified:
                if not isinstance(modified, str):
                    log.debug(
                        'Invalid date was stored for last modified time.')
                else:
                    headers['If-Modified-Since'] = modified
                    log.debug(
                        'Sending last-modified %s for task %s',
                        headers['If-Modified-Since'],
                        task.name,
                    )

        # Get the feed content
        if config['url'].startswith(('http', 'https', 'ftp', 'file')):
            # Get feed using requests library
            auth = None
            if 'username' in config and 'password' in config:
                auth = (config['username'], config['password'])
            try:
                # Use the raw response so feedparser can read the headers and status values
                response = task.requests.get(config['url'],
                                             timeout=60,
                                             headers=headers,
                                             raise_status=False,
                                             auth=auth)
                content = response.content
            except RequestException as e:
                raise plugin.PluginError(
                    'Unable to download the RSS for task %s (%s): %s' %
                    (task.name, config['url'], e))
            if config.get('ascii'):
                # convert content to ascii (cleanup), can also help with parsing problems on malformed feeds
                content = response.text.encode('ascii', 'ignore')

            # status checks
            status = response.status_code
            if status == 304:
                log.verbose(
                    '%s hasn\'t changed since last run. Not creating entries.',
                    config['url'])
                # Let details plugin know that it is ok if this feed doesn't produce any entries
                task.no_entries_ok = True
                return []
            elif status == 401:
                raise plugin.PluginError(
                    'Authentication needed for task %s (%s): %s' %
                    (task.name, config['url'],
                     response.headers['www-authenticate']),
                    log,
                )
            elif status == 404:
                raise plugin.PluginError(
                    'RSS Feed %s (%s) not found' % (task.name, config['url']),
                    log)
            elif status == 500:
                raise plugin.PluginError(
                    'Internal server exception on task %s (%s)' %
                    (task.name, config['url']), log)
            elif status != 200:
                raise plugin.PluginError(
                    'HTTP error %s received from %s' % (status, config['url']),
                    log)

            # update etag and last modified
            if not config['all_entries']:
                etag = response.headers.get('etag')
                if etag:
                    task.simple_persistence['%s_etag' % url_hash] = etag
                    log.debug('etag %s saved for task %s', etag, task.name)
                if response.headers.get('last-modified'):
                    modified = response.headers['last-modified']
                    task.simple_persistence['%s_modified' %
                                            url_hash] = modified
                    log.debug('last modified %s saved for task %s', modified,
                              task.name)
        else:
            # This is a file, open it
            with open(config['url'], 'rb') as f:
                content = f.read()
            if config.get('ascii'):
                # Just assuming utf-8 file in this case
                content = content.decode('utf-8',
                                         'ignore').encode('ascii', 'ignore')

        if not content:
            log.error('No data recieved for rss feed.')
            return []
        if config.get('escape'):
            log.debug("Trying to escape unescaped in RSS")
            content = self.escape_content(content)
        try:
            rss = feedparser.parse(content)
        except LookupError as e:
            raise plugin.PluginError('Unable to parse the RSS (from %s): %s' %
                                     (config['url'], e))

        # check for bozo
        ex = rss.get('bozo_exception', False)
        if ex or rss.get('bozo'):
            if rss.entries:
                msg = (
                    'Bozo error %s while parsing feed, but entries were produced, ignoring the error.'
                    % type(ex))
                if config.get('silent', False):
                    log.debug(msg)
                else:
                    log.verbose(msg)
            else:
                if isinstance(ex, feedparser.NonXMLContentType):
                    # see: http://www.feedparser.org/docs/character-encoding.html#advanced.encoding.nonxml
                    log.debug('ignoring feedparser.NonXMLContentType')
                elif isinstance(ex, feedparser.CharacterEncodingOverride):
                    # see: ticket 88
                    log.debug('ignoring feedparser.CharacterEncodingOverride')
                elif isinstance(ex, UnicodeEncodeError):
                    raise plugin.PluginError(
                        'Feed has UnicodeEncodeError while parsing...')
                elif isinstance(ex, (xml.sax._exceptions.SAXParseException,
                                     xml.sax._exceptions.SAXException)):
                    # save invalid data for review, this is a bit ugly but users seem to really confused when
                    # html pages (login pages) are received
                    self.process_invalid_content(task, content, config['url'])
                    if task.options.debug:
                        log.error('bozo error parsing rss: %s' % ex)
                    raise plugin.PluginError(
                        'Received invalid RSS content from task %s (%s)' %
                        (task.name, config['url']))
                elif isinstance(ex, http.client.BadStatusLine) or isinstance(
                        ex, IOError):
                    raise ex  # let the @internet decorator handle
                else:
                    # all other bozo errors
                    self.process_invalid_content(task, content, config['url'])
                    raise plugin.PluginError(
                        'Unhandled bozo_exception. Type: %s (task: %s)' %
                        (ex.__class__.__name__, task.name),
                        log,
                    )

        log.debug('encoding %s', rss.encoding)

        last_entry_id = ''
        if not all_entries:
            # Test to make sure entries are in descending order
            if (rss.entries and rss.entries[0].get('published_parsed')
                    and rss.entries[-1].get('published_parsed')):
                if rss.entries[0]['published_parsed'] < rss.entries[-1][
                        'published_parsed']:
                    # Sort them if they are not
                    rss.entries.sort(key=lambda x: x['published_parsed'],
                                     reverse=True)
            last_entry_id = task.simple_persistence.get('%s_last_entry' %
                                                        url_hash)

        # new entries to be created
        entries = []

        # Dict with fields to grab mapping from rss field name to FlexGet field name
        fields = {
            'guid': 'guid',
            'author': 'author',
            'description': 'description',
            'infohash': 'torrent_info_hash',
        }
        # extend the dict of fields to grab with other_fields list in config
        for field_map in config.get('other_fields', []):
            fields.update(field_map)

        # field name for url can be configured by setting link.
        # default value is auto but for example guid is used in some feeds
        ignored = 0
        for entry in rss.entries:

            # Check if title field is overridden in config
            title_field = config.get('title', 'title')
            # ignore entries without title
            if not entry.get(title_field):
                log.debug('skipping entry without title')
                ignored += 1
                continue

            # Set the title from the source field
            entry.title = entry[title_field]

            # Check we haven't already processed this entry in a previous run
            if last_entry_id == entry.title + entry.get('guid', ''):
                log.verbose('Not processing entries from last run.')
                # Let details plugin know that it is ok if this task doesn't produce any entries
                task.no_entries_ok = True
                break

            # remove annoying zero width spaces
            entry.title = entry.title.replace(u'\u200B', u'')

            # helper
            # TODO: confusing? refactor into class member ...

            def add_entry(ea):
                ea['title'] = entry.title

                # fields dict may be modified during this loop, so loop over a copy (fields.items())
                for rss_field, flexget_field in list(fields.items()):
                    if rss_field in entry:
                        if rss_field == 'content':
                            content_str = ''
                            for content in entry[rss_field]:
                                try:
                                    content_str += decode_html(content.value)
                                except UnicodeDecodeError:
                                    log.warning(
                                        'Failed to decode entry `%s` field `%s`',
                                        ea['title'],
                                        rss_field,
                                    )
                            ea[flexget_field] = content_str
                            log.debug(
                                'Field `%s` set to `%s` for `%s`',
                                rss_field,
                                ea[rss_field],
                                ea['title'],
                            )
                            continue
                        if not isinstance(getattr(entry, rss_field), str):
                            # Error if this field is not a string
                            log.error(
                                'Cannot grab non text field `%s` from rss.',
                                rss_field)
                            # Remove field from list of fields to avoid repeated error
                            del fields[rss_field]
                            continue
                        if not getattr(entry, rss_field):
                            log.debug(
                                'Not grabbing blank field %s from rss for %s.',
                                rss_field,
                                ea['title'],
                            )
                            continue
                        try:
                            ea[flexget_field] = decode_html(entry[rss_field])
                            if rss_field in config.get('other_fields', []):
                                # Print a debug message for custom added fields
                                log.debug(
                                    'Field `%s` set to `%s` for `%s`',
                                    rss_field,
                                    ea[rss_field],
                                    ea['title'],
                                )
                        except UnicodeDecodeError:
                            log.warning(
                                'Failed to decode entry `%s` field `%s`',
                                ea['title'], rss_field)
                # Also grab pubdate if available
                if hasattr(entry,
                           'published_parsed') and entry.published_parsed:
                    ea['rss_pubdate'] = datetime(*entry.published_parsed[:6])
                # store basic auth info
                if 'username' in config and 'password' in config:
                    ea['download_auth'] = (config['username'],
                                           config['password'])
                entries.append(ea)

            # create from enclosures if present
            enclosures = entry.get('enclosures', [])

            if len(enclosures) > 1 and not config.get('group_links'):
                # There is more than 1 enclosure, create an Entry for each of them
                log.debug('adding %i entries from enclosures', len(enclosures))
                for enclosure in enclosures:
                    if 'href' not in enclosure:
                        log.debug('RSS-entry `%s` enclosure does not have URL',
                                  entry.title)
                        continue
                    # There is a valid url for this enclosure, create an Entry for it
                    ee = Entry()
                    self.add_enclosure_info(ee, enclosure,
                                            config.get('filename', True), True)
                    add_entry(ee)
                # If we created entries for enclosures, we should not create an Entry for the main rss item
                continue

            # create flexget entry
            e = Entry()

            if not isinstance(config.get('link'), list):
                # If the link field is not a list, search for first valid url
                if config['link'] == 'auto':
                    # Auto mode, check for a single enclosure url first
                    if len(entry.get(
                            'enclosures',
                        [])) == 1 and entry['enclosures'][0].get('href'):
                        self.add_enclosure_info(e, entry['enclosures'][0],
                                                config.get('filename', True))
                    else:
                        # If there is no enclosure url, check link, then guid field for urls
                        for field in ['link', 'guid']:
                            if entry.get(field):
                                e['url'] = entry[field]
                                break
                else:
                    if entry.get(config['link']):
                        e['url'] = entry[config['link']]
            else:
                # If link was passed as a list, we create a list of urls
                for field in config['link']:
                    if entry.get(field):
                        e.setdefault('url', entry[field])
                        if entry[field] not in e.setdefault('urls', []):
                            e['urls'].append(entry[field])

            if config.get('group_links'):
                # Append a list of urls from enclosures to the urls field if group_links is enabled
                e.setdefault('urls', [e['url']]).extend([
                    enc.href for enc in entry.get('enclosures', [])
                    if enc.get('href') not in e['urls']
                ])

            if not e.get('url'):
                log.debug('%s does not have link (%s) or enclosure',
                          entry.title, config['link'])
                ignored += 1
                continue

            add_entry(e)

        # Save last spot in rss
        if rss.entries:
            log.debug('Saving location in rss feed.')

            try:
                entry_id = rss.entries[0].title + rss.entries[0].get(
                    'guid', '')
            except AttributeError:
                entry_id = ''

            if entry_id.strip():
                task.simple_persistence['%s_last_entry' % url_hash] = entry_id
            else:
                log.debug(
                    'rss feed location saving skipped: no title information in first entry'
                )

        if ignored:
            if not config.get('silent'):
                log.warning(
                    'Skipped %s RSS-entries without required information (title, link or enclosures)',
                    ignored,
                )

        return entries
Пример #32
0
    def parse_privmsg(self, nickname, channel, message):
        """
        Parses a public message and generates an entry if the message matches the regex or contains a url
        :param nickname: Nickname of who sent the message
        :param channel: Channel where the message was receivied
        :param message: Message text
        :return:
        """

        # Clean up the message
        message = MESSAGE_CLEAN.sub('', message)

        # If we have announcers defined, ignore any messages not from them
        if self.announcer_list and nickname not in self.announcer_list:
            log.debug('Ignoring message: from non-announcer %s', nickname)
            return

        # If it's listed in ignore lines, skip it
        for (rx, expected) in self.ignore_lines:
            if rx.match(message) and expected:
                log.debug('Ignoring message: matched ignore line')
                return

        # Create the entry
        entry = Entry(irc_raw_message=message)

        # Run the config regex patterns over the message
        matched_linepatterns = self.match_message_patterns(self.linepatterns, message)
        matched_multilinepatterns = self.match_message_patterns(self.multilinepatterns, message, multiline=True)

        if matched_linepatterns:
            match_found = True
            entry.update(matched_linepatterns)
        elif matched_multilinepatterns:
            match_found = True
            entry.update(matched_multilinepatterns)
        else:
            log.warning('Received message doesn\'t match any regexes.')
            return None

        # Generate the entry and process it through the linematched rules
        if self.tracker_config is not None and match_found:
            entry.update(self.process_tracker_config_rules(entry))
        elif self.tracker_config is not None:
            log.error('Failed to parse message. Skipping.')
            return None

        # If we have a torrentname, use it as the title
        entry['title'] = entry.get('irc_torrentname', message)

        # If we have a URL, use it
        if 'irc_torrenturl' in entry:
            entry['url'] = entry['irc_torrenturl']
        else:
            # find a url...
            url_match = URL_MATCHER.findall(message)
            if url_match:
                # We have a URL(s)!, generate an entry
                urls = list(url_match)
                url = urls[-1]
                entry.update({
                    'urls': urls,
                    'url': url,
                })

        log.debug('Entry after processing: %s', dict(entry))
        if not entry.get('url'):
            log.error('Parsing message failed. No url found.')
            return None
        return entry
Пример #33
0
    def items(self):
        if self._items is None:
            if self.config['list'] in ['collection', 'watched', 'trending', 'popular'] and self.config['type'] == 'auto':
                raise plugin.PluginError(
                    '`type` cannot be `auto` for %s list.' % self.config['list']
                )

            endpoint = self.get_list_endpoint()

            log.verbose('Retrieving `%s` list `%s`', self.config['type'], self.config['list'])
            try:
                result = self.session.get(db.get_api_url(endpoint))
                try:
                    data = result.json()
                except ValueError:
                    log.debug('Could not decode json from response: %s', result.text)
                    raise plugin.PluginError('Error getting list from trakt.')

                current_page = int(result.headers.get('X-Pagination-Page', 1))
                current_page_count = int(result.headers.get('X-Pagination-Page-Count', 1))
                if current_page < current_page_count:
                    # Pagination, gotta get it all, but we'll limit it to 1000 per page
                    # but we'll have to start over from 0
                    data = []

                    limit = 1000
                    pagination_item_count = int(result.headers.get('X-Pagination-Item-Count', 0))
                    number_of_pages = math.ceil(pagination_item_count / limit)
                    log.debug(
                        'Response is paginated. Number of items: %s, number of pages: %s',
                        pagination_item_count,
                        number_of_pages,
                    )
                    page = int(result.headers.get('X-Pagination-Page'))
                    while page <= number_of_pages:
                        paginated_result = self.session.get(
                            db.get_api_url(endpoint), params={'limit': limit, 'page': page}
                        )
                        page += 1
                        try:
                            data.extend(paginated_result.json())
                        except ValueError:
                            log.debug(
                                'Could not decode json from response: %s', paginated_result.text
                            )
                            raise plugin.PluginError('Error getting list from trakt.')

            except RequestException as e:
                raise plugin.PluginError('Could not retrieve list from trakt (%s)' % e)

            if not data:
                log.warning(
                    'No data returned from trakt for %s list %s.',
                    self.config['type'],
                    self.config['list'],
                )
                return []

            entries = []
            list_type = (self.config['type']).rstrip('s')
            for item in data:
                if self.config['type'] == 'auto':
                    list_type = item['type']
                if self.config['list'] == 'popular':
                    item = {list_type: item}
                # Collection and watched lists don't return 'type' along with the items (right now)
                if 'type' in item and item['type'] != list_type:
                    log.debug(
                        'Skipping %s because it is not a %s',
                        item[item['type']].get('title', 'unknown'),
                        list_type,
                    )
                    continue
                if list_type != 'episode' and not item[list_type]['title']:
                    # Skip shows/movies with no title
                    log.warning('Item in trakt list does not appear to have a title, skipping.')
                    continue
                entry = Entry()
                if list_type == 'episode':
                    entry['url'] = 'https://trakt.tv/shows/%s/seasons/%s/episodes/%s' % (
                        item['show']['ids']['slug'],
                        item['episode']['season'],
                        item['episode']['number'],
                    )
                else:
                    entry['url'] = 'https://trakt.tv/%ss/%s' % (
                        list_type,
                        item[list_type]['ids'].get('slug'),
                    )

                entry.update_using_map(field_maps[list_type], item)

                # get movie name translation
                language = self.config.get('language')
                if list_type == 'movie' and language:
                    endpoint = ['movies', entry['trakt_movie_id'], 'translations', language]
                    try:
                        result = self.session.get(db.get_api_url(endpoint))
                        try:
                            translation = result.json()
                        except ValueError:
                            raise plugin.PluginError(
                                'Error decoding movie translation from trakt: %s.' % result.text
                            )
                    except RequestException as e:
                        raise plugin.PluginError(
                            'Could not retrieve movie translation from trakt: %s' % str(e)
                        )
                    if not translation:
                        log.warning(
                            'No translation data returned from trakt for movie %s.', entry['title']
                        )
                    else:
                        log.verbose(
                            'Found `%s` translation for movie `%s`: %s',
                            language,
                            entry['movie_name'],
                            translation[0]['title'],
                        )
                        entry['title'] = translation[0]['title']
                        if entry.get('movie_year'):
                            entry['title'] += ' (' + str(entry['movie_year']) + ')'
                        entry['movie_name'] = translation[0]['title']

                # Override the title if strip_dates is on. TODO: a better way?
                if self.config.get('strip_dates'):
                    if list_type in ['show', 'movie']:
                        entry['title'] = item[list_type]['title']
                    elif list_type == 'episode':
                        entry[
                            'title'
                        ] = '{show[title]} S{episode[season]:02}E{episode[number]:02}'.format(
                            **item
                        )
                        if item['episode']['title']:
                            entry['title'] += ' {episode[title]}'.format(**item)
                if entry.isvalid():
                    if self.config.get('strip_dates'):
                        # Remove year from end of name if present
                        entry['title'] = split_title_year(entry['title'])[0]
                    entries.append(entry)

                    if self.config.get('limit') and len(entries) >= self.config.get('limit'):
                        break
                else:
                    log.debug('Invalid entry created? %s', entry)

            self._items = entries
        return self._items
Пример #34
0
    def items(self):
        if self._items is None:
            if self.config['list'] in ['collection', 'watched'
                                       ] and self.config['type'] == 'auto':
                raise plugin.PluginError(
                    '`type` cannot be `auto` for %s list.' %
                    self.config['list'])

            endpoint = self.get_list_endpoint()

            log.verbose('Retrieving `%s` list `%s`', self.config['type'],
                        self.config['list'])
            try:
                result = self.session.get(get_api_url(endpoint))
                try:
                    data = result.json()
                except ValueError:
                    log.debug('Could not decode json from response: %s',
                              result.text)
                    raise plugin.PluginError('Error getting list from trakt.')
            except RequestException as e:
                raise plugin.PluginError(
                    'Could not retrieve list from trakt (%s)' % e)

            if not data:
                log.warning('No data returned from trakt for %s list %s.',
                            self.config['type'], self.config['list'])
                return []

            entries = []
            list_type = (self.config['type']).rstrip('s')
            for item in data:
                if self.config['type'] == 'auto':
                    list_type = item['type']
                # Collection and watched lists don't return 'type' along with the items (right now)
                if 'type' in item and item['type'] != list_type:
                    log.debug('Skipping %s because it is not a %s',
                              item[item['type']].get('title',
                                                     'unknown'), list_type)
                    continue
                if list_type != 'episode' and not item[list_type]['title']:
                    # Skip shows/movies with no title
                    log.warning(
                        'Item in trakt list does not appear to have a title, skipping.'
                    )
                    continue
                entry = Entry()
                if list_type == 'episode':
                    entry[
                        'url'] = 'https://trakt.tv/shows/%s/seasons/%s/episodes/%s' % (
                            item['show']['ids']['slug'],
                            item['episode']['season'],
                            item['episode']['number'])
                else:
                    entry['url'] = 'https://trakt.tv/%ss/%s' % (
                        list_type, item[list_type]['ids'].get('slug'))

                entry.update_using_map(field_maps[list_type], item)

                # get movie name translation
                language = self.config.get('language')
                if list_type == 'movie' and language:
                    endpoint = [
                        'movies', entry['trakt_movie_id'], 'translations',
                        language
                    ]
                    try:
                        result = self.session.get(get_api_url(endpoint))
                        try:
                            translation = result.json()
                        except ValueError:
                            raise plugin.PluginError(
                                'Error decoding movie translation from trakt: %s.'
                                % result.text)
                    except RequestException as e:
                        raise plugin.PluginError(
                            'Could not retrieve movie translation from trakt: %s'
                            % str(e))
                    if not translation:
                        log.warning(
                            'No translation data returned from trakt for movie %s.',
                            entry['title'])
                    else:
                        log.verbose(
                            'Found `%s` translation for movie `%s`: %s',
                            language, entry['movie_name'],
                            translation[0]['title'])
                        entry['title'] = translation[0]['title']
                        if entry.get('movie_year'):
                            entry['title'] += ' (' + str(
                                entry['movie_year']) + ')'
                        entry['movie_name'] = translation[0]['title']

                # Override the title if strip_dates is on. TODO: a better way?
                if self.config.get('strip_dates'):
                    if list_type in ['show', 'movie']:
                        entry['title'] = item[list_type]['title']
                    elif list_type == 'episode':
                        entry[
                            'title'] = '{show[title]} S{episode[season]:02}E{episode[number]:02}'.format(
                                **item)
                        if item['episode']['title']:
                            entry['title'] += ' {episode[title]}'.format(
                                **item)
                if entry.isvalid():
                    if self.config.get('strip_dates'):
                        # Remove year from end of name if present
                        entry['title'] = split_title_year(entry['title'])[0]
                    entries.append(entry)
                else:
                    log.debug('Invalid entry created? %s', entry)

            self._items = entries
        return self._items
Пример #35
0
 def on_task_input(self, task, config):
     """Creates an entry for each item in your uoccin watchlist.
     
     Example::
         
         from_uoccin:
           path: /path/to/gdrive/uoccin
           type: series
           tags: [ 'favorite', 'hires' ]
           check_tags: all
     
     Options path and type are required while the others are for filtering:
     - 'any' will include all the items marked with one or more tags in the list
     - 'all' will only include the items marked with all the listed tags
     - 'none' will only include the items not marked with any of the listed tags.
     
     The entries created will have a valid imdb/tvdb url and id.
     """
     imdb_lookup = plugin.get_plugin_by_name('imdb_lookup').instance
     udata = load_uoccin_data(config['path'])
     section = udata['movies'] if config['type'] == 'movies' else udata['series']
     entries = []
     for eid, itm in list(section.items()):
         if not itm['watchlist']:
             continue
         if 'tags' in config:
             n = len(set(config['tags']) & set(itm.get('tags', [])))
             if config['check_tags'] == 'any' and n <= 0:
                 continue
             if config['check_tags'] == 'all' and n != len(config['tags']):
                 continue
             if config['check_tags'] == 'none' and n > 0:
                 continue
         if config['type'] == 'movies':
             entry = Entry()
             entry['url'] = 'http://www.imdb.com/title/' + eid
             entry['imdb_id'] = eid
             if itm['name'] != 'N/A':
                 entry['title'] = itm['name']
             else:
                 try:
                     imdb_lookup.lookup(entry)
                 except plugin.PluginError as e:
                     self.log.trace('entry %s imdb failed (%s)' % (entry['imdb_id'], e.value))
                     continue
                 entry['title'] = entry.get('imdb_name')
             if 'tags' in itm:
                 entry['uoccin_tags'] = itm['tags']
             if entry.isvalid():
                 entries.append(entry)
             else:
                 self.log.debug('Invalid entry created? %s' % entry)
         else:
             sname = itm['name']
             try:
                 sname = lookup_series(tvdb_id=eid).seriesname
             except LookupError:
                 self.log.warning('Unable to lookup series %s from tvdb, using raw name.' % eid)
             surl = 'http://thetvdb.com/?tab=series&id=' + eid
             if config['type'] == 'series':
                 entry = Entry()
                 entry['url'] = surl
                 entry['title'] = sname
                 entry['tvdb_id'] = eid
                 if 'tags' in itm:
                     entry['uoccin_tags'] = itm['tags']
                 if entry.isvalid():
                     entries.append(entry)
                 else:
                     self.log.debug('Invalid entry created? %s' % entry)
             elif config['ep_flags'] == 'collected':
                 slist = itm.get('collected', {})
                 for sno in list(slist.keys()):
                     for eno in slist[sno]:
                         entry = Entry()
                         entry['url'] = surl
                         entry['title'] = '%s S%02dE%02d' % (sname, int(sno), int(eno))
                         entry['tvdb_id'] = eid
                         if entry.isvalid():
                             entries.append(entry)
                         else:
                             self.log.debug('Invalid entry created? %s' % entry)
             else:
                 slist = itm.get('watched', {})
                 for sno in list(slist.keys()):
                     for eno in slist[sno]:
                         entry = Entry()
                         entry['url'] = surl
                         entry['title'] = '%s S%02dE%02d' % (sname, int(sno), eno)
                         entry['tvdb_id'] = eid
                         if entry.isvalid():
                             entries.append(entry)
                         else:
                             self.log.debug('Invalid entry created? %s' % entry)
     entries.sort(key=lambda x: x['title'])
     return entries
Пример #36
0
    def parse_privmsg(self, nickname, channel, message):
        """
        Parses a public message and generates an entry if the message matches the regex or contains a url
        :param nickname: Nickname of who sent the message
        :param channel: Channel where the message was receivied
        :param message: Message text
        :return:
        """

        # Clean up the message
        message = MESSAGE_CLEAN.sub('', message)

        # If we have announcers defined, ignore any messages not from them
        if self.announcer_list and nickname not in self.announcer_list:
            log.debug('Ignoring message: from non-announcer %s', nickname)
            return

        # If it's listed in ignore lines, skip it
        for (rx, expected) in self.ignore_lines:
            if rx.match(message) and expected:
                log.debug('Ignoring message: matched ignore line')
                return

        # Create the entry
        entry = Entry(irc_raw_message=message)

        # Run the config regex patterns over the message
        matched_linepatterns = self.match_message_patterns(self.linepatterns, message)
        matched_multilinepatterns = self.match_message_patterns(self.multilinepatterns, message, multiline=True)

        if matched_linepatterns:
            match_found = True
            entry.update(matched_linepatterns)
        elif matched_multilinepatterns:
            match_found = True
            entry.update(matched_multilinepatterns)
        else:
            log.warning('Received message doesn\'t match any regexes.')
            return None

        # Generate the entry and process it through the linematched rules
        if self.tracker_config is not None and match_found:
            entry.update(self.process_tracker_config_rules(entry))
        elif self.tracker_config is not None:
            log.error('Failed to parse message. Skipping.')
            return None

        # If we have a torrentname, use it as the title
        entry['title'] = entry.get('irc_torrentname', message)

        # If we have a URL, use it
        if 'irc_torrenturl' in entry:
            entry['url'] = entry['irc_torrenturl']
        else:
            # find a url...
            url_match = URL_MATCHER.findall(message)
            if url_match:
                # We have a URL(s)!, generate an entry
                urls = list(url_match)
                url = urls[-1]
                entry.update({
                    'urls': urls,
                    'url': url,
                })

        log.debug('Entry after processing: %s', dict(entry))
        if not entry.get('url'):
            log.error('Parsing message failed. No url found.')
            return None
        return entry
Пример #37
0
    def get_items(self):
        """Iterator over etrieved itesms from the trakt api."""
        if (
            self.config['list'] in ['collection', 'watched', 'trending', 'popular']
            and self.config['type'] == 'auto'
        ):
            raise plugin.PluginError('`type` cannot be `auto` for %s list.' % self.config['list'])

        limit_per_page = 1000

        endpoint = self.get_list_endpoint()

        list_type = (self.config['type']).rstrip('s')

        log.verbose('Retrieving `%s` list `%s`', self.config['type'], self.config['list'])
        try:
            page = 1
            collecting_finished = False
            while not collecting_finished:
                result = self.session.get(
                    db.get_api_url(endpoint), params={'limit': limit_per_page, 'page': page}
                )
                page = int(result.headers.get('X-Pagination-Page', 1))
                number_of_pages = int(result.headers.get('X-Pagination-Page-Count', 1))
                if page == 2:
                    # If there is more than one page (more than 1000 items) warn user they may want to limit
                    log.verbose(
                        'There are a large number of items in trakt `%s` list. You may want to enable `limit`'
                        ' plugin to reduce the amount of entries in this task.',
                        self.config['list'],
                    )

                collecting_finished = page >= number_of_pages
                page += 1

                try:
                    trakt_items = result.json()
                except ValueError:
                    log.debug('Could not decode json from response: %s', result.text)
                    raise plugin.PluginError('Error getting list from trakt.')
                if not trakt_items:
                    log.warning(
                        'No data returned from trakt for %s list %s.',
                        self.config['type'],
                        self.config['list'],
                    )
                    return

                for item in trakt_items:
                    if self.config['type'] == 'auto':
                        list_type = item['type']
                    if self.config['list'] == 'popular':
                        item = {list_type: item}
                    # Collection and watched lists don't return 'type' along with the items (right now)
                    if 'type' in item and item['type'] != list_type:
                        log.debug(
                            'Skipping %s because it is not a %s',
                            item[item['type']].get('title', 'unknown'),
                            list_type,
                        )
                        continue
                    if list_type not in item:
                        # Issue 2445
                        log.warning("Item type can not be determined, skipping item %s", item)
                        continue
                    if list_type != 'episode' and not item[list_type]['title']:
                        # Skip shows/movies with no title
                        log.warning(
                            'Item in trakt list does not appear to have a title, skipping.'
                        )
                        continue
                    entry = Entry()
                    if list_type == 'episode':
                        entry['url'] = 'https://trakt.tv/shows/%s/seasons/%s/episodes/%s' % (
                            item['show']['ids']['slug'],
                            item['episode']['season'],
                            item['episode']['number'],
                        )
                    else:
                        entry['url'] = 'https://trakt.tv/%ss/%s' % (
                            list_type,
                            item[list_type]['ids'].get('slug'),
                        )

                    # Pass the strip dates option in so it can be used in the update maps
                    item['strip_dates'] = self.config.get('strip_dates')
                    entry.update_using_map(field_maps[list_type], item)

                    # get movie name translation
                    language = self.config.get('language')
                    if list_type == 'movie' and language:
                        endpoint = ['movies', entry['trakt_movie_id'], 'translations', language]
                        try:
                            result = self.session.get(db.get_api_url(endpoint))
                            try:
                                translation = result.json()
                            except ValueError:
                                raise plugin.PluginError(
                                    'Error decoding movie translation from trakt: %s.'
                                    % result.text
                                )
                        except RequestException as e:
                            raise plugin.PluginError(
                                'Could not retrieve movie translation from trakt: %s' % str(e)
                            )
                        if not translation:
                            log.warning(
                                'No translation data returned from trakt for movie %s.',
                                entry['title'],
                            )
                        else:
                            log.verbose(
                                'Found `%s` translation for movie `%s`: %s',
                                language,
                                entry['movie_name'],
                                translation[0]['title'],
                            )
                            entry['title'] = translation[0]['title']
                            if entry.get('movie_year') and not self.config.get('strip_dates'):
                                entry['title'] += ' ({})'.format(entry['movie_year'])
                            entry['movie_name'] = translation[0]['title']

                    if entry.isvalid():
                        yield entry
                    else:
                        log.debug('Invalid entry created? %s', entry)

        except RequestException as e:
            raise plugin.PluginError('Could not retrieve list from trakt (%s)' % e)
Пример #38
0
    def on_task_input(self, task, config):
        config = self.build_config(config)

        log.debug('Requesting task `%s` url `%s`', task.name, config['url'])

        # Used to identify which etag/modified to use
        url_hash = str(hash(config['url']))

        # set etag and last modified headers if config has not changed since
        # last run and if caching wasn't disabled with --no-cache argument.
        all_entries = (config['all_entries'] or task.config_modified or
                       task.options.nocache or task.options.retry)
        headers = {}
        if not all_entries:
            etag = task.simple_persistence.get('%s_etag' % url_hash, None)
            if etag:
                log.debug('Sending etag %s for task %s', etag, task.name)
                headers['If-None-Match'] = etag
            modified = task.simple_persistence.get('%s_modified' % url_hash, None)
            if modified:
                if not isinstance(modified, basestring):
                    log.debug('Invalid date was stored for last modified time.')
                else:
                    headers['If-Modified-Since'] = modified
                    log.debug('Sending last-modified %s for task %s', headers['If-Modified-Since'], task.name)

        # Get the feed content
        if config['url'].startswith(('http', 'https', 'ftp', 'file')):
            # Get feed using requests library
            auth = None
            if 'username' in config and 'password' in config:
                auth = (config['username'], config['password'])
            try:
                # Use the raw response so feedparser can read the headers and status values
                response = task.requests.get(config['url'], timeout=60, headers=headers, raise_status=False, auth=auth)
                content = response.content
            except RequestException as e:
                raise plugin.PluginError('Unable to download the RSS for task %s (%s): %s' %
                                         (task.name, config['url'], e))
            if config.get('ascii'):
                # convert content to ascii (cleanup), can also help with parsing problems on malformed feeds
                content = response.text.encode('ascii', 'ignore')

            # status checks
            status = response.status_code
            if status == 304:
                log.verbose('%s hasn\'t changed since last run. Not creating entries.', config['url'])
                # Let details plugin know that it is ok if this feed doesn't produce any entries
                task.no_entries_ok = True
                return []
            elif status == 401:
                raise plugin.PluginError('Authentication needed for task %s (%s): %s' %
                                         (task.name, config['url'], response.headers['www-authenticate']), log)
            elif status == 404:
                raise plugin.PluginError('RSS Feed %s (%s) not found' % (task.name, config['url']), log)
            elif status == 500:
                raise plugin.PluginError('Internal server exception on task %s (%s)' % (task.name, config['url']), log)
            elif status != 200:
                raise plugin.PluginError('HTTP error %s received from %s' % (status, config['url']), log)

            # update etag and last modified
            if not config['all_entries']:
                etag = response.headers.get('etag')
                if etag:
                    task.simple_persistence['%s_etag' % url_hash] = etag
                    log.debug('etag %s saved for task %s', etag, task.name)
                if response.headers.get('last-modified'):
                    modified = response.headers['last-modified']
                    task.simple_persistence['%s_modified' % url_hash] = modified
                    log.debug('last modified %s saved for task %s', modified, task.name)
        else:
            # This is a file, open it
            with open(config['url'], 'rb') as f:
                content = f.read()
            if config.get('ascii'):
                # Just assuming utf-8 file in this case
                content = content.decode('utf-8', 'ignore').encode('ascii', 'ignore')

        if not content:
            log.error('No data recieved for rss feed.')
            return []
        try:
            rss = feedparser.parse(content)
        except LookupError as e:
            raise plugin.PluginError('Unable to parse the RSS (from %s): %s' % (config['url'], e))

        # check for bozo
        ex = rss.get('bozo_exception', False)
        if ex or rss.get('bozo'):
            if rss.entries:
                msg = 'Bozo error %s while parsing feed, but entries were produced, ignoring the error.' % type(ex)
                if config.get('silent', False):
                    log.debug(msg)
                else:
                    log.verbose(msg)
            else:
                if isinstance(ex, feedparser.NonXMLContentType):
                    # see: http://www.feedparser.org/docs/character-encoding.html#advanced.encoding.nonxml
                    log.debug('ignoring feedparser.NonXMLContentType')
                elif isinstance(ex, feedparser.CharacterEncodingOverride):
                    # see: ticket 88
                    log.debug('ignoring feedparser.CharacterEncodingOverride')
                elif isinstance(ex, UnicodeEncodeError):
                    raise plugin.PluginError('Feed has UnicodeEncodeError while parsing...')
                elif isinstance(ex, (xml.sax._exceptions.SAXParseException, xml.sax._exceptions.SAXException)):
                    # save invalid data for review, this is a bit ugly but users seem to really confused when
                    # html pages (login pages) are received
                    self.process_invalid_content(task, content, config['url'])
                    if task.options.debug:
                        log.error('bozo error parsing rss: %s' % ex)
                    raise plugin.PluginError('Received invalid RSS content from task %s (%s)' % (task.name,
                                                                                                 config['url']))
                elif isinstance(ex, http.client.BadStatusLine) or isinstance(ex, IOError):
                    raise ex  # let the @internet decorator handle
                else:
                    # all other bozo errors
                    self.process_invalid_content(task, content, config['url'])
                    raise plugin.PluginError('Unhandled bozo_exception. Type: %s (task: %s)' %
                                             (ex.__class__.__name__, task.name), log)

        log.debug('encoding %s', rss.encoding)

        last_entry_id = ''
        if not all_entries:
            # Test to make sure entries are in descending order
            if rss.entries and rss.entries[0].get('published_parsed') and rss.entries[-1].get('published_parsed'):
                if rss.entries[0]['published_parsed'] < rss.entries[-1]['published_parsed']:
                    # Sort them if they are not
                    rss.entries.sort(key=lambda x: x['published_parsed'], reverse=True)
            last_entry_id = task.simple_persistence.get('%s_last_entry' % url_hash)

        # new entries to be created
        entries = []

        # Dict with fields to grab mapping from rss field name to FlexGet field name
        fields = {'guid': 'guid',
                  'author': 'author',
                  'description': 'description',
                  'infohash': 'torrent_info_hash'}
        # extend the dict of fields to grab with other_fields list in config
        for field_map in config.get('other_fields', []):
            fields.update(field_map)

        # field name for url can be configured by setting link.
        # default value is auto but for example guid is used in some feeds
        ignored = 0
        for entry in rss.entries:

            # Check if title field is overridden in config
            title_field = config.get('title', 'title')
            # ignore entries without title
            if not entry.get(title_field):
                log.debug('skipping entry without title')
                ignored += 1
                continue

            # Set the title from the source field
            entry.title = entry[title_field]

            # Check we haven't already processed this entry in a previous run
            if last_entry_id == entry.title + entry.get('guid', ''):
                log.verbose('Not processing entries from last run.')
                # Let details plugin know that it is ok if this task doesn't produce any entries
                task.no_entries_ok = True
                break

            # remove annoying zero width spaces
            entry.title = entry.title.replace(u'\u200B', u'')

            # helper
            # TODO: confusing? refactor into class member ...

            def add_entry(ea):
                ea['title'] = entry.title

                # fields dict may be modified during this loop, so loop over a copy (fields.items())
                for rss_field, flexget_field in list(fields.items()):
                    if rss_field in entry:
                        if not isinstance(getattr(entry, rss_field), basestring):
                            # Error if this field is not a string
                            log.error('Cannot grab non text field `%s` from rss.', rss_field)
                            # Remove field from list of fields to avoid repeated error
                            del fields[rss_field]
                            continue
                        if not getattr(entry, rss_field):
                            log.debug('Not grabbing blank field %s from rss for %s.', rss_field, ea['title'])
                            continue
                        try:
                            ea[flexget_field] = decode_html(entry[rss_field])
                            if rss_field in config.get('other_fields', []):
                                # Print a debug message for custom added fields
                                log.debug('Field `%s` set to `%s` for `%s`', rss_field, ea[rss_field], ea['title'])
                        except UnicodeDecodeError:
                            log.warning('Failed to decode entry `%s` field `%s`', ea['title'], rss_field)
                # Also grab pubdate if available
                if hasattr(entry, 'published_parsed') and entry.published_parsed:
                    ea['rss_pubdate'] = datetime(*entry.published_parsed[:6])
                # store basic auth info
                if 'username' in config and 'password' in config:
                    ea['download_auth'] = (config['username'], config['password'])
                entries.append(ea)

            # create from enclosures if present
            enclosures = entry.get('enclosures', [])

            if len(enclosures) > 1 and not config.get('group_links'):
                # There is more than 1 enclosure, create an Entry for each of them
                log.debug('adding %i entries from enclosures', len(enclosures))
                for enclosure in enclosures:
                    if 'href' not in enclosure:
                        log.debug('RSS-entry `%s` enclosure does not have URL', entry.title)
                        continue
                    # There is a valid url for this enclosure, create an Entry for it
                    ee = Entry()
                    self.add_enclosure_info(ee, enclosure, config.get('filename', True), True)
                    add_entry(ee)
                # If we created entries for enclosures, we should not create an Entry for the main rss item
                continue

            # create flexget entry
            e = Entry()

            if not isinstance(config.get('link'), list):
                # If the link field is not a list, search for first valid url
                if config['link'] == 'auto':
                    # Auto mode, check for a single enclosure url first
                    if len(entry.get('enclosures', [])) == 1 and entry['enclosures'][0].get('href'):
                        self.add_enclosure_info(e, entry['enclosures'][0], config.get('filename', True))
                    else:
                        # If there is no enclosure url, check link, then guid field for urls
                        for field in ['link', 'guid']:
                            if entry.get(field):
                                e['url'] = entry[field]
                                break
                else:
                    if entry.get(config['link']):
                        e['url'] = entry[config['link']]
            else:
                # If link was passed as a list, we create a list of urls
                for field in config['link']:
                    if entry.get(field):
                        e.setdefault('url', entry[field])
                        if entry[field] not in e.setdefault('urls', []):
                            e['urls'].append(entry[field])

            if config.get('group_links'):
                # Append a list of urls from enclosures to the urls field if group_links is enabled
                e.setdefault('urls', [e['url']]).extend(
                    [enc.href for enc in entry.get('enclosures', []) if enc.get('href') not in e['urls']])

            if not e.get('url'):
                log.debug('%s does not have link (%s) or enclosure', entry.title, config['link'])
                ignored += 1
                continue

            add_entry(e)

        # Save last spot in rss
        if rss.entries:
            log.debug('Saving location in rss feed.')
            try:
                task.simple_persistence['%s_last_entry' % url_hash] = (rss.entries[0].title +
                                                                       rss.entries[0].get('guid', ''))
            except AttributeError:
                log.debug('rss feed location saving skipped: no title information in first entry')

        if ignored:
            if not config.get('silent'):
                log.warning('Skipped %s RSS-entries without required information (title, link or enclosures)', ignored)

        return entries
Пример #39
0
                else:
                    if entry.get(config['link']):
                        e['url'] = entry[config['link']]
            else:
                # If link was passed as a list, we create a list of urls
                for field in config['link']:
                    if entry.get(field):
                        e.setdefault('url', entry[field])
                        if entry[field] not in e.setdefault('urls', []):
                            e['urls'].append(entry[field])

            if config.get('group_links'):
                # Append a list of urls from enclosures to the urls field if group_links is enabled
                e.setdefault('urls', [e['url']]).extend(
                        [enc.href for enc in entry.get('enclosures', []) if enc.get('href') not in e['urls']])

            if not e.get('url'):
                log.debug('%s does not have link (%s) or enclosure' % (entry.title, config['link']))
                ignored += 1
                continue

            add_entry(e)

        if ignored:
            if not config.get('silent'):
                log.warning('Skipped %s RSS-entries without required information (title, link or enclosures)' % ignored)

        return entries

register_plugin(InputRSS, 'rss', api_ver=2)