Пример #1
0
    def filename_from_headers(self, entry, response):
        """Checks entry filename if it's found from content-disposition"""
        if not response.headers.get('content-disposition'):
            # No content disposition header, nothing we can do
            return
        filename = parse_header(
            response.headers['content-disposition'])[1].get('filename')

        if filename:
            # try to decode to unicode, specs allow latin1, some may do utf-8 anyway
            try:
                filename = native_str_to_text(filename, encoding='latin1')
                log.debug('filename header latin1 decoded')
            except UnicodeError:
                try:
                    filename = native_str_to_text(filename, encoding='utf-8')
                    log.debug('filename header UTF-8 decoded')
                except UnicodeError:
                    pass
            filename = decode_html(filename)
            log.debug('Found filename from headers: %s', filename)
            if 'filename' in entry:
                log.debug(
                    'Overriding filename %s with %s from content-disposition',
                    entry['filename'], filename)
            entry['filename'] = filename
Пример #2
0
    def filename_from_headers(self, entry, response):
        """Checks entry filename if it's found from content-disposition"""
        if not response.headers.get('content-disposition'):
            # No content disposition header, nothing we can do
            return
        filename = parse_header(response.headers['content-disposition'])[1].get('filename')

        if filename:
            # try to decode to unicode, specs allow latin1, some may do utf-8 anyway
            try:
                filename = native_str_to_text(filename, encoding='latin1')
                log.debug('filename header latin1 decoded')
            except UnicodeError:
                try:
                    filename = native_str_to_text(filename, encoding='utf-8')
                    log.debug('filename header UTF-8 decoded')
                except UnicodeError:
                    pass
            filename = decode_html(filename)
            log.debug('Found filename from headers: %s', filename)
            if 'filename' in entry:
                log.debug(
                    'Overriding filename %s with %s from content-disposition',
                    entry['filename'],
                    filename,
                )
            entry['filename'] = filename
Пример #3
0
def filter_formatdate(val, format):
    """Returns a string representation of a datetime object according to format string."""
    encoding = locale.getpreferredencoding()
    if not isinstance(val, (datetime, date, time)):
        return val
    return native_str_to_text(
        val.strftime(text_to_native_str(format, encoding=encoding)), encoding=encoding
    )
Пример #4
0
 def execute_cmd(self, cmd, allow_background, encoding):
     log.verbose('Executing: %s' % cmd)
     # if PY2: cmd = cmd.encode(encoding) ?
     p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                          stderr=subprocess.STDOUT, close_fds=False)
     if not allow_background:
         (r, w) = (p.stdout, p.stdin)
         response = native_str_to_text(r.read(), encoding=encoding, errors='replace')
         r.close()
         w.close()
         if response:
             log.info('Stdout: %s' % response)
     return p.wait()
Пример #5
0
 def execute_cmd(self, cmd, allow_background, encoding):
     log.verbose('Executing: %s' % cmd)
     # if PY2: cmd = cmd.encode(encoding) ?
     p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                          stderr=subprocess.STDOUT, close_fds=False)
     if not allow_background:
         (r, w) = (p.stdout, p.stdin)
         response = native_str_to_text(r.read(), encoding=encoding, errors='replace')
         r.close()
         w.close()
         if response:
             log.info('Stdout: %s' % response)
     return p.wait()
Пример #6
0
    def _handle_path(self, entries, ftp, baseurl, path='', mlst_supported=False, files_only=False, recursive=False,
                     get_size=True, encoding=None):
        dirs = self.list_directory(ftp, path)

        for p in dirs:
            if encoding:
                p = native_str_to_text(p, encoding=encoding)

            # Clean file list when subdirectories are used
            p = p.replace(path + '/', '')

            mlst = {}
            if mlst_supported:
                mlst_output = ftp.sendcmd('MLST ' + path + '/' + p)
                clean_mlst_output = [line.strip().lower() for line in mlst_output.splitlines()][1]
                mlst = self.parse_mlst(clean_mlst_output)
            else:
                element_is_directory = self.is_directory(ftp, path + '/' + p)
                if element_is_directory:
                    mlst['type'] = 'dir'
                    log.debug('%s is a directory', p)
                else:
                    mlst['type'] = 'file'
                    log.debug('%s is a file', p)

            if recursive and mlst.get('type') == 'dir':
                self._handle_path(entries, ftp, baseurl, path + '/' + p, mlst_supported, files_only,
                                  recursive, get_size, encoding)

            if not files_only or mlst.get('type') == 'file':
                url = baseurl + quote(path) + '/' + quote(p)
                log.debug("Encoded URL: " + url)
                title = os.path.basename(p)
                log.info('Accepting entry "%s" [%s]' % (path + '/' + p, mlst.get('type') or "unknown",))
                entry = Entry(title, url)
                if get_size and 'size' not in mlst:
                    if mlst.get('type') == 'file':
                        entry['content_size'] = old_div(ftp.size(path + '/' + p), (1024 * 1024))
                        log.debug('(FILE) Size = %s', entry['content_size'])
                    elif mlst.get('type') == 'dir':
                        entry['content_size'] = self.get_folder_size(ftp, path, p)
                        log.debug('(DIR) Size = %s', entry['content_size'])
                elif get_size:
                    entry['content_size'] = old_div(float(mlst.get('size')), (1024 * 1024))
                entries.append(entry)
Пример #7
0
    def parse_response(self, response):
        p, u = self.getparser()

        response_body = ''

        while True:
            data = response.read(1024)
            if not data:
                break
            response_body += native_str_to_text(data, encoding='utf-8')

        if self.verbose:
            log.info('body: %s', repr(response_body))

        # Remove SCGI headers from the response.
        _, response_body = re.split(r'\n\s*?\n', response_body, maxsplit=1)
        p.feed(response_body.encode('utf-8'))
        p.close()

        return u.close()
Пример #8
0
    def search(self, task, entry, config=None):
        """
        Search for name from torrent411.
        """
        url_base = "http://www.t411.ch"

        if not isinstance(config, dict):
            config = {}

        category = config.get("category")
        if category in list(CATEGORIES):
            category = CATEGORIES[category]

        sub_categories = config.get("sub_category")
        if not isinstance(sub_categories, list):
            sub_categories = [sub_categories]

        filter_url = ""
        if isinstance(category, int):
            filter_url = "&cat=%s" % str(category)

            if sub_categories[0] is not None:
                sub_categories = [SUB_CATEGORIES[c] for c in sub_categories]
                filter_url = (
                    filter_url
                    + "&"
                    + "&".join(
                        [
                            urllib.parse.quote_plus("term[%s][]" % c[0]).encode("utf-8") + "=" + str(c[1])
                            for c in sub_categories
                        ]
                    )
                )

        if "series_season" in entry and "series_episode" in entry:
            season = entry["series_season"]
            if season in list(SEASONS):
                filter_url = filter_url + "&term[%d][]" % SEASONS[season][0] + "=" + str(SEASONS[season][1])

            episode = entry["series_episode"]
            if episode in list(EPISODES):
                filter_url = filter_url + "&term[%d][]" % EPISODES[episode][0] + "=" + str(EPISODES[episode][1])

        entries = set()
        for search_string in entry.get("search_strings", [entry["title"]]):
            query = normalize_unicode(search_string)
            url_search = (
                "/torrents/search/?search=%40name+" + urllib.parse.quote_plus(query.encode("utf-8")) + filter_url
            )

            opener = urllib.request.build_opener()
            opener.addheaders = [("User-agent", "Mozilla/5.0")]
            response = opener.open(url_base + url_search)

            data = response.read()
            soup = get_soup(data)
            tb = soup.find("table", class_="results")
            if not tb:
                continue

            for tr in tb.findAll("tr")[1:][:-1]:
                entry = Entry()
                nfo_link_res = re.search("torrents/nfo/\?id=(\d+)", str(tr))
                if nfo_link_res is not None:
                    tid = nfo_link_res.group(1)
                title_res = re.search(
                    '<a href="//www.t411.ch/torrents/([-A-Za-z0-9+&@#/%|?=~_|!:,.;]+)" title="([^"]*)">', str(tr)
                )
                if title_res is not None:
                    entry["title"] = native_str_to_text(title_res.group(2), encoding="utf-8")
                size = tr("td")[5].contents[0]
                entry["url"] = "http://www.t411.ch/torrents/download/?id=%s" % tid
                entry["torrent_seeds"] = tr("td")[7].contents[0]
                entry["torrent_leeches"] = tr("td")[8].contents[0]
                entry["search_sort"] = torrent_availability(entry["torrent_seeds"], entry["torrent_leeches"])
                size = re.search("([\.\d]+) ([GMK]?)B", size)

                entry["content_size"] = parse_filesize(size.group(0))

                auth_handler = t411Auth(config["username"], config["password"])

                entry["download_auth"] = auth_handler
                entries.add(entry)

            return sorted(entries, reverse=True, key=lambda x: x.get("search_sort"))
Пример #9
0
    def _handle_path(self,
                     entries,
                     ftp,
                     baseurl,
                     path='',
                     mlst_supported=False,
                     files_only=False,
                     recursive=False,
                     get_size=True,
                     encoding=None):
        dirs = self.list_directory(ftp, path)

        for p in dirs:
            if encoding:
                p = native_str_to_text(p, encoding=encoding)

            # Clean file list when subdirectories are used
            p = p.replace(path + '/', '')

            mlst = {}
            if mlst_supported:
                mlst_output = ftp.sendcmd('MLST ' + path + '/' + p)
                clean_mlst_output = [
                    line.strip().lower() for line in mlst_output.splitlines()
                ][1]
                mlst = self.parse_mlst(clean_mlst_output)
            else:
                element_is_directory = self.is_directory(ftp, path + '/' + p)
                if element_is_directory:
                    mlst['type'] = 'dir'
                    log.debug('%s is a directory', p)
                else:
                    mlst['type'] = 'file'
                    log.debug('%s is a file', p)

            if recursive and mlst.get('type') == 'dir':
                self._handle_path(entries, ftp, baseurl, path + '/' + p,
                                  mlst_supported, files_only, recursive,
                                  get_size, encoding)

            if not files_only or mlst.get('type') == 'file':
                url = baseurl + quote(path) + '/' + quote(p)
                log.debug("Encoded URL: " + url)
                title = os.path.basename(p)
                log.info('Accepting entry "%s" [%s]' % (
                    path + '/' + p,
                    mlst.get('type') or "unknown",
                ))
                entry = Entry(title, url)
                if get_size and 'size' not in mlst:
                    if mlst.get('type') == 'file':
                        entry['content_size'] = old_div(
                            ftp.size(path + '/' + p), (1024 * 1024))
                        log.debug('(FILE) Size = %s', entry['content_size'])
                    elif mlst.get('type') == 'dir':
                        entry['content_size'] = self.get_folder_size(
                            ftp, path, p)
                        log.debug('(DIR) Size = %s', entry['content_size'])
                elif get_size:
                    entry['content_size'] = old_div(float(mlst.get('size')),
                                                    (1024 * 1024))
                entries.append(entry)
Пример #10
0
    def search(self, task, entry, config=None):
        """
        Search for name from torrent411.
        """
        url_base = 'http://www.t411.li'

        if not isinstance(config, dict):
            config = {}

        category = config.get('category')
        if category in list(CATEGORIES):
            category = CATEGORIES[category]

        sub_categories = config.get('sub_category')
        if not isinstance(sub_categories, list):
            sub_categories = [sub_categories]

        filter_url = ''
        if isinstance(category, int):
            filter_url = '&cat=%s' % str(category)

            if sub_categories[0] is not None:
                sub_categories = [SUB_CATEGORIES[c] for c in sub_categories]
                filter_url = filter_url + '&' + '&'.join([urllib.parse.quote_plus('term[%s][]' % c[0]).
                                                         encode('utf-8') + '=' + str(c[1])
                                                          for c in sub_categories])

        if 'series_season' in entry and 'series_episode' in entry:
            season = entry['series_season']
            if season in list(SEASONS):
                filter_url = filter_url + '&term[%d][]' % SEASONS[season][0] + '=' + str(SEASONS[season][1])

            episode = entry['series_episode']
            if episode in list(EPISODES):
                filter_url = filter_url + '&term[%d][]' % EPISODES[episode][0] + '=' + str(EPISODES[episode][1])

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            url_search = ('/torrents/search/?search=%40name+' +
                          urllib.parse.quote_plus(query.encode('utf-8')) +
                          filter_url)

            opener = urllib.request.build_opener()
            opener.addheaders = [('User-agent', 'Mozilla/5.0')]
            response = opener.open(url_base + url_search)

            data = response.read()
            soup = get_soup(data)
            tb = soup.find("table", class_="results")
            if not tb:
                continue

            for tr in tb.findAll('tr')[1:][:-1]:
                entry = Entry()
                nfo_link_res = re.search('torrents/nfo/\?id=(\d+)', str(tr))
                if nfo_link_res is not None:
                    tid = nfo_link_res.group(1)
                title_res = re.search(
                    '<a href=\"//www.t411.li/torrents/([-A-Za-z0-9+&@#/%|?=~_|!:,.;]+)\" title="([^"]*)">',
                    str(tr))
                if title_res is not None:
                    entry['title'] = native_str_to_text(title_res.group(2), encoding='utf-8')
                size = tr('td')[5].contents[0]
                entry['url'] = 'http://www.t411.li/torrents/download/?id=%s' % tid
                entry['torrent_seeds'] = tr('td')[7].contents[0]
                entry['torrent_leeches'] = tr('td')[8].contents[0]
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'],
                                                            entry['torrent_leeches'])
                size = re.search('([\.\d]+) ([GMK]?)B', size)

                entry['content_size'] = parse_filesize(size.group(0))

                auth_handler = t411Auth(config['username'],
                                        config['password'])

                entry['download_auth'] = auth_handler
                entries.add(entry)

            return sorted(entries, reverse=True,
                          key=lambda x: x.get('search_sort'))
Пример #11
0
    def search(self, task, entry, config=None):
        """
        Search for name from torrent411.
        """
        url_base = 'https://www.t411.al'

        if not isinstance(config, dict):
            config = {}

        category = config.get('category')
        if category in list(CATEGORIES):
            category = CATEGORIES[category]

        sub_categories = config.get('sub_category')
        if not isinstance(sub_categories, list):
            sub_categories = [sub_categories]

        filter_url = ''
        if isinstance(category, int):
            filter_url = '&cat=%s' % str(category)

            if sub_categories[0] is not None:
                sub_categories = [SUB_CATEGORIES[c] for c in sub_categories]
                filter_url = filter_url + '&' + '&'.join([
                    urllib.parse.quote_plus(
                        'term[%s][]' % c[0]).encode('utf-8') + '=' + str(c[1])
                    for c in sub_categories
                ])

        if 'series_season' in entry and 'series_episode' in entry:
            season = entry['series_season']
            if season in list(SEASONS):
                filter_url = filter_url + '&term[%d][]' % SEASONS[season][
                    0] + '=' + str(SEASONS[season][1])

            episode = entry['series_episode']
            if episode in list(EPISODES):
                filter_url = filter_url + '&term[%d][]' % EPISODES[episode][
                    0] + '=' + str(EPISODES[episode][1])

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            url_search = ('/torrents/search/?search=%40name+' +
                          urllib.parse.quote_plus(query.encode('utf-8')) +
                          filter_url)

            opener = urllib.request.build_opener()
            opener.addheaders = [('User-agent', 'Mozilla/5.0')]
            response = opener.open(url_base + url_search)

            data = response.read()
            soup = get_soup(data)
            tb = soup.find("table", class_="results")
            if not tb:
                continue

            for tr in tb.findAll('tr')[1:][:-1]:
                entry = Entry()
                nfo_link_res = re.search('torrents/nfo/\?id=(\d+)', str(tr))
                if nfo_link_res is not None:
                    tid = nfo_link_res.group(1)
                title_res = re.search(
                    '<a href=\"//www.t411.al/torrents/([-A-Za-z0-9+&@#/%|?=~_|!:,.;]+)\" title="([^"]*)">',
                    str(tr))
                if title_res is not None:
                    entry['title'] = native_str_to_text(title_res.group(2),
                                                        encoding='utf-8')
                size = tr('td')[5].contents[0]
                entry[
                    'url'] = 'https://www.t411.al/torrents/download/?id=%s' % tid
                entry['torrent_seeds'] = tr('td')[7].contents[0]
                entry['torrent_leeches'] = tr('td')[8].contents[0]
                entry['search_sort'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])
                size = re.search('([\.\d]+) ([GMK]?)B', size)

                entry['content_size'] = parse_filesize(size.group(0))

                auth_handler = t411Auth(config['username'], config['password'])

                entry['download_auth'] = auth_handler
                entries.add(entry)

            return sorted(entries,
                          reverse=True,
                          key=lambda x: x.get('search_sort'))
Пример #12
0
    def on_task_input(self, task, config):

        # Let details plugin know that it is ok if this task doesn't produce any entries
        task.no_entries_ok = True

        filename = os.path.expanduser(config['file'])
        encoding = config.get('encoding', None)
        with Session() as session:
            db_pos = (session.query(TailPosition).
                      filter(TailPosition.task == task.name).filter(TailPosition.filename == filename).first())
            if db_pos:
                last_pos = db_pos.position
            else:
                last_pos = 0

            with open(filename, 'r') as file:
                if task.options.tail_reset == filename or task.options.tail_reset == task.name:
                    if last_pos == 0:
                        log.info('Task %s tail position is already zero' % task.name)
                    else:
                        log.info('Task %s tail position (%s) reset to zero' % (task.name, last_pos))
                        last_pos = 0

                if os.path.getsize(filename) < last_pos:
                    log.info('File size is smaller than in previous execution, resetting to beginning of the file')
                    last_pos = 0

                file.seek(last_pos)

                log.debug('continuing from last position %s' % last_pos)

                entry_config = config.get('entry')
                format_config = config.get('format', {})

                # keep track what fields have been found
                used = {}
                entries = []
                entry = Entry()

                # now parse text

                while True:
                    line = file.readline()
                    if encoding:
                        try:
                            line = native_str_to_text(line, encoding=encoding)
                        except UnicodeError:
                            raise plugin.PluginError('Failed to decode file using %s. Check encoding.' % encoding)

                    if not line:
                        break

                    for field, regexp in entry_config.items():
                        # log.debug('search field: %s regexp: %s' % (field, regexp))
                        match = re.search(regexp, line)
                        if match:
                            # check if used field detected, in such case start with new entry
                            if field in used:
                                if entry.isvalid():
                                    log.info('Found field %s again before entry was completed. \
                                              Adding current incomplete, but valid entry and moving to next.' % field)
                                    self.format_entry(entry, format_config)
                                    entries.append(entry)
                                else:
                                    log.info(
                                        'Invalid data, entry field %s is already found once. Ignoring entry.' % field)
                                # start new entry
                                entry = Entry()
                                used = {}

                            # add field to entry
                            entry[field] = match.group(1)
                            used[field] = True
                            log.debug('found field: %s value: %s' % (field, entry[field]))

                        # if all fields have been found
                        if len(used) == len(entry_config):
                            # check that entry has at least title and url
                            if not entry.isvalid():
                                log.info('Invalid data, constructed entry is missing mandatory fields (title or url)')
                            else:
                                self.format_entry(entry, format_config)
                                entries.append(entry)
                                log.debug('Added entry %s' % entry)
                                # start new entry
                                entry = Entry()
                                used = {}
                last_pos = file.tell()
            if db_pos:
                db_pos.position = last_pos
            else:
                session.add(TailPosition(task=task.name, filename=filename, position=last_pos))
        return entries