def filename_from_headers(self, entry, response): """Checks entry filename if it's found from content-disposition""" if not response.headers.get('content-disposition'): # No content disposition header, nothing we can do return filename = parse_header( response.headers['content-disposition'])[1].get('filename') if filename: # try to decode to unicode, specs allow latin1, some may do utf-8 anyway try: filename = native_str_to_text(filename, encoding='latin1') log.debug('filename header latin1 decoded') except UnicodeError: try: filename = native_str_to_text(filename, encoding='utf-8') log.debug('filename header UTF-8 decoded') except UnicodeError: pass filename = decode_html(filename) log.debug('Found filename from headers: %s', filename) if 'filename' in entry: log.debug( 'Overriding filename %s with %s from content-disposition', entry['filename'], filename) entry['filename'] = filename
def filename_from_headers(self, entry, response): """Checks entry filename if it's found from content-disposition""" if not response.headers.get('content-disposition'): # No content disposition header, nothing we can do return filename = parse_header(response.headers['content-disposition'])[1].get('filename') if filename: # try to decode to unicode, specs allow latin1, some may do utf-8 anyway try: filename = native_str_to_text(filename, encoding='latin1') log.debug('filename header latin1 decoded') except UnicodeError: try: filename = native_str_to_text(filename, encoding='utf-8') log.debug('filename header UTF-8 decoded') except UnicodeError: pass filename = decode_html(filename) log.debug('Found filename from headers: %s', filename) if 'filename' in entry: log.debug( 'Overriding filename %s with %s from content-disposition', entry['filename'], filename, ) entry['filename'] = filename
def filter_formatdate(val, format): """Returns a string representation of a datetime object according to format string.""" encoding = locale.getpreferredencoding() if not isinstance(val, (datetime, date, time)): return val return native_str_to_text( val.strftime(text_to_native_str(format, encoding=encoding)), encoding=encoding )
def execute_cmd(self, cmd, allow_background, encoding): log.verbose('Executing: %s' % cmd) # if PY2: cmd = cmd.encode(encoding) ? p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=False) if not allow_background: (r, w) = (p.stdout, p.stdin) response = native_str_to_text(r.read(), encoding=encoding, errors='replace') r.close() w.close() if response: log.info('Stdout: %s' % response) return p.wait()
def _handle_path(self, entries, ftp, baseurl, path='', mlst_supported=False, files_only=False, recursive=False, get_size=True, encoding=None): dirs = self.list_directory(ftp, path) for p in dirs: if encoding: p = native_str_to_text(p, encoding=encoding) # Clean file list when subdirectories are used p = p.replace(path + '/', '') mlst = {} if mlst_supported: mlst_output = ftp.sendcmd('MLST ' + path + '/' + p) clean_mlst_output = [line.strip().lower() for line in mlst_output.splitlines()][1] mlst = self.parse_mlst(clean_mlst_output) else: element_is_directory = self.is_directory(ftp, path + '/' + p) if element_is_directory: mlst['type'] = 'dir' log.debug('%s is a directory', p) else: mlst['type'] = 'file' log.debug('%s is a file', p) if recursive and mlst.get('type') == 'dir': self._handle_path(entries, ftp, baseurl, path + '/' + p, mlst_supported, files_only, recursive, get_size, encoding) if not files_only or mlst.get('type') == 'file': url = baseurl + quote(path) + '/' + quote(p) log.debug("Encoded URL: " + url) title = os.path.basename(p) log.info('Accepting entry "%s" [%s]' % (path + '/' + p, mlst.get('type') or "unknown",)) entry = Entry(title, url) if get_size and 'size' not in mlst: if mlst.get('type') == 'file': entry['content_size'] = old_div(ftp.size(path + '/' + p), (1024 * 1024)) log.debug('(FILE) Size = %s', entry['content_size']) elif mlst.get('type') == 'dir': entry['content_size'] = self.get_folder_size(ftp, path, p) log.debug('(DIR) Size = %s', entry['content_size']) elif get_size: entry['content_size'] = old_div(float(mlst.get('size')), (1024 * 1024)) entries.append(entry)
def parse_response(self, response): p, u = self.getparser() response_body = '' while True: data = response.read(1024) if not data: break response_body += native_str_to_text(data, encoding='utf-8') if self.verbose: log.info('body: %s', repr(response_body)) # Remove SCGI headers from the response. _, response_body = re.split(r'\n\s*?\n', response_body, maxsplit=1) p.feed(response_body.encode('utf-8')) p.close() return u.close()
def search(self, task, entry, config=None): """ Search for name from torrent411. """ url_base = "http://www.t411.ch" if not isinstance(config, dict): config = {} category = config.get("category") if category in list(CATEGORIES): category = CATEGORIES[category] sub_categories = config.get("sub_category") if not isinstance(sub_categories, list): sub_categories = [sub_categories] filter_url = "" if isinstance(category, int): filter_url = "&cat=%s" % str(category) if sub_categories[0] is not None: sub_categories = [SUB_CATEGORIES[c] for c in sub_categories] filter_url = ( filter_url + "&" + "&".join( [ urllib.parse.quote_plus("term[%s][]" % c[0]).encode("utf-8") + "=" + str(c[1]) for c in sub_categories ] ) ) if "series_season" in entry and "series_episode" in entry: season = entry["series_season"] if season in list(SEASONS): filter_url = filter_url + "&term[%d][]" % SEASONS[season][0] + "=" + str(SEASONS[season][1]) episode = entry["series_episode"] if episode in list(EPISODES): filter_url = filter_url + "&term[%d][]" % EPISODES[episode][0] + "=" + str(EPISODES[episode][1]) entries = set() for search_string in entry.get("search_strings", [entry["title"]]): query = normalize_unicode(search_string) url_search = ( "/torrents/search/?search=%40name+" + urllib.parse.quote_plus(query.encode("utf-8")) + filter_url ) opener = urllib.request.build_opener() opener.addheaders = [("User-agent", "Mozilla/5.0")] response = opener.open(url_base + url_search) data = response.read() soup = get_soup(data) tb = soup.find("table", class_="results") if not tb: continue for tr in tb.findAll("tr")[1:][:-1]: entry = Entry() nfo_link_res = re.search("torrents/nfo/\?id=(\d+)", str(tr)) if nfo_link_res is not None: tid = nfo_link_res.group(1) title_res = re.search( '<a href="//www.t411.ch/torrents/([-A-Za-z0-9+&@#/%|?=~_|!:,.;]+)" title="([^"]*)">', str(tr) ) if title_res is not None: entry["title"] = native_str_to_text(title_res.group(2), encoding="utf-8") size = tr("td")[5].contents[0] entry["url"] = "http://www.t411.ch/torrents/download/?id=%s" % tid entry["torrent_seeds"] = tr("td")[7].contents[0] entry["torrent_leeches"] = tr("td")[8].contents[0] entry["search_sort"] = torrent_availability(entry["torrent_seeds"], entry["torrent_leeches"]) size = re.search("([\.\d]+) ([GMK]?)B", size) entry["content_size"] = parse_filesize(size.group(0)) auth_handler = t411Auth(config["username"], config["password"]) entry["download_auth"] = auth_handler entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get("search_sort"))
def _handle_path(self, entries, ftp, baseurl, path='', mlst_supported=False, files_only=False, recursive=False, get_size=True, encoding=None): dirs = self.list_directory(ftp, path) for p in dirs: if encoding: p = native_str_to_text(p, encoding=encoding) # Clean file list when subdirectories are used p = p.replace(path + '/', '') mlst = {} if mlst_supported: mlst_output = ftp.sendcmd('MLST ' + path + '/' + p) clean_mlst_output = [ line.strip().lower() for line in mlst_output.splitlines() ][1] mlst = self.parse_mlst(clean_mlst_output) else: element_is_directory = self.is_directory(ftp, path + '/' + p) if element_is_directory: mlst['type'] = 'dir' log.debug('%s is a directory', p) else: mlst['type'] = 'file' log.debug('%s is a file', p) if recursive and mlst.get('type') == 'dir': self._handle_path(entries, ftp, baseurl, path + '/' + p, mlst_supported, files_only, recursive, get_size, encoding) if not files_only or mlst.get('type') == 'file': url = baseurl + quote(path) + '/' + quote(p) log.debug("Encoded URL: " + url) title = os.path.basename(p) log.info('Accepting entry "%s" [%s]' % ( path + '/' + p, mlst.get('type') or "unknown", )) entry = Entry(title, url) if get_size and 'size' not in mlst: if mlst.get('type') == 'file': entry['content_size'] = old_div( ftp.size(path + '/' + p), (1024 * 1024)) log.debug('(FILE) Size = %s', entry['content_size']) elif mlst.get('type') == 'dir': entry['content_size'] = self.get_folder_size( ftp, path, p) log.debug('(DIR) Size = %s', entry['content_size']) elif get_size: entry['content_size'] = old_div(float(mlst.get('size')), (1024 * 1024)) entries.append(entry)
def search(self, task, entry, config=None): """ Search for name from torrent411. """ url_base = 'http://www.t411.li' if not isinstance(config, dict): config = {} category = config.get('category') if category in list(CATEGORIES): category = CATEGORIES[category] sub_categories = config.get('sub_category') if not isinstance(sub_categories, list): sub_categories = [sub_categories] filter_url = '' if isinstance(category, int): filter_url = '&cat=%s' % str(category) if sub_categories[0] is not None: sub_categories = [SUB_CATEGORIES[c] for c in sub_categories] filter_url = filter_url + '&' + '&'.join([urllib.parse.quote_plus('term[%s][]' % c[0]). encode('utf-8') + '=' + str(c[1]) for c in sub_categories]) if 'series_season' in entry and 'series_episode' in entry: season = entry['series_season'] if season in list(SEASONS): filter_url = filter_url + '&term[%d][]' % SEASONS[season][0] + '=' + str(SEASONS[season][1]) episode = entry['series_episode'] if episode in list(EPISODES): filter_url = filter_url + '&term[%d][]' % EPISODES[episode][0] + '=' + str(EPISODES[episode][1]) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) url_search = ('/torrents/search/?search=%40name+' + urllib.parse.quote_plus(query.encode('utf-8')) + filter_url) opener = urllib.request.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] response = opener.open(url_base + url_search) data = response.read() soup = get_soup(data) tb = soup.find("table", class_="results") if not tb: continue for tr in tb.findAll('tr')[1:][:-1]: entry = Entry() nfo_link_res = re.search('torrents/nfo/\?id=(\d+)', str(tr)) if nfo_link_res is not None: tid = nfo_link_res.group(1) title_res = re.search( '<a href=\"//www.t411.li/torrents/([-A-Za-z0-9+&@#/%|?=~_|!:,.;]+)\" title="([^"]*)">', str(tr)) if title_res is not None: entry['title'] = native_str_to_text(title_res.group(2), encoding='utf-8') size = tr('td')[5].contents[0] entry['url'] = 'http://www.t411.li/torrents/download/?id=%s' % tid entry['torrent_seeds'] = tr('td')[7].contents[0] entry['torrent_leeches'] = tr('td')[8].contents[0] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) size = re.search('([\.\d]+) ([GMK]?)B', size) entry['content_size'] = parse_filesize(size.group(0)) auth_handler = t411Auth(config['username'], config['password']) entry['download_auth'] = auth_handler entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, task, entry, config=None): """ Search for name from torrent411. """ url_base = 'https://www.t411.al' if not isinstance(config, dict): config = {} category = config.get('category') if category in list(CATEGORIES): category = CATEGORIES[category] sub_categories = config.get('sub_category') if not isinstance(sub_categories, list): sub_categories = [sub_categories] filter_url = '' if isinstance(category, int): filter_url = '&cat=%s' % str(category) if sub_categories[0] is not None: sub_categories = [SUB_CATEGORIES[c] for c in sub_categories] filter_url = filter_url + '&' + '&'.join([ urllib.parse.quote_plus( 'term[%s][]' % c[0]).encode('utf-8') + '=' + str(c[1]) for c in sub_categories ]) if 'series_season' in entry and 'series_episode' in entry: season = entry['series_season'] if season in list(SEASONS): filter_url = filter_url + '&term[%d][]' % SEASONS[season][ 0] + '=' + str(SEASONS[season][1]) episode = entry['series_episode'] if episode in list(EPISODES): filter_url = filter_url + '&term[%d][]' % EPISODES[episode][ 0] + '=' + str(EPISODES[episode][1]) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) url_search = ('/torrents/search/?search=%40name+' + urllib.parse.quote_plus(query.encode('utf-8')) + filter_url) opener = urllib.request.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] response = opener.open(url_base + url_search) data = response.read() soup = get_soup(data) tb = soup.find("table", class_="results") if not tb: continue for tr in tb.findAll('tr')[1:][:-1]: entry = Entry() nfo_link_res = re.search('torrents/nfo/\?id=(\d+)', str(tr)) if nfo_link_res is not None: tid = nfo_link_res.group(1) title_res = re.search( '<a href=\"//www.t411.al/torrents/([-A-Za-z0-9+&@#/%|?=~_|!:,.;]+)\" title="([^"]*)">', str(tr)) if title_res is not None: entry['title'] = native_str_to_text(title_res.group(2), encoding='utf-8') size = tr('td')[5].contents[0] entry[ 'url'] = 'https://www.t411.al/torrents/download/?id=%s' % tid entry['torrent_seeds'] = tr('td')[7].contents[0] entry['torrent_leeches'] = tr('td')[8].contents[0] entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) size = re.search('([\.\d]+) ([GMK]?)B', size) entry['content_size'] = parse_filesize(size.group(0)) auth_handler = t411Auth(config['username'], config['password']) entry['download_auth'] = auth_handler entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def on_task_input(self, task, config): # Let details plugin know that it is ok if this task doesn't produce any entries task.no_entries_ok = True filename = os.path.expanduser(config['file']) encoding = config.get('encoding', None) with Session() as session: db_pos = (session.query(TailPosition). filter(TailPosition.task == task.name).filter(TailPosition.filename == filename).first()) if db_pos: last_pos = db_pos.position else: last_pos = 0 with open(filename, 'r') as file: if task.options.tail_reset == filename or task.options.tail_reset == task.name: if last_pos == 0: log.info('Task %s tail position is already zero' % task.name) else: log.info('Task %s tail position (%s) reset to zero' % (task.name, last_pos)) last_pos = 0 if os.path.getsize(filename) < last_pos: log.info('File size is smaller than in previous execution, resetting to beginning of the file') last_pos = 0 file.seek(last_pos) log.debug('continuing from last position %s' % last_pos) entry_config = config.get('entry') format_config = config.get('format', {}) # keep track what fields have been found used = {} entries = [] entry = Entry() # now parse text while True: line = file.readline() if encoding: try: line = native_str_to_text(line, encoding=encoding) except UnicodeError: raise plugin.PluginError('Failed to decode file using %s. Check encoding.' % encoding) if not line: break for field, regexp in entry_config.items(): # log.debug('search field: %s regexp: %s' % (field, regexp)) match = re.search(regexp, line) if match: # check if used field detected, in such case start with new entry if field in used: if entry.isvalid(): log.info('Found field %s again before entry was completed. \ Adding current incomplete, but valid entry and moving to next.' % field) self.format_entry(entry, format_config) entries.append(entry) else: log.info( 'Invalid data, entry field %s is already found once. Ignoring entry.' % field) # start new entry entry = Entry() used = {} # add field to entry entry[field] = match.group(1) used[field] = True log.debug('found field: %s value: %s' % (field, entry[field])) # if all fields have been found if len(used) == len(entry_config): # check that entry has at least title and url if not entry.isvalid(): log.info('Invalid data, constructed entry is missing mandatory fields (title or url)') else: self.format_entry(entry, format_config) entries.append(entry) log.debug('Added entry %s' % entry) # start new entry entry = Entry() used = {} last_pos = file.tell() if db_pos: db_pos.position = last_pos else: session.add(TailPosition(task=task.name, filename=filename, position=last_pos)) return entries