def filename_from_url(url, clean=False): """Returns a reasonable filename for saving the given url. """ check_u(url) try: match = URI_PATTERN.match(url) if match is None: # This code path will never be executed. return unicode_to_filename(url) filename = match.group(2) query = match.group(4) if not filename: ret = query elif not query: ret = filename else: root, ext = os.path.splitext(filename) ret = u"%s-%s%s" % (root, query, ext) ret = unquote(ret) if ret is None: ret = u'unknown' if clean: return clean_filename(ret) else: return unicode_to_filename(ret) except (TypeError, KeyError, AttributeError, UnicodeDecodeError): return unicode_to_filename(u'unknown')
def setup_new(self, url, item, content_type=None, channel_name=None): check_u(url) if content_type: check_u(content_type) self.orig_url = self.url = url self.item_list = [] self.child_deleted = False self.main_item_id = None self.dlid = generate_dlid() if content_type is None: # HACK: Some servers report the wrong content-type for # torrent files. We try to work around that by assuming # if the enclosure states that something is a torrent, # it's a torrent. Thanks to [email protected]. if item.enclosure_type == u'application/x-bittorrent': content_type = item.enclosure_type self.content_type = u"" self.delete_files = True self.channel_name = channel_name self.manualUpload = False self._update_retry_time_dc = None self.status_updates_frozen = False self.last_update = time.time() self.reset_status_attributes() if content_type is None: self.content_type = u"" else: self.content_type = content_type if self.content_type == u'': self.get_content_type() else: self.run_downloader()
def unicode_to_filename(filename, path=None): """Takes in a unicode string representation of a filename (NOT a file path) and creates a valid byte representation of it attempting to preserve extensions. .. Note:: This is not guaranteed to give the same results every time it is run, nor is it guaranteed to reverse the results of filename_to_unicode. """ check_u(filename) if path: check_b(path) else: path = os.getcwd() # keep this a little shorter than the max length, so we can # add a number to the end max_len = os.statvfs(path)[statvfs.F_NAMEMAX] - 5 for mem in ("/", "\000", "\\", ":", "*", "?", "\"", "'", "<", ">", "|", "&", "\r", "\n"): filename = filename.replace(mem, "_") new_filename = encode_fn(filename) while len(new_filename) > max_len: filename = shorten_fn(filename) new_filename = encode_fn(filename) return new_filename
def _scrape_youtube_url(url, callback): check_u(url) components = urlparse.urlsplit(url) params = cgi.parse_qs(components[3]) video_id = None if components[2] == u'/watch' and 'v' in params: try: video_id = params['v'][0] except IndexError: pass elif components[2].startswith('/v/'): m = re.compile(r'/v/([\w-]+)').match(components[2]) if m is not None: video_id = m.group(1) if video_id is None: logging.warning('_scrape_youtube_url: unable to scrape YouTube Video URL') callback(None) return try: url = u"http://www.youtube.com/get_video_info?video_id=%s&el=embedded&ps=default&eurl=" % video_id httpclient.grab_url( url, lambda x: _youtube_callback_step2(x, video_id, callback), lambda x: _youtube_errback(x, callback)) except StandardError: logging.exception("youtube_callback: unable to scrape YouTube Video URL") callback(None)
def create_downloader(url, contentType, dlid): check_u(url) check_u(contentType) if contentType == u'application/x-bittorrent': return BTDownloader(url, dlid) else: return HTTPDownloader(url, dlid, expectedContentType=contentType)
def __setitem__(self, key, value): check_u(key) super(DeviceDatabase, self).__setitem__(key, value) if self.parent: self.parent.notify_changed() else: self.notify_changed()
def filename_from_url(url, clean=False): """Returns a reasonable filename for saving the given url. """ check_u(url) try: match = URI_PATTERN.match(url) if match is None: # This code path will never be executed. return unicode_to_filename(url) filename = match.group(2) query = match.group(4) if not filename: ret = query elif not query: ret = filename else: root, ext = os.path.splitext(filename) ret = u"%s-%s%s" % (root, query, ext) ret = unquote(ret) if ret is None: ret = u'unknown' if clean: return clean_filename(ret) else: return unicode_to_filename(ret) except (SystemExit, KeyboardInterrupt): raise except: return unicode_to_filename(u'unknown')
def setup_new(self, url, item, content_type=None, channel_name=None): check_u(url) if content_type: check_u(content_type) self.orig_url = self.url = url self.item_list = [] self.child_deleted = False self.main_item_id = None self.dlid = generate_dlid() if content_type is None: # HACK: Some servers report the wrong content-type for # torrent files. We try to work around that by assuming # if the enclosure states that something is a torrent, # it's a torrent. Thanks to [email protected]. if item.enclosure_type == u'application/x-bittorrent': content_type = item.enclosure_type self.content_type = u"" self.delete_files = True self.channel_name = channel_name self.manualUpload = False self.status_updates_frozen = False self.last_update = time.time() self.reset_status_attributes() if content_type is None: self.content_type = u"" else: self.content_type = content_type if self.content_type == u'': self.get_content_type() else: self.run_downloader()
def __getitem__(self, key): check_u(key) value = super(DeviceDatabase, self).__getitem__(key) if isinstance(value, dict) and not isinstance(value, DeviceDatabase): value = DeviceDatabase(value, self.parent or self) # don't trip the changed signal super(DeviceDatabase, self).__setitem__(key, value) return value
def shorten_fn(filename): check_u(filename) first, last = os.path.splitext(filename) if first: return u"".join([first[:-1], last]) return unicode(last[:-1])
def try_scraping_url(url, callback): check_u(url) scrape = _get_scrape_function_for(url) if scrape is not None: scrape(url, lambda newurl, content_type=u"video/x-flv", title=None: _actual_url_callback(url, callback, newurl, content_type, title)) else: callback(url)
def filename_to_unicode(filename, path=None): """Given a filename in raw bytes, return the unicode representation Since this is not guaranteed to give the same results every time it is run, not is it garanteed to reverse the results of unicode_to_filename """ if path: check_u(path) check_u(filename) return filename
def setup_new(self, typ): """Construct a TabOrder. typ should be either ``channel`` or ``playlist``. """ check_u(typ) self.type = typ self.tab_ids = [] self._setup_views() to_sort = self.id_to_tab.values() to_sort.sort(key=lambda x: x.get_title().lower()) for tab in to_sort: self.tab_ids.append(tab.id)
def start_new_download(url, dlid, contentType, channelName): """Creates a new downloader object. Returns id on success, None on failure. """ check_u(url) check_u(contentType) if channelName: check_f(channelName) dl = create_downloader(url, contentType, dlid) dl.channelName = channelName _downloads[dlid] = dl
def filename_to_unicode(filename, path=None): """Given a filename in raw bytes, return the unicode representation. Since this is not guaranteed to give the same results every time it is run, not is it garanteed to reverse the results of unicode_to_filename. """ if path: check_u(path) check_u(filename) return filename
def shortenFilename(filename): check_u(filename) # Find the first part and the last part pieces = filename.split(u".") lastpart = pieces[-1] if len(pieces) > 1: firstpart = u".".join(pieces[:-1]) else: firstpart = u"" # If there's a first part, use that, otherwise shorten what we have if len(firstpart) > 0: return u"%s.%s" % (firstpart[:-1], lastpart) else: return filename[:-1]
def shortenFilename(filename): check_u(filename) # Find the first part and the last part pieces = filename.split(u".") lastpart = pieces[-1] if len(pieces) > 1: firstpart = u".".join(pieces[:-1]) else: firstpart = u"" # If there's a first part, use that, otherwise shorten what we have if len(firstpart) > 0: return u"%s.%s" % (firstpart[:-1],lastpart) else: return filename[:-1]
def unicode_to_filename(filename, path=None): """Takes in a unicode string representation of a filename and creates a valid byte representation of it attempting to preserve extensions This is not guaranteed to give the same results every time it is run, not is it garanteed to reverse the results of filename_to_unicode """ @returns_unicode def shorten_filename(filename): check_u(filename) # Find the first part and the last part pieces = filename.split(u".") lastpart = pieces[-1] if len(pieces) > 1: firstpart = u".".join(pieces[:-1]) else: firstpart = u"" # If there's a first part, use that, otherwise shorten what we have if len(firstpart) > 0: return u"%s.%s" % (firstpart[:-1], lastpart) else: return filename[:-1] check_u(filename) if path: check_u(path) else: path = os.getcwd() # Keep this a little shorter than the max length, so we can run # nextFilename MAX_LEN = 200 badchars = ('/', '\000', '\\', ':', '*', '?', "'", '"', '<', '>', '|', "\n", "\r") for mem in badchars: filename = filename.replace(mem, "_") new_filename = filename while len(new_filename) > MAX_LEN: new_filename = shorten_filename(new_filename) return new_filename
def unicode_to_filename(filename, path = None): check_u(filename) if path: check_b(path) else: path = os.getcwd() # Keep this a little shorter than the max length, so we can run # nextFilename MAX_LEN = os.statvfs(path)[statvfs.F_NAMEMAX]-5 for mem in ("/", "\000", "\\", ":", "*", "?", "'", "\"", "<", ">", "|", "&", "\r", "\n"): filename = filename.replace(mem, "_") new_filename = filename.encode('utf-8','replace') while len(new_filename) > MAX_LEN: filename = shortenFilename(filename) new_filename = filename.encode('utf-8','replace') return new_filename
def setup_new(self, url, allowedURLs=None): check_u(url) # FIXME - clean up the allowedURLs thing here self.allowedURLs = [] self.url = url self.updated_url = url self.title = None self.userTitle = None self.client = None self.lastVisitedURL = None self.setup_new_icon_cache() self.favicon = None self.firstTime = True if url: self.historyLocation = 0 self.history = [self.url] else: self.historyLocation = None self.history = [] self.download_guide()
def unicode_to_filename(filename, path=None): check_u(filename) if path: check_b(path) else: path = os.getcwd() # Keep this a little shorter than the max length, so we can run # nextFilename MAX_LEN = os.statvfs(path)[statvfs.F_NAMEMAX] - 5 for mem in ("/", "\000", "\\", ":", "*", "?", "'", "\"", "<", ">", "|", "&", "\r", "\n"): filename = filename.replace(mem, "_") new_filename = filename.encode('utf-8', 'replace') while len(new_filename) > MAX_LEN: filename = shortenFilename(filename) new_filename = filename.encode('utf-8', 'replace') return new_filename
def test_check_u(self): util.check_u(None) util.check_u(u'abc') util.check_u(u'&*@!#)*) !@)( !@# !)@(#') self.assertRaises(util.MiroUnicodeError, util.check_u, 'abc') self.assertRaises(util.MiroUnicodeError, util.check_u, '&*@!#)*) !@)( !@# !)@(#')
def setup_new(self, url, allowedURLs=None): check_u(url) # FIXME - clean up the allowedURLs thing here self.allowedURLs = [] self.url = url self.updated_url = url self.title = None self.userTitle = None self.client = None self.lastVisitedURL = None self.setup_new_icon_cache() self.favicon = None self.firstTime = True self.store = self.STORE_NOT_STORE if url: self.historyLocation = 0 self.history = [self.url] else: self.historyLocation = None self.history = [] self.setup_common() self.download_guide()
def __init__(self, name, title, url, sort_order=0, filename=None): check_u(name) check_u(title) check_u(url) self.name = name self.title = title self.url = url self.sort_order = sort_order if filename is not None: self.filename = os.path.normcase(filename) # used for changing icon location on themed searches else: self.filename = None
def unmake_url_safe(s): """Undoes make_url_safe (assuming it was passed a FilenameType) """ # unquote the byte string check_u(s) return urllib.unquote(s.encode("ascii"))
def make_url_safe(string, safe='/'): """Takes in a byte string or a unicode string and does the right thing to make a URL """ check_u(string) return urllib.quote(string.encode('utf_8'), safe=safe).decode('ascii')
def unmake_url_safe(string): """Undoes make_url_safe. """ check_u(string) return urllib.unquote(string.encode('ascii')).decode('utf_8')
def _actual_url_callback(url, callback, new_url, content_type, title): if new_url: check_u(new_url) callback(new_url, content_type=content_type, title=title)
def _get_scrape_function_for(url): check_u(url) for scrape_info in SCRAPER_INFO_MAP: if scrape_info['pattern'].match(url) is not None: return scrape_info['func'] return None
def unmake_url_safe(string): """Undoes make_url_safe (assuming it was passed a filenameType) """ # unquote the byte string check_u(string) return urllib.unquote(string.encode('ascii'))