def fetchPackages(self, destination=None): """Downloads packages to destination directory """ from urlgrabber.grabber import URLGrabber from urlgrabber.progress import TextMeter from os import path, chdir if destination: chdir(destination) else: chdir(self.dest_dir) ### URLGrabber objects ### t = TextMeter() g = URLGrabber(progress_obj=t) ### Start Iteration over list of packages' URIs ### for uri in self.getPackageList(): pisifile = uri.split("/")[-1] if path.exists(pisifile): print pisifile, "--- No Update! Skipping..." continue try: g.urlgrab(uri) except: print "Error while downloading file %s" % pisifile break print "Finished."
class Fetcher(object): def __init__(self, remote): self.remote = remote self.g = URLGrabber(prefix=self.remote) def fetch_to_file(self, src, dest): tmp = dest + '.part' try: self.g.urlgrab(src, filename=tmp, copy_local=1, user_agent='lsd-fetch/1.0') except URLGrabError as e: raise IOError(str(e)) os.rename(tmp, dest) def fetch(self, src='/'): try: contents = self.g.urlread(src).strip() except URLGrabError as e: raise IOError(str(e)) return contents def listdir(self, dir='/'): lfn = os.path.join(dir, '.listing') contents = self.fetch(lfn) return [ s.strip() for s in contents.split() if s.strip() != '' ] # Pickling support -- only pickle the remote URL def __getstate__(self): return self.remote def __setstate__(self, remote): self.__init__(remote)
def update_categories(username, subscriptions): g = URLGrabber() folder = BASE + '/' + username if not os.path.exists(folder): os.mkdir(folder) cats = get_categories(username) visited = set() for sub in subscriptions: if sub.name in visited: continue elif sub.name in cats: del cats[sub.name] visited.add(sub.name) continue else: print 'Downloading thumbnail for %s/%s'%(sub.name, sub.dname) ft = sub.thumbnail[-3:] nf = '%s/%s%s%s.%s'%(folder, sub.name, SPLITTER, sub.dname, ft) g.urlgrab(sub.thumbnail, filename=nf) for sub in cats: print 'Removing thumbnail for %s'%sub if cats[sub] is None: old_fn = '%s/%s*'%(folder, sub) else: old_fn = '%s/%s/%s*'%(folder, cats[sub], sub) for fl in glob.glob(old_fn): print '\t', fl os.remove(fl)
def downloadFile(url, filename, subdir): BongEnvironment.logger.info("starting download of {!s} to {!s}/{!s}".format(url, subdir, filename)) maxBytesPerSecond=0 # 2**19 ==> 0.5 MByte/s # 0 ==> not restricted grabber = URLGrabber( progress_obj=None , throttle=maxBytesPerSecond , reget='simple' , retry=5 , retrycodes=[-1,4,5,6,7,12,14] , timeout=30 , user_agent='bong download manager/1.0' ) statinfo = os.stat(BongEnvironment.settings['recdir']) targetdir = os.path.join(BongEnvironment.settings['recdir'], subdir) if not os.path.isdir(targetdir): os.mkdir(targetdir) if os.name == 'posix': os.chmod(targetdir, 0777) os.chown(targetdir, statinfo.st_uid, statinfo.st_gid) targetfile = os.path.join(targetdir, filename) t1 = time.time() try: local_filename = grabber.urlgrab(url, targetfile) except URLGrabError, e: BongEnvironment.logger.warning('exception {!s} trying to download {!s} to {!s}'.format(e, url, targetfile)) return False
def _retrievePublicKey(self, keyurl, repo=None): """ Retrieve a key file @param keyurl: url to the key to retrieve Returns a list of dicts with all the keyinfo """ key_installed = False # Go get the GPG key from the given URL try: url = yum.misc.to_utf8(keyurl) if repo is None: rawkey = urlgrabber.urlread(url, limit=9999) else: # If we have a repo. use the proxy etc. configuration for it. # In theory we have a global proxy config. too, but meh... # external callers should just update. ug = URLGrabber(bandwidth = repo.bandwidth, retry = repo.retries, throttle = repo.throttle, progress_obj = repo.callback, proxies=repo.proxy_dict) ug.opts.user_agent = default_grabber.opts.user_agent rawkey = ug.urlread(url, text=repo.id + "/gpgkey") except urlgrabber.grabber.URLGrabError, e: raise ChannelException('GPG key retrieval failed: ' + yum.i18n.to_unicode(str(e)))
def __init__(self, awsAccessKey, awsSecretKey, baseurl): self.logger = logging.getLogger("yum.verbose.main") self.logger.log(logginglevels.DEBUG_4, "s3: creating empty URLGrabber instance") URLGrabber.__init__(self) self.logger.log(logginglevels.DEBUG_4, "s3: BotoGrabber init BASE_URL=%s" % baseurl) if not baseurl: raise Exception("s3: BotoGrabberInit got blank baseurl") try: baseurl = baseurl[0] except: pass self.s3 = boto.connect_s3(awsAccessKey, awsSecretKey) self.baseurl = urlparse(baseurl) if hasattr(self.baseurl, 'netloc'): self.bucket_name = self.baseurl.netloc self.key_prefix = self.baseurl.path[1:] else: self.bucket_name = self.baseurl[1] self.key_prefix = self.baseurl[2] if self.key_prefix.startswith("/"): self.key_prefix = self.key_prefix[1:] m = re.match('(.*)\.s3.*\.amazonaws\.com', self.bucket_name) if (m): self.bucket_name = m.group(1) if sys.stdout.isatty(): print "%s - %s" % (self.bucket_name, self.key_prefix)
def moosWeb2dict(vehicle_host, vehicle_port): def moosHTML2dict(data): soup = BeautifulSoup(data) istrtd = (lambda tag : tag.name == "tr" and len(tag.findAll("td")) > 0) ret = {} for tr in soup.table.table.findAll(istrtd): tds = tr.findAll("td") vartag = tds[0].a if 0 < len(vartag) and "pending" != tds[2].contents[0]: key = vartag.contents[0] val = tds[6].contents[0] ret[str(key)] = str(val) return ret UG = URLGrabber() #fetch new page data = UG.urlread("http://" + remote_vehicle + ":" + str(vehicle_port)) #paul newman writes shitty HTML; we must fix it p = re.compile('<A href = ([^>]*)>') fixed_data = p.sub(r'<A href="\1">', data) return moosHTML2dict(fixed_data)
class WebGrabber(Singleton): g = None def __init__(self, config={}): self.gotLibUrlGrabber = False try: from urlgrabber.grabber import URLGrabber except: writeError('This script is better with URLBrabber.') writeError('See http://linux.duke.edu/projects/urlgrabber/') self.gotLibUrlGrabber = False if not self.gotLibUrlGrabber: return if config.has_key('proxy'): writeInfo("URLGrabberWithProxy : %s" % config['proxy']) self.g = URLGrabber(proxies={'http': config['proxy']}) else: writeInfo("URLGrabbersansProxy") self.g = URLGrabber() def getWebFile(self, url, dest): if not self.gotLibUrlGrabber: import urllib fd = open(dest, "wb") fd.write(urllib.urlopen(url).read()) fd.close() else: urllib.urlretrieve("http://www.example.com/songs/mp3.mp3", "mp3.mp3") self.g.urlgrab(url, filename=dest)
def _retrievePublicKey(self, keyurl, repo=None): """ Retrieve a key file @param keyurl: url to the key to retrieve Returns a list of dicts with all the keyinfo """ key_installed = False # Go get the GPG key from the given URL try: url = yum.misc.to_utf8(keyurl) if repo is None: rawkey = urlgrabber.urlread(url, limit=9999) else: # If we have a repo. use the proxy etc. configuration for it. # In theory we have a global proxy config. too, but meh... # external callers should just update. ug = URLGrabber(bandwidth=repo.bandwidth, retry=repo.retries, throttle=repo.throttle, progress_obj=repo.callback, proxies=repo.proxy_dict) ug.opts.user_agent = default_grabber.opts.user_agent rawkey = ug.urlread(url, text=repo.id + "/gpgkey") except urlgrabber.grabber.URLGrabError, e: raise ChannelException('GPG key retrieval failed: ' + yum.i18n.to_unicode(str(e)))
def test_make_callback(self): """grabber.URLGrabber._make_callback() tests""" def cb(e): pass tup_cb = (cb, ('stuff'), {'some': 'dict'}) g = URLGrabber() self.assertEquals(g._make_callback(cb), (cb, (), {})) self.assertEquals(g._make_callback(tup_cb), tup_cb)
def threaded_download(single_download, logfile=None): """ This method initiate with an URL as a thread from a threadPool. But on its own, it is not thread-safe. It has to be managed to the caller Download location: <Current Directory> single_download --> complete download link logfile --> use default logfile if not supplied with. """ # registering CTRL+C as UserInterrupt # signal.signal(signal.SIGINT, signal.SIG_IGN) response = "Not Downloaded" try: download_size = int((u2.urlopen(single_download) ).info().getheaders("Content-Length")[0]) print "Starting: " + str( single_download) + " :: Download target's size: %s KB" % ( download_size / 1024) g = URLGrabber(reget='simple', retry=default_retry, timeout=default_timeout, proxies=default_proxy) response = g.urlgrab(single_download) print "Completed: " + response except URLGrabError as ue: print str(ue) + "\nskipping: " + single_download else: return response # response --> downloaded file's name, if download is successful
class WebGrabber(Singleton): g = None def __init__(self,config = {}): self.gotLibUrlGrabber = False try: from urlgrabber.grabber import URLGrabber except: writeError('This script is better with URLBrabber.') writeError('See http://linux.duke.edu/projects/urlgrabber/') self.gotLibUrlGrabber = False if not self.gotLibUrlGrabber: return if config.has_key('proxy'): writeInfo("URLGrabberWithProxy : %s" % config['proxy']) self.g = URLGrabber(proxies= {'http' : config['proxy']}) else: writeInfo("URLGrabbersansProxy") self.g = URLGrabber() def getWebFile(self,url, dest): if not self.gotLibUrlGrabber: import urllib fd = open(dest,"wb") fd.write(urllib.urlopen(url).read()) fd.close() else: urllib.urlretrieve ("http://www.example.com/songs/mp3.mp3", "mp3.mp3") self.g.urlgrab(url, filename=dest)
def __init__(self, pakfire, *args, **kwargs): kwargs.update({ "quote" : 0, "user_agent" : "pakfire/%s" % PAKFIRE_VERSION, "ssl_verify_host" : False, "ssl_verify_peer" : False, }) if isinstance(pakfire, _Config): config = pakfire else: config = pakfire.config self.config = config # Set throttle setting. bandwidth_throttle = config.get("downloader", "bandwidth_throttle") if bandwidth_throttle: try: bandwidth_throttle = int(bandwidth_throttle) except ValueError: log.error("Configuration value for bandwidth_throttle is invalid.") bandwidth_throttle = 0 kwargs.update({ "throttle" : bandwidth_throttle }) # Configure HTTP proxy. http_proxy = config.get("downloader", "http_proxy") if http_proxy: kwargs.update({ "proxies" : { "http" : http_proxy, "https" : http_proxy }}) URLGrabber.__init__(self, *args, **kwargs)
def __init__(self, awsAccessKey, awsSecretKey, baseurl): self.logger.debug("BotoGrabber init BASE_URL=%s" % baseurl) URLGrabber.__init__(self) self._handle_baseurl(baseurl) self._handle_s3(awsAccessKey, awsSecretKey) self._dump_attributes() interactive_notify("%s - %s" % (self.bucket_name, self.key_prefix))
def mediaHandler(self, *args, **kwargs): relative = kwargs["relative"] ug = URLGrabber(checkfunc=kwargs["checkfunc"]) ug.urlgrab("%s/%s" % (self.tree, kwargs["relative"]), kwargs["local"], text=kwargs["text"], range=kwargs["range"], copy_local=1) return kwargs["local"]
def _retrievePublicKey(self, keyurl, repo=None): """ Retrieve a key file @param keyurl: url to the key to retrieve Returns a list of dicts with all the keyinfo """ key_installed = False # Go get the GPG key from the given URL try: url = yum.misc.to_utf8(keyurl) if repo is None: rawkey = urlgrabber.urlread(url, limit=9999) else: # If we have a repo. use the proxy etc. configuration for it. # In theory we have a global proxy config. too, but meh... # external callers should just update. ug = URLGrabber(bandwidth=repo.bandwidth, retry=repo.retries, throttle=repo.throttle, progress_obj=repo.callback, proxies=repo.proxy_dict) ug.opts.user_agent = default_grabber.opts.user_agent rawkey = ug.urlread(url, text=repo.id + "/gpgkey") except urlgrabber.grabber.URLGrabError as e: raise ChannelException('GPG key retrieval failed: ' + yum.i18n.to_unicode(str(e))) # Parse the key try: keys_info = yum.misc.getgpgkeyinfo(rawkey, multiple=True) except ValueError as err: raise ChannelException( 'GPG key information retrieval failed: {}'.format(err)) except Exception as err: raise ChannelException( 'Unhandled GPG key failure occurred: {}'.format(err)) keys = [] for keyinfo in keys_info: thiskey = {} for info in ('keyid', 'timestamp', 'userid', 'fingerprint', 'raw_key'): if not keyinfo.has_key(info): raise ChannelException( 'GPG key parsing failed: key does not have value %s' % info) thiskey[info] = keyinfo[info] thiskey['keyid'] = str( "%016x" % (thiskey['keyid'] & 0xffffffffffffffff)).upper() thiskey['hexkeyid'] = yum.misc.keyIdToRPMVer( keyinfo['keyid']).upper() keys.append(thiskey) return keys
class ProxyHTTPAuthTests(BaseProxyTests): def setUp(self): self.url = ref_http if not self.have_proxy(): self.skip() self.g = URLGrabber() def test_good_password(self): self.g.urlopen(self.url, proxies=self.good_proxies) def test_bad_password(self): self.assertRaises(URLGrabError, self.g.urlopen, self.url, proxies=self.bad_proxies)
def _getTreeInfo(self, url, proxy_url, sslverify): """ Retrieve treeinfo and return the path to the local file. :param baseurl: url of the repo :type baseurl: string :param proxy_url: Optional full proxy URL of or "" :type proxy_url: string :param sslverify: True if SSL certificate should be varified :type sslverify: bool :returns: Path to retrieved .treeinfo file or None :rtype: string or None """ if not url: return None log.debug("retrieving treeinfo from %s (proxy: %s ; sslverify: %s)", url, proxy_url, sslverify) ugopts = {"ssl_verify_peer": sslverify, "ssl_verify_host": sslverify} proxies = {} if proxy_url: try: proxy = ProxyString(proxy_url) proxies = {"http": proxy.url, "https": proxy.url} except ProxyStringError as e: log.info("Failed to parse proxy for _getTreeInfo %s: %s", proxy_url, e) ug = URLGrabber() try: treeinfo = ug.urlgrab("%s/.treeinfo" % url, "/tmp/.treeinfo", copy_local=True, proxies=proxies, **ugopts) except URLGrabError as e: try: treeinfo = ug.urlgrab("%s/treeinfo" % url, "/tmp/.treeinfo", copy_local=True, proxies=proxies, **ugopts) except URLGrabError as e: log.info("Error downloading treeinfo: %s", e) treeinfo = None return treeinfo
def urlgrab(self, url, *args, **kwargs): self.check_offline_mode() # This is for older versions of urlgrabber which are packaged in Debian # and Ubuntu and cannot handle filenames as a normal Python string but need # a unicode string. return URLGrabber.urlgrab(self, url.encode("utf-8"), *args, **kwargs)
def setUp(self): def server(): import socket s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind(('localhost', 2000)) s.listen(1) while 1: c, a = s.accept() while not c.recv(4096).endswith('\r\n\r\n'): pass c.sendall('HTTP/1.1 %d %s\r\n' % self.reply) c.close() import thread self.reply = 503, "Busy" thread.start_new_thread(server, ()) def failure(obj): self.code = getattr(obj.exception, 'code', None) return {} self.g = URLGrabber() self.mg = MirrorGroup(self.g, ['http://localhost:2000/'], failure_callback=failure)
def download(url, filename=None, associated_task=None, web_proxy = None): if associated_task: associated_task.description = _("Downloading %s") % os.path.basename(url) associated_task.unit = "KB" log.debug("downloading %s > %s" % (url, filename)) progress_obj = DownloadProgress(associated_task) if web_proxy: web_proxy={'http':web_proxy} urlgrabber = URLGrabber( reget = 'simple', proxies = web_proxy, progress_obj = progress_obj) if os.path.isdir(filename): basename = os.path.basename(url) filename = os.path.join(filename, basename) filename = urlgrabber.urlgrab(url, filename=filename) return filename
def download(url, filename=None, associated_task=None, web_proxy=None): if associated_task: associated_task.description = _("Downloading %s") % os.path.basename( url) associated_task.unit = "KB" log.debug("downloading %s > %s" % (url, filename)) progress_obj = DownloadProgress(associated_task) if web_proxy: web_proxy = {'http': web_proxy} urlgrabber = URLGrabber(reget='simple', proxies=web_proxy, progress_obj=progress_obj) if os.path.isdir(filename): basename = os.path.basename(url) filename = os.path.join(filename, basename) filename = urlgrabber.urlgrab(url, filename=filename) return filename
def __init__(self,config = {}): self.gotLibUrlGrabber = False try: from urlgrabber.grabber import URLGrabber except: writeError('This script is better with URLBrabber.') writeError('See http://linux.duke.edu/projects/urlgrabber/') self.gotLibUrlGrabber = False if not self.gotLibUrlGrabber: return if config.has_key('proxy'): writeInfo("URLGrabberWithProxy : %s" % config['proxy']) self.g = URLGrabber(proxies= {'http' : config['proxy']}) else: writeInfo("URLGrabbersansProxy") self.g = URLGrabber()
def setUp(self): self.g = URLGrabber() fullmirrors = [base_mirror_url + m + '/' for m in \ (bad_mirrors + good_mirrors)] if hasattr(urlgrabber.grabber, '_TH'): # test assumes mirrors are not re-ordered urlgrabber.grabber._TH.hosts.clear() self.mg = MirrorGroup(self.g, fullmirrors)
def __init__(self, config={}): self.gotLibUrlGrabber = False try: from urlgrabber.grabber import URLGrabber except: writeError('This script is better with URLBrabber.') writeError('See http://linux.duke.edu/projects/urlgrabber/') self.gotLibUrlGrabber = False if not self.gotLibUrlGrabber: return if config.has_key('proxy'): writeInfo("URLGrabberWithProxy : %s" % config['proxy']) self.g = URLGrabber(proxies={'http': config['proxy']}) else: writeInfo("URLGrabbersansProxy") self.g = URLGrabber()
def setUp(self): self.url = ref_ftp if not self.have_proxy(): self.skip() try: fo = urllib.request.urlopen(self.url).close() except IOError: self.skip() self.g = URLGrabber()
def validConnection (szURL, szVersion, bsupgrade): try: upgrade_tarball = "nsg-upgrade.tar.gz" baseURL = re.sub(r'/[^/]+$', '', szURL) bootstrap_url = baseURL + "/nsg-upgrade/" + upgrade_tarball grabber = URLGrabber(timeout=30.0) bsupgrade = grabber.urlgrab( bootstrap_url, "/tmp/" + upgrade_tarball ) except URLGrabError, e: if e[0] == 4: aszHost = szURL.split("/") return "ERROR Connection check failed: Host %s is not responding" % (aszHost[2]) elif e[0] == 14: return "ERROR Connection check failed: nsg-upgrade directory was not found in url %s" % szURL else: return "ERROR Checking Connection: %d %s" % (e[0] , e[1]) return "ERROR " + e.strerror
def download_file(url, dirname): """ Download @url and save to @dirname. @return - filename of saved file """ # pycurl is picky about Unicode URLs, see rhbz #515797 url = url.encode('ascii', 'ignore') if not os.path.exists(dirname): os.makedirs(dirname) basename = os.path.basename(url) filename = "%s/%s" % (dirname, basename) if os.path.exists(filename): raise Exception("File %s already exists! Not downloading!" % filename) g = URLGrabber(reget=None) local_filename = g.urlgrab(url, filename) return local_filename
def test_parse_url_with_prefix(self): """grabber.URLParser.parse() with opts.prefix""" base = 'http://foo.com/dir' bases = [base, base+'/'] filename = 'bar/baz' target = base + '/' + filename for b in bases: g = URLGrabber(prefix=b) (url, parts) = g.opts.urlparser.parse(filename, g.opts) self.assertEquals(url, target)
def run(self): #Check if file exists if os.path.isfile(self.file): os.chmod(self.file, stat.S_IWUSR) os.remove(self.file) ##Init url/path pointers #response = urllib2.urlopen(self.url) #total_size = response.info().getheader('Content-Length').strip() #self.total_size = int(total_size) #freespace #freespace = get_free_space(self.app, path) #check if enough freespace #if self.freespace < total_size and self.freespace != 0: # self.app.gui.ShowDialogNotification('Not enough freespace to download the item') # self.active = False # return self.app.gui.SetVisible(4000, True) progress = TextMeter(self.app) try: Log(self.app, 'Download started') g = URLGrabber(reget='simple') g.urlgrab(self.url, filename=self.file, reget='simple', progress_obj=progress, text=self.filename) #Create info file as json json_dumps(self.infodata, self.infopath) self.app.gui.ShowDialogNotification('Download Complete') except: Log(self.app, traceback.format_exc()) self.app.gui.ShowDialogNotification('Error during download') self.app.gui.SetVisible(4000, False) self.active = False Log(self.app, 'Download finished')
def chunk_get(process_no, dest_dir, file_url, file_size): file_name = file_url.split('/')[-1] url = "ftp://localhost:2121/" + file_url file_path = dest_dir + file_name + ".part" + str(process_no) file_dir = file_url.rsplit('/', 1)[0] try: if (os.path.isfile(file_path) == False): raise Exception('') else: g = URLGrabber(reget="simple") start_byte = os.stat(file_path).st_size if start_byte < process_no * file_size / 5: if process_no == 4: end_byte = file_size else: end_byte = process_no * file_size / 5 file_temp_path = file_path + ".tmp" local_file = g.urlgrab(url, filename=file_temp_path, range=(start_byte, end_byte), retry=0) file(file_path, 'ab').write(file(file_temp_path, 'rb').read()) os.remove(file_temp_path) except: g = URLGrabber(reget="simple") start_byte = (process_no) * file_size / 5 if process_no == 4: end_byte = file_size else: end_byte = start_byte + file_size / 5 local_file = g.urlgrab(url, filename=file_path, range=(start_byte, end_byte), retry=0)
def __init__(self, awsAccessKey, awsSecretKey, baseurl): if self.DEBUG: print "creating empty URLGrabber instance" URLGrabber.__init__(self) if self.DEBUG: print "BotoGrabber init BASE_URL=%s" % baseurl if not baseurl: raise Exception("BotoGrabberInit got blank baseurl") try: baseurl = baseurl[0] except: pass self.s3 = boto.connect_s3(awsAccessKey, awsSecretKey) self.baseurl = urlparse(baseurl) if hasattr(self.baseurl, 'netloc'): self.bucket_name = self.baseurl.netloc self.key_prefix = self.baseurl.path[1:] else: self.bucket_name = self.baseurl[1] self.key_prefix = self.baseurl[2] m = re.match('(.*)\.s3.*\.amazonaws\.com', self.bucket_name) if (m): self.bucket_name = m.group(1) if sys.stdout.isatty(): print "%s - %s" % (self.bucket_name, self.key_prefix)
def _preInstall_url_image(self): """ Download the image using urlgrabber """ # Setup urlgrabber and call back to download image to sysroot progress = URLGrabberProgress() ugopts = { "ssl_verify_peer": not self.data.method.noverifyssl, "ssl_verify_host": not self.data.method.noverifyssl, "proxies": self._proxies, "progress_obj": progress, "copy_local": True } error = None try: ug = URLGrabber() ug.urlgrab(self.data.method.url, self.image_path, **ugopts) except URLGrabError as e: log.error("Error downloading liveimg: %s", e) error = e else: if not os.path.exists(self.image_path): error = "Failed to download %s, file doesn't exist" % self.data.method.url log.error(error)
def run(self): #Check if file exists if os.path.isfile(self.file): os.chmod(self.file, stat.S_IWUSR) os.remove(self.file) ##Init url/path pointers #response = urllib2.urlopen(self.url) #total_size = response.info().getheader('Content-Length').strip() #self.total_size = int(total_size) #freespace #freespace = get_free_space(self.app, path) #check if enough freespace #if self.freespace < total_size and self.freespace != 0: # self.app.gui.ShowDialogNotification('Not enough freespace to download the item') # self.active = False # return self.app.gui.SetVisible(4000, True) progress = TextMeter(self.app) try: Log(self.app, 'Download started' ) g = URLGrabber(reget='simple') g.urlgrab(self.url, filename=self.file, reget='simple', progress_obj=progress, text=self.filename) #Create info file as json json_dumps(self.infodata, self.infopath) self.app.gui.ShowDialogNotification('Download Complete') except: Log(self.app, traceback.format_exc() ) self.app.gui.ShowDialogNotification('Error during download') self.app.gui.SetVisible(4000, False) self.active = False Log(self.app, 'Download finished' )
class Fetcher(object): def __init__(self, remote): self.remote = remote self.g = URLGrabber(prefix=self.remote) def fetch_to_file(self, src, dest): tmp = dest + '.part' try: self.g.urlgrab(src, filename=tmp, copy_local=1, user_agent='lsd-fetch/1.0') except URLGrabError as e: raise IOError(str(e)) os.rename(tmp, dest) def fetch(self, src='/'): try: contents = self.g.urlread(src).strip() except URLGrabError as e: raise IOError(str(e)) return contents def listdir(self, dir='/'): lfn = os.path.join(dir, '.listing') contents = self.fetch(lfn) return [s.strip() for s in contents.split() if s.strip() != ''] # Pickling support -- only pickle the remote URL def __getstate__(self): return self.remote def __setstate__(self, remote): self.__init__(remote)
def testKeywordArgs(self): """grabber.URLGrabber.__init__() **kwargs handling. This is a simple test that just passes some arbitrary values into the URLGrabber constructor and checks that they've been set properly. """ opener = urllib2.OpenerDirector() g = URLGrabber(progress_obj=self.meter, throttle=0.9, bandwidth=20, retry=20, retrycodes=[5, 6, 7], copy_local=1, close_connection=1, user_agent='test ua/1.0', proxies={'http': 'http://www.proxy.com:9090'}, opener=opener) opts = g.opts self.assertEquals(opts.progress_obj, self.meter) self.assertEquals(opts.throttle, 0.9) self.assertEquals(opts.bandwidth, 20) self.assertEquals(opts.retry, 20) self.assertEquals(opts.retrycodes, [5, 6, 7]) self.assertEquals(opts.copy_local, 1) self.assertEquals(opts.close_connection, 1) self.assertEquals(opts.user_agent, 'test ua/1.0') self.assertEquals(opts.proxies, {'http': 'http://www.proxy.com:9090'}) self.assertEquals(opts.opener, opener) nopts = grabber.URLGrabberOptions(delegate=opts, throttle=0.5, copy_local=0) self.assertEquals(nopts.progress_obj, self.meter) self.assertEquals(nopts.throttle, 0.5) self.assertEquals(nopts.bandwidth, 20) self.assertEquals(nopts.retry, 20) self.assertEquals(nopts.retrycodes, [5, 6, 7]) self.assertEquals(nopts.copy_local, 0) self.assertEquals(nopts.close_connection, 1) self.assertEquals(nopts.user_agent, 'test ua/1.0') self.assertEquals(nopts.proxies, {'http': 'http://www.proxy.com:9090'}) nopts.opener = None self.assertEquals(nopts.opener, None)
def _test_url(self, urllist): g = URLGrabber() try: quote = urllist[3] except IndexError: quote = None g.opts.quote = quote (url, parts) = g.opts.urlparser.parse(urllist[0], g.opts) if 1: self.assertEquals(url, urllist[1]) self.assertEquals(parts, urllist[2]) else: if url == urllist[1] and parts == urllist[2]: print('OK: %s' % urllist[0]) else: print('ERROR: %s' % urllist[0]) print(' ' + urllist[1]) print(' ' + url) print(' ' + urllist[2]) print(' ' + parts)
def setUp(self): # start the server self.exit = False def server(): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind(LOCALPORT) s.listen(1) while 1: c, a = s.accept() if self.exit: c.close() break ending_compat = '\r\n\r\n' if not six.PY3 else b'\r\n\r\n' while not c.recv(4096).endswith(ending_compat): pass http_compat = 'HTTP/1.1 %d %s\r\n' % self.reply c.sendall(http_compat if not six.PY3 else http_compat. encode('utf-8')) if self.content is not None: cont_length_compat = 'Content-Length: %d\r\n\r\n' % len( self.content) c.sendall(cont_length_compat if not six.PY3 else cont_length_compat.encode('utf-8')) c.sendall(self.content if not six.PY3 else self.content. encode('utf-8')) c.close() s.close() self.exit = False thread.start_new_thread(server, ()) # create grabber and mirror group objects def failure(obj): self.code = getattr(obj.exception, 'code', None) return {} self.g = URLGrabber() self.mg = MirrorGroup(self.g, ['http://%s:%d' % LOCALPORT], failure_callback=failure)
def setUp(self): # start the server self.exit = False self.process = lambda data: None s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind(('localhost', 0)) s.listen(1) self.port = s.getsockname()[1] def server(): while True: c, a = s.accept() if self.exit: c.close() break data = b'' while not data.endswith(b'\r\n\r\n'): data = c.recv(4096) self.process(data) c.sendall(b'HTTP/1.1 %d %s\r\n' % self.reply) if self.content is not None: c.sendall(b'Content-Length: %d\r\n\r\n' % len(self.content)) c.sendall(self.content) c.close() s.close() self.exit = False self.thread = threading.Thread(target=server) self.thread.start() # create grabber and mirror group objects def failure(obj): self.code = getattr(obj.exception, 'code', None) return {} self.g = URLGrabber() self.mg = MirrorGroup(self.g, ['http://localhost:%d' % self.port], failure_callback=failure)
def _test_url(self, urllist): g = URLGrabber() try: quote = urllist[3] except IndexError: quote = None g.opts.quote = quote url = urllist[0].encode('utf8') expected_url = urllist[1].encode('utf8') expected_parts = tuple(part.encode('utf8') for part in urllist[2]) (url, parts) = g.opts.urlparser.parse(url, g.opts) if 1: self.assertEqual(url, expected_url) self.assertEqual(parts, expected_parts) else: if url == urllist[1] and parts == urllist[2]: print('OK: %s' % urllist[0]) else: print('ERROR: %s' % urllist[0]) print(' ' + urllist[1]) print(' ' + url) print(' ' + urllist[2]) print(' ' + parts)
def __init__(self, progress_obj=None): # we cannot use super because we still have to support # older urlgrabber versions where URLGrabber is an old-style class URLGrabber.__init__(self) self.progress_obj = progress_obj
help='Extract files after downloading (to subdirectories) [default: no]', default=False ) parser.add_option( '-k', '--keep-after-extract', action='store_true', dest='keep_after_extract', help='Keep files after extracting [default: no]', default=False ) (options, args) = parser.parse_args() if len(args) == 0: parser.error('One or more bundle keys are required') progress_printer = ProgressPrint() grabber = URLGrabber(prefix=options.gmb_url, progress_obj=progress_printer) # Download the albums for each key for key in args: # Get download page and grab all download URLs download_page_url = urljoin(options.gmb_url, '/download?key=%s' % key) download_page = urlopen(download_page_url) html = download_page.read() soup = BeautifulSoup(html, 'lxml') download_page.close() # Find all download links regex_download_link = re.compile('/download\?.*') download_links = [x['href'] for x in soup.find_all('a', href=regex_download_link)] album_urls = merge_album_links(download_links)
def get_file_if_size_diff(url, d): fn = url.split('/')[-1] out_fnp = os.path.join(d, fn) g = URLGrabber(reget = "simple") locFnp = g.urlgrab(url, out_fnp) return locFnp
def setUp(self): self.url = ref_http if not self.have_proxy(): self.skip() self.g = URLGrabber()
if config.has_key('proxy'): writeInfo("URLGrabberWithProxy : %s" % config['proxy']) self.g = URLGrabber(proxies= {'http' : config['proxy']}) else: writeInfo("URLGrabbersansProxy") self.g = URLGrabber() def getWebFile(self,url, dest): if not self.gotLibUrlGrabber: import urllib fd = open(dest,"wb") fd.write(urllib.urlopen(url).read()) fd.close() else: urllib.urlretrieve ("http://www.example.com/songs/mp3.mp3", "mp3.mp3") self.g.urlgrab(url, filename=dest) if __name__ == '__main__': g = URLGrabber(proxies={'http' : 'http://proxy.free.fr:3128'}) url = 'http://www.advanscene.com/offline/datas/ADVANsCEne_NDS.zip' g.urlgrab(url, filename='moncul.zip') g1 = WebGrabber(config={'proxy':'http://proxy.free.fr:3128'}) g2 = WebGrabber() print "g1 is g2 %s" % (g1 is g2) g1.getWebFile('http://www.advanscene.com/offline/datas/ADVANsCEne_NDS.zip','moncul.zip') print "Done."
else: self.totalRead = totalRead self.lastData = time() oldCount = self.count self.count = int(totalRead // self.QUANTUM) + 1 self.progress(("=" if self.started else "+") * max(0, self.count - oldCount), suffix) self.started = True def end(self, totalRead): self.update(totalRead, "OK") progressIndicator = ProgressIndicator() grabber = URLGrabber( reget="simple", timeout=self.timeout, progress_obj=progressIndicator, user_agent=userAgent, http_headers=tuple((str(cookie["name"]), str(cookie["value"])) for cookie in cookies), ) try: grabber.urlgrab(link, filename=targetFileName) downloadOK = True except URLGrabError, e: self.errors += 1 self.logger.error("Download failed: %s", e) except KeyboardInterrupt: self.errors += 1 self.logger.error("Download interrupted") if downloadOK: localSize = getFileSize(targetFileName) if not localSize:
def __init__(self, remote): self.remote = remote self.g = URLGrabber(prefix=self.remote)
def preInstall(self, *args, **kwargs): """ Download image and loopback mount it. This is called after partitioning is setup, we now have space to grab the image. Download it to ROOT_PATH and provide feedback during the download (using urlgrabber callback). """ # Setup urlgrabber and call back to download image to ROOT_PATH progress = URLGrabberProgress() ugopts = {"ssl_verify_peer": not self.data.method.noverifyssl, "ssl_verify_host": not self.data.method.noverifyssl, "proxies" : self._proxies, "progress_obj" : progress, "copy_local" : True} error = None try: ug = URLGrabber() ug.urlgrab(self.data.method.url, self.image_path, **ugopts) except URLGrabError as e: log.error("Error downloading liveimg: %s", e) error = e else: if not os.path.exists(self.image_path): error = "Failed to download %s, file doesn't exist" % self.data.method.url log.error(error) if error: exn = PayloadInstallError(str(error)) if errorHandler.cb(exn) == ERROR_RAISE: raise exn # Used to make install progress % look correct self._adj_size = os.stat(self.image_path)[stat.ST_SIZE] if self.data.method.checksum: progressQ.send_message(_("Checking image checksum")) sha256 = hashlib.sha256() with open(self.image_path, "rb") as f: while True: data = f.read(1024*1024) if not data: break sha256.update(data) filesum = sha256.hexdigest() log.debug("sha256 of %s is %s", self.data.method.url, filesum) if lowerASCII(self.data.method.checksum) != filesum: log.error("%s does not match checksum.", self.data.method.checksum) exn = PayloadInstallError("Checksum of image does not match") if errorHandler.cb(exn) == ERROR_RAISE: raise exn # Mount the image and check to see if it is a LiveOS/*.img # style squashfs image. If so, move it to IMAGE_DIR and mount the real # root image on INSTALL_TREE blivet.util.mount(self.image_path, INSTALL_TREE, fstype="auto", options="ro") if os.path.exists(INSTALL_TREE+"/LiveOS"): # Find the first .img in the directory and mount that on INSTALL_TREE img_files = glob.glob(INSTALL_TREE+"/LiveOS/*.img") if img_files: img_file = os.path.basename(sorted(img_files)[0]) # move the mount to IMAGE_DIR os.makedirs(IMAGE_DIR, 0755) # work around inability to move shared filesystems iutil.execWithRedirect("mount", ["--make-rprivate", "/"]) iutil.execWithRedirect("mount", ["--move", INSTALL_TREE, IMAGE_DIR]) blivet.util.mount(IMAGE_DIR+"/LiveOS/"+img_file, INSTALL_TREE, fstype="auto", options="ro")
def __init__(self, maxthreads=5, **kwargs): self.maxthreads = 5 self.grabber = URLGrabber(**kwargs) self.queue = [] self.threads = [] self.sem = Semaphore()