Python URLGrabber示例，urlgrabber.grabber.URLGrabber Python示例

示例#1

0

显示文件

文件： pisi_crawler.py 项目： havan/porsuk

    def fetchPackages(self, destination=None):
        """Downloads packages to destination directory """
        from urlgrabber.grabber import URLGrabber
        from urlgrabber.progress import TextMeter
        from os import path, chdir

        if destination:
            chdir(destination)
        else:
            chdir(self.dest_dir)

        ### URLGrabber objects ###
        t = TextMeter()
        g = URLGrabber(progress_obj=t)

        ### Start Iteration over list of packages' URIs ###
        for uri in self.getPackageList():
            pisifile = uri.split("/")[-1]
            if path.exists(pisifile):
                print pisifile, "--- No Update! Skipping..."
                continue
            try:
                g.urlgrab(uri)
            except:
                print "Error while downloading file %s" % pisifile
                break
        print "Finished."

示例#2

0

显示文件

文件： fetcher.py 项目： banados/lsd

	class Fetcher(object):
		def __init__(self, remote):
			self.remote = remote
			self.g = URLGrabber(prefix=self.remote)

		def fetch_to_file(self, src, dest):
			tmp = dest + '.part'
			try:
				self.g.urlgrab(src, filename=tmp, copy_local=1, user_agent='lsd-fetch/1.0')
			except URLGrabError as e:
				raise IOError(str(e))
			os.rename(tmp, dest)

		def fetch(self, src='/'):
			try:
				contents = self.g.urlread(src).strip()
			except URLGrabError as e:
				raise IOError(str(e))
			return contents

		def listdir(self, dir='/'):
			lfn = os.path.join(dir, '.listing')

			contents = self.fetch(lfn)

			return [ s.strip() for s in contents.split() if s.strip() != '' ]

		# Pickling support -- only pickle the remote URL
		def __getstate__(self):
			return self.remote
		def __setstate__(self, remote):
			self.__init__(remote)

示例#3

0

显示文件

文件： classes.py 项目： COD3BOY/probablyscripts

def update_categories(username, subscriptions):
    g = URLGrabber()
    folder = BASE + '/' + username
    if not os.path.exists(folder):
        os.mkdir(folder)

    cats = get_categories(username)
    visited = set()

    for sub in subscriptions:
        if sub.name in visited:
            continue
        elif sub.name in cats:
            del cats[sub.name]
            visited.add(sub.name)
            continue
        else:
            print 'Downloading thumbnail for %s/%s'%(sub.name, sub.dname)
            ft = sub.thumbnail[-3:]
            nf = '%s/%s%s%s.%s'%(folder, sub.name, SPLITTER, sub.dname, ft)
            g.urlgrab(sub.thumbnail, filename=nf)

    for sub in cats:
        print 'Removing thumbnail for %s'%sub
        if cats[sub] is None:
            old_fn = '%s/%s*'%(folder, sub)
        else:
            old_fn = '%s/%s/%s*'%(folder, cats[sub], sub)
        for fl in glob.glob(old_fn):
            print '\t', fl
            os.remove(fl)

示例#4

0

显示文件

文件： BongDownload.py 项目： ambo4/BONG.TV-Download-Manager

def downloadFile(url, filename, subdir):
    BongEnvironment.logger.info("starting download of {!s} to {!s}/{!s}".format(url, subdir, filename))
    maxBytesPerSecond=0        #  2**19   ==> 0.5 MByte/s 
                               #  0       ==> not restricted
    grabber = URLGrabber( progress_obj=None
                        , throttle=maxBytesPerSecond        
                        , reget='simple'
                        , retry=5
                        , retrycodes=[-1,4,5,6,7,12,14]
                        , timeout=30
                        , user_agent='bong download manager/1.0'
                        )
    
    statinfo = os.stat(BongEnvironment.settings['recdir'])
    
    targetdir = os.path.join(BongEnvironment.settings['recdir'], subdir)
    if not os.path.isdir(targetdir):
        os.mkdir(targetdir)
        if os.name == 'posix':
            os.chmod(targetdir, 0777)
            os.chown(targetdir, statinfo.st_uid, statinfo.st_gid)

    targetfile = os.path.join(targetdir, filename)
    
    t1 = time.time()
    try:
        local_filename = grabber.urlgrab(url, targetfile)
    except URLGrabError, e:
        BongEnvironment.logger.warning('exception {!s} trying to download {!s} to {!s}'.format(e, url, targetfile))
        return False

示例#5

0

显示文件

文件： yum_src.py 项目： m47ik/uyuni

    def _retrievePublicKey(self, keyurl, repo=None):
        """
        Retrieve a key file
        @param keyurl: url to the key to retrieve
        Returns a list of dicts with all the keyinfo
        """
        key_installed = False

        # Go get the GPG key from the given URL
        try:
            url = yum.misc.to_utf8(keyurl)
            if repo is None:
                rawkey = urlgrabber.urlread(url, limit=9999)
            else:
                #  If we have a repo. use the proxy etc. configuration for it.
                # In theory we have a global proxy config. too, but meh...
                # external callers should just update.
                ug = URLGrabber(bandwidth = repo.bandwidth,
                                retry = repo.retries,
                                throttle = repo.throttle,
                                progress_obj = repo.callback,
                                proxies=repo.proxy_dict)
                ug.opts.user_agent = default_grabber.opts.user_agent
                rawkey = ug.urlread(url, text=repo.id + "/gpgkey")

        except urlgrabber.grabber.URLGrabError, e:
            raise ChannelException('GPG key retrieval failed: ' +
                                    yum.i18n.to_unicode(str(e)))

示例#6

0

显示文件

文件： s3.py 项目： aripringle/yum-s3-plugin

		def __init__(self, awsAccessKey, awsSecretKey, baseurl):
			self.logger = logging.getLogger("yum.verbose.main")
			self.logger.log(logginglevels.DEBUG_4, "s3: creating empty URLGrabber instance")
			URLGrabber.__init__(self)
			self.logger.log(logginglevels.DEBUG_4, "s3: BotoGrabber init BASE_URL=%s" % baseurl)
			if not baseurl:
				raise Exception("s3: BotoGrabberInit got blank baseurl")
			try:
				baseurl = baseurl[0]
			except:
				pass
			self.s3 = boto.connect_s3(awsAccessKey, awsSecretKey)
			self.baseurl = urlparse(baseurl)
			if hasattr(self.baseurl, 'netloc'):
				self.bucket_name = self.baseurl.netloc
				self.key_prefix = self.baseurl.path[1:]
			else:
				self.bucket_name = self.baseurl[1]
				self.key_prefix = self.baseurl[2]
			if self.key_prefix.startswith("/"):
				self.key_prefix = self.key_prefix[1:]
			m = re.match('(.*)\.s3.*\.amazonaws\.com', self.bucket_name)
			if (m):
				self.bucket_name = m.group(1)
			if sys.stdout.isatty():
				print "%s - %s" % (self.bucket_name, self.key_prefix)

示例#7

0

显示文件

文件： MOOSDBparser.py 项目： Hoffman408/MOOS-python-utils

def moosWeb2dict(vehicle_host, vehicle_port):

    def moosHTML2dict(data):
        soup = BeautifulSoup(data)
        istrtd = (lambda tag : tag.name == "tr" and len(tag.findAll("td")) > 0)
        ret = {}
        for tr in soup.table.table.findAll(istrtd):
            tds = tr.findAll("td")
            vartag = tds[0].a
            if 0 < len(vartag) and "pending" != tds[2].contents[0]:
                key = vartag.contents[0]
                val = tds[6].contents[0]
                ret[str(key)] = str(val)
        return ret


    UG = URLGrabber()

    #fetch new page
    data = UG.urlread("http://" + remote_vehicle + ":" + str(vehicle_port))

    #paul newman writes shitty HTML; we must fix it
    p = re.compile('<A href = ([^>]*)>')
    fixed_data = p.sub(r'<A href="\1">', data)
                
    return moosHTML2dict(fixed_data)

示例#8

0

显示文件

class WebGrabber(Singleton):
    g = None

    def __init__(self, config={}):
        self.gotLibUrlGrabber = False
        try:
            from urlgrabber.grabber import URLGrabber
        except:
            writeError('This script is better with URLBrabber.')
            writeError('See http://linux.duke.edu/projects/urlgrabber/')
            self.gotLibUrlGrabber = False

        if not self.gotLibUrlGrabber:
            return
        if config.has_key('proxy'):
            writeInfo("URLGrabberWithProxy : %s" % config['proxy'])
            self.g = URLGrabber(proxies={'http': config['proxy']})
        else:
            writeInfo("URLGrabbersansProxy")
            self.g = URLGrabber()

    def getWebFile(self, url, dest):
        if not self.gotLibUrlGrabber:
            import urllib
            fd = open(dest, "wb")
            fd.write(urllib.urlopen(url).read())
            fd.close()
        else:
            urllib.urlretrieve("http://www.example.com/songs/mp3.mp3",
                               "mp3.mp3")
            self.g.urlgrab(url, filename=dest)

示例#9

0

显示文件

文件： yum_src.py 项目： ncounter/uyuni

    def _retrievePublicKey(self, keyurl, repo=None):
        """
        Retrieve a key file
        @param keyurl: url to the key to retrieve
        Returns a list of dicts with all the keyinfo
        """
        key_installed = False

        # Go get the GPG key from the given URL
        try:
            url = yum.misc.to_utf8(keyurl)
            if repo is None:
                rawkey = urlgrabber.urlread(url, limit=9999)
            else:
                #  If we have a repo. use the proxy etc. configuration for it.
                # In theory we have a global proxy config. too, but meh...
                # external callers should just update.
                ug = URLGrabber(bandwidth=repo.bandwidth,
                                retry=repo.retries,
                                throttle=repo.throttle,
                                progress_obj=repo.callback,
                                proxies=repo.proxy_dict)
                ug.opts.user_agent = default_grabber.opts.user_agent
                rawkey = ug.urlread(url, text=repo.id + "/gpgkey")

        except urlgrabber.grabber.URLGrabError, e:
            raise ChannelException('GPG key retrieval failed: ' +
                                   yum.i18n.to_unicode(str(e)))

示例#10

0

显示文件

 def test_make_callback(self):
     """grabber.URLGrabber._make_callback() tests"""
     def cb(e): pass
     tup_cb = (cb, ('stuff'), {'some': 'dict'})
     g = URLGrabber()
     self.assertEquals(g._make_callback(cb),     (cb, (), {}))
     self.assertEquals(g._make_callback(tup_cb), tup_cb)

示例#11

0

显示文件

def threaded_download(single_download, logfile=None):
    """
    This method initiate with an URL as a thread from a threadPool.
    But on its own, it is not thread-safe. It has to be managed to the caller
    
    Download location: <Current Directory>
    single_download --> complete download link
    logfile         --> use default logfile if not supplied with.
    """
    # registering CTRL+C as UserInterrupt
    # signal.signal(signal.SIGINT, signal.SIG_IGN)

    response = "Not Downloaded"
    try:
        download_size = int((u2.urlopen(single_download)
                             ).info().getheaders("Content-Length")[0])
        print "Starting: " + str(
            single_download) + " :: Download target's size: %s KB" % (
                download_size / 1024)

        g = URLGrabber(reget='simple',
                       retry=default_retry,
                       timeout=default_timeout,
                       proxies=default_proxy)

        response = g.urlgrab(single_download)
        print "Completed: " + response

    except URLGrabError as ue:
        print str(ue) + "\nskipping: " + single_download
    else:
        return response  # response --> downloaded file's name, if download is successful

示例#12

0

显示文件

文件： webGrabber.py 项目： naparuba/pyndsgest

class WebGrabber(Singleton):
    g = None
    
    def __init__(self,config = {}):
        self.gotLibUrlGrabber = False
        try:
            from urlgrabber.grabber import URLGrabber
        except:
            writeError('This script is better with URLBrabber.')
            writeError('See http://linux.duke.edu/projects/urlgrabber/')
            self.gotLibUrlGrabber = False
            
        if not self.gotLibUrlGrabber:
            return
        if config.has_key('proxy'):
            writeInfo("URLGrabberWithProxy : %s" % config['proxy'])
            self.g = URLGrabber(proxies= {'http' : config['proxy']})
        else:
            writeInfo("URLGrabbersansProxy")
            self.g = URLGrabber()

    def getWebFile(self,url, dest):
        if not self.gotLibUrlGrabber:
            import urllib
            fd = open(dest,"wb")
            fd.write(urllib.urlopen(url).read())
            fd.close()
        else:
            urllib.urlretrieve ("http://www.example.com/songs/mp3.mp3", "mp3.mp3")
            self.g.urlgrab(url, filename=dest)

示例#13

0

显示文件

文件： downloader.py 项目： ipfire/pakfire

	def __init__(self, pakfire, *args, **kwargs):
		kwargs.update({
			"quote" : 0,
			"user_agent" : "pakfire/%s" % PAKFIRE_VERSION,

			"ssl_verify_host" : False,
			"ssl_verify_peer" : False,
		})

		if isinstance(pakfire, _Config):
			config = pakfire
		else:
			config = pakfire.config
		self.config = config

		# Set throttle setting.
		bandwidth_throttle = config.get("downloader", "bandwidth_throttle")
		if bandwidth_throttle:
			try:
				bandwidth_throttle = int(bandwidth_throttle)
			except ValueError:
				log.error("Configuration value for bandwidth_throttle is invalid.")
				bandwidth_throttle = 0

			kwargs.update({ "throttle" : bandwidth_throttle })

		# Configure HTTP proxy.
		http_proxy = config.get("downloader", "http_proxy")
		if http_proxy:
			kwargs.update({ "proxies" : { "http" : http_proxy, "https" : http_proxy }})

		URLGrabber.__init__(self, *args, **kwargs)

示例#14

0

显示文件

        def __init__(self, awsAccessKey, awsSecretKey, baseurl):
            self.logger.debug("BotoGrabber init BASE_URL=%s" % baseurl)

            URLGrabber.__init__(self)
            self._handle_baseurl(baseurl)
            self._handle_s3(awsAccessKey, awsSecretKey)
            self._dump_attributes()
            interactive_notify("%s - %s" % (self.bucket_name, self.key_prefix))

示例#15

0

显示文件

文件： s3.py 项目： toxsick/yum-s3-plugin

        def __init__(self, awsAccessKey, awsSecretKey, baseurl):
            self.logger.debug("BotoGrabber init BASE_URL=%s" % baseurl)

            URLGrabber.__init__(self)
            self._handle_baseurl(baseurl)
            self._handle_s3(awsAccessKey, awsSecretKey)
            self._dump_attributes()
            interactive_notify("%s - %s" % (self.bucket_name, self.key_prefix))

示例#16

0

显示文件

    def mediaHandler(self, *args, **kwargs):
        relative = kwargs["relative"]

        ug = URLGrabber(checkfunc=kwargs["checkfunc"])
        ug.urlgrab("%s/%s" % (self.tree, kwargs["relative"]),
                   kwargs["local"],
                   text=kwargs["text"],
                   range=kwargs["range"],
                   copy_local=1)
        return kwargs["local"]

示例#17

0

显示文件

    def _retrievePublicKey(self, keyurl, repo=None):
        """
        Retrieve a key file
        @param keyurl: url to the key to retrieve
        Returns a list of dicts with all the keyinfo
        """
        key_installed = False

        # Go get the GPG key from the given URL
        try:
            url = yum.misc.to_utf8(keyurl)
            if repo is None:
                rawkey = urlgrabber.urlread(url, limit=9999)
            else:
                #  If we have a repo. use the proxy etc. configuration for it.
                # In theory we have a global proxy config. too, but meh...
                # external callers should just update.
                ug = URLGrabber(bandwidth=repo.bandwidth,
                                retry=repo.retries,
                                throttle=repo.throttle,
                                progress_obj=repo.callback,
                                proxies=repo.proxy_dict)
                ug.opts.user_agent = default_grabber.opts.user_agent
                rawkey = ug.urlread(url, text=repo.id + "/gpgkey")

        except urlgrabber.grabber.URLGrabError as e:
            raise ChannelException('GPG key retrieval failed: ' +
                                   yum.i18n.to_unicode(str(e)))
        # Parse the key
        try:
            keys_info = yum.misc.getgpgkeyinfo(rawkey, multiple=True)
        except ValueError as err:
            raise ChannelException(
                'GPG key information retrieval failed: {}'.format(err))
        except Exception as err:
            raise ChannelException(
                'Unhandled GPG key failure occurred: {}'.format(err))

        keys = []
        for keyinfo in keys_info:
            thiskey = {}
            for info in ('keyid', 'timestamp', 'userid', 'fingerprint',
                         'raw_key'):
                if not keyinfo.has_key(info):
                    raise ChannelException(
                        'GPG key parsing failed: key does not have value %s' %
                        info)
                thiskey[info] = keyinfo[info]
            thiskey['keyid'] = str(
                "%016x" % (thiskey['keyid'] & 0xffffffffffffffff)).upper()
            thiskey['hexkeyid'] = yum.misc.keyIdToRPMVer(
                keyinfo['keyid']).upper()
            keys.append(thiskey)

        return keys

示例#18

0

显示文件

class ProxyHTTPAuthTests(BaseProxyTests):
    def setUp(self):
        self.url = ref_http
        if not self.have_proxy():
            self.skip()
        self.g = URLGrabber()

    def test_good_password(self):
        self.g.urlopen(self.url, proxies=self.good_proxies)

    def test_bad_password(self):
        self.assertRaises(URLGrabError, self.g.urlopen,
                          self.url, proxies=self.bad_proxies)

示例#19

0

显示文件

文件： __init__.py 项目： mykntom/anaconda

    def _getTreeInfo(self, url, proxy_url, sslverify):
        """ Retrieve treeinfo and return the path to the local file.

            :param baseurl: url of the repo
            :type baseurl: string
            :param proxy_url: Optional full proxy URL of or ""
            :type proxy_url: string
            :param sslverify: True if SSL certificate should be varified
            :type sslverify: bool
            :returns: Path to retrieved .treeinfo file or None
            :rtype: string or None
        """
        if not url:
            return None

        log.debug("retrieving treeinfo from %s (proxy: %s ; sslverify: %s)",
                  url, proxy_url, sslverify)

        ugopts = {"ssl_verify_peer": sslverify, "ssl_verify_host": sslverify}

        proxies = {}
        if proxy_url:
            try:
                proxy = ProxyString(proxy_url)
                proxies = {"http": proxy.url, "https": proxy.url}
            except ProxyStringError as e:
                log.info("Failed to parse proxy for _getTreeInfo %s: %s",
                         proxy_url, e)

        ug = URLGrabber()
        try:
            treeinfo = ug.urlgrab("%s/.treeinfo" % url,
                                  "/tmp/.treeinfo",
                                  copy_local=True,
                                  proxies=proxies,
                                  **ugopts)
        except URLGrabError as e:
            try:
                treeinfo = ug.urlgrab("%s/treeinfo" % url,
                                      "/tmp/.treeinfo",
                                      copy_local=True,
                                      proxies=proxies,
                                      **ugopts)
            except URLGrabError as e:
                log.info("Error downloading treeinfo: %s", e)
                treeinfo = None

        return treeinfo

示例#20

0

显示文件

文件： downloader.py 项目： ipfire/pakfire

	def urlgrab(self, url, *args, **kwargs):
		self.check_offline_mode()

		# This is for older versions of urlgrabber which are packaged in Debian
		# and Ubuntu and cannot handle filenames as a normal Python string but need
		# a unicode string.
		return URLGrabber.urlgrab(self, url.encode("utf-8"), *args, **kwargs)

示例#21

0

显示文件

    def setUp(self):
        def server():
            import socket
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            s.bind(('localhost', 2000))
            s.listen(1)
            while 1:
                c, a = s.accept()
                while not c.recv(4096).endswith('\r\n\r\n'):
                    pass
                c.sendall('HTTP/1.1 %d %s\r\n' % self.reply)
                c.close()

        import thread
        self.reply = 503, "Busy"
        thread.start_new_thread(server, ())

        def failure(obj):
            self.code = getattr(obj.exception, 'code', None)
            return {}

        self.g = URLGrabber()
        self.mg = MirrorGroup(self.g, ['http://localhost:2000/'],
                              failure_callback=failure)

示例#22

0

显示文件

文件： downloader.py 项目： hechaoyuyu/swinst

def download(url, filename=None, associated_task=None, web_proxy = None):
    if associated_task:
        associated_task.description = _("Downloading %s") % os.path.basename(url)
        associated_task.unit = "KB"
    log.debug("downloading %s > %s" % (url, filename))
    progress_obj = DownloadProgress(associated_task)
    if web_proxy:
        web_proxy={'http':web_proxy}
    urlgrabber = URLGrabber(
        reget = 'simple',
        proxies = web_proxy,
        progress_obj = progress_obj)
    if os.path.isdir(filename):
        basename = os.path.basename(url)
        filename = os.path.join(filename, basename)
    filename = urlgrabber.urlgrab(url, filename=filename)
    return filename

示例#23

0

显示文件

def download(url, filename=None, associated_task=None, web_proxy=None):
    if associated_task:
        associated_task.description = _("Downloading %s") % os.path.basename(
            url)
        associated_task.unit = "KB"
    log.debug("downloading %s > %s" % (url, filename))
    progress_obj = DownloadProgress(associated_task)
    if web_proxy:
        web_proxy = {'http': web_proxy}
    urlgrabber = URLGrabber(reget='simple',
                            proxies=web_proxy,
                            progress_obj=progress_obj)
    if os.path.isdir(filename):
        basename = os.path.basename(url)
        filename = os.path.join(filename, basename)
    filename = urlgrabber.urlgrab(url, filename=filename)
    return filename

示例#24

0

显示文件

文件： webGrabber.py 项目： naparuba/pyndsgest

 def __init__(self,config = {}):
     self.gotLibUrlGrabber = False
     try:
         from urlgrabber.grabber import URLGrabber
     except:
         writeError('This script is better with URLBrabber.')
         writeError('See http://linux.duke.edu/projects/urlgrabber/')
         self.gotLibUrlGrabber = False
         
     if not self.gotLibUrlGrabber:
         return
     if config.has_key('proxy'):
         writeInfo("URLGrabberWithProxy : %s" % config['proxy'])
         self.g = URLGrabber(proxies= {'http' : config['proxy']})
     else:
         writeInfo("URLGrabbersansProxy")
         self.g = URLGrabber()

示例#25

0

显示文件

文件： test_mirror.py 项目： wimmuskee/urlgrabber

 def setUp(self):
     self.g = URLGrabber()
     fullmirrors = [base_mirror_url + m + '/' for m in \
                    (bad_mirrors + good_mirrors)]
     if hasattr(urlgrabber.grabber, '_TH'):
         # test assumes mirrors are not re-ordered
         urlgrabber.grabber._TH.hosts.clear()
     self.mg = MirrorGroup(self.g, fullmirrors)

示例#26

0

显示文件

文件： __init__.py 项目： akozumpl/anaconda

    def _getTreeInfo(self, url, proxy_url, sslverify):
        """ Retrieve treeinfo and return the path to the local file.

            :param baseurl: url of the repo
            :type baseurl: string
            :param proxy_url: Optional full proxy URL of or ""
            :type proxy_url: string
            :param sslverify: True if SSL certificate should be varified
            :type sslverify: bool
            :returns: Path to retrieved .treeinfo file or None
            :rtype: string or None
        """
        if not url:
            return None

        log.debug("retrieving treeinfo from %s (proxy: %s ; sslverify: %s)",
                  url, proxy_url, sslverify)

        ugopts = {"ssl_verify_peer": sslverify,
                  "ssl_verify_host": sslverify}

        proxies = {}
        if proxy_url:
            try:
                proxy = ProxyString(proxy_url)
                proxies = {"http": proxy.url,
                           "https": proxy.url}
            except ProxyStringError as e:
                log.info("Failed to parse proxy for _getTreeInfo %s: %s",
                         proxy_url, e)

        ug = URLGrabber()
        try:
            treeinfo = ug.urlgrab("%s/.treeinfo" % url,
                                  "/tmp/.treeinfo", copy_local=True,
                                  proxies=proxies, **ugopts)
        except URLGrabError as e:
            try:
                treeinfo = ug.urlgrab("%s/treeinfo" % url,
                                      "/tmp/.treeinfo", copy_local=True,
                                      proxies=proxies, **ugopts)
            except URLGrabError as e:
                log.info("Error downloading treeinfo: %s", e)
                treeinfo = None

        return treeinfo

示例#27

0

显示文件

    def __init__(self, config={}):
        self.gotLibUrlGrabber = False
        try:
            from urlgrabber.grabber import URLGrabber
        except:
            writeError('This script is better with URLBrabber.')
            writeError('See http://linux.duke.edu/projects/urlgrabber/')
            self.gotLibUrlGrabber = False

        if not self.gotLibUrlGrabber:
            return
        if config.has_key('proxy'):
            writeInfo("URLGrabberWithProxy : %s" % config['proxy'])
            self.g = URLGrabber(proxies={'http': config['proxy']})
        else:
            writeInfo("URLGrabbersansProxy")
            self.g = URLGrabber()

示例#28

0

显示文件

 def setUp(self):
     self.url = ref_ftp
     if not self.have_proxy():
         self.skip()
     try:
         fo = urllib.request.urlopen(self.url).close()
     except IOError:
         self.skip()
     self.g = URLGrabber()

示例#29

0

显示文件

文件： webapp.py 项目： pudding2/NSG9000

def validConnection (szURL, szVersion, bsupgrade):
    
    try:
        upgrade_tarball = "nsg-upgrade.tar.gz"
        baseURL = re.sub(r'/[^/]+$', '', szURL)
        bootstrap_url = baseURL + "/nsg-upgrade/" + upgrade_tarball
        grabber = URLGrabber(timeout=30.0)
        bsupgrade = grabber.urlgrab( bootstrap_url, "/tmp/" + upgrade_tarball )
        
    except URLGrabError, e: 
          
        if e[0] == 4:
            aszHost = szURL.split("/")
            return "ERROR Connection check failed: Host %s is not responding" % (aszHost[2])
        elif e[0] == 14:
            return "ERROR Connection check failed: nsg-upgrade directory was not found in url %s" % szURL
        else:
            return "ERROR Checking Connection: %d %s" % (e[0] , e[1])
        return "ERROR " + e.strerror

示例#30

0

显示文件

文件： grabber.py 项目： Acidburn0zzz/difio

def download_file(url, dirname):
    """
        Download @url and save to @dirname.
        @return - filename of saved file
    """
    # pycurl is picky about Unicode URLs, see rhbz #515797
    url = url.encode('ascii', 'ignore')

    if not os.path.exists(dirname):
        os.makedirs(dirname)

    basename = os.path.basename(url)
    filename = "%s/%s" % (dirname, basename)

    if os.path.exists(filename):
        raise Exception("File %s already exists! Not downloading!" % filename)

    g = URLGrabber(reget=None)
    local_filename = g.urlgrab(url, filename)
    return local_filename

示例#31

0

显示文件

def download_file(url, dirname):
    """
        Download @url and save to @dirname.
        @return - filename of saved file
    """
    # pycurl is picky about Unicode URLs, see rhbz #515797
    url = url.encode('ascii', 'ignore')

    if not os.path.exists(dirname):
        os.makedirs(dirname)

    basename = os.path.basename(url)
    filename = "%s/%s" % (dirname, basename)

    if os.path.exists(filename):
        raise Exception("File %s already exists! Not downloading!" % filename)

    g = URLGrabber(reget=None)
    local_filename = g.urlgrab(url, filename)
    return local_filename

示例#32

0

显示文件

 def test_parse_url_with_prefix(self):
     """grabber.URLParser.parse() with opts.prefix"""
     base = 'http://foo.com/dir'
     bases = [base, base+'/']
     filename = 'bar/baz'
     target = base + '/' + filename
     
     for b in bases:
         g = URLGrabber(prefix=b)
         (url, parts) = g.opts.urlparser.parse(filename, g.opts)
         self.assertEquals(url, target)

示例#33

0

显示文件

    def run(self):
        #Check if file exists
        if os.path.isfile(self.file):
            os.chmod(self.file, stat.S_IWUSR)
            os.remove(self.file)

        ##Init url/path pointers
        #response     = urllib2.urlopen(self.url)
        #total_size   = response.info().getheader('Content-Length').strip()
        #self.total_size   = int(total_size)

        #freespace
        #freespace = get_free_space(self.app, path)

        #check if enough freespace
        #if self.freespace < total_size and self.freespace != 0:
        #    self.app.gui.ShowDialogNotification('Not enough freespace to download the item')
        #    self.active = False
        #    return

        self.app.gui.SetVisible(4000, True)
        progress = TextMeter(self.app)
        try:
            Log(self.app, 'Download started')
            g = URLGrabber(reget='simple')
            g.urlgrab(self.url,
                      filename=self.file,
                      reget='simple',
                      progress_obj=progress,
                      text=self.filename)

            #Create info file as json
            json_dumps(self.infodata, self.infopath)
            self.app.gui.ShowDialogNotification('Download Complete')
        except:
            Log(self.app, traceback.format_exc())
            self.app.gui.ShowDialogNotification('Error during download')

        self.app.gui.SetVisible(4000, False)
        self.active = False
        Log(self.app, 'Download finished')

示例#34

0

显示文件

def chunk_get(process_no, dest_dir, file_url, file_size):
    file_name = file_url.split('/')[-1]
    url = "ftp://localhost:2121/" + file_url
    file_path = dest_dir + file_name + ".part" + str(process_no)
    file_dir = file_url.rsplit('/', 1)[0]
    try:
        if (os.path.isfile(file_path) == False):
            raise Exception('')
        else:
            g = URLGrabber(reget="simple")
            start_byte = os.stat(file_path).st_size
            if start_byte < process_no * file_size / 5:
                if process_no == 4:
                    end_byte = file_size
                else:
                    end_byte = process_no * file_size / 5
                file_temp_path = file_path + ".tmp"
                local_file = g.urlgrab(url,
                                       filename=file_temp_path,
                                       range=(start_byte, end_byte),
                                       retry=0)
                file(file_path, 'ab').write(file(file_temp_path, 'rb').read())
                os.remove(file_temp_path)
    except:
        g = URLGrabber(reget="simple")
        start_byte = (process_no) * file_size / 5
        if process_no == 4:
            end_byte = file_size
        else:
            end_byte = start_byte + file_size / 5
        local_file = g.urlgrab(url,
                               filename=file_path,
                               range=(start_byte, end_byte),
                               retry=0)

示例#35

0

显示文件

文件： s3.py 项目： NumberFour/yum-s3-plugin

		def __init__(self, awsAccessKey, awsSecretKey, baseurl):
			if self.DEBUG:
				print "creating empty URLGrabber instance"
			URLGrabber.__init__(self)
			if self.DEBUG:
				print "BotoGrabber init BASE_URL=%s" % baseurl
			if not baseurl: raise Exception("BotoGrabberInit got blank baseurl")
			try: baseurl = baseurl[0]
			except: pass
			self.s3 = boto.connect_s3(awsAccessKey, awsSecretKey)
			self.baseurl = urlparse(baseurl)
			if hasattr(self.baseurl, 'netloc'):
				self.bucket_name = self.baseurl.netloc
				self.key_prefix = self.baseurl.path[1:]
			else:
				self.bucket_name = self.baseurl[1]
				self.key_prefix = self.baseurl[2]
			m = re.match('(.*)\.s3.*\.amazonaws\.com', self.bucket_name)
			if (m):
				self.bucket_name = m.group(1)
			if sys.stdout.isatty():
				print "%s - %s" % (self.bucket_name, self.key_prefix)

示例#36

0

显示文件

    def _preInstall_url_image(self):
        """ Download the image using urlgrabber """
        # Setup urlgrabber and call back to download image to sysroot
        progress = URLGrabberProgress()
        ugopts = {
            "ssl_verify_peer": not self.data.method.noverifyssl,
            "ssl_verify_host": not self.data.method.noverifyssl,
            "proxies": self._proxies,
            "progress_obj": progress,
            "copy_local": True
        }

        error = None
        try:
            ug = URLGrabber()
            ug.urlgrab(self.data.method.url, self.image_path, **ugopts)
        except URLGrabError as e:
            log.error("Error downloading liveimg: %s", e)
            error = e
        else:
            if not os.path.exists(self.image_path):
                error = "Failed to download %s, file doesn't exist" % self.data.method.url
                log.error(error)

示例#37

0

显示文件

文件： download.py 项目： D34dmeat/boxeehack

    def run(self):
        #Check if file exists
        if os.path.isfile(self.file):
            os.chmod(self.file, stat.S_IWUSR)
            os.remove(self.file)

        ##Init url/path pointers
        #response     = urllib2.urlopen(self.url)
        #total_size   = response.info().getheader('Content-Length').strip()
        #self.total_size   = int(total_size)

        #freespace
        #freespace = get_free_space(self.app, path)

        #check if enough freespace
        #if self.freespace < total_size and self.freespace != 0:
        #    self.app.gui.ShowDialogNotification('Not enough freespace to download the item')
        #    self.active = False
        #    return

        self.app.gui.SetVisible(4000, True)
        progress = TextMeter(self.app)
        try:
            Log(self.app, 'Download started' )
            g = URLGrabber(reget='simple')
            g.urlgrab(self.url, filename=self.file, reget='simple', progress_obj=progress, text=self.filename)

            #Create info file as json
            json_dumps(self.infodata, self.infopath)
            self.app.gui.ShowDialogNotification('Download Complete')
        except:
            Log(self.app, traceback.format_exc() )
            self.app.gui.ShowDialogNotification('Error during download')

        self.app.gui.SetVisible(4000, False)
        self.active = False
        Log(self.app, 'Download finished' )

示例#38

0

显示文件

        class Fetcher(object):
            def __init__(self, remote):
                self.remote = remote
                self.g = URLGrabber(prefix=self.remote)

            def fetch_to_file(self, src, dest):
                tmp = dest + '.part'
                try:
                    self.g.urlgrab(src,
                                   filename=tmp,
                                   copy_local=1,
                                   user_agent='lsd-fetch/1.0')
                except URLGrabError as e:
                    raise IOError(str(e))
                os.rename(tmp, dest)

            def fetch(self, src='/'):
                try:
                    contents = self.g.urlread(src).strip()
                except URLGrabError as e:
                    raise IOError(str(e))
                return contents

            def listdir(self, dir='/'):
                lfn = os.path.join(dir, '.listing')

                contents = self.fetch(lfn)

                return [s.strip() for s in contents.split() if s.strip() != '']

            # Pickling support -- only pickle the remote URL
            def __getstate__(self):
                return self.remote

            def __setstate__(self, remote):
                self.__init__(remote)

示例#39

0

显示文件

文件： test_grabber.py 项目： snowwolf6/autotest-client-tests

    def testKeywordArgs(self):
        """grabber.URLGrabber.__init__() **kwargs handling.
        
        This is a simple test that just passes some arbitrary
        values into the URLGrabber constructor and checks that
        they've been set properly.
        """
        opener = urllib2.OpenerDirector()
        g = URLGrabber(progress_obj=self.meter,
                       throttle=0.9,
                       bandwidth=20,
                       retry=20,
                       retrycodes=[5, 6, 7],
                       copy_local=1,
                       close_connection=1,
                       user_agent='test ua/1.0',
                       proxies={'http': 'http://www.proxy.com:9090'},
                       opener=opener)
        opts = g.opts
        self.assertEquals(opts.progress_obj, self.meter)
        self.assertEquals(opts.throttle, 0.9)
        self.assertEquals(opts.bandwidth, 20)
        self.assertEquals(opts.retry, 20)
        self.assertEquals(opts.retrycodes, [5, 6, 7])
        self.assertEquals(opts.copy_local, 1)
        self.assertEquals(opts.close_connection, 1)
        self.assertEquals(opts.user_agent, 'test ua/1.0')
        self.assertEquals(opts.proxies, {'http': 'http://www.proxy.com:9090'})
        self.assertEquals(opts.opener, opener)

        nopts = grabber.URLGrabberOptions(delegate=opts,
                                          throttle=0.5,
                                          copy_local=0)
        self.assertEquals(nopts.progress_obj, self.meter)
        self.assertEquals(nopts.throttle, 0.5)
        self.assertEquals(nopts.bandwidth, 20)
        self.assertEquals(nopts.retry, 20)
        self.assertEquals(nopts.retrycodes, [5, 6, 7])
        self.assertEquals(nopts.copy_local, 0)
        self.assertEquals(nopts.close_connection, 1)
        self.assertEquals(nopts.user_agent, 'test ua/1.0')
        self.assertEquals(nopts.proxies, {'http': 'http://www.proxy.com:9090'})
        nopts.opener = None
        self.assertEquals(nopts.opener, None)

示例#40

0

显示文件

 def _test_url(self, urllist):
     g = URLGrabber()
     try: quote = urllist[3]
     except IndexError: quote = None
     g.opts.quote = quote
     (url, parts) = g.opts.urlparser.parse(urllist[0], g.opts)
     
     if 1:
         self.assertEquals(url, urllist[1])
         self.assertEquals(parts, urllist[2])
     else:
         if url == urllist[1] and parts == urllist[2]:
             print('OK: %s' % urllist[0])
         else:
             print('ERROR: %s' % urllist[0])
             print('  ' + urllist[1])
             print('  ' + url)
             print('  ' + urllist[2])
             print('  ' + parts)

示例#41

0

显示文件

文件： test_mirror.py 项目： pombredanne/urlgrabber-1

    def setUp(self):
        # start the server
        self.exit = False

        def server():
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            s.bind(LOCALPORT)
            s.listen(1)
            while 1:
                c, a = s.accept()
                if self.exit:
                    c.close()
                    break
                ending_compat = '\r\n\r\n' if not six.PY3 else b'\r\n\r\n'
                while not c.recv(4096).endswith(ending_compat):
                    pass
                http_compat = 'HTTP/1.1 %d %s\r\n' % self.reply
                c.sendall(http_compat if not six.PY3 else http_compat.
                          encode('utf-8'))
                if self.content is not None:
                    cont_length_compat = 'Content-Length: %d\r\n\r\n' % len(
                        self.content)
                    c.sendall(cont_length_compat if not six.PY3 else
                              cont_length_compat.encode('utf-8'))
                    c.sendall(self.content if not six.PY3 else self.content.
                              encode('utf-8'))
                c.close()
            s.close()
            self.exit = False

        thread.start_new_thread(server, ())

        # create grabber and mirror group objects
        def failure(obj):
            self.code = getattr(obj.exception, 'code', None)
            return {}

        self.g = URLGrabber()
        self.mg = MirrorGroup(self.g, ['http://%s:%d' % LOCALPORT],
                              failure_callback=failure)

示例#42

0

显示文件

文件： test_mirror.py 项目： wimmuskee/urlgrabber

    def setUp(self):
        # start the server
        self.exit = False
        self.process = lambda data: None

        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        s.bind(('localhost', 0))
        s.listen(1)
        self.port = s.getsockname()[1]

        def server():
            while True:
                c, a = s.accept()
                if self.exit:
                    c.close()
                    break
                data = b''
                while not data.endswith(b'\r\n\r\n'):
                    data = c.recv(4096)
                self.process(data)
                c.sendall(b'HTTP/1.1 %d %s\r\n' % self.reply)
                if self.content is not None:
                    c.sendall(b'Content-Length: %d\r\n\r\n' %
                              len(self.content))
                    c.sendall(self.content)
                c.close()
            s.close()
            self.exit = False

        self.thread = threading.Thread(target=server)
        self.thread.start()

        # create grabber and mirror group objects
        def failure(obj):
            self.code = getattr(obj.exception, 'code', None)
            return {}

        self.g = URLGrabber()
        self.mg = MirrorGroup(self.g, ['http://localhost:%d' % self.port],
                              failure_callback=failure)

示例#43

0

显示文件

    def _test_url(self, urllist):
        g = URLGrabber()
        try:
            quote = urllist[3]
        except IndexError:
            quote = None
        g.opts.quote = quote
        url = urllist[0].encode('utf8')
        expected_url = urllist[1].encode('utf8')
        expected_parts = tuple(part.encode('utf8') for part in urllist[2])
        (url, parts) = g.opts.urlparser.parse(url, g.opts)

        if 1:
            self.assertEqual(url, expected_url)
            self.assertEqual(parts, expected_parts)
        else:
            if url == urllist[1] and parts == urllist[2]:
                print('OK: %s' % urllist[0])
            else:
                print('ERROR: %s' % urllist[0])
                print('  ' + urllist[1])
                print('  ' + url)
                print('  ' + urllist[2])
                print('  ' + parts)

示例#44

0

显示文件

文件： fetch.py 项目： pombreda/osc

 def __init__(self, progress_obj=None):
     # we cannot use super because we still have to support
     # older urlgrabber versions where URLGrabber is an old-style class
     URLGrabber.__init__(self)
     self.progress_obj = progress_obj

示例#45

0

显示文件

文件： download_game_music_bundles.py 项目： Terr/utils

    help='Extract files after downloading (to subdirectories) [default: no]',
    default=False
)
parser.add_option(
    '-k', '--keep-after-extract', action='store_true', dest='keep_after_extract',
    help='Keep files after extracting [default: no]',
    default=False
)

(options, args) = parser.parse_args()

if len(args) == 0:
    parser.error('One or more bundle keys are required')

progress_printer = ProgressPrint()
grabber = URLGrabber(prefix=options.gmb_url,
                     progress_obj=progress_printer)

# Download the albums for each key
for key in args:
    # Get download page and grab all download URLs
    download_page_url = urljoin(options.gmb_url, '/download?key=%s' % key)
    download_page = urlopen(download_page_url)
    html = download_page.read()
    soup = BeautifulSoup(html, 'lxml')
    download_page.close()

    # Find all download links
    regex_download_link = re.compile('/download\?.*')
    download_links = [x['href'] for x in soup.find_all('a', href=regex_download_link)]
    album_urls = merge_album_links(download_links)

示例#46

0

显示文件

文件： downloadSraFromTable.py 项目： umass-bib/guides

def get_file_if_size_diff(url, d):
    fn = url.split('/')[-1]
    out_fnp = os.path.join(d, fn)
    g = URLGrabber(reget = "simple")
    locFnp = g.urlgrab(url, out_fnp)
    return locFnp

示例#47

0

显示文件

 def setUp(self):
     self.url = ref_http
     if not self.have_proxy():
         self.skip()
     self.g = URLGrabber()

示例#48

0

显示文件

文件： webGrabber.py 项目： naparuba/pyndsgest

        if config.has_key('proxy'):
            writeInfo("URLGrabberWithProxy : %s" % config['proxy'])
            self.g = URLGrabber(proxies= {'http' : config['proxy']})
        else:
            writeInfo("URLGrabbersansProxy")
            self.g = URLGrabber()

    def getWebFile(self,url, dest):
        if not self.gotLibUrlGrabber:
            import urllib
            fd = open(dest,"wb")
            fd.write(urllib.urlopen(url).read())
            fd.close()
        else:
            urllib.urlretrieve ("http://www.example.com/songs/mp3.mp3", "mp3.mp3")
            self.g.urlgrab(url, filename=dest)


if __name__ == '__main__':
    g = URLGrabber(proxies={'http' : 'http://proxy.free.fr:3128'})
    url = 'http://www.advanscene.com/offline/datas/ADVANsCEne_NDS.zip'
    g.urlgrab(url, filename='moncul.zip')

    g1 = WebGrabber(config={'proxy':'http://proxy.free.fr:3128'})
    g2 = WebGrabber()
    print "g1 is g2 %s" % (g1 is g2)
    
    g1.getWebFile('http://www.advanscene.com/offline/datas/ADVANsCEne_NDS.zip','moncul.zip')
    
    print "Done."

示例#49

0

显示文件

文件： VimeoCrawler.py 项目： tungpd/vimeo-crawler

                            else:
                                self.totalRead = totalRead
                                self.lastData = time()
                            oldCount = self.count
                            self.count = int(totalRead // self.QUANTUM) + 1
                            self.progress(("=" if self.started else "+") * max(0, self.count - oldCount), suffix)
                            self.started = True

                        def end(self, totalRead):
                            self.update(totalRead, "OK")

                    progressIndicator = ProgressIndicator()
                    grabber = URLGrabber(
                        reget="simple",
                        timeout=self.timeout,
                        progress_obj=progressIndicator,
                        user_agent=userAgent,
                        http_headers=tuple((str(cookie["name"]), str(cookie["value"])) for cookie in cookies),
                    )
                    try:
                        grabber.urlgrab(link, filename=targetFileName)
                        downloadOK = True
                    except URLGrabError, e:
                        self.errors += 1
                        self.logger.error("Download failed: %s", e)
                    except KeyboardInterrupt:
                        self.errors += 1
                        self.logger.error("Download interrupted")
                    if downloadOK:
                        localSize = getFileSize(targetFileName)
                        if not localSize:

示例#50

0

显示文件

文件： fetcher.py 项目： banados/lsd

		def __init__(self, remote):
			self.remote = remote
			self.g = URLGrabber(prefix=self.remote)

示例#51

0

显示文件

文件： livepayload.py 项目： akozumpl/anaconda

    def preInstall(self, *args, **kwargs):
        """ Download image and loopback mount it.

            This is called after partitioning is setup, we now have space
            to grab the image. Download it to ROOT_PATH and provide feedback
            during the download (using urlgrabber callback).
        """
        # Setup urlgrabber and call back to download image to ROOT_PATH
        progress = URLGrabberProgress()
        ugopts = {"ssl_verify_peer": not self.data.method.noverifyssl,
                  "ssl_verify_host": not self.data.method.noverifyssl,
                  "proxies" : self._proxies,
                  "progress_obj" : progress,
                  "copy_local" : True}

        error = None
        try:
            ug = URLGrabber()
            ug.urlgrab(self.data.method.url, self.image_path, **ugopts)
        except URLGrabError as e:
            log.error("Error downloading liveimg: %s", e)
            error = e
        else:
            if not os.path.exists(self.image_path):
                error = "Failed to download %s, file doesn't exist" % self.data.method.url
                log.error(error)

        if error:
            exn = PayloadInstallError(str(error))
            if errorHandler.cb(exn) == ERROR_RAISE:
                raise exn

        # Used to make install progress % look correct
        self._adj_size = os.stat(self.image_path)[stat.ST_SIZE]

        if self.data.method.checksum:
            progressQ.send_message(_("Checking image checksum"))
            sha256 = hashlib.sha256()
            with open(self.image_path, "rb") as f:
                while True:
                    data = f.read(1024*1024)
                    if not data:
                        break
                    sha256.update(data)
            filesum = sha256.hexdigest()
            log.debug("sha256 of %s is %s", self.data.method.url, filesum)

            if lowerASCII(self.data.method.checksum) != filesum:
                log.error("%s does not match checksum.", self.data.method.checksum)
                exn = PayloadInstallError("Checksum of image does not match")
                if errorHandler.cb(exn) == ERROR_RAISE:
                    raise exn

        # Mount the image and check to see if it is a LiveOS/*.img
        # style squashfs image. If so, move it to IMAGE_DIR and mount the real
        # root image on INSTALL_TREE
        blivet.util.mount(self.image_path, INSTALL_TREE, fstype="auto", options="ro")
        if os.path.exists(INSTALL_TREE+"/LiveOS"):
            # Find the first .img in the directory and mount that on INSTALL_TREE
            img_files = glob.glob(INSTALL_TREE+"/LiveOS/*.img")
            if img_files:
                img_file = os.path.basename(sorted(img_files)[0])

                # move the mount to IMAGE_DIR
                os.makedirs(IMAGE_DIR, 0755)
                # work around inability to move shared filesystems
                iutil.execWithRedirect("mount",
                                       ["--make-rprivate", "/"])
                iutil.execWithRedirect("mount",
                                       ["--move", INSTALL_TREE, IMAGE_DIR])
                blivet.util.mount(IMAGE_DIR+"/LiveOS/"+img_file, INSTALL_TREE,
                                  fstype="auto", options="ro")

示例#52

0

显示文件

文件： batchgrabber.py 项目： wimmuskee/urlgrabber

 def __init__(self, maxthreads=5, **kwargs):
     self.maxthreads = 5
     self.grabber = URLGrabber(**kwargs)
     self.queue = []
     self.threads = []
     self.sem = Semaphore()