Python FancyURLopener.retrieve示例，urllib.FancyURLopener.retrieve Python示例

示例#1

0

显示文件

    def download(self, download_dir):
        dir_util.mkpath(download_dir)
        url = self.installer_url()
        print 'Downloading:', url

        web = FancyURLopener()
        web.retrieve(url, path.join(download_dir, path.basename(url)),
                     display_progress)

示例#2

0

显示文件

文件： package.py 项目： finnianr/Eiffel-Loop-safe

    def download(self, download_dir):
        result = path.join(download_dir, self.package_basename)
        if path.exists(result):
            print 'Found install', self.package_basename
        else:
            dir_util.mkpath(download_dir)
            url = "http://www.eiffel-loop.com/download/" + self.package_basename
            print 'Downloading:', url

            web = FancyURLopener()
            web.retrieve(url, result, display_progress)

        return result

示例#3

0

显示文件

文件： package.py 项目： finnianr/Eiffel-Loop

	def download (self, download_dir):
		result = path.join (download_dir, self.package_basename)
		if path.exists (result):
			print 'Found install', self.package_basename
		else:
			dir_util.mkpath (download_dir)
			url = "http://www.eiffel-loop.com/download/" + self.package_basename
			print 'Downloading:', url
	
			web = FancyURLopener ()
			web.retrieve (url, result, display_progress)

		return result

示例#4

0

显示文件

文件： input_source.py 项目： EDUlib/eTracesX

    def ensureFileLocal(self, inFilePathOrURL):
        '''
        Takes a file path or URL. Sets self.localFilePath
        to the same path if file is local, or
        if the file is remote but uncompressed. 
        If a file is remote and compressed, retrieves
        the file into a local tmp file and returns that
        file name. In this case the flag self.deleteTempFile
        is set to True. 

        :param inFilePathOrURL: file path or URL to file
        :type inFilePathOrURL: String
        '''
        self.localFilePath = inFilePathOrURL
        self.deleteTempFile = False
        if self.compression == COMPRESSION_TYPE.NO_COMPRESSION:
            return
        # Got compressed file; is it local?
        parseResult = urlparse(inFilePathOrURL)
        if parseResult.scheme == 'file':
            self.localFilePath = parseResult.path
            return
        opener = FancyURLopener()
        # Throws IOError if URL does not exist:
        self.localFilePath = opener.retrieve(inFilePathOrURL)[0]
        self.deleteTempFile = True

示例#5

0

显示文件

文件： input_source.py 项目： paepcke/json_to_relation

    def ensureFileLocal(self, inFilePathOrURL):
        '''
        Takes a file path or URL. Sets self.localFilePath
        to the same path if file is local, or
        if the file is remote but uncompressed. 
        If a file is remote and compressed, retrieves
        the file into a local tmp file and returns that
        file name. In this case the flag self.deleteTempFile
        is set to True. 

        :param inFilePathOrURL: file path or URL to file
        :type inFilePathOrURL: String
        '''
        self.localFilePath = inFilePathOrURL
        self.deleteTempFile = False
        if self.compression == COMPRESSION_TYPE.NO_COMPRESSION:
            return
        # Got compressed file; is it local?
        parseResult = urlparse(inFilePathOrURL)
        if parseResult.scheme == 'file':
            self.localFilePath = parseResult.path
            return
        opener = FancyURLopener()
        # Throws IOError if URL does not exist:
        self.localFilePath = opener.retrieve(inFilePathOrURL)[0]
        self.deleteTempFile = True

示例#6

0

显示文件

文件： pip_download.py 项目： shiveshwar/impala-3.0.1

def download_package(pkg_name, pkg_version):
  file_name, path, hash_algorithm, expected_digest = get_package_info(pkg_name,
      pkg_version)
  if not file_name:
    return False
  if os.path.isfile(file_name) and check_digest(file_name, hash_algorithm,
      expected_digest):
    print('File with matching digest already exists, skipping {0}'.format(file_name))
    return True
  downloader = FancyURLopener()
  pkg_url = '{0}/packages/{1}'.format(PYPI_MIRROR, path)
  print('Downloading {0} from {1}'.format(file_name, pkg_url))
  downloader.retrieve(pkg_url, file_name)
  if check_digest(file_name, hash_algorithm, expected_digest):
    return True
  else:
    print('Hash digest check failed in file {0}.'.format(file_name))
    return False

示例#7

0

显示文件

    def __install_grinder(self, grinder_path):
        """
        Installs Grinder.
        Grinder version and download link may be set in config:
        "download-link":"http://domain/resource-{version}.zip"
        "version":"1.2.3"
        """

        dest = os.path.dirname(
            os.path.dirname(os.path.expanduser(grinder_path)))
        if not dest:
            dest = os.path.expanduser("~/grinder-taurus")
        dest = os.path.abspath(dest)
        grinder_full_path = os.path.join(dest, "lib", "grinder.jar")
        try:
            self.__grinder(grinder_full_path)
            return grinder_full_path
        except CalledProcessError:
            self.log.info("Will try to install grinder into %s", dest)

        downloader = FancyURLopener()
        grinder_zip_path = self.engine.create_artifact("grinder-dist", ".zip")
        version = self.settings.get("version", GrinderExecutor.VERSION)
        download_link = self.settings.get("download-link",
                                          GrinderExecutor.DOWNLOAD_LINK)
        download_link = download_link.format(version=version)
        self.log.info("Downloading %s", download_link)

        try:
            downloader.retrieve(download_link, grinder_zip_path,
                                download_progress_hook)
        except BaseException as e:
            self.log.error("Error while downloading %s", download_link)
            raise e

        self.log.info("Unzipping %s", grinder_zip_path)
        unzip(grinder_zip_path, dest, 'grinder-' + version)
        os.remove(grinder_zip_path)
        self.log.info("Installed grinder successfully")
        return grinder_full_path

示例#8

0

显示文件

    def __install_gatling(self, gatling_path):
        """
        Installs Gatling.
        Gatling version and download link may be set in config:
        "download-link":"http://domain/resource-{version}.zip"
        "version":"1.2.3"
        """
        dest = os.path.dirname(
            os.path.dirname(os.path.expanduser(gatling_path)))  # ../..
        dest = os.path.abspath(dest)

        try:
            self.__gatling(gatling_path)
            return gatling_path
        except OSError:
            self.log.info("Will try to install Gatling into %s", dest)

        # download gatling
        downloader = FancyURLopener()
        gatling_zip_path = self.engine.create_artifact("gatling-dist", ".zip")
        version = self.settings.get("version", GatlingExecutor.VERSION)
        download_link = self.settings.get("download-link",
                                          GatlingExecutor.DOWNLOAD_LINK)
        download_link = download_link.format(version=version)
        self.log.info("Downloading %s", download_link)
        # TODO: check archive checksum/hash before unzip and run

        try:
            downloader.retrieve(download_link, gatling_zip_path,
                                download_progress_hook)
        except BaseException as e:
            self.log.error("Error while downloading %s", download_link)
            raise e

        self.log.info("Unzipping %s", gatling_zip_path)
        unzip(gatling_zip_path, dest,
              'gatling-charts-highcharts-bundle-' + version)
        os.remove(gatling_zip_path)
        os.chmod(os.path.expanduser(gatling_path), 0o755)
        self.log.info("Installed Gatling successfully")

示例#9

0

显示文件

文件： grinder.py 项目： cherednichenko/taurus

    def __install_grinder(self, grinder_path):
        """
        Installs Grinder.
        Grinder version and download link may be set in config:
        "download-link":"http://domain/resource-{version}.zip"
        "version":"1.2.3"
        """

        dest = os.path.dirname(os.path.dirname(os.path.expanduser(grinder_path)))
        if not dest:
            dest = os.path.expanduser("~/grinder-taurus")
        dest = os.path.abspath(dest)
        grinder_full_path = os.path.join(dest, "lib", "grinder.jar")
        try:
            self.__grinder(grinder_full_path)
            return grinder_full_path
        except CalledProcessError:
            self.log.info("Will try to install grinder into %s", dest)

        downloader = FancyURLopener()
        grinder_zip_path = self.engine.create_artifact("grinder-dist", ".zip")
        version = self.settings.get("version", GrinderExecutor.VERSION)
        download_link = self.settings.get("download-link", GrinderExecutor.DOWNLOAD_LINK)
        download_link = download_link.format(version=version)
        self.log.info("Downloading %s", download_link)

        try:
            downloader.retrieve(download_link, grinder_zip_path, download_progress_hook)
        except BaseException as e:
            self.log.error("Error while downloading %s", download_link)
            raise e

        self.log.info("Unzipping %s", grinder_zip_path)
        unzip(grinder_zip_path, dest, 'grinder-' + version)
        os.remove(grinder_zip_path)
        self.log.info("Installed grinder successfully")
        return grinder_full_path

示例#10

0

显示文件

文件： gatling.py 项目： cherednichenko/taurus

    def __install_gatling(self, gatling_path):
        """
        Installs Gatling.
        Gatling version and download link may be set in config:
        "download-link":"http://domain/resource-{version}.zip"
        "version":"1.2.3"
        """
        dest = os.path.dirname(os.path.dirname(os.path.expanduser(gatling_path)))  # ../..
        dest = os.path.abspath(dest)

        try:
            self.__gatling(gatling_path)
            return gatling_path
        except OSError:
            self.log.info("Will try to install Gatling into %s", dest)

        # download gatling
        downloader = FancyURLopener()
        gatling_zip_path = self.engine.create_artifact("gatling-dist", ".zip")
        version = self.settings.get("version", GatlingExecutor.VERSION)
        download_link = self.settings.get("download-link", GatlingExecutor.DOWNLOAD_LINK)
        download_link = download_link.format(version=version)
        self.log.info("Downloading %s", download_link)
        # TODO: check archive checksum/hash before unzip and run

        try:
            downloader.retrieve(download_link, gatling_zip_path, download_progress_hook)
        except BaseException as e:
            self.log.error("Error while downloading %s", download_link)
            raise e

        self.log.info("Unzipping %s", gatling_zip_path)
        unzip(gatling_zip_path, dest, 'gatling-charts-highcharts-bundle-' + version)
        os.remove(gatling_zip_path)
        os.chmod(os.path.expanduser(gatling_path), 0o755)
        self.log.info("Installed Gatling successfully")

示例#11

0

显示文件

文件： bbbLib.py 项目： drrlramsey/xbmc-addons

def fetchURL(url, file='', params=None, headers={}, isBinary=False, encodeURL=True):
	log("> bbbLib.fetchURL() %s isBinary=%s encodeURL=%s" % (url, isBinary, encodeURL))
	if encodeURL:
		safe_url = quote_plus(url,'/:&?=+#@')
	else:
		safe_url = url

	success = False
	data = None
	if not file:
		# create temp file if needed
		file = xbmc.translatePath(os.path.join(os.getcwd(), "temp.html"))

	# remove destination file if exists already
	deleteFile(file)

	# fetch from url
	try:
		opener = FancyURLopener()

		# add headers if supplied
#		if headers:
		if not headers.has_key('User-Agent')  and not headers.has_key('User-agent'):
			headers['User-Agent'] = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
		for name, value  in headers.items():
			opener.addheader(name, value)

		fn, resp = opener.retrieve(safe_url, file, data=params)
#		print fn, resp

		content_type = resp.get("Content-Type",'').lower()
		# fail if expecting an image but not corrent type returned
		if isBinary and (find(content_type,"text") != -1):
			raise "Not Binary"

		opener.close()
		del opener
		urlcleanup()
	except IOError, errobj:
		ErrorCode(errobj)

示例#12

0

显示文件

def fetchURL(url, file='', params=None, headers={}, isBinary=False, encodeURL=True):
	log("> bbbLib.fetchURL() %s isBinary=%s encodeURL=%s" % (url, isBinary, encodeURL))
	if encodeURL:
		safe_url = quote_plus(url,'/:&?=+#@')
	else:
		safe_url = url

	success = False
	data = None
	if not file:
		# create temp file
		file = xbmc.translatePath( "special://temp/temp.html" )

	# remove destination file if exists already
	deleteFile(file)

	# fetch from url
	try:
		opener = FancyURLopener()

		# add headers if supplied
		if not headers.has_key('User-Agent') and not headers.has_key('User-agent'):
			headers['User-Agent'] = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
		for name, value  in headers.items():
			opener.addheader(name, value)

		fn, resp = opener.retrieve(safe_url, file, data=params)
#		print fn, resp

		content_type = resp.get("Content-Type",'').lower()
		# fail if expecting an image but not correct type returned
		if isBinary and (find(content_type,"text") != -1):
			raise "Not Binary"

		opener.close()
		del opener
		urlcleanup()
	except IOError, errobj:
		ErrorCode(errobj)

示例#13

0

显示文件

文件： fetchthread.py 项目： minrk/DinosaurComics

class FetchThread(Thread):
    def __init__(self, master):
        Thread.__init__(self)
        self.master=master
        self.ht = httplib.HTTPConnection('www.qwantz.com')
        self.ht.connect()
        self.getter = FancyURLopener()
    
    def run(self):
        notdone = True
        while notdone:
            
            r = self.fetch()
            # if False:
            with self.master.fourLock:
                if r != 200 and r is not None:
                    self.master.fourofours += 1
                else:
                    self.master.fourofours =0
                notdone = self.master.fourofours < 7
            if r == 200:
                with self.master.writeLock:
                    self.master.writeTitles()
            
    def fetch(self):
        with self.master.numLock:
            n = self.master.toget.pop(0)
        justTitle=False
        if os.path.isfile(pjoin(basedir,"dinocomics%06i.png"%(n))):
            if n in self.master.titles.iterkeys():
                return None
            else:
                justTitle=True
        
        # print >> stderr,n
        
        ht = self.ht
        ht.request("GET","/index.php?comic=%i"%n)
        # time.sleep(1)
        # time.sleep(1)ma
        try:
            r = ht.getresponse()
        except: # new connection
            ht = httplib.HTTPConnection('www.qwantz.com')
            ht.connect()
            self.ht = ht
            time.sleep(1)
            # print dir(ht)
            # try:
            r = ht.getresponse()
            # except Exception, e:
                # raise e
        if r.status != 200:
            if r.status in (404, 302):
                print >> stderr, n, "No Comic"
            else:
                print >> stderr,  n, "FAILED: %i"%r.status
            ht = httplib.HTTPConnection('www.qwantz.com')
            ht.connect()
            self.ht = ht
            return r.status
        s = r.read()
        m = re.search('\<img *src *= *"http://www.qwantz.com/comics.*?\>', s,re.S)
        if m is None:
            print >> stderr, n,"no match!1"
            return r.status
        img = m.group()
        m = re.search('src *= *".*?"', img,re.S)
        if m is None:
            print >> stderr, n,"no match!2"
            return r.status
        href = re.search('".*?"',m.group(),re.S).group()[1:-1]
        m = re.search('title *= *".*?"', img,re.S)
        if m is None:
            print >> stderr, n,"no match!3"
            return r.status
        title = re.search('".*"',m.group(),re.S).group()[1:-1].strip()
        
        title = unescape(title)
        title = title.replace("\r\n","\\n")
        title = title.replace("\n","\\n")
        while True:
            prevtitle = title
            title = title.replace("\\n\\n","\\n")
            if prevtitle == title:
                break
        # print repr(title)
        # return
        # self.master.titleLock.acquire()
        with self.master.titleLock:
            self.master.titles[n] = title
        # self.master.titleLock.release()
        print >> stderr, n,title
        if not justTitle:
            self.getter.retrieve(href,"%s/dinocomics%06i.png"%(basedir,n))
        return r.status

示例#14

0

显示文件

文件： demo.py 项目： barthanssens/belex-tools

from rovin.belex.BelexParser import BelexParser
from urllib import FancyURLopener
from os import path

url = "http://www.ejustice.just.fgov.be/cgi_loi/loi_a1.pl?language=nl&table_name=wet&la=N&cn=1994021730&&caller=list&N&fromtab=wet"
filename = "constitution-nl.html"

if not path.isfile(filename):
    print "Downloading", url
    downloader = FancyURLopener()
    downloader.retrieve(url, filename)

f = open(filename, 'r')
html = f.read()

parser = BelexParser()
parser.feed(html)
for article in parser.all_articles():
    print " <<<< "
    print article.number
    print "BODY:" + article.body
    print " >>>> "

示例#15

0

显示文件

文件： demo.py 项目： barthanssens/belex-tools

from rovin.belex.BelexParser import BelexParser
from urllib import FancyURLopener
from os import path

url = "http://www.ejustice.just.fgov.be/cgi_loi/loi_a1.pl?language=nl&table_name=wet&la=N&cn=1994021730&&caller=list&N&fromtab=wet"
filename = "constitution-nl.html"

if not path.isfile(filename):
	print "Downloading", url
	downloader = FancyURLopener()
	downloader.retrieve(url, filename)

f = open(filename, 'r')
html = f.read()

parser = BelexParser()
parser.feed(html)
for article in parser.all_articles():
	print " <<<< "
	print article.number
	print "BODY:" + article.body
	print " >>>> "

示例#16

0

显示文件

文件： getAipaiPic.py 项目： richard-liang/VideoSpider

import urllib
from urllib import FancyURLopener

storePath = "/home/richard/media/Share/GameVideo"
projectPathBase = "/home/richard/workspace/VideoSpider"

with open("%s/pyspider/aipaiImg.list" % projectPathBase, "r") as listFile:
    i = 0
    for imgsrc in listFile:
        sp = imgsrc.split(":h")
        opener = FancyURLopener({}) 
        opener.version = 'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11'
        opener.retrieve("h"+sp[1], "%s/%s" % (storePath, sp[0]))
        i = i+1
        print i,
        print sp[0]