def download(self, download_dir): dir_util.mkpath(download_dir) url = self.installer_url() print 'Downloading:', url web = FancyURLopener() web.retrieve(url, path.join(download_dir, path.basename(url)), display_progress)
def download(self, download_dir): result = path.join(download_dir, self.package_basename) if path.exists(result): print 'Found install', self.package_basename else: dir_util.mkpath(download_dir) url = "http://www.eiffel-loop.com/download/" + self.package_basename print 'Downloading:', url web = FancyURLopener() web.retrieve(url, result, display_progress) return result
def download (self, download_dir): result = path.join (download_dir, self.package_basename) if path.exists (result): print 'Found install', self.package_basename else: dir_util.mkpath (download_dir) url = "http://www.eiffel-loop.com/download/" + self.package_basename print 'Downloading:', url web = FancyURLopener () web.retrieve (url, result, display_progress) return result
def ensureFileLocal(self, inFilePathOrURL): ''' Takes a file path or URL. Sets self.localFilePath to the same path if file is local, or if the file is remote but uncompressed. If a file is remote and compressed, retrieves the file into a local tmp file and returns that file name. In this case the flag self.deleteTempFile is set to True. :param inFilePathOrURL: file path or URL to file :type inFilePathOrURL: String ''' self.localFilePath = inFilePathOrURL self.deleteTempFile = False if self.compression == COMPRESSION_TYPE.NO_COMPRESSION: return # Got compressed file; is it local? parseResult = urlparse(inFilePathOrURL) if parseResult.scheme == 'file': self.localFilePath = parseResult.path return opener = FancyURLopener() # Throws IOError if URL does not exist: self.localFilePath = opener.retrieve(inFilePathOrURL)[0] self.deleteTempFile = True
def download_package(pkg_name, pkg_version): file_name, path, hash_algorithm, expected_digest = get_package_info(pkg_name, pkg_version) if not file_name: return False if os.path.isfile(file_name) and check_digest(file_name, hash_algorithm, expected_digest): print('File with matching digest already exists, skipping {0}'.format(file_name)) return True downloader = FancyURLopener() pkg_url = '{0}/packages/{1}'.format(PYPI_MIRROR, path) print('Downloading {0} from {1}'.format(file_name, pkg_url)) downloader.retrieve(pkg_url, file_name) if check_digest(file_name, hash_algorithm, expected_digest): return True else: print('Hash digest check failed in file {0}.'.format(file_name)) return False
def __install_grinder(self, grinder_path): """ Installs Grinder. Grinder version and download link may be set in config: "download-link":"http://domain/resource-{version}.zip" "version":"1.2.3" """ dest = os.path.dirname( os.path.dirname(os.path.expanduser(grinder_path))) if not dest: dest = os.path.expanduser("~/grinder-taurus") dest = os.path.abspath(dest) grinder_full_path = os.path.join(dest, "lib", "grinder.jar") try: self.__grinder(grinder_full_path) return grinder_full_path except CalledProcessError: self.log.info("Will try to install grinder into %s", dest) downloader = FancyURLopener() grinder_zip_path = self.engine.create_artifact("grinder-dist", ".zip") version = self.settings.get("version", GrinderExecutor.VERSION) download_link = self.settings.get("download-link", GrinderExecutor.DOWNLOAD_LINK) download_link = download_link.format(version=version) self.log.info("Downloading %s", download_link) try: downloader.retrieve(download_link, grinder_zip_path, download_progress_hook) except BaseException as e: self.log.error("Error while downloading %s", download_link) raise e self.log.info("Unzipping %s", grinder_zip_path) unzip(grinder_zip_path, dest, 'grinder-' + version) os.remove(grinder_zip_path) self.log.info("Installed grinder successfully") return grinder_full_path
def __install_gatling(self, gatling_path): """ Installs Gatling. Gatling version and download link may be set in config: "download-link":"http://domain/resource-{version}.zip" "version":"1.2.3" """ dest = os.path.dirname( os.path.dirname(os.path.expanduser(gatling_path))) # ../.. dest = os.path.abspath(dest) try: self.__gatling(gatling_path) return gatling_path except OSError: self.log.info("Will try to install Gatling into %s", dest) # download gatling downloader = FancyURLopener() gatling_zip_path = self.engine.create_artifact("gatling-dist", ".zip") version = self.settings.get("version", GatlingExecutor.VERSION) download_link = self.settings.get("download-link", GatlingExecutor.DOWNLOAD_LINK) download_link = download_link.format(version=version) self.log.info("Downloading %s", download_link) # TODO: check archive checksum/hash before unzip and run try: downloader.retrieve(download_link, gatling_zip_path, download_progress_hook) except BaseException as e: self.log.error("Error while downloading %s", download_link) raise e self.log.info("Unzipping %s", gatling_zip_path) unzip(gatling_zip_path, dest, 'gatling-charts-highcharts-bundle-' + version) os.remove(gatling_zip_path) os.chmod(os.path.expanduser(gatling_path), 0o755) self.log.info("Installed Gatling successfully")
def __install_grinder(self, grinder_path): """ Installs Grinder. Grinder version and download link may be set in config: "download-link":"http://domain/resource-{version}.zip" "version":"1.2.3" """ dest = os.path.dirname(os.path.dirname(os.path.expanduser(grinder_path))) if not dest: dest = os.path.expanduser("~/grinder-taurus") dest = os.path.abspath(dest) grinder_full_path = os.path.join(dest, "lib", "grinder.jar") try: self.__grinder(grinder_full_path) return grinder_full_path except CalledProcessError: self.log.info("Will try to install grinder into %s", dest) downloader = FancyURLopener() grinder_zip_path = self.engine.create_artifact("grinder-dist", ".zip") version = self.settings.get("version", GrinderExecutor.VERSION) download_link = self.settings.get("download-link", GrinderExecutor.DOWNLOAD_LINK) download_link = download_link.format(version=version) self.log.info("Downloading %s", download_link) try: downloader.retrieve(download_link, grinder_zip_path, download_progress_hook) except BaseException as e: self.log.error("Error while downloading %s", download_link) raise e self.log.info("Unzipping %s", grinder_zip_path) unzip(grinder_zip_path, dest, 'grinder-' + version) os.remove(grinder_zip_path) self.log.info("Installed grinder successfully") return grinder_full_path
def __install_gatling(self, gatling_path): """ Installs Gatling. Gatling version and download link may be set in config: "download-link":"http://domain/resource-{version}.zip" "version":"1.2.3" """ dest = os.path.dirname(os.path.dirname(os.path.expanduser(gatling_path))) # ../.. dest = os.path.abspath(dest) try: self.__gatling(gatling_path) return gatling_path except OSError: self.log.info("Will try to install Gatling into %s", dest) # download gatling downloader = FancyURLopener() gatling_zip_path = self.engine.create_artifact("gatling-dist", ".zip") version = self.settings.get("version", GatlingExecutor.VERSION) download_link = self.settings.get("download-link", GatlingExecutor.DOWNLOAD_LINK) download_link = download_link.format(version=version) self.log.info("Downloading %s", download_link) # TODO: check archive checksum/hash before unzip and run try: downloader.retrieve(download_link, gatling_zip_path, download_progress_hook) except BaseException as e: self.log.error("Error while downloading %s", download_link) raise e self.log.info("Unzipping %s", gatling_zip_path) unzip(gatling_zip_path, dest, 'gatling-charts-highcharts-bundle-' + version) os.remove(gatling_zip_path) os.chmod(os.path.expanduser(gatling_path), 0o755) self.log.info("Installed Gatling successfully")
def fetchURL(url, file='', params=None, headers={}, isBinary=False, encodeURL=True): log("> bbbLib.fetchURL() %s isBinary=%s encodeURL=%s" % (url, isBinary, encodeURL)) if encodeURL: safe_url = quote_plus(url,'/:&?=+#@') else: safe_url = url success = False data = None if not file: # create temp file if needed file = xbmc.translatePath(os.path.join(os.getcwd(), "temp.html")) # remove destination file if exists already deleteFile(file) # fetch from url try: opener = FancyURLopener() # add headers if supplied # if headers: if not headers.has_key('User-Agent') and not headers.has_key('User-agent'): headers['User-Agent'] = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' for name, value in headers.items(): opener.addheader(name, value) fn, resp = opener.retrieve(safe_url, file, data=params) # print fn, resp content_type = resp.get("Content-Type",'').lower() # fail if expecting an image but not corrent type returned if isBinary and (find(content_type,"text") != -1): raise "Not Binary" opener.close() del opener urlcleanup() except IOError, errobj: ErrorCode(errobj)
def fetchURL(url, file='', params=None, headers={}, isBinary=False, encodeURL=True): log("> bbbLib.fetchURL() %s isBinary=%s encodeURL=%s" % (url, isBinary, encodeURL)) if encodeURL: safe_url = quote_plus(url,'/:&?=+#@') else: safe_url = url success = False data = None if not file: # create temp file file = xbmc.translatePath( "special://temp/temp.html" ) # remove destination file if exists already deleteFile(file) # fetch from url try: opener = FancyURLopener() # add headers if supplied if not headers.has_key('User-Agent') and not headers.has_key('User-agent'): headers['User-Agent'] = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' for name, value in headers.items(): opener.addheader(name, value) fn, resp = opener.retrieve(safe_url, file, data=params) # print fn, resp content_type = resp.get("Content-Type",'').lower() # fail if expecting an image but not correct type returned if isBinary and (find(content_type,"text") != -1): raise "Not Binary" opener.close() del opener urlcleanup() except IOError, errobj: ErrorCode(errobj)
class FetchThread(Thread): def __init__(self, master): Thread.__init__(self) self.master=master self.ht = httplib.HTTPConnection('www.qwantz.com') self.ht.connect() self.getter = FancyURLopener() def run(self): notdone = True while notdone: r = self.fetch() # if False: with self.master.fourLock: if r != 200 and r is not None: self.master.fourofours += 1 else: self.master.fourofours =0 notdone = self.master.fourofours < 7 if r == 200: with self.master.writeLock: self.master.writeTitles() def fetch(self): with self.master.numLock: n = self.master.toget.pop(0) justTitle=False if os.path.isfile(pjoin(basedir,"dinocomics%06i.png"%(n))): if n in self.master.titles.iterkeys(): return None else: justTitle=True # print >> stderr,n ht = self.ht ht.request("GET","/index.php?comic=%i"%n) # time.sleep(1) # time.sleep(1)ma try: r = ht.getresponse() except: # new connection ht = httplib.HTTPConnection('www.qwantz.com') ht.connect() self.ht = ht time.sleep(1) # print dir(ht) # try: r = ht.getresponse() # except Exception, e: # raise e if r.status != 200: if r.status in (404, 302): print >> stderr, n, "No Comic" else: print >> stderr, n, "FAILED: %i"%r.status ht = httplib.HTTPConnection('www.qwantz.com') ht.connect() self.ht = ht return r.status s = r.read() m = re.search('\<img *src *= *"http://www.qwantz.com/comics.*?\>', s,re.S) if m is None: print >> stderr, n,"no match!1" return r.status img = m.group() m = re.search('src *= *".*?"', img,re.S) if m is None: print >> stderr, n,"no match!2" return r.status href = re.search('".*?"',m.group(),re.S).group()[1:-1] m = re.search('title *= *".*?"', img,re.S) if m is None: print >> stderr, n,"no match!3" return r.status title = re.search('".*"',m.group(),re.S).group()[1:-1].strip() title = unescape(title) title = title.replace("\r\n","\\n") title = title.replace("\n","\\n") while True: prevtitle = title title = title.replace("\\n\\n","\\n") if prevtitle == title: break # print repr(title) # return # self.master.titleLock.acquire() with self.master.titleLock: self.master.titles[n] = title # self.master.titleLock.release() print >> stderr, n,title if not justTitle: self.getter.retrieve(href,"%s/dinocomics%06i.png"%(basedir,n)) return r.status
from rovin.belex.BelexParser import BelexParser from urllib import FancyURLopener from os import path url = "http://www.ejustice.just.fgov.be/cgi_loi/loi_a1.pl?language=nl&table_name=wet&la=N&cn=1994021730&&caller=list&N&fromtab=wet" filename = "constitution-nl.html" if not path.isfile(filename): print "Downloading", url downloader = FancyURLopener() downloader.retrieve(url, filename) f = open(filename, 'r') html = f.read() parser = BelexParser() parser.feed(html) for article in parser.all_articles(): print " <<<< " print article.number print "BODY:" + article.body print " >>>> "
import urllib from urllib import FancyURLopener storePath = "/home/richard/media/Share/GameVideo" projectPathBase = "/home/richard/workspace/VideoSpider" with open("%s/pyspider/aipaiImg.list" % projectPathBase, "r") as listFile: i = 0 for imgsrc in listFile: sp = imgsrc.split(":h") opener = FancyURLopener({}) opener.version = 'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11' opener.retrieve("h"+sp[1], "%s/%s" % (storePath, sp[0])) i = i+1 print i, print sp[0]