def DownloadUpdate(self, file): self.log('Downloading: %s' % file) dirfile = os.path.join(self.UpdateTempDir,file) dirname, filename = os.path.split(dirfile) if not os.path.isdir(dirname): try: os.makedirs(dirname) except: self.log('Error creating directory: ' +dirname) url = self.SVNPathAddress+urllib.quote(file) try: if re.findall(".xbt",url): self.totalsize = int(re.findall("File length: ([0-9]*)",urllib2.urlopen(url+"?view=log").read())[0]) urllib.urlretrieve( url.decode("utf-8"), dirfile.decode("utf-8")) else: urllib.urlretrieve( url.decode("utf-8"), dirfile.decode("utf-8") ) self.DownloadedFiles.append(urllib.unquote(url)) return 1 except: try: time.sleep(2) if re.findall(".xbt",url): self.totalsize = int(re.findall("File length: ([0-9]*)",urllib2.urlopen(url+"?view=log").read())[0]) urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8")) else: urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8") ) urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8")) self.DownloadedFiles.append(urllib.unquote(url)) return 1 except: self.log("Download failed: %s" % url) self.DownloadFailedFiles.append(urllib.unquote(url)) return 0
def setup_client(domain, title): """ Download tar ball from github, upack it and set it up. """ if os.path.exists(CLIENT_DIR): rmtree(CLIENT_DIR) os.makedirs(CLIENT_DIR) urllib.urlretrieve("https://github.com/emory-libraries-ecds/OpenTourBuilder-Client/releases/download/1.0.1/OpenTourBuilder-Client.tar.gz", "%sOpenTourBuilder-Client.tar.gz" % CLIENT_DIR) tar = tarfile.open('%sOpenTourBuilder-Client.tar.gz' % CLIENT_DIR) tar.extractall(CLIENT_DIR) tar.close() js_file = '%sassets/open-tour-builder-ember.js' % CLIENT_DIR tmp_js_file = 'tmp.js' move(js_file, tmp_js_file) new_js = open(js_file, 'w+') for line in open(tmp_js_file, 'r'): line = line.replace('$api-host', 'api.%s' % domain) new_js.write(line) os.remove(tmp_js_file) index_file = '%sindex.html' % CLIENT_DIR tmp_index_file = 'tmp.html' move(index_file, tmp_index_file) new_index = open(index_file, 'w+') for line in open(tmp_index_file, 'r'): line = line.replace('$tour-title', title) line = line.replace('%24api-host', 'http://api.%s' % domain) new_index.write(line) os.remove(tmp_index_file)
def main(): #set up loop start_date = datetime.date(2003,01,01) end_date = datetime.date(2008,12,31) d = start_date delta = datetime.timedelta(days=1) while d <= end_date: #set up url url = 'http://www.cloud-net.org/quicklooks/data/chilbolton/products/classification/' \ + str(d.strftime("%Y")) \ + '/' + str(d.strftime("%Y%m%d")) \ + '_chilbolton_classification.png' #check if exists code = urllib.urlopen(url).code if (code / 100 >= 4): print "No data for ", str(d.strftime("%Y%m%d")), ' continuing...' d += delta continue #check size urllib.urlretrieve(url, str(d.strftime("%Y%m%d")) + '.png') #increment date print 'Succesfully retrieved ', str(d.strftime("%Y%m%d")), ' moving on...' d += delta
def ftp_download(): ftp_host = 'ftp.uniprot.org' ftp_user = '******' ftp_pass = '' ftp_path = '/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes' ftp = FTP(ftp_host) ftp.login(ftp_user, ftp_pass) ftp.getwelcome() ftp.cwd(ftp_path) dirs = ftp.nlst() # print(dirs) p = 0 # Navigate to the required directory and thereby download data. for dir in dirs: if re.search(species, dir): path = ftp_path + '/' + str(species) # print(path) ftp.cwd(path) types = ftp.nlst() for x in types: if not re.search('DNA.fasta.gz', x) and re.search('fasta.gz', x): final = path + '/' + str(x) # print(final) fullfilename = os.path.join(store + str(x)) urllib.urlretrieve('ftp://' + ftp_host + str(final), fullfilename) p+=1 else: pass print("Number of viruses: " + str(p)) print(ftp.pwd())
def getAndUncompress( libraries ): for libname, url, inNewDirectory in libraries: print '_'*80 print '--', libname parts = url.split('/') filename = [p for p in parts if len(getKnowExtensions(p))] #if len(filename) == 0: # print '-'*40 # print 'No filename with a regognize extension in "'+libname+'" url="'+url+'"' # print '-'*40 # continue filename = filename[0] print url, ' -> ', filename ext = getKnowExtensions(filename)[0] current_file = filename try: if os.path.isfile(filename): # if not already downloaded print 'Already downloaded: ', filename else: urllib.urlretrieve(url, filename, dlProgress) dirname = filename[:-len(ext)-1] #if os.path.isdir(libname) or os.path.islink(libname): # if not already uncompressed if os.path.isdir(filename[:-len(ext)-1]) : print 'Already uncompressed : ', dirname else: uncompress( filename, ext, inNewDirectory, libname ) except Exception, e: print 'uncompress error (', str(e), ')'
def wget(url,saveto): print "get",url,"to",saveto import urllib if url.endswith(".gif"): prefix=saveto.rsplit(".",1)[0] saveto = prefix+".png" txt_name = prefix+".txt" import gif2strip if not (os.path.exists(txt_name) or os.path.exists(saveto)): try: gif2strip.go(url,saveto) except (urllib2.HTTPError,urllib2.URLError): pass elif url.endswith(".mp3"): prefix=saveto.rsplit(".",1)[0] saveto=prefix+".ogg" if not os.path.exists(saveto): urllib.urlretrieve(url.replace(" ","%20"),"mp3ogg/input.mp3") subprocess.call(["mp3ogg\mpg123.exe","-w","mp3ogg\output.wav","mp3ogg\input.mp3"]) subprocess.call(["mp3ogg\oggenc2.exe","mp3ogg\output.wav","mp3ogg\output.ogg","--resample=44100"]) f = open("mp3ogg/output.ogg","rb") o = f.read() f.close() f = open(saveto,"wb") f.write(o) f.close() elif not os.path.exists(saveto): print "retrieving" urllib.urlretrieve(url.replace(" ","%20"),saveto) return saveto
def FetchRevision(context, rev, filename, quit_event=None, progress_event=None): """Downloads and unzips revision |rev|. @param context A PathContext instance. @param rev The Chromium revision number/tag to download. @param filename The destination for the downloaded file. @param quit_event A threading.Event which will be set by the master thread to indicate that the download should be aborted. @param progress_event A threading.Event which will be set by the master thread to indicate that the progress of the download should be displayed. """ def ReportHook(blocknum, blocksize, totalsize): if quit_event and quit_event.isSet(): raise RuntimeError('Aborting download of revision %s' % str(rev)) if progress_event and progress_event.isSet(): size = blocknum * blocksize if totalsize == -1: # Total size not known. progress = 'Received %d bytes' % size else: size = min(totalsize, size) progress = 'Received %d of %d bytes, %.2f%%' % ( size, totalsize, 100.0 * size / totalsize) # Send a \r to let all progress messages use just one line of output. sys.stdout.write('\r' + progress) sys.stdout.flush() download_url = context.GetDownloadURL(rev) try: urllib.urlretrieve(download_url, filename, ReportHook) if progress_event and progress_event.isSet(): print except RuntimeError: pass
def test_CalibrationDifference1(self): """ Ideally you should have several levels of tests. At the lowest level tests sould exercise the functionality of the logic with different inputs (both valid and invalid). At higher levels your tests should emulate the way the user would interact with your code and confirm that it still works the way you intended. One of the most important features of the tests is that it should alert other developers when their changes will have an impact on the behavior of your module. For example, if a developer removes a feature that you depend on, your test should break so they know that the feature is needed. """ self.delayDisplay("Starting the test") # # first, get some data # import urllib downloads = ( ('http://slicer.kitware.com/midas3/download?items=5767', 'FA.nrrd', slicer.util.loadVolume), ) for url,name,loader in downloads: filePath = slicer.app.temporaryPath + '/' + name if not os.path.exists(filePath) or os.stat(filePath).st_size == 0: print('Requesting download %s from %s...\n' % (name, url)) urllib.urlretrieve(url, filePath) if loader: print('Loading %s...\n' % (name,)) loader(filePath) self.delayDisplay('Finished with download and loading\n') volumeNode = slicer.util.getNode(pattern="FA") logic = CalibrationDifferenceLogic() self.assertTrue( logic.hasImageData(volumeNode) ) self.delayDisplay('Test passed!')
def download(origin, destination): ''' download the corresponding atis file from http://www-etud.iro.umontreal.ca/~mesnilgr/atis/ ''' print('Downloading data from %s' % origin) urllib.urlretrieve(origin, destination)
def downloadFile(url,dest): try: urllib.urlretrieve(url,dest) except Exception, e: dialog = xbmcgui.Dialog() main.ErrorReport(e) dialog.ok("Mash Up", "Report the error below at " + main.supportsite, str(e), "We will try our best to help you")
def downloader_is(url, name, showProgress=True): import downloader, extract addonsDir = xbmc.translatePath(os.path.join('special://home', 'addons')).decode("utf-8") packageFile = os.path.join(addonsDir, 'packages', 'isr.zip') ''' try: os.remove(packageFile) except: pass ''' if showProgress: dp = xbmcgui.DialogProgress() dp.create(AddonName, "Downloading", name, "Please Wait") downloader.download(url, packageFile, dp) dp.update(0, "", "Extracting Zip Please Wait") extract.all(packageFile, addonsDir, dp) else: urllib.urlretrieve(url, packageFile) extract.all(packageFile, addonsDir) try: os.remove(packageFile) except: pass xbmc.executebuiltin("UpdateLocalAddons") xbmc.executebuiltin("UpdateAddonRepos")
def genericRetrieve(self, url, root, name): '''Fetch the gzipped tarfile indicated by url and expand it into root - All the logic for removing old versions, updating etc. must move''' # get the tarball file name from the URL filename = os.path.basename(urlparse.urlparse(url)[2]) localFile = os.path.join(root,'_d_'+filename) ext = os.path.splitext(localFile)[1] if ext not in ['.bz2','.tbz','.gz','.tgz','.zip','.ZIP']: raise RuntimeError('Unknown compression type in URL: '+ url) self.logPrint('Downloading '+url+' to '+localFile) if os.path.exists(localFile): os.unlink(localFile) try: urllib.urlretrieve(url, localFile) except Exception, e: failureMessage = '''\ Unable to download package %s from: %s * If URL specified manually - perhaps there is a typo? * If your network is disconnected - please reconnect and rerun ./configure * Or perhaps you have a firewall blocking the download * Alternatively, you can download the above URL manually, to /yourselectedlocation/%s and use the configure option: --download-%s=/yourselectedlocation/%s ''' % (name, url, filename, name.lower(), filename) raise RuntimeError(failureMessage)
def updateServer(): stopServer(); time.sleep(2); if actionfile == "core": urllib.urlretrieve("http://www.armafiles.info/" + "dayz/dayz_v" + codever + ".rar", "..\\@dayz\\Addons\\" + actionfile + ".zip"); else: urllib.urlretrieve("http://www.armafiles.info/" + "dayz/dayz_"+ actionfile + "_v" + codever + ".rar", "..\\@dayz\\Addons\\" + actionfile + ".zip"); time.sleep(1); os.rename("..\\@dayz\\Addons\\" + actionfile + ".rar","..\\@dayz\\Addons\\" + actionfile + ".zip"); zip = zipfile.ZipFile("..\\@dayz\\Addons\\" + actionfile + ".zip"); zip.extractall(path="..\\@dayz\\Addons\\"); print "Unziped!"; if actionfile == "code": filename = "..\\" + "cfgdayz" + server + "\\server.cfg"; writestring = 'hostname = "DayZ Zombie RPG - NZ (v' + codever + ') www.dayzmod.com";\n'; f = open(filename,'r'); lines = f.readlines(); f.close(); f = open(filename,'w'); f.write(writestring); f.write(''.join(lines[1:])); f.close(); startServer();
def showInfo(self): if self.check == "true" and self.menulist: m_title = self["menulist"].getCurrent()[0][0] m_url = self["menulist"].getCurrent()[0][1] if m_url: #m_url = re.findall('(.*?)\.', m_url) #extra_imdb_convert = "._V1_SX320.jpg" #m_url = "http://ia.media-imdb.com/images/%s%s" % (m_url[0], extra_imdb_convert) print "EMC iMDB: Download Poster - %s" % m_url urllib._urlopener = AppURLopener() urllib.urlretrieve(m_url, self.path) urllib.urlcleanup() if os.path.exists(self.path): self.poster_resize(self.path, m_title) #ptr = LoadPixmap(self.path) #if ptr is None: # ptr = LoadPixmap("/usr/lib/enigma2/python/Plugins/Extensions/EnhancedMovieCenter/img/no_poster.png") # print "EMC iMDB: Load default NO Poster." #if ptr is not None: # self["poster"].instance.setPixmap(ptr) # print "EMC iMDB: Load Poster - %s" % m_title else: print "EMC iMDB: No url found for - %s" % m_title else: print "EMC iMDB: No url found for - %s" % m_title
def load(self): ids_filename = os.path.join(os.getenv("DATA_PATH"), "openml", "ids.pkl") if not os.path.exists(ids_filename): url = "http://www.openml.org/api_query/?{0}".format(urllib.urlencode({"q": self.query})) result = get_result_as_dict(url) f = open(ids_filename, "w") pickle.dump(result, f) else: result = pickle.load(open(ids_filename, "r")) ds_ids = [int(r[0]) for r in result["data"]] for ds_id in ds_ids: if self.verbose: print("retrieving {0}...".format(ds_id)) url_desc = "http://www.openml.org/d/{0}/json".format(ds_id) u = urllib.urlopen(url_desc) desc = json.load(u) u.close() if "arff" not in desc["url"]: if self.verbose: print("skipping {0}...".format(ds_id)) continue filename = os.path.join(os.getenv("DATA_PATH"), "openml", "{0}.arff".format(ds_id)) if not os.path.exists(filename): urllib.urlretrieve(desc["url"], filename)
def download_file_no_requests(url, path, overwrite=False): """ This function ... :param url: :param path: :param overwrite: :return: """ # Get the name of the file filename = fs.name(url) # Determine the local path to the file filepath = fs.join(path, filename) if fs.is_directory(path) else path # Check filepath if fs.is_file(filepath): if overwrite: fs.remove_file(filepath) else: raise IOError("File is already present: " + filepath) # Debugging log.debug("Downloading '" + filename + "' to '" + path + "' ...") log.debug("URL: " + url) # Download urllib.urlretrieve(url, filepath) # Return the file path return filepath
def retrieve_jquery_ui(self): if not os.path.exists(self.relative_path(self.JQUERY_UI_FILENAME)): urllib.urlretrieve(self.JQUERY_UI_LOCATION, self.relative_path(self.JQUERY_UI_FILENAME)) if os.path.exists(self.relative_path('jquery-ui')): shutil.rmtree(self.relative_path('jquery-ui')) zip = zipfile.ZipFile(self.relative_path(self.JQUERY_UI_FILENAME)) zip.extractall(self.relative_path('jquery-ui')) for path in ('js', 'development-bundle/external', 'development-bundle/ui/minified'): shutil.rmtree(self.relative_path('jquery-ui', *path.split('/'))) for path in ('ui', 'themes'): os.rename(self.relative_path('jquery-ui', 'development-bundle', path), self.relative_path('jquery-ui', path)) shutil.rmtree(self.relative_path('jquery-ui', 'development-bundle')) os.unlink(self.relative_path('jquery-ui', 'index.html')) for path in os.listdir(self.relative_path('jquery-ui', 'ui')): if path.startswith('jquery-ui-') and path.endswith('.custom.js'): os.symlink(os.path.join('ui', path), self.relative_path('jquery-ui', 'jquery-ui.js')) for path in os.listdir(self.relative_path('jquery-ui', 'css', 'smoothness')): if path.startswith('jquery-ui-') and path.endswith('.custom.css'): os.symlink(os.path.join(path), self.relative_path('jquery-ui', 'css', 'smoothness', 'jquery-ui.css')) minify = [self.relative_path('jquery-ui', 'jquery-ui.js'), self.relative_path('jquery-ui', 'css', 'smoothness', 'jquery-ui.css')] for path in minify: with open(path+'.min', 'w') as stdout: subprocess.call(['java', '-jar', self.relative_path('jquery', 'build', 'yuicompressor-2.4.2.jar'), path], stdout=stdout)
def asLegend( self ): """ Return a legend graphic image for this model from the cache directory. If it is empty it makes a wms request and populates the directory. """ myFileName = str(self.id) + "." + str(self.image_format) myLocalPath = os.path.join( settings.LEGEND_IMAGE_ROOT, myFileName ) myWebPath = os.path.join( settings.LEGEND_IMAGE_URL, myFileName ) if not os.path.exists( myLocalPath ): logging.info('Get Legend Graphic not cached...fetching....') # the image is not into the expected dir, get it from WMS request.. ''' if (not self.legend_style is None) and (self.legend_style != ''): myFetchUrl = """%s?request=GetLegendGraphic&style=%s&version=1.0.0&format=image/png&width=20&height=20&layer=%s""" % ( self.url, self.legend_style, self.layers ) else: myFetchUrl = """%s?request=GetLegendGraphic&version=1.0.0&format=image/png&width=20&height=20&layer=%s""" % ( self.url, self.layers ) ''' if re.search('\?', self.url): question_mark = '&' else: question_mark = '?' myFetchUrl = """%s%srequest=GetLegendGraphic&version=1.0.0&format=image/png&width=20&height=20&layer=%s""" % ( self.url, question_mark, self.layers ) if (not self.legend_style is None) and (self.legend_style != ''): myFetchUrl += r'&style='+self.legend_style # .. and store into filesystem logging.info('GET %s -> %s' % (myFetchUrl, myLocalPath)) urllib.urlretrieve(myFetchUrl, myLocalPath) else: logging.info('Using cached LegendGraphic %s' % myLocalPath) myUrl = """<img src="%s" id="legend-image-%s" class="legend-image"/>""" % ( myWebPath, self.id ) return myUrl
def download_planck(): from urllib import urlretrieve basepath = 'http://irsa.ipac.caltech.edu/data/Planck/release_1/all-sky-maps/maps/' file = 'HFI_SkyMap_217_2048_R1.10_nominal.fits' url = basepath + file savename = datadir + file urlretrieve(url, savename)
def getimg(aaa): reg = re.compile(r'<img.*?src=".*?" data-lazyload-src="(.*?)".*?>') l = re.findall(reg, aaa) temp = 101 for i in l: temp += 1 urllib.urlretrieve(i, '/home/guoweikuang/图片/%s.jpg' %temp)
def download_images(img_urls, dest_dir): """Given the urls already in the correct order, downloads each image into the given directory. Gives the images local filenames img0, img1, and so on. Creates an index.html in the directory with an img tag to show each local image file. Creates the directory if necessary. """ # Build directory if it does not currently exist. if not os.path.exists(dest_dir): os.mkdir(dest_dir) index = file(os.path.join(dest_dir, 'index.html'), 'w') index.write('<html><body>\n') for x, img_url in enumerate(img_urls): dest_file = os.path.join(dest_dir, "img%d.jpg" % x) # rename try: print "Retrieving.....%s" % img_url urllib.urlretrieve(img_url, dest_file) # downloads the url data to the given file path except IOError: print "Problem reading url: %s" % img_url else: index.write('<img src="%s">' % os.path.abspath(dest_file)) index.write('\n</body></html>\n') index.close() return
def fetch(url): fetchFail = True failCount = 0 while fetchFail: try: urlretrieve(url, 'myfile') data = gzip.open('myfile', 'rb').read() try: data_decoded = data.decode('gbk') except Exception as e: data_decoded = data print("error decoding") fetchFail = False except Exception as e: failCount += 1 print "attempt " + str(failCount) + " failed" print "sleeping for 1 seconds before retrying..." if failCount >= 20: return "" time.sleep(1) return data_decoded
def imagetitan_save_image(self, imggrp, imgmiddle, imgname): # generate just the filename of the image to be locally saved savefile = join(self.basedir, imgname) # generate the url of the image download_url = 'http://' + imggrp + '.imagetitan.com/' + imggrp + imgmiddle + imgname # finally save the image on the desidered directory urlretrieve(download_url, savefile)
def update(params): # Descarga el ZIP xbmc.output("[updater.py] update") xbmc.output("[updater.py] cwd="+os.getcwd()) remotefilename = REMOTE_FILE+params.get("version")+".zip" localfilename = LOCAL_FILE+params.get("version")+".zip" xbmc.output("[updater.py] remotefilename=%s" % remotefilename) xbmc.output("[updater.py] localfilename=%s" % localfilename) xbmc.output("[updater.py] descarga fichero...") inicio = time.clock() urllib.urlretrieve(remotefilename,localfilename) fin = time.clock() xbmc.output("[updater.py] Descargado en %d segundos " % (fin-inicio+1)) # Lo descomprime xbmc.output("[updater.py] descomprime fichero...") import ziptools unzipper = ziptools.ziptools() destpathname = DESTINATION_FOLDER xbmc.output("[updater.py] destpathname=%s" % destpathname) unzipper.extract(localfilename,destpathname) # Borra el zip descargado xbmc.output("[updater.py] borra fichero...") os.remove(localfilename)
def cacheSong( self, songNumber ): info = self.songinfo[songNumber] if not info["caching"]: print "Caching song %s"%info['title'] info["caching"] = True urllib.urlretrieve(str(info['url']), os.path.join(CachePath, "%s.mp3"%info['title'])) info["localpath"] = os.path.join(CachePath, "%s.mp3"%info['title'])
def download_images(img_urls, dest_dir): """Given the urls already in the correct order, downloads each image into the given directory. Gives the images local filenames img0, img1, and so on. Creates an index.html in the directory with an img tag to show each local image file. Creates the directory if necessary. """ # prepare output directory - if it is not there already, create it if not(os.path.exists(dest_dir)): os.mkdir(dest_dir) i=0 imgstr='' for slice in img_urls: # use a simple naming scheme for new image files and make sure they end up in the given directory outfile='img'+str(i)+'.png' imgstr+='<img src="'+outfile+'">' outfile=os.path.join(dest_dir, outfile) print "Fetching "+outfile+' '+ os.path.basename(slice) urllib.urlretrieve('http://code.google.com/'+slice, outfile) i+=1 print "All image strips downloaded" f=open(os.path.join(dest_dir, 'index.html'), 'w') f.write('<html><body>') f.write(imgstr) f.write('</body></html>') f.close()
def download_images(img_urls, dest_dir): """Given the urls already in the correct order, downloads each image into the given directory. Gives the images local filenames img0, img1, and so on. Creates an index.html in the directory with an img tag to show each local image file. Creates the directory if necessary. """ # Verify Directory Exists. If not create it. print 'Verifying Directory: ' + dest_dir if os.path.exists(dest_dir)==False: print 'Creating Directory: ' + dest_dir os.makedirs(dest_dir) # Download Image Files with incremented nama and create IMG html line f = open(dest_dir + '/index.html', 'a') img_html = [] img_count = len(img_urls) i = 0 for img in img_urls: i = i + 1 print 'Retrieving File ' + str(i) + ' of ' + str(img_count) + '.' urllib.urlretrieve(img, dest_dir + '/img' + str(i)) img_html.append('<img src=\"img' + str(i) + '\">') # Create index.html file print 'Writing index.html' f.write('<verbatim>') f.write('<html>') f.write('<body>') f.write(''.join(img_html)) f.write('</body>') f.write('</html>') f.close()
def reload_indexp(working_dir, cycle): def execute_file(filename): contents = " ".join([line for line in open(filename, 'r') if line[0:2] != '--']) statements = contents.split(';')[:-1] # split on semi-colon. Last element will be trailing whitespace for statement in statements: log.info("Executing %s" % statement) c.execute(statement) try: working_dir = os.path.expanduser(working_dir) if not os.path.isdir(working_dir): os.makedirs(working_dir) log = set_up_logger('indexp_importer', working_dir, 'IndExp Importer Fail') local_file_path = os.path.join(working_dir, LOCAL_FILE) log.info("downloading %s to %s..." % (DOWNLOAD_URL.format(cycle), local_file_path)) urllib.urlretrieve(DOWNLOAD_URL.format(cycle), local_file_path) log.info("uploading to table %s..." % TABLE_NAME) c = connection.cursor() c.execute("insert into fec_indexp_out_of_date_cycles (cycle) values ({})".format(cycle)) c.execute("DELETE FROM %s" % TABLE_NAME) c.copy_expert("COPY %s (candidate_id, candidate_name, spender_id, spender_name, election_type, candidate_state, candidate_district, candidate_office, candidate_party, amount, date, aggregate_amount, support_oppose, purpose, payee, filing_number, amendment, transaction_id, image_number, received_date, prev_file_num) FROM STDIN CSV HEADER" % TABLE_NAME, open(local_file_path, 'r')) c.execute("update {} set cycle = {}".format(TABLE_NAME, cycle)) execute_file(SQL_POSTLOAD_FILE) c.execute("delete from fec_indexp_out_of_date_cycles") log.info("Import Succeeded.") except Exception as e: log.error(e) raise
def searchcode(url, regex): code = urlopen(url).read() result = "" try: regexresults = re.search(regex, str(code)) result = str(regexresults.group(0)) if result is not None: if args.url is True: logger.info(" " + str(url)) if args.verbose is True: logger.info(" [+] Found the following results") logger.info(" " + str(result)) if args.write_file: if result == "": pass else: f = open(args.write_file, "a") f.write(str(result + "\n")) f.close() if args.directory: filename = args.directory + "/" + url.replace("/", "-") if not os.path.exists(args.directory): os.makedirs(args.directory) logger.info(" [+] Downloading " + filename) urlretrieve(url, filename) fp = open(filename, "wb") fp.write(code) fp.close() else: pass except: pass
def get_amalgamation(): """Download the SQLite amalgamation if it isn't there, already.""" if os.path.exists(AMALGAMATION_ROOT): return os.mkdir(AMALGAMATION_ROOT) print "Downloading amalgation." # XXX upload the amalgamation file to a somewhat more # official place amalgamation_url = ("http://futeisha.org/sqlcipher/" "amalgamation-sqlcipher-2.1.0.zip") # and download it print 'amalgamation url: %s' % (amalgamation_url,) urllib.urlretrieve(amalgamation_url, "tmp.zip") zf = zipfile.ZipFile("tmp.zip") files = ["sqlite3.c", "sqlite3.h"] directory = zf.namelist()[0] for fn in files: print "Extracting", fn outf = open(AMALGAMATION_ROOT + os.sep + fn, "wb") outf.write(zf.read(directory + fn)) outf.close() zf.close() os.unlink("tmp.zip")
try: FILE = sys.argv[2] PORT = sys.argv[1] except: print "Usage:", sys.argv[0], "[comm port] [hex file or url]" sys.exit(-1) # check if file is url urlRegex = re.compile(r'^(?:http|ftp)s?://', re.IGNORECASE) if urlRegex.match(FILE): # file is an url # download the file print "Downloading firmware..." FILE,_ = urllib.urlretrieve(FILE) print "Firmware downloaded to", FILE def find_languagepack(filename): res = 0 with open(filename, 'r') as f: for l in iter(f.readline, ''): # need to use this to avoid buffering which screws up f.tell() l = l.strip() if l == HEX_TERMINATOR: if res == 0: res = f.tell() else: return res # make sure to return res only if another one had been found before return None def escapeFilename(filename):
def download(url, filename): urllib.urlretrieve(url, filename, reporthook)
print(card["name"] + " exists in master.csv. Skipping...") else: print(card["name"] + " is not in master.csv. Adding...") with open('master.csv', 'rU') as csvfile: masterreader = csv.reader(csvfile) ID = sum(1 for row in masterreader) mc = open('master.csv', 'a') writer=csv.writer(mc) myCsvRow = [ID, unicodedata.normalize('NFKD', card["name"]).encode('ascii', 'ignore'), card["multiverseid"]] if card['layout'] == 'split': myCsvRow = [ID, unicodedata.normalize('NFKD', card['names'][0] + '//' + card['names'][1]).encode('ascii', 'ignore'), card["multiverseid"]] writer.writerow(myCsvRow) mc.close() download_mtg_json() with open('AllSets-x.json') as data_file: data = json.load(data_file) #Build master.csv: ID/Name/MultiverseID for set in sets: for card in data[set]["cards"]: Add_card(card) #Download images from gatherer for all cards listed in master.csv: with open('master.csv', 'rU') as csvfile: masterreader = csv.reader(csvfile) for row in masterreader: mvid = row[2] localid = row[0] urllib.urlretrieve("http://gatherer.wizards.com/Handlers/Image.ashx?multiverseid=" + mvid + "&type=card", "pics/" + localid + ".jpg")
# Starter Code import os import urllib import csv url = 'https://raw.githubusercontent.com/fivethirtyeight/data/master/fandango/fandango_scrape.csv' filename = 'fandango_scrape.csv' if not os.path.isfile(filename): urllib.urlretrieve(url, filename) # use requests if you have pip on your machine else: pass # raw_data = [] with open(filename, 'r') as f: reader = csv.reader(f, delimiter=',') raw_data = list(reader) # for row in reader: # raw_data.append(row)
docgdata = docg.getfile().read() doclinks = re.compile('"r"><a href="' + '(.*?)"' + ' ') doclinksres = doclinks.findall(docgdata) i = 0 y = 0 em = 0 print "[*]-Start downloading DOC(s) for domain:", domain_name for p in doclinksres: if DocRes.count(p) == 0: DocRes.append(p) dlURL = str(p) dlDOC = domain_name + str(i) + ".doc" urllib.urlretrieve(dlURL, dlDOC) i = i + 1 cmd = command + ' -author ' + dlDOC p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) for line in p.stdout.readlines(): if Users.count(line) == 0: Users.append(line) u = line.replace("Author :", "") print " |-" + dlURL + " (Author: " + u.rstrip('\n') + ")" y = y + 1 doccmd2 = command2 + ' ' + dlDOC + ' ' + '|egrep "\w+([._-]\w)*@"' + domain_name
The timestamp of the last probe, or 'undef' if the mirror was never probed. """ line = line.strip() (url, age, status, probe_time, probe_history, state_history, last_probe) = line.split() if state_history in ("f", "z", "x"): bad_mirror_urls.append(url) else: good_mirror_urls.append(url) return good_mirror_urls, bad_mirror_urls if __name__ == '__main__': (sites_path, headers) = urlretrieve(SITES_URL) with open(sites_path, "rb") as sites_file: state = 0 state |= JUNK saved_line = None continents = [] current_mirror = None for line in sites_file: line = line.strip() # skip empty lines
def property_list(request): address, city, state = "", "", "" if request.method == "POST": address = request.POST.get("address", "").replace(", United States", "").strip() city = request.POST.get("city", "").strip() state = request.POST.get("stateSelection", "").strip() if address and city: search_address = address + ", " + city + ", " + state elif not address and city: search_address = city + ", " + state if address and city: try: property = Property.objects.filter(address__icontains=address, city__icontains=city, state__icontains=state) if not property: zillow_property = zillow_offline_api(address, city, state) if not "Error" in zillow_property: for z_p in zillow_property: zpid = z_p.pop('zpid', None) u = User.objects.get_or_create( username="******")[0] u.set_password('zilLow@321') u.save() z_p['user'] = u property, created = Property.objects.get_or_create( **z_p) if created: zpid_data = zillow_zpid_detail(zpid) if not "Error" in zpid_data: # TODO save detail and image try: property.description = zpid_data[ 'description'] except: pass try: photo = Images( ) # set any other fields, but don't commit to DB (ie. don't save()) name = urlparse( zpid_data['image']).path.split( '/')[-1] content = urllib.urlretrieve( zpid_data['image'], settings.MEDIA_ROOT) photo.image.save(name, File(open( content[0])), save=True) PropertyImages.objects.create( property=property, image=photo) except: pass property.search_index = property.address.strip() + ", " + property.city.strip() \ + ", " + property.state.strip() + " " \ + property.zip_code.strip() property.save() if len(zillow_property) == 1: return HttpResponseRedirect( "/property/preview/" + str(property.id)) except: return render( request, 'home_page.html', { 'error': "please use proper address or use suggestion in dropdown as help." }) return render(request, 'property/property_list.html', { "address": address, "city": city, "state": state })
def obtain(dir_path): """ Downloads the dataset to ``dir_path``. """ dir_path = os.path.expanduser(dir_path) print 'Downloading the dataset' import urllib urllib.urlretrieve('http://www.cs.toronto.edu/~larocheh/public/datasets/corrupted_mnist/mnist_corrupted_u.mat',os.path.join(dir_path,'mnist_corrupted_u.mat')) urllib.urlretrieve('http://www.cs.toronto.edu/~larocheh/public/datasets/corrupted_mnist/mnist_corrupted_v.mat',os.path.join(dir_path,'mnist_corrupted_v.mat')) urllib.urlretrieve('http://www.cs.toronto.edu/~larocheh/public/datasets/corrupted_mnist/mnist_corrupted_valid_u.mat',os.path.join(dir_path,'mnist_corrupted_valid_u.mat')) urllib.urlretrieve('http://www.cs.toronto.edu/~larocheh/public/datasets/corrupted_mnist/mnist_corrupted_valid_v.mat',os.path.join(dir_path,'mnist_corrupted_valid_v.mat')) urllib.urlretrieve('http://www.cs.toronto.edu/~larocheh/public/datasets/corrupted_mnist/mnist_corrupted_test_u.mat',os.path.join(dir_path,'mnist_corrupted_test_u.mat')) urllib.urlretrieve('http://www.cs.toronto.edu/~larocheh/public/datasets/corrupted_mnist/mnist_corrupted_test_v.mat',os.path.join(dir_path,'mnist_corrupted_test_v.mat')) # Writing everything into text files, to allow for not loading the data into memory def write_to_txt_file(u,v,filename): f = open(filename,'w') for u_t,v_t in zip(u,v): for i in range(len(u_t)): f.write(str(int(u_t[i]>127))+' ') for i in range(len(v_t)-1): f.write(str(int(v_t[i]>127))+' ') f.write(str(int(v_t[-1]>127))+'\n') f.close() import scipy.io u = scipy.io.loadmat(os.path.join(dir_path,'mnist_corrupted_u.mat'))['dat'] v = scipy.io.loadmat(os.path.join(dir_path,'mnist_corrupted_v.mat'))['dat'] write_to_txt_file(u,v,os.path.join(dir_path,'corrupted_mnist_train.txt')) u = scipy.io.loadmat(os.path.join(dir_path,'mnist_corrupted_valid_u.mat'))['dat'] v = scipy.io.loadmat(os.path.join(dir_path,'mnist_corrupted_valid_v.mat'))['dat'] write_to_txt_file(u,v,os.path.join(dir_path,'corrupted_mnist_valid.txt')) u = scipy.io.loadmat(os.path.join(dir_path,'mnist_corrupted_test_u.mat'))['dat'] v = scipy.io.loadmat(os.path.join(dir_path,'mnist_corrupted_test_v.mat'))['dat'] write_to_txt_file(u,v,os.path.join(dir_path,'corrupted_mnist_test.txt')) print 'Done '
def Get_Lesson(src, num): content = urllib2.urlopen(src).read() l_pattern = re.compile(r"http.*\.m3u8") lesson = re.findall(l_pattern, content) lesson_src = lesson[0].replace("m3u8", "mp4") urllib.urlretrieve(lesson_src, "Obama%s.mp4" % (num))
def download_links_from_page(page, extension="", output_dir="", ltf=False, base_url=""): """ Downloads files from some html page. you can specify that you only want files with a particular extension, and where to save them. NOTE: Currently it does not support downloading of files that are listed as relative links. Args: page: (str) either a url, or the contents of an html page. extension:(str) extension of file you want to download, eg "pdf". If left blank, it will download ALL links on the page. output_dir:(str) what directory to save files to ltf: (bool)(default=False) Link Text as Filename If True Use the link text as the file name. Replacing any spaces with underscores. If false, it uses the original remote file name. base_url: (str)(default="") This is only used if `page` is not a url. If page is a string containing the HTML content, then it is a good idea to specify the base directory that the page come from. Without it, any relative file paths to files in links will fail to download. """ # Creates the directory if it does not already exist if not os.path.exists(output_dir): os.makedirs(output_dir) # Automatically detect if the content is a HTTP URL, if so, download the # html page content. otherwise treat 'page' as an html page. # Get the html content from the url specified url_was_provided = True if re.search("^(https://|http://)", page) else False if url_was_provided: html = urllib2.urlopen(page) base_url = os.path.split(page)[0] else: html = page soup = BeautifulSoup(html) # Returns all links if extension != "": links = soup.findAll('a', attrs={'href': re.compile(".{}$".format(extension))}) else: links = soup.findAll('a') # -------------------------------------------------------------------------- # DOWNLOAD EACH FILE AT A TIME # -------------------------------------------------------------------------- unprocessed_links = [] # keep a list of files that could not be downloaded for link in links: link_url = link["href"] # Determine if it's an absolute path to a file using http or https file full_url = True if re.search("^(https://|http://)", link_url) else False # Use base_url to convert to a full path if link_url is a relative path link_url = link_url if full_url else os.path.join(base_url, link_url) # ---------------------------------------------------------------------- # Extract the link test to use as the file name, otherwise use the # remote filename # ---------------------------------------------------------------------- if ltf: filename = (link.contents[0]).replace(" ", "_") + "." + extension else: filename = os.path.split(link_url)[1] # ---------------------------------------------------------------------- # Download the file # ---------------------------------------------------------------------- try: print("Downloading " + link_url) urllib.urlretrieve(link_url, os.path.join(output_dir, filename)) print("--done") except: print("Could NOT download " + link_url) unprocessed_links.append(link_url) print("Done Downloading Files") if len(unprocessed_links) > 0: print("The following could NOT be downloaded" + "\n ".join([""] + a)) # download_links_from_page(url, extension="jpg", output_dir="/tmp/mydir", ltf=False)
opencommand = "gnome-open" if sys.platform == 'darwin': opencommand = "open" # download full PDF pdfpath = os.path.join('db', idstr, 'paper.pdf') urls = pub['FullVersionURL'] pdfurls = [u for u in urls if u.endswith('.pdf')] gotit = False print "All paper links:" for u in urls: print u for u in pdfurls: print "trying to retrieve: ", u try: urllib.urlretrieve(u, pdfpath) print "saved pdf at ", pdfpath try: print "opening the pdf using %s (%s) for your convenience to verify the download..." %(opencommand, sys.platform) os.system(opencommand + " " + pdfpath) except Error, e: print "%s failed. Make sure the downloaded %s pdf is correct." % (opencommand, pdfpath, ) isok = raw_input("download good? y/n: ") if isok=="y": gotit = True break except Exception, e: print "ERROR retrieving: ", e if not gotit: print "Couldn't get the paper pdf. Please download manually and save as %s." % (pdfpath, )
Page = urllib2.urlopen(DataURL).read() Links = re.findall('<a href=(.*?)>.*?</a>', Page) fast5Links = [] for Link in Links: if "fast5" in Link: fast5Links.append(Link) print "Totally %d fast5 files" % len(fast5Links) ####################################Download the files fileNo = 100 #len(fast5Links) #Max number of files need to be download TryMax = 5 #Max download attemps Try = 0 #Current attempt times fileCount = 1 for fileName in fast5Links[0:fileNo]: fileName = fileName[1:-1] #Get rid of the double quotation Try = 0 while Try < TryMax: try: urllib.urlretrieve(DataURL + fileName, FileSaveLoc + fileName) fileSize = os.path.getsize(FileSaveLoc + fileName) print "Download %s (%d/%d) %s" % (fileName, fileCount, fileNo, convert_bytes(fileSize)) break except IOError: print "Download data files,try again (%d/%d)" % (Try, TryMax) Try += 1 except: print "Unexpected Error when downloading(%d/%d):" % ( fileCount, fileNo), sys.exc_info()[0] Try += 1 fileCount += 1
def download(filename, source='http://yann.lecun.com/exdb/mnist/'): print("Downloading %s" % filename) urlretrieve(source + filename, filename)
print "Downloaded %d of %d bytes (%3.1f%%)\r" \ % (sizeSoFar, totalSize, 100.0*float(sizeSoFar)/totalSize), if sizeSoFar == totalSize: sys.stdout.write("\n") sys.stdout.flush() for url, dirName in data.iteritems(): (baseUrl, fileName) = url.rsplit('/', 1) target = os.path.join(installPath, dirName) if not os.path.exists(target): os.makedirs(target) target = os.path.join(target, fileName) if not os.path.exists(target): print url + " -> " + target urllib.urlretrieve(baseUrl + "/" + urllib.quote(fileName), target, progress) if fileName.endswith(".tar.gz") or fileName.endswith(".tar.bz2") \ or fileName.endswith(".tar"): raw = tarfile.open(target) for m in raw.getmembers(): raw.extract(m, os.path.dirname(target)) #os.unlink(target) elif fileName.endswith(".gz"): raw = gzip.open(target, 'rb').read() open(os.path.splitext(target)[0], 'wb').write(raw) #os.unlink(target) elif fileName.endswith(".zip"): raw = zipfile.ZipFile(target, 'r') raw.extractall(os.path.dirname(target)) #os.unlink(target)
def cache_distro(build): """Cache the OpenDaylight distribution to package as RPM/Deb. :param build: Description of an RPM build :type build: dict :return str distro_tar_path: Path to cached distribution tarball """ # Specialize templates for the given build distro = distro_template.substitute(build) # Append file extensions to get ODL distro zip/tarball templates distro_tar = distro + ".tar.gz" distro_zip = distro + ".zip" # Prepend cache dir path to get template of full path to cached zip/tarball distro_tar_path = os.path.join(cache_dir, distro_tar) distro_zip_path = os.path.join(cache_dir, distro_zip) # Cache OpenDaylight tarball to be packaged if not os.path.isfile(distro_tar_path): if build["download_url"].endswith(".tar.gz"): print("Downloading: {}".format(build["download_url"])) urllib.urlretrieve(build["download_url"], distro_tar_path) print("Cached: {}".format(distro_tar)) # If download_url points at a zip, repackage as a tarball elif build["download_url"].endswith(".zip"): if not os.path.isfile(distro_zip): print("URL is to a zip, will download and convert to tar.gz") print("Downloading: {}".format(build["download_url"])) urllib.urlretrieve(build["download_url"], distro_zip_path) print("Downloaded {}".format(distro_zip_path)) else: print("Already cached: {}".format(distro_zip_path)) # Extract zip archive # NB: zipfile.ZipFile.extractall doesn't preserve permissions # https://bugs.python.org/issue15795 subprocess.call(["unzip", "-oq", distro_zip_path, "-d", cache_dir]) # Get files in cache dir cache_dir_ls_all = glob.glob(os.path.join(cache_dir, "*")) # Remove pyc files that may be newer than just-extracted zip cache_dir_ls = filter(lambda f: '.pyc' not in f, cache_dir_ls_all) # Get the most recent file in cache dir, hopefully unzipped archive unzipped_distro_path = max(cache_dir_ls, key=os.path.getctime) print("Extracted: {}".format(unzipped_distro_path)) # Remove path from unzipped distro filename, as will cd to dir below unzipped_distro = os.path.basename(unzipped_distro_path) # Using the full paths here creates those paths in the tarball, which # breaks the build. There's a way to change the working dir during a # single tar command using the system tar binary, but I don't see a # way to do that with Python. # TODO: Is there a good way to do this without changing directories? # TODO: Try https://goo.gl/XMx5gb cwd = os.getcwd() os.chdir(cache_dir) with tarfile.open(distro_tar, "w:gz") as tb: tb.add(unzipped_distro) print("Taring {} into {}".format(unzipped_distro, distro_tar)) os.chdir(cwd) print("Cached: {}".format(distro_tar)) else: print("Already cached: {}".format(distro_tar)) return distro_tar_path
import re import requests import json import fileinput #getting string in form of #getting real string input lines = split('&', input) realinput = split(' ', realstring) autism_arr = [] real_arr = [] fingerprintsarr = [] for line in lines: fields = split('|', line) autism = fields[0] real = fields[2] fingerprints = int(split(',', real)) autism_arr.append(autism) real_arr.append(real) fingerprintsarr.append(fingerprints); for realstring in realinput: if realstring not in autism_arr: filename = realstring + ".wav" urlretrieve("http://api.voicerss.org/?key=04f49802d32d442ca997d4d2ea76d3d5" "&hl=en-us&c=wav&src="+realstring, filename) rate, data = wav.read(filename)
def download(url, path=None): """Download a file over HTTP""" log.debug("Downloading %s.", url) name, headers = urllib.urlretrieve(url, path) log.debug("Downloaded to %s.", name) return name
# Free Software Foundation, Inc., 59 Temple Place - Suite 330, # Boston, MA 02111-1307, USA. ## import os import urllib2 import urllib from BeautifulSoup import BeautifulSoup from django.core.files import File from montanha.models import PoliticalParty for pp in PoliticalParty.objects.all(): if not pp.wikipedia: continue req = urllib2.Request(pp.wikipedia, headers={"User-Agent": "Mozilla/5.0"}) html = urllib2.urlopen(req) doc = BeautifulSoup(html) table = doc.find("table", {"class": "infobox_v2"}) if table: img = table.find("a", {"class": "image"}) if img: logo_url = img.find("img")["src"] if "http:" not in logo_url: logo_url = "http:%s" % logo_url print logo_url result = urllib.urlretrieve(logo_url) pp.logo.save(os.path.basename(logo_url), File(open(result[0]))) pp.save()
146087462 + 19233765 ) # 146,087,462 yellow taxi trips for January - June 2015 GreenCab_NYC_Proportion = 1 - YellowCab_NYC_Proportion # 19,233,765 green taxi trips for 2015 Chicago_Proportion = 2720546 / (8550405 + 2720546) # COMMAND ---------- # MAGIC %md ## Read in CSV file # COMMAND ---------- import urllib urllib.urlretrieve( "https://s3-us-west-2.amazonaws.com/nyctlc/yellow_tripdata_2015-01-06.csv.gz" ) # COMMAND ---------- dbutils.fs.mv("file:/tmp/tmpy1fBlQ.gz", "dbfs:/tmp/sample_zip/yellow_tripdata_zip.csv.gz") # COMMAND ---------- display(dbutils.fs.ls("dbfs:/tmp/sample_zip")) # COMMAND ---------- Yellow2015DF = spark.read.load( "dbfs:/tmp/sample_zip/yellow_tripdata_zip.csv.gz",
def load_umontreal_data(dataset): ''' Loads the dataset :type dataset: string :param dataset: the path to the dataset (here MNIST) ''' ############# # LOAD DATA # ############# # Download the MNIST dataset if it is not present data_dir, data_file = os.path.split(dataset) if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': import urllib origin = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' print 'Downloading data from %s' % origin urllib.urlretrieve(origin, dataset) print '... loading data' # Load the dataset f = gzip.open(dataset, 'rb') train_set, valid_set, test_set = cPickle.load(f) f.close() #train_set, valid_set, test_set format: tuple(input, target) #input is an np.ndarray of 2 dimensions (a matrix) #witch row's correspond to an example. target is a #np.ndarray of 1 dimensions (vector)) that have the same length as #the number of rows in the input. It should give the target #target to the example with the same index in the input. def _shared_dataset(data_xy): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(np.asarray(data_x, dtype=theano.config.floatX)) shared_y = theano.shared(np.asarray(data_y, dtype=theano.config.floatX)) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, 'int32') test_set_x, test_set_y = _shared_dataset(test_set) valid_set_x, valid_set_y = _shared_dataset(valid_set) train_set_x, train_set_y = _shared_dataset(train_set) rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] return rval
def FromFiles(self): self.name2cid_map = {} self.uid2compound_map = {} self.uid2reaction_map = {} self.uid2pathway_map = {} self.enzrxn2regulation_map = {} logging.info("Retrieving COMPOUNDS file and parsing it") if (not os.path.exists(self.COMPOUND_FILE)): urllib.urlretrieve(self.TAR_URL, self.TAR_FILE) os.chdir(self.base_dir) os.system('tar xvfz ' + self.org + '.tar.gz') entry2fields_map = parse_metacyc_file(self.COMPOUND_FILE) for uid in sorted(entry2fields_map.keys()): field_map = entry2fields_map[uid] comp = Compound(uid) if ("COMMON-NAME" in field_map): comp.name = re.sub('<.+?>', '', field_map["COMMON-NAME"].strip()) if ("SYNONYMS" in field_map): all_names = field_map["SYNONYMS"].split('\t') for name in all_names: name = re.sub('<.+?>', '', name.strip()) self.name2cid_map[name] = uid comp.all_names = all_names if ("MOLECULAR-WEIGHT" in field_map): comp.mass = float(field_map["MOLECULAR-WEIGHT"]) if ("CHEMICAL-FORMULA" in field_map): comp.formula = field_map["CHEMICAL-FORMULA"] if ("INCHI" in field_map): comp.inchi = field_map["INCHI"] if ("SMILES" in field_map): comp.smiles = field_map["SMILES"] if ("DBLINKS" in field_map): for sid in re.findall("PUBCHEM \"(\d+)\"", field_map["DBLINKS"]): comp.pubchem_id = int(sid) for cas in re.findall("CAS \"([\d\-]+)\"", field_map["DBLINKS"]): comp.cas = cas if ("REGULATES" in field_map): comp.regulates = field_map["REGULATES"].split('\t') if ("TYPES" in field_map): comp.types = field_map["TYPES"].split('\t') self.uid2compound_map[uid] = comp logging.info("Retrieving REGULATION file and parsing it") if (not os.path.exists(self.REGULATION_FILE)): urllib.urlretrieve(self.TAR_URL, self.TAR_FILE) os.chdir(self.base_dir) os.system('tar xvfz ' + self.org + '.tar.gz') entry2fields_map = parse_metacyc_file(self.REGULATION_FILE) for uid in sorted(entry2fields_map.keys()): field_map = entry2fields_map[uid] reg = Regulation(uid) if ("MODE" in field_map): reg.mode = field_map["MODE"] if ("REGULATED-ENTITY" in field_map): reg.regulated = field_map["REGULATED-ENTITY"] if ("REGULATOR" in field_map): reg.regulator = field_map["REGULATOR"] if reg.regulated != None: self.enzrxn2regulation_map[reg.regulated] = reg logging.info("Retrieving REACTIONS file and parsing it") if (not os.path.exists(self.REACTION_FILE)): urllib.urlretrieve(self.TAR_URL, self.TAR_FILE) os.chdir(self.base_dir) os.system('tar xvfz ' + self.org + '.tar.gz') entry2fields_map = parse_rxns_metacyc_file(self.REACTION_FILE) for uid in sorted(entry2fields_map.keys()): field_map = entry2fields_map[uid] direction = '<=>' if ("REACTION-DIRECTION" in field_map): if (re.search('LEFT-TO-RIGHT', field_map['REACTION-DIRECTION'])): direction = '=>' elif (re.search('RIGHT-TO-LEFT', field_map['REACTION-DIRECTION'])): direction = '<=' rxn = Reaction(uid, sparse_reaction=field_map['SPARSE'], direction=direction) if ("COMMON-NAME" in field_map): rxn.name = field_map["COMMON-NAME"].strip() if ("TYPES" in field_map): rxn.types = field_map["TYPES"].split('\t') if ("EC-NUMBER" in field_map): rxn.ec_number = field_map["EC-NUMBER"] if ("ENZYMATIC-REACTION" in field_map): rxn.enzrxns_list = field_map["ENZYMATIC-REACTION"].split('\t') self.uid2reaction_map[uid] = rxn os.remove('../res/metacyc_pathways_stats.txt') logging.info("Retrieving PATHWAYS file and parsing it") if (not os.path.exists(self.PATHWAY_FILE)): urllib.urlretrieve(self.TAR_URL, self.TAR_FILE) os.chdir(self.base_dir) os.system('tar xvfz ' + self.org + '.tar.gz') entry2fields_map = parse_metacyc_file(self.PATHWAY_FILE) n_super = 0 n_rxns_dict_prob = 0 rxn_parse_error = 0 dup_rxn_layout = 0 unknown_dir = 0 no_pred_info = 0 no_start = 0 mul_start = 0 for uid in sorted(entry2fields_map.keys()): field_map = entry2fields_map[uid] rxn_direction_map = {} if ('Super-Pathways' in field_map['TYPES']): n_super += 1 continue pw = Pathway(uid) if ("COMMON-NAME" in field_map): pw.name = field_map["COMMON-NAME"].strip() if ("TYPES" in field_map): pw.types = field_map["TYPES"].split('\t') if ("PREDECESSORS" in field_map): pw.preds = field_map["PREDECESSORS"].split('\t') try: pw.UpdateRxnsDict() if pw.preds == None: no_pred_info += 1 except MetaCycPathwayWithoutStartException, e: no_start += 1 logging.debug(str(e)) continue except MetaCycManyPathwayStartException, e: mul_start += 1 logging.debug(str(e)) continue
print( 'Alternatively, you can upload the dac_sample.tar.gz file to your Jupyter root ' + 'directory') return False if os.path.isfile(fileName): print 'File is already available. Nothing to do.' elif extractTar(check=True): print 'tar.gz file was already available.' elif not url.endswith('dac_sample.tar.gz'): print 'Check your download url. Are you downloading the Sample dataset?' else: # Download the file and store it in the same directory as this notebook try: urllib.urlretrieve(url, os.path.basename(urlparse.urlsplit(url).path)) except IOError: print 'Unable to download and store: {0}'.format(url) extractTar() # In[69]: import os.path baseDir = os.path.join('data') inputPath = os.path.join('cs190', 'dac_sample.txt') fileName = os.path.join(baseDir, inputPath) if os.path.isfile(fileName): rawData = (sc.textFile(fileName, 2).map(lambda x: x.replace('\t', ',')) ) # work with either ',' or '\t' separated data
def download_mtg_json(): urllib.urlretrieve("http://www.mtgjson.com/json/AllSets-x.json.zip", "AllSets-x.json.zip") zip_ref = zipfile.ZipFile("AllSets-x.json.zip", 'r') zip_ref.extractall() zip_ref.close() os.remove("AllSets-x.json.zip")
def build(self, release=False, dev=False, jobs=None, params=None, no_package=False, verbose=False, very_verbose=False, target=None, android=False, magicleap=False, libsimpleservo=False, uwp=False, features=None, win_arm64=False, **kwargs): opts = params or [] features = features or [] target, android = self.pick_target_triple(target, android, magicleap) target_path = base_path = self.get_target_dir() if android: target_path = path.join(target_path, "android") base_path = path.join(target_path, target) elif magicleap: target_path = path.join(target_path, "magicleap") base_path = path.join(target_path, target) release_path = path.join(base_path, "release", "servo") dev_path = path.join(base_path, "debug", "servo") release_exists = path.exists(release_path) dev_exists = path.exists(dev_path) if not (release or dev): if self.config["build"]["mode"] == "dev": dev = True elif self.config["build"]["mode"] == "release": release = True elif release_exists and not dev_exists: release = True elif dev_exists and not release_exists: dev = True else: print("Please specify either --dev (-d) for a development") print(" build, or --release (-r) for an optimized build.") sys.exit(1) if release and dev: print("Please specify either --dev or --release.") sys.exit(1) if release: opts += ["--release"] servo_path = release_path else: servo_path = dev_path if jobs is not None: opts += ["-j", jobs] if verbose: opts += ["-v"] if very_verbose: opts += ["-vv"] if win_arm64: if target: print("Can't specify explicit --target value with --win-arm64.") sys.exit(1) target = "aarch64-pc-windows-msvc" if target: if self.config["tools"]["use-rustup"]: # 'rustup target add' fails if the toolchain is not installed at all. self.call_rustup_run(["rustc", "--version"]) check_call(["rustup" + BIN_SUFFIX, "target", "add", "--toolchain", self.toolchain(), target]) env = self.build_env(target=target, is_build=True) self.ensure_bootstrapped(target=target) self.ensure_clobbered() build_start = time() env["CARGO_TARGET_DIR"] = target_path host = host_triple() target_triple = target or host_triple() if 'apple-darwin' in host and target_triple == host: if 'CXXFLAGS' not in env: env['CXXFLAGS'] = '' env["CXXFLAGS"] += "-mmacosx-version-min=10.10" if 'windows' in host: vs_dirs = self.vs_dirs() if host != target_triple and 'windows' in target_triple: if os.environ.get('VisualStudioVersion'): print("Can't cross-compile for Windows inside of a Visual Studio shell.\n" "Please run `python mach build [arguments]` to bypass automatic " "Visual Studio shell.") sys.exit(1) vcinstalldir = vs_dirs['vcdir'] if not os.path.exists(vcinstalldir): print("Can't find Visual C++ %s installation at %s." % (vs_dirs['vs_version'], vcinstalldir)) sys.exit(1) env['PKG_CONFIG_ALLOW_CROSS'] = "1" if uwp: # Don't try and build a desktop port. libsimpleservo = True arches = { "aarch64": { "angle": "arm64", "gst": "ARM64", "gst_root": "arm64", }, "x86_64": { "angle": "x64", "gst": "X86_64", "gst_root": "x64", }, } arch = arches.get(target_triple.split('-')[0]) if not arch: print("Unsupported UWP target.") sys.exit(1) # Ensure that the NuGet ANGLE package containing libEGL is accessible # to the Rust linker. append_to_path_env(angle_root(target_triple, env), env, "LIB") # Don't want to mix non-UWP libraries with vendored UWP libraries. if "gstreamer" in env['LIB']: print("Found existing GStreamer library path in LIB. Please remove it.") sys.exit(1) # Override any existing GStreamer installation with the vendored libraries. env["GSTREAMER_1_0_ROOT_" + arch['gst']] = path.join( self.msvc_package_dir("gstreamer-uwp"), arch['gst_root'] ) # Ensure that GStreamer libraries are accessible when linking. if 'windows' in target_triple: gst_root = gstreamer_root(target_triple, env) if gst_root: append_to_path_env(os.path.join(gst_root, "lib"), env, "LIB") if android: if "ANDROID_NDK" not in env: print("Please set the ANDROID_NDK environment variable.") sys.exit(1) if "ANDROID_SDK" not in env: print("Please set the ANDROID_SDK environment variable.") sys.exit(1) android_platform = self.config["android"]["platform"] android_toolchain_name = self.config["android"]["toolchain_name"] android_toolchain_prefix = self.config["android"]["toolchain_prefix"] android_lib = self.config["android"]["lib"] android_arch = self.config["android"]["arch"] # Build OpenSSL for android env["OPENSSL_VERSION"] = "1.0.2k" make_cmd = ["make"] if jobs is not None: make_cmd += ["-j" + jobs] openssl_dir = path.join(target_path, target, "native", "openssl") if not path.exists(openssl_dir): os.makedirs(openssl_dir) shutil.copy(path.join(self.android_support_dir(), "openssl.makefile"), openssl_dir) shutil.copy(path.join(self.android_support_dir(), "openssl.sh"), openssl_dir) # Check if the NDK version is 15 if not os.path.isfile(path.join(env["ANDROID_NDK"], 'source.properties')): print("ANDROID_NDK should have file `source.properties`.") print("The environment variable ANDROID_NDK may be set at a wrong path.") sys.exit(1) with open(path.join(env["ANDROID_NDK"], 'source.properties')) as ndk_properties: lines = ndk_properties.readlines() if lines[1].split(' = ')[1].split('.')[0] != '15': print("Currently only support NDK 15. Please re-run `./mach bootstrap-android`.") sys.exit(1) env["RUST_TARGET"] = target with cd(openssl_dir): status = call( make_cmd + ["-f", "openssl.makefile"], env=env, verbose=verbose) if status: return status openssl_dir = path.join(openssl_dir, "openssl-{}".format(env["OPENSSL_VERSION"])) env['OPENSSL_LIB_DIR'] = openssl_dir env['OPENSSL_INCLUDE_DIR'] = path.join(openssl_dir, "include") env['OPENSSL_STATIC'] = 'TRUE' # Android builds also require having the gcc bits on the PATH and various INCLUDE # path munging if you do not want to install a standalone NDK. See: # https://dxr.mozilla.org/mozilla-central/source/build/autoconf/android.m4#139-161 os_type = platform.system().lower() if os_type not in ["linux", "darwin"]: raise Exception("Android cross builds are only supported on Linux and macOS.") cpu_type = platform.machine().lower() host_suffix = "unknown" if cpu_type in ["i386", "i486", "i686", "i768", "x86"]: host_suffix = "x86" elif cpu_type in ["x86_64", "x86-64", "x64", "amd64"]: host_suffix = "x86_64" host = os_type + "-" + host_suffix host_cc = env.get('HOST_CC') or _get_exec_path(["clang"]) or _get_exec_path(["gcc"]) host_cxx = env.get('HOST_CXX') or _get_exec_path(["clang++"]) or _get_exec_path(["g++"]) llvm_toolchain = path.join(env['ANDROID_NDK'], "toolchains", "llvm", "prebuilt", host) gcc_toolchain = path.join(env['ANDROID_NDK'], "toolchains", android_toolchain_prefix + "-4.9", "prebuilt", host) gcc_libs = path.join(gcc_toolchain, "lib", "gcc", android_toolchain_name, "4.9.x") env['PATH'] = (path.join(llvm_toolchain, "bin") + ':' + env['PATH']) env['ANDROID_SYSROOT'] = path.join(env['ANDROID_NDK'], "sysroot") support_include = path.join(env['ANDROID_NDK'], "sources", "android", "support", "include") cpufeatures_include = path.join(env['ANDROID_NDK'], "sources", "android", "cpufeatures") cxx_include = path.join(env['ANDROID_NDK'], "sources", "cxx-stl", "llvm-libc++", "include") clang_include = path.join(llvm_toolchain, "lib64", "clang", "3.8", "include") cxxabi_include = path.join(env['ANDROID_NDK'], "sources", "cxx-stl", "llvm-libc++abi", "include") sysroot_include = path.join(env['ANDROID_SYSROOT'], "usr", "include") arch_include = path.join(sysroot_include, android_toolchain_name) android_platform_dir = path.join(env['ANDROID_NDK'], "platforms", android_platform, "arch-" + android_arch) arch_libs = path.join(android_platform_dir, "usr", "lib") clang_include = path.join(llvm_toolchain, "lib64", "clang", "5.0", "include") android_api = android_platform.replace('android-', '') env['HOST_CC'] = host_cc env['HOST_CXX'] = host_cxx env['HOST_CFLAGS'] = '' env['HOST_CXXFLAGS'] = '' env['CC'] = path.join(llvm_toolchain, "bin", "clang") env['CPP'] = path.join(llvm_toolchain, "bin", "clang") + " -E" env['CXX'] = path.join(llvm_toolchain, "bin", "clang++") env['ANDROID_TOOLCHAIN'] = gcc_toolchain env['ANDROID_TOOLCHAIN_DIR'] = gcc_toolchain env['ANDROID_VERSION'] = android_api env['ANDROID_PLATFORM_DIR'] = android_platform_dir env['GCC_TOOLCHAIN'] = gcc_toolchain gcc_toolchain_bin = path.join(gcc_toolchain, android_toolchain_name, "bin") env['AR'] = path.join(gcc_toolchain_bin, "ar") env['RANLIB'] = path.join(gcc_toolchain_bin, "ranlib") env['OBJCOPY'] = path.join(gcc_toolchain_bin, "objcopy") env['YASM'] = path.join(env['ANDROID_NDK'], 'prebuilt', host, 'bin', 'yasm') # A cheat-sheet for some of the build errors caused by getting the search path wrong... # # fatal error: 'limits' file not found # -- add -I cxx_include # unknown type name '__locale_t' (when running bindgen in mozjs_sys) # -- add -isystem sysroot_include # error: use of undeclared identifier 'UINTMAX_C' # -- add -D__STDC_CONSTANT_MACROS # # Also worth remembering: autoconf uses C for its configuration, # even for C++ builds, so the C flags need to line up with the C++ flags. env['CFLAGS'] = ' '.join([ "--target=" + target, "--sysroot=" + env['ANDROID_SYSROOT'], "--gcc-toolchain=" + gcc_toolchain, "-isystem", sysroot_include, "-I" + arch_include, "-B" + arch_libs, "-L" + arch_libs, "-D__ANDROID_API__=" + android_api, ]) env['CXXFLAGS'] = ' '.join([ "--target=" + target, "--sysroot=" + env['ANDROID_SYSROOT'], "--gcc-toolchain=" + gcc_toolchain, "-I" + cpufeatures_include, "-I" + cxx_include, "-I" + clang_include, "-isystem", sysroot_include, "-I" + cxxabi_include, "-I" + clang_include, "-I" + arch_include, "-I" + support_include, "-L" + gcc_libs, "-B" + arch_libs, "-L" + arch_libs, "-D__ANDROID_API__=" + android_api, "-D__STDC_CONSTANT_MACROS", "-D__NDK_FPABI__=", ]) env['CPPFLAGS'] = ' '.join([ "--target=" + target, "--sysroot=" + env['ANDROID_SYSROOT'], "-I" + arch_include, ]) env["NDK_ANDROID_VERSION"] = android_api env["ANDROID_ABI"] = android_lib env["ANDROID_PLATFORM"] = android_platform env["NDK_CMAKE_TOOLCHAIN_FILE"] = path.join(env['ANDROID_NDK'], "build", "cmake", "android.toolchain.cmake") env["CMAKE_TOOLCHAIN_FILE"] = path.join(self.android_support_dir(), "toolchain.cmake") # Set output dir for gradle aar files aar_out_dir = self.android_aar_dir() if not os.path.exists(aar_out_dir): os.makedirs(aar_out_dir) env["AAR_OUT_DIR"] = aar_out_dir # GStreamer and its dependencies use pkg-config and this flag is required # to make it work in a cross-compilation context. env["PKG_CONFIG_ALLOW_CROSS"] = '1' # Build the name of the package containing all GStreamer dependencies # according to the build target. gst_lib = "gst-build-{}".format(self.config["android"]["lib"]) gst_lib_zip = "gstreamer-{}-1.16.0-20190517-095630.zip".format(self.config["android"]["lib"]) gst_dir = os.path.join(target_path, "gstreamer") gst_lib_path = os.path.join(gst_dir, gst_lib) pkg_config_path = os.path.join(gst_lib_path, "pkgconfig") env["PKG_CONFIG_PATH"] = pkg_config_path if not os.path.exists(gst_lib_path): # Download GStreamer dependencies if they have not already been downloaded # This bundle is generated with `libgstreamer_android_gen` # Follow these instructions to build and deploy new binaries # https://github.com/servo/libgstreamer_android_gen#build print("Downloading GStreamer dependencies") gst_url = "https://servo-deps.s3.amazonaws.com/gstreamer/%s" % gst_lib_zip print(gst_url) urllib.urlretrieve(gst_url, gst_lib_zip) zip_ref = zipfile.ZipFile(gst_lib_zip, "r") zip_ref.extractall(gst_dir) os.remove(gst_lib_zip) # Change pkgconfig info to make all GStreamer dependencies point # to the libgstreamer_android.so bundle. for each in os.listdir(pkg_config_path): if each.endswith('.pc'): print("Setting pkgconfig info for %s" % each) pc = os.path.join(pkg_config_path, each) expr = "s#libdir=.*#libdir=%s#g" % gst_lib_path subprocess.call(["perl", "-i", "-pe", expr, pc]) if magicleap: if platform.system() not in ["Darwin"]: raise Exception("Magic Leap builds are only supported on macOS. " "If you only wish to test if your code builds, " "run ./mach build -p libmlservo.") ml_sdk = env.get("MAGICLEAP_SDK") if not ml_sdk: raise Exception("Magic Leap builds need the MAGICLEAP_SDK environment variable") if not os.path.exists(ml_sdk): raise Exception("Path specified by MAGICLEAP_SDK does not exist.") ml_support = path.join(self.get_top_dir(), "support", "magicleap") # We pretend to be an Android build env.setdefault("ANDROID_VERSION", "21") env.setdefault("ANDROID_NDK", env["MAGICLEAP_SDK"]) env.setdefault("ANDROID_NDK_VERSION", "16.0.0") env.setdefault("ANDROID_PLATFORM_DIR", path.join(env["MAGICLEAP_SDK"], "lumin")) env.setdefault("ANDROID_TOOLCHAIN_DIR", path.join(env["MAGICLEAP_SDK"], "tools", "toolchains")) env.setdefault("ANDROID_CLANG", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "clang")) # A random collection of search paths env.setdefault("STLPORT_LIBS", " ".join([ "-L" + path.join(env["MAGICLEAP_SDK"], "lumin", "stl", "libc++-lumin", "lib"), "-lc++" ])) env.setdefault("STLPORT_CPPFLAGS", " ".join([ "-I" + path.join(env["MAGICLEAP_SDK"], "lumin", "stl", "libc++-lumin", "include") ])) env.setdefault("CPPFLAGS", " ".join([ "--no-standard-includes", "--sysroot=" + env["ANDROID_PLATFORM_DIR"], "-I" + path.join(env["ANDROID_PLATFORM_DIR"], "usr", "include"), "-isystem" + path.join(env["ANDROID_TOOLCHAIN_DIR"], "lib64", "clang", "3.8", "include"), ])) env.setdefault("CFLAGS", " ".join([ env["CPPFLAGS"], "-L" + path.join(env["ANDROID_TOOLCHAIN_DIR"], "lib", "gcc", target, "4.9.x"), ])) env.setdefault("CXXFLAGS", " ".join([ # Sigh, Angle gets confused if there's another EGL around "-I./gfx/angle/checkout/include", env["STLPORT_CPPFLAGS"], env["CFLAGS"] ])) # The toolchain commands env.setdefault("AR", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-ar")) env.setdefault("AS", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-clang")) env.setdefault("CC", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-clang")) env.setdefault("CPP", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-clang -E")) env.setdefault("CXX", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-clang++")) env.setdefault("LD", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-ld")) env.setdefault("OBJCOPY", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-objcopy")) env.setdefault("OBJDUMP", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-objdump")) env.setdefault("RANLIB", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-ranlib")) env.setdefault("STRIP", path.join(env["ANDROID_TOOLCHAIN_DIR"], "bin", "aarch64-linux-android-strip")) # Undo all of that when compiling build tools for the host env.setdefault("HOST_CFLAGS", "") env.setdefault("HOST_CXXFLAGS", "") env.setdefault("HOST_CC", "/usr/local/opt/llvm/bin/clang") env.setdefault("HOST_CXX", "/usr/local/opt/llvm/bin/clang++") env.setdefault("HOST_LD", "ld") # Some random build configurations env.setdefault("HARFBUZZ_SYS_NO_PKG_CONFIG", "1") env.setdefault("PKG_CONFIG_ALLOW_CROSS", "1") env.setdefault("CMAKE_TOOLCHAIN_FILE", path.join(ml_support, "toolchain.cmake")) env.setdefault("_LIBCPP_INLINE_VISIBILITY", "__attribute__((__always_inline__))") # The Open SSL configuration env.setdefault("OPENSSL_DIR", path.join(target_path, target, "native", "openssl")) env.setdefault("OPENSSL_VERSION", "1.0.2k") env.setdefault("OPENSSL_STATIC", "1") # GStreamer configuration env.setdefault("GSTREAMER_DIR", path.join(target_path, target, "native", "gstreamer-1.16.0")) env.setdefault("GSTREAMER_URL", "https://servo-deps.s3.amazonaws.com/gstreamer/gstreamer-magicleap-1.16.0-20190823-104505.tgz") env.setdefault("PKG_CONFIG_PATH", path.join(env["GSTREAMER_DIR"], "system", "lib64", "pkgconfig")) # Override the linker set in .cargo/config env.setdefault("CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER", path.join(ml_support, "fake-ld.sh")) # Only build libmlservo opts += ["--package", "libmlservo"] # Download and build OpenSSL if necessary status = call(path.join(ml_support, "openssl.sh"), env=env, verbose=verbose) if status: return status # Download prebuilt Gstreamer if necessary if not os.path.exists(path.join(env["GSTREAMER_DIR"], "system")): if not os.path.exists(env["GSTREAMER_DIR"] + ".tgz"): check_call([ 'curl', '-L', '-f', '-o', env["GSTREAMER_DIR"] + ".tgz", env["GSTREAMER_URL"], ]) check_call([ 'mkdir', '-p', env["GSTREAMER_DIR"], ]) check_call([ 'tar', 'xzf', env["GSTREAMER_DIR"] + ".tgz", '-C', env["GSTREAMER_DIR"], ]) if very_verbose: print (["Calling", "cargo", "build"] + opts) for key in env: print((key, env[key])) if sys.platform == "win32": env.setdefault("CC", "clang-cl.exe") env.setdefault("CXX", "clang-cl.exe") if uwp: env.setdefault("CFLAGS", "") env.setdefault("CXXFLAGS", "") env["CFLAGS"] += " -DWINAPI_FAMILY=WINAPI_FAMILY_APP" env["CXXFLAGS"] += " -DWINAPI_FAMILY=WINAPI_FAMILY_APP" else: env.setdefault("CC", "clang") env.setdefault("CXX", "clang++") status = self.run_cargo_build_like_command( "build", opts, env=env, verbose=verbose, target=target, android=android, magicleap=magicleap, libsimpleservo=libsimpleservo, uwp=uwp, features=features, **kwargs ) elapsed = time() - build_start # Do some additional things if the build succeeded if status == 0: if android and not no_package: flavor = None if "googlevr" in features: flavor = "googlevr" elif "oculusvr" in features: flavor = "oculusvr" rv = Registrar.dispatch("package", context=self.context, release=release, dev=dev, target=target, flavor=flavor) if rv: return rv if sys.platform == "win32": servo_exe_dir = os.path.dirname( self.get_binary_path(release, dev, target=target, simpleservo=libsimpleservo) ) assert os.path.exists(servo_exe_dir) # on msvc builds, use editbin to change the subsystem to windows, but only # on release builds -- on debug builds, it hides log output if not dev and not libsimpleservo: call(["editbin", "/nologo", "/subsystem:windows", path.join(servo_exe_dir, "servo.exe")], verbose=verbose) # on msvc, we need to copy in some DLLs in to the servo.exe dir for ssl_lib in ["libssl.dll", "libcrypto.dll"]: shutil.copy(path.join(env['OPENSSL_LIB_DIR'], "../bin", ssl_lib), servo_exe_dir) # Search for the generated nspr4.dll build_path = path.join(servo_exe_dir, "build") assert os.path.exists(build_path) def package_generated_shared_libraries(libs, build_path, servo_exe_dir): for root, dirs, files in os.walk(build_path): remaining_libs = list(libs) for lib in libs: if lib in files: shutil.copy(path.join(root, lib), servo_exe_dir) remaining_libs.remove(lib) continue libs = remaining_libs if not libs: return for lib in libs: print("WARNING: could not find " + lib) # UWP build has its own ANGLE library that it packages. if not uwp: package_generated_shared_libraries(["libEGL.dll", "libGLESv2.dll"], build_path, servo_exe_dir) # copy needed gstreamer DLLs in to servo.exe dir print("Packaging gstreamer DLLs") if not package_gstreamer_dlls(env, servo_exe_dir, target_triple, uwp): status = 1 # UWP app packaging already bundles all required DLLs for us. print("Packaging MSVC DLLs") if not package_msvc_dlls(servo_exe_dir, target_triple, vs_dirs['vcdir'], vs_dirs['vs_version']): status = 1 elif sys.platform == "darwin": # On the Mac, set a lovely icon. This makes it easier to pick out the Servo binary in tools # like Instruments.app. try: import Cocoa icon_path = path.join(self.get_top_dir(), "resources", "servo.png") icon = Cocoa.NSImage.alloc().initWithContentsOfFile_(icon_path) if icon is not None: Cocoa.NSWorkspace.sharedWorkspace().setIcon_forFile_options_(icon, servo_path, 0) except ImportError: pass # Generate Desktop Notification if elapsed-time > some threshold value notify_build_done(self.config, elapsed, status == 0) print("Build %s in %s" % ("Completed" if status == 0 else "FAILED", format_duration(elapsed))) return status
# Return the JSON triples sparql.setReturnFormat(JSON) results = sparql.query().convert() # Open the file for writing urls (this is for image magick) listImages = open('bmimagesResized/files.txt', 'w') # Iterate over the results for result in results["results"]["bindings"]: image = result["image"]["value"] if os.path.isfile(os.path.join('bmimages', os.path.basename(image))): print "File already exists" else: path = os.path.join('bmimages', os.path.basename(image)) urllib.urlretrieve(image, path) print "Image " + os.path.basename(image) + " downloaded" for file in os.listdir('bmimages'): if not file.startswith('.'): listImages.write( os.path.join("bmimagesResized", os.path.basename(file)) + "\n") # Iterate through files and crop as required for file in os.listdir('bmimages'): # Make sure file is not a hidden one etc if not file.startswith('.') and os.path.isfile( os.path.join('bmimages', file)): # Open the file checking if it is valid or not. It fails otherwise :-( try: if not os.path.exists(os.path.join('bmimagesResized', file)):
def download_images(urls): global downloaded for url in urls: urllib.urlretrieve (url, "../../datasets/Delhi_imgs/" + url[36:42] + '_' + url[45:51] + ".jpg")
def size_for_node(self, node, client): '''Given a docutils image node, returns the size the image should have in the PDF document, and what 'kind' of size that is. That involves lots of guesswork''' uri = str(node.get("uri")) if uri.split("://")[0].lower() not in ('http', 'ftp', 'https'): uri = os.path.join(client.basedir, uri) else: uri, _ = urllib.urlretrieve(uri) client.to_unlink.append(uri) srcinfo = client, uri # Extract all the information from the URI imgname, extension, options = self.split_uri(uri) if not os.path.isfile(imgname): imgname = missing scale = float(node.get('scale', 100)) / 100 size_known = False # Figuring out the size to display of an image is ... annoying. # If the user provides a size with a unit, it's simple, adjustUnits # will return it in points and we're done. # However, often the unit wil be "%" (specially if it's meant for # HTML originally. In which case, we will use a percentage of # the containing frame. # Find the image size in pixels: kind = 'direct' xdpi, ydpi = client.styles.def_dpi, client.styles.def_dpi extension = imgname.split('.')[-1].lower() if extension in ['svg', 'svgz']: iw, ih = SVGImage(imgname, srcinfo=srcinfo).wrap(0, 0) # These are in pt, so convert to px iw = iw * xdpi / 72 ih = ih * ydpi / 72 elif extension == 'pdf': if VectorPdf is not None: xobj = VectorPdf.load_xobj(srcinfo) iw, ih = xobj.w, xobj.h else: reader = pdfinfo.PdfFileReader(open(imgname, 'rb')) box = [float(x) for x in reader.getPage(0)['/MediaBox']] iw, ih = x2 - x1, y2 - y1 # These are in pt, so convert to px iw = iw * xdpi / 72.0 ih = ih * ydpi / 72.0 size_known = True # Assume size from original PDF is OK else: keeptrying = True if PILImage: try: img = PILImage.open(imgname) img.load() iw, ih = img.size xdpi, ydpi = img.info.get('dpi', (xdpi, ydpi)) keeptrying = False except IOError: # PIL throws this when it's a broken/unknown image pass if keeptrying: if extension not in ['jpg', 'jpeg']: log.error( "The image (%s, %s) is broken or in an unknown format", imgname, nodeid(node)) raise ValueError else: # Can be handled by reportlab log.warning( "Can't figure out size of the image (%s, %s). Install PIL for better results.", imgname, nodeid(node)) iw = 1000 ih = 1000 # Try to get the print resolution from the image itself via PIL. # If it fails, assume a DPI of 300, which is pretty much made up, # and then a 100% size would be iw*inch/300, so we pass # that as the second parameter to adjustUnits # # Some say the default DPI should be 72. That would mean # the largest printable image in A4 paper would be something # like 480x640. That would be awful. # w = node.get('width') h = node.get('height') if h is None and w is None: # Nothing specified # Guess from iw, ih log.debug( "Using image %s without specifying size." "Calculating based on image size at %ddpi [%s]", imgname, xdpi, nodeid(node)) w = iw * inch / xdpi h = ih * inch / ydpi elif w is not None: # Node specifies only w # In this particular case, we want the default unit # to be pixels so we work like rst2html if w[-1] == '%': kind = 'percentage_of_container' w = int(w[:-1]) else: # This uses default DPI setting because we # are not using the image's "natural size" # this is what LaTeX does, according to the # docutils mailing list discussion w = client.styles.adjustUnits(w, client.styles.tw, default_unit='px') if h is None: # h is set from w with right aspect ratio h = w * ih / iw else: h = client.styles.adjustUnits(h, ih * inch / ydpi, default_unit='px') elif h is not None and w is None: if h[-1] != '%': h = client.styles.adjustUnits(h, ih * inch / ydpi, default_unit='px') # w is set from h with right aspect ratio w = h * iw / ih else: log.error('Setting height as a percentage does **not** work. '\ 'ignoring height parameter [%s]', nodeid(node)) # Set both from image data w = iw * inch / xdpi h = ih * inch / ydpi # Apply scale factor w = w * scale h = h * scale # And now we have this probably completely bogus size! log.info("Image %s size calculated: %fcm by %fcm [%s]", imgname, w / cm, h / cm, nodeid(node)) return w, h, kind
def getPage(url): urllib.urlretrieve(url,"test.html")
from bs4 import BeautifulSoup import urllib2 import os import urllib import os import getxml #create connection with main page url = 'http://www.heart.org/HEARTORG/General/State-by-State-NIH-Allocations_UCM_440585_Article.jsp' page = urllib2.urlopen(url) soup = BeautifulSoup(page.read(), 'lxml') #get all the links to countries div = soup.find('div', class_ = 'content') table = div.find('table', width = 400) for row in table.findChildren('tr'): for cell in row.findChildren('td'): link = cell.find('a').get('href') url = 'http://www.heart.org/' + link state = cell.text urllib.urlretrieve (url, 'pdf/' + state + '.pdf') getxml.getxml('pdf/' + state) print state + ' done'
soup2 = BeautifulSoup(response2.text, 'html.parser') pp = soup2.find_all("a") # To download the mcat (galex catalogue). catalogue_link = [] for link in pp: somel = link.get('href') try: if somel[-12:] == 'mcat.fits.gz': catalogue_link.append(somel) except: pass if len(catalogue_link) != 0: catalogue = catalogue_link[0].split('/')[-1] urllib.urlretrieve(catalogue_link[0], catalogue) else: sys.exit(1) # Reading coordinates from catalogue. hdu = fits.open(catalogue) alpha = hdu[1].data['alpha_j2000_merged'] delta = hdu[1].data['delta_j2000_merged'] # NUV nuv_mag = hdu[1].data['nuv_mag'] refined_set = [(al, de, nm) for al, de, nm in zip(alpha, delta, nuv_mag) if int(nm) != -999 and nm <= 22.] nalpha, ndelta, nuv_mag = zip(*refined_set)