def pullHydro(hytype, downloaddir): # see https://www.dropbox.com/sh/hmpwobbz9qixxpe/AAAI_jasMJPZl_6wX6d3vEOla for the 'root' of the hydrsheds data hysource = { "hybas_af": "https://www.dropbox.com/sh/hmpwobbz9qixxpe/AADoPLdVZNd2JG-KaJNY0zT1a/HydroBASINS/standard/af/hybas_af_lev01-06_v1c.zip", "hybas_eu": "https://www.dropbox.com/sh/hmpwobbz9qixxpe/AABz1Pym5esD6GUJcnzaaqpEa/HydroBASINS/standard/eu/hybas_eu_lev01-06_v1c.zip", "af_riv_30s": "https://www.dropbox.com/sh/hmpwobbz9qixxpe/AAC9imuUajl_1bS0tKWqPE8Ya/HydroSHEDS_RIV/RIV_30s/af_riv_30s.zip", "eu_riv_30s": "https://www.dropbox.com/sh/hmpwobbz9qixxpe/AAD68vqkhRNJd5qK3NVvM7TSa/HydroSHEDS_RIV/RIV_30s/eu_riv_30s.zip" } httpserv = http(hysource[hytype], lastmod=datetime(2021, 2, 8)) #Newest version which is supported by this plugin uri, upd = httpserv.download(downloaddir, check=True) if upd: #unzip all the goodies zipd = os.path.join(downloaddir, 'extract') with ZipFile(uri.url, 'r') as zp: zp.extractall(zipd) else: slurplogger().info( "This component of hydrosheds is already downloaded") return upd
def purgeentry(self, filter): """Delete pgfunction entry in the database""" slurplogger().info("Deleting %s entry" % (self.name)) self._ses.delete(self._dbinvent) self._ses.commit() dropexec = text("DROP FUNCTION IF EXISTS :pgfunc;") self.db.dbeng.execute(dropexec, pgfunc=self.name)
def entryNeedsUpdate(self, likestr, lastmod, col=None): """Query for a Columns in the table based on a alike string and delete the entry when older than lastmod""" needsupdate = True try: if not col: col = self.table.uri qResults = self._ses.query(self.table).filter( col.like('%' + likestr + '%')) if qResults.count() == 0: return True needsupdate = False #check if at least one needs updating for qres in qResults: if qres.lastupdate < lastmod: needsupdate = True break if needsupdate: for qres in qResults: #delete the entries which need updating self._ses.delete(qres) self._ses.commit() else: slurplogger().info("No Update needed, skipping %s" % (likestr)) except Exception as e: # Fine no entries found pass return needsupdate
def pull(self,pattern='.*'): """ Pulls the Easy CORA file from the copernicus FTP server , and unpacks them :param pattern (string) only download data which obey this regular expression file pattern (e.g. 20[0-9][0-9] to download from 2000 and onward) """ ftproot="ftp://my.cmems-du.eu/Core/INSITU_GLO_TS_REP_OBSERVATIONS_013_001_b/CORIOLIS-GLOBAL-EasyCORA-OBS/global" #get cmems authentication details from database cred=self.conf.authCred("cmems") ftpcr=ftpCrawler(ftproot,auth=cred, pattern=pattern) updated=ftpcr.parallelDownload(self.cacheDir(),check=True,maxconn=10,continueonError=True) #unpack the downloaded files in the data directory datadir=self.dataDir() for tarf in [UriFile(f) for f in findFiles(self.cacheDir(),".*tgz$")]: succesfile=os.path.join(datadir,os.path.basename(tarf.url)+".isextracted") try: #check if the files need unpacking (only unpack when needed) #check if the last file is already extracted if os.path.exists(succesfile): slurplogger().info(f"{tarf.url} is already extracted, skipping") else: with tarfile.open(tarf.url,"r:gz") as tf: slurplogger().info(f"Extracting trajectory files from {tarf.url}") tf.extractall(datadir) #touch the sucessfile to indcate this archive has been sucessfully extracted Path(succesfile).touch() except tarfile.ReadError as exc: raise exc
def fillCSVTable(filename, tablename, lookup, scheme, hskip=0): """Update/populate a database table from a CSV file) This function reads all rows from an open CSV file. The first line is expected to hold the COlumn names, which are mapped to types in the lookup string dictionary """ ses = scheme.db.Session() # currently we can only cope with updating the entire table as a whole scheme.dropTable(tablename) # if self.dbeng.has_table(tablename,schema=schema): slurplogger().info("Filling CSV table %s:%s " % (scheme._schema, tablename)) with open(filename, 'r') as fid: for i in range(hskip): next(fid) names, cols = columnsFromCSV(fid.readline(), lookup) table = Table(tablename, scheme.db.mdata, *cols, schema=scheme._schema) table.create(checkfirst=True) tableMap = tableMapFactory(tablename, table) for ln in fid: values = valuesFromCSV(ln, names) ses.add(tableMap(**values)) ses.commit() # self.vacuumAnalyze(tablename,schema) # ses.commit() ses.close()
def pull(self, missionRegex=None): """Pulls the ncessary tables and data from the rads server :param missionRegex: only register specific mission obeying this regular expression """ #pulls and registers the radsCycle table if it needs updating radsCycles = geoslurpCatalogue.getDatasets( self.conf, f"{scheme}.RadsCycles")[0](self.db) if radsCycles.isExpired(): radsCycles.pull() readsCycles.register() #determine the reference cycles and download rads 1hz data files to get the orbit self.registerRefCycles() #the query will take the first cycle which has the maximum amount of passes for each mission/-phase combination # Download appropriate cycles for mission, entry in self._dbinvent.data["missions"].items(): if missionRegex: if not re.search(missionRegex, mission): slurplogger().info(f"Skipping mission {mission}") continue #Download rads data for this specific cycle sat = mission[0:2] ph = mission[2:3] alttbl = f"{scheme}.rads_{sat}_{ph}" slurplogger().info( f"Getting reference cycle {entry['refcycle']} for {alttbl}") radsOrbit = geoslurpCatalogue.getDatasets(self.conf, f"{alttbl}")[0](self.db) radsOrbit.pull(cycle=entry["refcycle"]) radsOrbit.register()
def register(self): """Update/populate a database table (creates one if it doesn't exist) This function reads a shapefile and puts it in a single table. :param ogrfile: gdal dataset (e.g. shapefile) :param forceGType (optional): a geometry type to be used as the "geom" column :returns nothing (but sets the internal qlalchemy table) """ # currently we can only cope with updating the entire table as a whole self.db.dropTable(self.name, self.scheme) slurplogger().info("Filling POSTGIS table %s.%s with data from %s" % (self.scheme, self.name, self.ogrfile)) #open shapefile directory or ogr file if self.ogrfile.endswith( '.kmz') and not gdal.GetDriverByName('LIBKML'): #unzip the kmz file cache = self.cacheDir() with ZipFile(self.ogrfile, 'r') as zp: kmlf = zp.namelist()[0] #take the first zip file only zp.extract(kmlf, cache) kmlfile = os.path.join(cache, kmlf) shpf = gdal.OpenEx(kmlfile, 0) else: shpf = gdal.OpenEx(self.ogrfile, 0) count = 0 for ithlayer in range(shpf.GetLayerCount()): shpflayer = shpf.GetLayer(ithlayer) if self.layerregex: if not re.search(self.layerregex, shpflayer.GetName()): continue sourceprj = shpflayer.GetSpatialRef() if sourceprj.IsSame(self.targetprj): transform = None else: transform = osr.CoordinateTransformation( sourceprj, self.targetprj) # print(sourceprj) # print(self.targetprj) # print(sourceprj.IsSame(self.targetprj)) for feat in shpflayer: count += 1 if self.table == None: cols = self.columnsFromOgrFeat(feat) self.createTable(cols) values = self.valuesFromOgrFeat(feat, transform) # import pdb;pdb.set_trace() try: self.addEntry(values) except: pass #commit every X times #also update entry in the inventory table self.updateInvent()
def cachedGithubCatalogue(reponame, cachedir=".", commitsha=None, gfilter=GithubFilter(), gfollowfilter=GithubFilter({"type": "tree"}), depth=2, ghtoken=None): """Caches the result of a github result for later reuse""" cachedCatalog = os.path.join(cachedir, reponame.replace("/", "_") + ".yaml") catalog = {} if os.path.exists(cachedCatalog): #check whether the commit sha agrees when explicitly specified if commitsha: #read catalog from yaml file with open(cachedCatalog, 'r') as fid: catalog = yaml.safe_load(fid) # import pdb;pdb.set_trace() if catalog["commitsha"] != commitsha: #trigger a new download catalog = {} else: #always download a newer version catalog = {} if catalog: slurplogger().info("using cached github catalogue %s" % (cachedCatalog)) else: slurplogger().info("downloading github catalogue to cache %s" % (cachedCatalog)) #retrieve from github and store for later use crwl = Crawler(reponame, commitsha=commitsha, filter=gfilter, followfilt=gfollowfilter, oauthtoken=ghtoken) catalog = { "Description": "Cached github crawler results", "rooturl": crwl.rooturl, "commitsha": commitsha, "datasets": [] } # import pdb;pdb.set_trace() for item in crwl.treeitems(depth=depth): catalog["datasets"].append({ "path": os.path.join(item["dirpath"], item["path"]), "url": item["url"] }) #save the results to a cached file with open(cachedCatalog, 'w') as fid: yaml.dump(catalog, fid, default_flow_style=False) return catalog
def lloveMetaExtractor(uri): """extract some metainfo from the load Lovenumber file""" #extract maximum degree from file and heuristically derive loadtype from the filename) if re.search("body", uri.url): ltype = "body" else: ltype = "surface" nmax = 0 reentry = re.compile('^ *[0-9]') hn = [] ln = [] kn = [] deg = [] slurplogger().info(f"Processing {uri.url}") descr = "" ref = None with gzip.open(uri.url, 'rt') as fid: for line in fid: if reentry.search(line): linespl = line.split() n = int(linespl[0]) if n == 1: #look for CF degree 1 coefficients only ref = "CF" if linespl[4] != ref: #only use the degree 1 numbers of the chosen reference system continue deg.append(n) hln = [float(el.replace('D', 'E')) for el in linespl[1:4]] #possibly replace infinity values with NaN hln = [None if np.isinf(el) else el for el in hln] hn.append(hln[0]) ln.append(hln[1]) kn.append(hln[2]) else: #append comment to description descr += line #create an xarray dataset dslove = xr.Dataset(data_vars=dict(kn=(["degree"], kn), hn=(["degree"], hn), ln=(["degree"], ln)), coords=dict(degree=(["degree"], deg))) #extract the maximum degree nmax = dslove.degree.max().data.item() meta = { "name": os.path.basename(uri.url).replace(".love.gz", ""), "lastupdate": uri.lastmod, "descr": descr, "loadtype": ltype, "nmax": nmax, "ref": ref, "data": dslove } return meta
def register(self): #currently deletes all entries in the table self.truncateTable() #open main index file and read zipdir=self.cacheDir()+"/"+self.typ+"_"+self.freq with open(os.path.join(zipdir,'filelist.txt'),'r') as fid: for ln in fid: lnspl=ln.split(";") lat=float(lnspl[1]) lon=float(lnspl[2]) id=int(lnspl[0]) slurplogger().info("Indexing %s"%(lnspl[3])) geoLoc=ogr.Geometry(ogr.wkbPoint) geoLoc.AddPoint(lon,lat) meta={ "id":id, "statname":lnspl[3], "countrycode":lnspl[4], "formerid":lnspl[5], "geom":geoLoc.ExportToWkt(), # "geom":WKBElement(geoLoc.ExportToWkb(),srid=4326,extended=True), } #also open data file data={"time":[],"sl":[]} tmin=datetime.max tmax=datetime.min with open(os.path.join(zipdir,'data',"%d.%sdata"%(id,self.typ))) as dfid: for dln in dfid: tyear,valmm,dum1,dum2=dln.split(";") dt=decyear2dt(float(tyear)) if self.freq == 'monthly': dstart,dend=dt2monthlyinterval(dt) else: #yearly dstart,dend=dt2yearlyinterval(dt) tmin=min(dt,tmin) tmax=max(dt,tmax) data["time"].append(dt.isoformat()) data["sl"].append(1e3*int(valmm)) #open documentation files with open(os.path.join(zipdir,'docu',"%d.txt"%(id))) as docid: data["doc"]=docid.readlines() #open auth file with open(os.path.join(zipdir,'docu',"%d_auth.txt"%(id))) as docid: data["auth"]=docid.readlines() meta['tstart']=tmin meta["tend"]=tmax meta["data"]=data self.addEntry(meta) self.updateInvent()
def purgeentry(self): """Delete pgfunction entry in the database""" slurplogger().info("Deleting %s function entry" % (self.name)) self._ses.delete(self._dbinvent) self._ses.commit() dropexec = text("DROP FUNCTION IF EXISTS %s.%s;" % (self.scheme, self.name)) self.db.dbeng.execute(dropexec)
def register(self): #truncate table self.truncateTable() for cyclefile in glob(self.dataDir() + '/*.cyc'): slurplogger().info("extracting cycle catalogue from %s" % (cyclefile)) cycleinfo = extractCycleInfo(cyclefile) self.bulkInsert(cycleinfo) self.updateInvent()
def register(self): """ Register the drainage divides""" slurplogger().info("Registering %s" % self.name) #possibly empty table self.truncateTable() fname = os.path.join(self.cacheDir(), self.fbase + ".gz") #loop over polygon entries for dicentry in IceSatPolygons(fname): self.addEntry(dicentry) self.updateInvent()
def addUser(self, name, passw, readonly=False): """Adds a user to the database (note executing this functions requires appropriate database rights""" slurplogger().info("Adding new user: %s" % (name)) if readonly: self.dbeng.execute( "CREATE USER %s WITH ENCRYPTED PASSWORD '%s' IN ROLE geobrowse;" % (name, passw)) else: self.dbeng.execute( "CREATE USER %s WITH ENCRYPTED PASSWORD '%s' IN ROLE geoslurp,geobrowse;" % (name, passw))
def requestInfo(self): """Request info (modification time, size, datacoverage) on this specific query from the server""" if self.info: #quick return when already done return self.opts.describe = True oldd = self.opts.out_dir oldnm = self.opts.out_name self.opts.out_dir = self.opts.cache self.opts.out_name = self.opts.out_name.replace('.nc', '_descr.xml') # import pdb;pdb.set_trace() try: execute_request(self.opts) except Exception as e: slurplogger().error("failed to request info on query") raise (e) self.opts.describe = False self.opts.out_dir = oldd #read and parse xml xml = XMLTree.parse( os.path.join(self.opts.cache, self.opts.out_name.replace('.nc', '.xml'))) trange = xml.find('timeCoverage') self.lastmod = isoParser(trange.attrib['end']).replace(tzinfo=None) #also retrieve datacoverage covdict = {} for axis in xml.iterfind('dataGeospatialCoverage/axis'): if axis.attrib['axisType'] == 'Lat': covdict['s'] = float(axis.attrib['lower']) covdict['n'] = float(axis.attrib['upper']) if axis.attrib['axisType'] == 'Lon': covdict['w'] = float(axis.attrib['lower']) covdict['e'] = float(axis.attrib['upper']) if axis.attrib['axisType'] == 'Time': covdict['ts'] = num2date(float(axis.attrib['lower']), axis.attrib['units']) covdict['te'] = num2date(float(axis.attrib['upper']), axis.attrib['units']) self.maxbtdbox = BtdBox(**covdict) #Crop/Synchronize the requested bounding box with that what is available self.opts.btdbox.crop(self.maxbtdbox) self.opts.syncbtdbox() #hack (change outname back to nc suffix) self.opts.out_name = oldnm self.info = True
def xmlitems(self, xmlcatalog=None, url=None, depth=10): """Generator which returns xml nodes which obey a certain filter Nodes which obey the followFilter will be recursively searched""" if depth == 0: # signals a stopiteration return else: depth -= 1 if xmlcatalog is None: xmlcatalog = self._rootxml if url is None: url = self._catalogurl for xelem in xmlcatalog: if self._filt.isValid(xelem): #special check whether we're considering a resume here if self.resuming: self.unsetResumePoint() # we're going to continue with the element after this one continue # Allright we can return this entry straight away yield xelem # Also continue with the loop after yielding continue if not self._followFilt: # continue with the next element if no element should be followed continue if self._followFilt.isValid(xelem): # If this is the case we may need a recursive search in either a if xelem.tag.endswith("catalogRef"): # We treat CatalogRefs in a special way by retrieving the subcatalog from the thredds server suburl = os.path.dirname(url) + "/" + gethref(xelem) try: subxml = self.getCatalog(suburl) except: # Just ignore this catalog entry upon exceptions slurplogger().warning("Ignoring failed CatalogRef %s" % (suburl)) continue else: # Otherwise we're just going to look in the children of the current element suburl = url subxml = xelem yield from self.xmlitems(subxml, suburl, depth)
def register(self): """Update/populate a database table (creates one if it doesn't exist) This function reads a shapefile and puts it in a single table. :param ogrfile: gdal dataset (e.g. shapefile) :param forceGType (optional): a geometry type to be used as the "geom" column :returns nothing (but sets the internal qlalchemy table) """ # currently we can only cope with updating the entire table as a whole self.db.dropTable(self.name, self.scheme) slurplogger().info("Filling POSTGIS table %s.%s with data from %s" % (self.scheme, self.name, self.ogrfile)) #open shapefile directory shpf = gdal.OpenEx(self.ogrfile, 0) count = 0 for ithlayer in range(shpf.GetLayerCount()): shpflayer = shpf.GetLayer(ithlayer) if self.layerregex: if not re.search(self.layerregex, shpflayer.GetName()): continue sourceprj = shpflayer.GetSpatialRef() if sourceprj.IsSame(self.targetprj): transform = None else: transform = osr.CoordinateTransformation( sourceprj, self.targetprj) # print(sourceprj) # print(self.targetprj) # print(sourceprj.IsSame(self.targetprj)) for feat in shpflayer: count += 1 if self.table == None: cols = columnsFromOgrFeat(feat, forceGType=self.gtype, targetsrid=self.targetsrid) self.createTable(cols) values = valuesFromOgrFeat(feat, self.encoding, transform, self.targetsrid, self.swapxy) try: self.addEntry(values) except: pass #commit every X times #also update entry in the inventory table self.updateInvent()
def download(self): """Download file""" muri = Uri(self.mopts) #check if download is needed muri.requestInfo() uristacked = UriFile(self.mopts.fullname()) if uristacked.lastmod: if muri.lastmod <= uristacked.lastmod: slurplogger().info("Already downloaded %s" % (uristacked.url)) #quick return when there is no need to merge/download return uristacked, False #check if download is allowed kb, maxkb = muri.updateSize() if kb > maxkb: #split up request and try again #create 2 bounding boxes split on time Abbox, Bbbox = muri.opts.btdbox.timeSplit() AmotuRec = MotuRecursive(copy.deepcopy(self.mopts)) AmotuRec.mopts.syncbtdbox(Abbox) AmotuRec.mopts.out_name = self.mopts.out_name.replace( '.nc', '_A.nc') AmotuRec.mopts.out_dir = AmotuRec.mopts.cache BmotuRec = MotuRecursive(copy.deepcopy(self.mopts)) BmotuRec.mopts.syncbtdbox(Bbbox) BmotuRec.mopts.out_name = self.mopts.out_name.replace( '.nc', '_B.nc') BmotuRec.mopts.out_dir = BmotuRec.mopts.cache Auri, Aupd = AmotuRec.download() Buri, Bupd = BmotuRec.download() #possible improvement here split a dataset at an unlimited dimensions and append the second one to the first one #patch files together (if updated) if Aupd or Bupd or not os.path.exists(self.mopts.fullname()): uristacked, upd = stackNcFiles(self.mopts.fullname(), Auri.url, Buri.url, 'time') if not self.keepfiles: #remove the partial files os.remove(AmotuRec.mopts.fullname()) os.remove(BmotuRec.mopts.fullname()) else: uristacked = UriFile(self.mopts.fullname()) upd = False return uristacked, True else: return muri.download(self.mopts.out_dir, check=True)
def queueRequest(self,fout,requestDict): if os.path.exists(fout): slurplogger().info(f"Already downloaded file {fout}, skipping request") return req_id=None #possibly get the request id from a previously queued job if fout in self.jobqueue: req_id=self.jobqueue[fout] if req_id: #try to get an existing job slurplogger().info(f"Trying to retrieve previously queued job for {fout}") try: req=cdsapi.api.Result(self.client,dict(request_id=req_id)) req.update() except: #Job cannot be found anymore slurplogger().info(f"Job cannot be found anymore for {fout}, requeing") req_id=None del self.jobqueue[fout] if not req_id: #start a new request slurplogger().info(f"Queuing new CDS request for {fout}") req=self.client.retrieve(self.resource,requestDict) req.update() req_id=req.reply["request_id"] #add an entry to the inventory self.jobqueue[fout]=req_id self.requests.append((req,fout,req.reply["state"]))
def rastExtract(self, uri): """How things are extracted from the raster file (this may be overloaded in derived classes for more granular access""" slurplogger().info("Extracting info from raster: %s" % (uri.url)) #check file type if uri.url.endswith(".nc"): raw = False else: raw = True if self.preview or not raw: meta = self.rastFromRio(uri) else: meta = self.rastFromGDAL(uri) return meta
def register(self): slurplogger().info("Building file list..") files=[UriFile(file) for file in findFiles(self.dataDir(),'.*gz',self._dbinvent.lastupdate)] # import pdb;pdb.set_trace() filesnew=self.retainnewUris(files) if len(filesnew) == 0: slurplogger().info("GRDC: No database update needed") return # filesnew=[UriFile(os.path.join(self.dataDir(),"4208270_Q_Month.txt.gz"))] #loop over files for uri in filesnew: meta=GRDCmetaExtractor(uri) self.addEntry(meta) self.updateInvent()
def encryptAuth(self): """Encrypt the authentification credentials to store in the database""" salt = os.urandom(16) cyph = self.genCypher(salt, self.db.passw.encode('utf-8')) conf = json.dumps(self.auth).encode('utf-8') if self.authver == "ENCRV1": slurplogger().warning( "Replacing the authentication details with a safer encryption (not compatible with older geoslurp versions" ) self.authver = "ENCRV2" self.userentry.auth = self.authver.encode( 'utf-8') + salt + cyph.encrypt(conf) return
def register(self): """""" for gsource in self.dsources: try: src=gsource(self.cacheDir()) metadicts=src.extract() slurplogger().info("registering %s"%(src.meta["name"])) for meta in metadicts: if self.entryNeedsUpdate(meta['name'],lastmod=src.meta['lastupdate'],col=self.table.name): self.addEntry(meta) except Exception as e: #possibly not downloaded but that is ok continue self.updateInvent()
def icgemMetaExtractor(uri): """Extract meta information from a gzipped icgem file""" #first extract the icgem header headstart = False hdr = {} with gz.open(uri.url, 'rt') as fid: slurplogger().info("Extracting info from %s" % (uri.url)) for ln in fid: # if "begin_of_head" in ln: # headstart=True # continue if headstart and 'end_of_head' in ln: break # if headstart: spl = ln.split() if len(spl) == 2: hdr[spl[0]] = spl[1] try: meta = { "nmax": int(hdr["max_degree"]), "lastupdate": uri.lastmod, "format": "icgem", "gm": float(hdr["earth_gravity_constant"].replace('D', 'E')), "re": float(hdr["radius"].replace('D', 'E')), "uri": uri.url, "type": "GSM", "data": { "name": hdr["modelname"] } } except Exception as e: pass #add tide system try: tmp = hdr["tide_system"] if re.search('zero_tide', tmp): meta["tidesystem"] = "zero-tide" elif re.search('tide_free', tmp): meta["tidesystem"] = "tide-free" except: pass return meta
def register(self): slurplogger().info("Building file list..") files = [ UriFile(file) for file in findFiles(self.cacheDir(), '.*love', self._dbinvent.lastupdate) ] if len(files) == 0: slurplogger().info("LLove: No new files found since last update") return self.truncateTable() #loop over files for uri in files: self.addEntry(lloveMetaExtractor(uri)) self.updateInvent()
def pull(self): """Pulls the shapefile layers from the server""" zipf = http( "http://data.pgc.umn.edu/elev/dem/setsm/ArcticDEM/indexes/" + self.filebase + ".zip", lastmod=datetime(2018, 9, 26)) #download the zip shapefiles downloaddir = self.cacheDir() uri, upd = zipf.download(downloaddir, check=True) zipd = os.path.join(downloaddir, 'extract') if not os.path.exists(zipd): #unzip the goodies with ZipFile(uri.url, 'r') as zp: slurplogger().info("Unzipping %s" % (uri.url)) zp.extractall(zipd)
def pull(self): try: cred=self.conf.authCred("grdcgis") except: raise RuntimeError("No Authentification data found. The GRDC data is unfortunately only available after agreeing with the grdc user policy, please visit https://www.bafg.de/GRDC/EN/04_spcldtbss/43_GRfN/refDataset_node.html") #pull the data but rezip it with gzip to save space pullGRDC(self.cacheDir(),cred,pattern=self.zipname,unzip=False) datadir=self.dataDir() #rezip data in the datadirectory with ZipFile(os.path.join(self.cacheDir(),self.zipname),'r') as zp: for member in zp.namelist(): #open file and gzip it into the datadir with zp.open(member) as fid: slurplogger().info("re-gzipping file %s"%member) with gzip.open(os.path.join(datadir,member+".gz"),'wb') as gzid: gzid.write(fid.read())
def register(self): #create a list of files which need to be (re)registered if self.updated: files=self.updated else: files=[UriFile(file) for file in findFiles(self._dbinvent.datadir,'.*gfc.gz',since=self._dbinvent.lastupdate)] newfiles=self.retainnewUris(files) #loop over files for uri in newfiles: slurplogger().info("extracting meta info from %s"%(uri.url)) meta=icgemMetaExtractor(uri) meta=enhanceMeta(meta) self.addEntry(meta) self.updateInvent()
def register(self): if not self.table: #create a new table on the fly self.createTable(self.columns) #create a list of files which need to be (re)registered newfiles = self.retainnewUris([ UriFile(file) for file in findFiles(self.dataDir(), f".*\{self.app}$") ]) for uri in newfiles: meta = self.metaExtractor(uri) if not meta: #don't register empty entries continue slurplogger().info(f"Adding metadata from {uri.url}") self.addEntry(meta) self._dbinvent.data["Description"] = self.description self.updateInvent()
def updateSize(self): """Request information about the size of the query""" self.opts.size = True oldd = self.opts.out_dir self.opts.out_dir = self.opts.cache try: execute_request(self.opts) except Exception as e: slurplogger().error("failed to request size: %s", e) raise (e) # self.opts.out_name=self.opts.out_name.replace('.nc','.xml') self.opts.size = False self.opts.out_dir = oldd xml = XMLTree.parse(os.path.join(self.opts.cache, self.opts.out_name)) self.kbsize = float(xml.getroot().attrib['size']) self.maxkbsize = float(xml.getroot().attrib['maxAllowedSize']) self.opts.out_name = self.opts.out_name.replace('.xml', '.nc') return self.kbsize, self.maxkbsize