def show_status(request): try: logs.info(" datacenter name: " + request.dcname) rqstat = request.status() except ArclinkError, e: logs.error(str(e)) return
def show_status(request): try: logs.info("datacenter name: " + request.dcname) rqstat = request.status() except ArclinkError, e: logs.error(str(e)) return
def __getDecryptor(self, buf, password): try: try: SSL = None status = False if buf is None or len(buf) < 8: raise Exception( "supplied Buffer smaller than 8, cannot find out encryption." ) if buf[0:8] == "Salted__": status = True if password is None or password == "": raise Exception( 'file is encrypted but no password supplied.') SSL = SSLWrapper(password) except Exception, e: logs.info(str(e)) finally: return (SSL, status)
def parse_breqfast(req, input_file): parser = BreqParser() parser.parse_email(input_file) req.content = parser.reqlist logs.debug("") if parser.failstr: logs.error(parser.failstr) else: logs.info("parsed %d lines from breqfast message" % len(req.content))
def parse_breqfast_from_handler(req, fh): parser = BreqParser() parser.parse_email_from_handler(fh) req.content = parser.reqlist logs.debug("") if parser.failstr: logs.error(parser.failstr) else: logs.info("parsed %d lines from breqfast message" % len(req.content))
def __getDecompressor(self, buf): try: try: status = False DEC = None if buf is None or len(buf) < 3: raise Exception('buffer size too small to perform analyse.') if buf[0:3] == "BZh": DEC = bz2.BZ2Decompressor() except Exception, e: logs.info(str(e)) if status is True: logs.info('file will be saved compressed.') finally: return (DEC, status)
def __getDecompressor(self, buf): try: try: status = False DEC = None if buf is None or len(buf) < 3: raise Exception( 'buffer size too small to perform analyse.') if buf[0:3] == "BZh": DEC = bz2.BZ2Decompressor() except Exception, e: logs.info(str(e)) if status is True: logs.info('file will be saved compressed.') finally: return (DEC, status)
def __getDecryptor(self, buf, password): try: try: SSL = None status = False if buf is None or len(buf) < 8: raise Exception("supplied Buffer smaller than 8, cannot find out encryption.") if buf[0:8] == "Salted__": status = True if password is None or password == "": raise Exception('file is encrypted but no password supplied.') SSL = SSLWrapper(password) except Exception, e: logs.info(str(e)) finally: return (SSL, status)
def run(self): fd = self.__volume_factory.open(self.__dcid) self.__req.submit(self.__addr, DEFAULT_USER, None) if self.__req.error: logs.warning("error submitting request to %s: %s" % (self.__dcid, self.__req.error)) fd.close(Arclink_ERROR(message=self.__req.error)) return try: self.__req.download_data(fd, True, False) logs.info("%s: request %s ready" % (self.__req.address, self.__req.id)) rqstat = self.__req.status() for vol in rqstat.volume: if vol.status != STATUS_OK: if vol.message: fd.close(Arclink_WARN(message=vol.message)) else: fd.close(Arclink_WARN) break else: if vol.message: fd.close(Arclink_OK(message=vol.message)) else: fd.close(Arclink_OK) self.__req.purge() except ArclinkTimeout, e: logs.warning("%s: %s" % (self.__req.address, str(e))) fd.close(Arclink_RETRY(message="timeout"))
def show_status(rqstat): if rqstat.error: req_status = "ERROR" elif rqstat.ready: req_status = "READY" else: req_status = "PROCESSING" logs.info("Request ID: %s, Label: %s, Type: %s, Args: %s" % \ (rqstat.id, rqstat.label, rqstat.type, rqstat.args)) logs.info("Status: %s, Size: %d, Info: %s" % \ (req_status, rqstat.size, rqstat.message)) for vol in rqstat.volume: logs.info(" Volume ID: %s, Status: %s, Size: %d, Info: %s" % \ (vol.id, arclink_status_string(vol.status), vol.size, vol.message)) for rqln in vol.line: logs.info(" Request: %s" % (rqln.content, )) logs.info(" Status: %s, Size: %d, Info: %s" % \ (arclink_status_string(rqln.status), rqln.size, rqln.message)) logs.info("")
logs.info(" datacenter name: " + request.dcname) rqstat = request.status() except ArclinkError, e: logs.error(str(e)) return if rqstat.error: req_status = "ERROR" elif rqstat.ready: req_status = "READY" else: req_status = "PROCESSING" IrequestCompressed = request.args.get("compression") logs.info(" request ID: %s, Label: %s, Type: %s, Encrypted: %s, Args: %s" % \ (rqstat.id, rqstat.label, rqstat.type, rqstat.encrypted, rqstat.args)) logs.info(" status: %s, Size: %d, Info: %s" % \ (req_status, rqstat.size, rqstat.message)) for vol in rqstat.volume: logs.info(" volume ID: %s, dcid: %s, Status: %s, Size: %d, Encrypted: %s, Info: %s" % \ (vol.id, vol.dcid, arclink_status_string(vol.status), vol.size, vol.encrypted, vol.message)) for rqln in vol.line: logs.info(" request: %s" % (rqln.content,)) logs.info(" status: %s, Size: %d, Info: %s" % \ (arclink_status_string(rqln.status), rqln.size, rqln.message)) logs.info("") def show_lines(request):
else: args[pv[0]] = urllib.unquote(pv[1]) action(obj, args) except ArclinkAuthFailed, e: obj.err_headers_out['WWW-Authenticate'] = 'Basic realm="%s"' % ( e.dcname, ) logs.debug("unauthorized") return apache.HTTP_UNAUTHORIZED except (ArclinkError, socket.error), e: # # PLE 2013-03-13: This is where that message # "Error: missing user ID (email address)" # escapes from, but how does it get raised? # logs.error(fname + ":" + str(e)) obj.content_type = "text/plain" obj.write("Error: " + "(%s) " % fname + str(e)) except apache.SERVER_RETURN: raise except Exception, e: logs.error(str(e)) raise return apache.OK syslog.openlog("webinterface", syslog.LOG_PID, syslog.LOG_LOCAL0) logs.info("ArcLink webinterface v" + VERSION + " started")
def __load_file(self, func, file): if file: logs.info("loading " + file) func(file)
def run(self): try: logs.info("ArcLink request handler v" + VERSION + " started") logs.info("Configuration: ") logs.info( "Request handler for %s (organization) at %s (Datacenter Id)" % (self.organization, self.dcid)) logs.info("Request Dir: %s" % self.reqdir) logs.info("Max request size: %s" % self.maxsize) logs.info("Archive Dir: %s" % self.archdir) logs.info("ISO Dir: %s" % self.isodir) logs.info("NRT Dir: %s" % self.nrtdir) logs.info("Trackdb is %s @ %s" % (self.trackdb, self.trackdir)) logs.info("GFAurl: %s" % self.gfaurl) logs.info("Subnodelist: %s" % self.subnodelist) logs.info("File Database: %s" % self.filedb) subnode_addr = {} dcid_override = {} if self.subnodelist is not None: fd = open(self.subnodelist) line = fd.readline() while line: try: (dcid, addr) = line.split() subnode_addr[dcid] = addr except ValueError: (dcid, addr, net, sta) = line.split() subnode_addr[dcid] = addr dcid_override[(net, sta)] = dcid line = fd.readline() fd.close() sc3wrap.dbQuery = self.query() handler.sc3App = self inv = Inventory(self.query().loadInventory()) rtn = Routing(self.query().loadRouting()) wf = WiggleFetcher(self.nrtdir, self.archdir, self.isodir, self.filedb, 1024 * 1024 * self.maxsize, self.dcid, subnode_addr, dcid_override) if self.gfaurl: gfa = IO.GFArchive.Open(self.gfaurl) if gfa is None: logs.error("Invalid GFArchive URL: " + self.gfaurl) else: gfa = None rh = RequestHandler(inv, rtn, wf, gfa, self.reqdir, (self.trackdir, self.trackdb), 5, self.organization, DEFAULT_LABEL) mt = MessageThread(self.connection(), rh) mt.start() rh.start() except Exception: logs.print_exc() return False return True
def process_options(): parser = OptionParser(usage="usage: %prog [-h|--help] [OPTIONS] -u USER -o OUTPUTFILE [REQUEST]", version="%prog v" + VERSION, add_help_option=False) parser.set_defaults(address = "eida.gfz-potsdam.de:18001", request_format = "native", data_format = "mseed", spfr = None, label = None, no_resp_dict = False, rebuild_volume = False, proxymode = False, timeout = 300, retries = 5, SSLpasswordFile = "dcidpasswords.txt") parser.add_option("-h", "--help", action="store_true", dest="showhelp", default=False) parser.add_option("-l", "--longhelp", action="store_true", dest="showlonghelp", default=False) parser.add_option("-w","--password-file", type="string", dest="SSLpasswordFile", help="file containing passwords used for decryption of encrypted data (default %default)") parser.add_option("-a", "--address", type="string", dest="address", help="address of primary ArcLink node (default %default)") foptions = ("native", "breqfast") parser.add_option("-f", "--request-format", type="choice", dest="request_format", choices=foptions, help="request format: breqfast, native (default %default)") koptions = ("mseed", "mseed4k", "fseed", "dseed", "inv", "inventory") parser.add_option("-k", "--data-format", type="choice", dest="data_format", choices=koptions, help="data format: mseed, mseed4k, fseed, dseed, inv[entory] (default %default)") parser.add_option("-s", "--preferred-sample-rate", type="float", dest="spfr", help="preferred sample rate") parser.add_option("-L", "--label", type="string", dest="label", help="label of SEED volume") parser.add_option("-n", "--no-resp-dict", action="store_true", dest="no_resp_dict", help="avoid using response dictionary (default %default)") parser.add_option("-g", "--rebuild-volume", action="store_true", dest="rebuild_volume", help="rebuild SEED volume (default %default)") parser.add_option("-p", "--proxy", action="store_true", dest="proxymode", help="proxy mode, no routing (default %default)") parser.add_option("-t", "--timeout", type="int", dest="timeout", help="timeout in seconds (default %default)") parser.add_option("-x", "--retries", type="int", dest="retries", help="download retries (default %default)") parser.add_option("-v", action="callback", callback=add_verbosity, help="increase verbosity level") parser.add_option("-q", action="callback", callback=add_quietness, help="decrease verbosity level") parser.add_option("-u", "--user", type="string", dest="user", help="user's e-mail address") parser.add_option("-o", "--output-file", type="string", dest="output_file", help="file where downloaded data is written") (options, args) = parser.parse_args() if options.showhelp or options.showlonghelp: parser.print_help(); if options.showlonghelp: print """ About ArcLink Protocol ====================== The ArcLink is a protocol used to request distributed archive seismological data. Today it gives you access to several European data archives (European Integrated Data Archive - EIDA) that are supporting the protocol developed by GEOFON ([email protected]) at the GeoForschungZentrum, Potsdam, Germany. You can find more information about it at the SeisComp3 and GEOFON web pages: * http://www.seiscomp3.org/ * http://geofon.gfz-potsdam.de/ ArcLink Password File (for decryption) ====================================== In this file (default: dcidpasswords.txt) you can store your private passwords given by different data centers. Each data center that you request encrypted data will send you a different password. The format of the file is really simple: just the data center ID followed by the password that you received. One data center ID and password per line. Any empty lines or lines starting with # are ignored. Example: gfz password1 odc password2 ipgp password3 The data center ID and password can be found on the automatically generated e-mail that you received from each data center. (You will only receive this email if you have been authorized to download encrypted data, and you have tried to download it.) Input File Format ================= ArcLink Fetch program supports two different input formats for the request file. It supports the traditional BREQ FAST format, and its own native format. Both formats contains the same information and they differ slightly. Native Format: -------------- The native format has the following format: YYYY,MM,DD,HH,MM,SS YYYY,MM,DD,HH,MM,SS Network Station Channel [Location] the Channel, Station and Location, can contains wildcards (*) and the Location field is optional. For matching all locations please use the '*' symbol. Example: 2010,02,18,12,00,00 2010,02,18,12,10,00 GE WLF BH* 2010,02,18,12,00,00 2010,02,18,12,10,00 GE VSU BH* 00 BREQ FAST Format: ----------------- The BREQ FAST format is a standard format used on seismology to request data. Each header line start with '.' and the request lines have the following format: Station Network {Time Start} {Time End} {Number of Channels} N x Channels Location Time Specification should have the following format: YYYY MM DD HH MM SS.TTTT Please read more about the BREQ FAST format at: http://www.iris.edu/manuals/breq_fast.htm """ sys.exit() errors = [] warnings = [] if options.user == None: errors.append("Username required") if options.output_file == None: errors.append("Output file required") if options.data_format.upper() != "FSEED" and options.rebuild_volume: errors.append("-g is only applicable to FSEED format") if len(args) > 1: errors.append("invalid command line options or multiple files supplied") elif len(args) == 1: if not os.path.exists(args[0]): errors.append("request file '%s' not found." % args[0]) request_file = args[0] else: request_file = None SSLpasswordDict = {} if os.path.exists(options.SSLpasswordFile): fd = open(options.SSLpasswordFile) line = fd.readline() while line: line = line.strip() if line and line[0] != "#": try: (dcid, password) = line.split() SSLpasswordDict[dcid] = password except ValueError: logs.error(options.SSLpasswordFile + " invalid line: " + line) fd.close() sys.exit() line = fd.readline() else: if options.SSLpasswordFile != parser.defaults['SSLpasswordFile']: errors.append("Supplied password file (%s) not found" % options.SSLpasswordFile) else: warnings.append("Default password file (%s) not found" % options.SSLpasswordFile) if len(errors) > 0: logs.error("\n** ArcLink Fetch %s **\n" % VERSION) parser.print_usage() logs.error("Errors detected on the command line:") for item in errors: logs.error("\t%s" % item) print "" if len(warnings) > 0: logs.info("Warnings detected on the command line:") for item in warnings: logs.info("\t%s" % item) print "" if len(errors) > 0: sys.exit() return (SSLpasswordDict, options.address, options.request_format, options.data_format, options.label, not options.no_resp_dict, options.rebuild_volume, options.proxymode, options.user, options.timeout, options.retries, options.output_file, request_file, options.spfr)
inventory.setNetworkRestricted(netCode, restricted) for (netCode, network) in inventory.networks.iteritems(): if netCode not in existingNetworks: logs.notice("deleting network %s from inventory" % (netCode,)) inventory.obj.remove(network.obj) for ((netCode, staCode), station) in inventory.stations.iteritems(): if netCode in existingNetworks and (netCode, staCode) not in existingStations: logs.notice("deleting station %s_%s from inventory" % (netCode, staCode)) inventory.networks[netCode].obj.remove(station.obj) if incompleteResponse: logs.info( "The following stations are missing full response data") logs.info("Use dlsv2inv if needed") # for netCode in sorted(incompleteResponse.keys()): # logs.info("%s: %s" % (netCode, " ".join(sorted(list(incompleteResponse[netCode]))))) tmpDict = sortDictionary(incompleteResponse) for netCode in tmpDict.keys(): tmpSortedList = list(tmpDict[netCode]) tmpSortedList.sort() logs.info("%s: %s" % (netCode, " ".join(tmpSortedList))) ar = seiscomp3.IO.XMLArchive() if not self.output: sys.stderr.write("Writing output to stdout\n") if not ar.create("-"): sys.stderr.write("Cannot open open stdout\n")
logs.info("datacenter name: " + request.dcname) rqstat = request.status() except ArclinkError, e: logs.error(str(e)) return if rqstat.error: req_status = "ERROR" elif rqstat.ready: req_status = "READY" else: req_status = "PROCESSING" IrequestCompressed = request.args.get("compression") logs.info("request ID: %s, Label: %s, Type: %s, Encrypted: %s, Args: %s" % \ (rqstat.id, rqstat.label, rqstat.type, rqstat.encrypted, rqstat.args)) logs.info("status: %s, Size: %d, Info: %s" % \ (req_status, rqstat.size, rqstat.message)) for vol in rqstat.volume: logs.info(" volume ID: %s, dcid: %s, Status: %s, Size: %d, Encrypted: %s, Info: %s" % \ (vol.id, vol.dcid, arclink_status_string(vol.status), vol.size, vol.encrypted, vol.message)) for rqln in vol.line: logs.info(" request: %s" % (rqln.content, )) logs.info(" status: %s, Size: %d, Info: %s" % \ (arclink_status_string(rqln.status), rqln.size, rqln.message)) logs.info("")
def main(): param0 = ["-y", "station", "-q", "format=text", "-q", "level=network"] param1 = ["-y", "station", "-q", "format=text", "-q", "level=channel"] param2 = ["-y", "dataselect", "-z"] times = {"starttime": datetime.datetime(1900, 1, 1), "endtime": datetime.datetime(2100, 1, 1)} nets = set() def add_param0(option, opt_str, value, parser): param0.append(opt_str) param0.append(value) def add_param1(option, opt_str, value, parser): param1.append(opt_str) param1.append(value) def add_param2(option, opt_str, value, parser): param2.append(opt_str) param2.append(value) def add_param(option, opt_str, value, parser): add_param0(option, opt_str, value, parser) add_param1(option, opt_str, value, parser) add_param2(option, opt_str, value, parser) def add_time(option, opt_str, value, parser): add_param1(option, opt_str, value, parser) try: t = dateutil.parser.parse(value) except ValueError as e: raise optparse.OptionValueError("option '%s': invalid time value: '%s'" % (opt_str, value)) if t.tzinfo is not None: t = t.astimezone(dateutil.tz.tzutc()).replace(tzinfo=None) times[option.dest] = t parser = optparse.OptionParser( usage="Usage: %prog [-h|--help] [OPTIONS] -o directory", version="%prog " + VERSION) parser.set_defaults( url="http://geofon.gfz-potsdam.de/eidaws/routing/1/", timeout=600, retries=10, retry_wait=60, threads=5, max_lines=1000, max_timespan=1440) parser.add_option("-v", "--verbose", action="store_true", default=False, help="verbose mode") parser.add_option("-u", "--url", type="string", action="callback", callback=add_param, help="URL of routing service (default %default)") parser.add_option("-N", "--network", type="string", action="callback", callback=add_param1, help="network code or pattern") parser.add_option("-S", "--station", type="string", action="callback", callback=add_param1, help="station code or pattern") parser.add_option("-L", "--location", type="string", action="callback", callback=add_param1, help="location code or pattern") parser.add_option("-C", "--channel", type="string", action="callback", callback=add_param1, help="channel code or pattern") parser.add_option("-s", "--starttime", type="string", action="callback", callback=add_time, help="start time") parser.add_option("-e", "--endtime", type="string", action="callback", callback=add_time, help="end time") parser.add_option("-t", "--timeout", type="int", action="callback", callback=add_param, help="request timeout in seconds (default %default)") parser.add_option("-r", "--retries", type="int", action="callback", callback=add_param, help="number of retries (default %default)") parser.add_option("-w", "--retry-wait", type="int", action="callback", callback=add_param, help="seconds to wait before each retry (default %default)") parser.add_option("-n", "--threads", type="int", action="callback", callback=add_param, help="maximum number of download threads (default %default)") parser.add_option("-c", "--credentials-file", type="string", action="callback", callback=add_param2, help="URL,user,password file (CSV format) for queryauth") parser.add_option("-a", "--auth-file", type="string", action="callback", callback=add_param2, help="file that contains the auth token") parser.add_option("-o", "--output-dir", type="string", help="SDS directory where downloaded data is written") parser.add_option("-l", "--max-lines", type="int", help="max lines per request (default %default)") parser.add_option("-m", "--max-timespan", type="int", help="max timespan per request in minutes (default %default)") parser.add_option("-z", "--no-citation", action="store_true", default=False, help="suppress network citation info") parser.add_option("-Z", "--no-check", action="store_true", default=False, help="suppress checking received routes and data") (options, args) = parser.parse_args() if args or not options.output_dir: parser.print_usage(sys.stderr) return 1 def log_alert(s): if sys.stderr.isatty(): s = "\033[31m" + s + "\033[m" sys.stderr.write(s + '\n') sys.stderr.flush() def log_notice(s): if sys.stderr.isatty(): s = "\033[32m" + s + "\033[m" sys.stderr.write(s + '\n') sys.stderr.flush() def log_verbose(s): sys.stderr.write(s + '\n') sys.stderr.flush() def log_silent(s): pass logs.error = log_alert logs.warning = log_alert logs.notice = log_notice logs.info = (log_silent, log_verbose)[options.verbose] logs.debug = log_silent try: try: proc = exec_fetch(param1, None, options.verbose, options.no_check) except OSError as e: logs.error(str(e)) logs.error("error running fdsnws_fetch") return 1 timespan = {} for line in proc.stdout: if isinstance(line, bytes): line = line.decode('utf-8') if not line or line.startswith('#'): continue starttime = max(dateutil.parser.parse(line.split('|')[15]), times['starttime']) endtime = min(dateutil.parser.parse(line.split('|')[16]), times['endtime']) if starttime.tzinfo is not None: starttime = starttime.astimezone(dateutil.tz.tzutc()).replace(tzinfo=None) if endtime.tzinfo is not None: endtime = endtime.astimezone(dateutil.tz.tzutc()).replace(tzinfo=None) try: ts = timespan[tuple(line.split('|')[:4])] if ts.start > starttime: ts.start = starttime ts.current = starttime if ts.end < endtime: ts.end = endtime except KeyError: timespan[tuple(line.split('|')[:4])] = Timespan(starttime, endtime) proc.stdout.close() proc.wait() if proc.returncode != 0: logs.error("error running fdsnws_fetch") return 1 if os.path.exists(options.output_dir): scan_sds(options.output_dir, timespan, nets) while len(timespan) > 0: postdata = "" ts_used = random.sample(timespan.items(), min(len(timespan), options.max_lines)) for ((net, sta, loc, cha), ts) in ts_used: te = min(ts.end, ts.start + datetime.timedelta(minutes=options.max_timespan)) if loc == '': loc = '--' postdata += "%s %s %s %s %sZ %sZ\n" \ % (net, sta, loc, cha, ts.start.isoformat(), te.isoformat()) if not isinstance(postdata, bytes): postdata = postdata.encode('utf-8') try: proc = exec_fetch(param2, postdata, options.verbose, options.no_check) except OSError as e: logs.error(str(e)) logs.error("error running fdsnws_fetch") return 1 got_data = False try: for rec in mseedlite.Input(proc.stdout): try: ts = timespan[(rec.net, rec.sta, rec.loc, rec.cha)] except KeyError: logs.warning("unexpected data: %s.%s.%s.%s" % (rec.net, rec.sta, rec.loc, rec.cha)) continue if rec.end_time <= ts.current: continue sds_dir = "%s/%d/%s/%s/%s.D" \ % (options.output_dir, rec.begin_time.year, rec.net, rec.sta, rec.cha) sds_file = "%s.%s.%s.%s.D.%s" \ % (rec.net, rec.sta, rec.loc, rec.cha, rec.begin_time.strftime('%Y.%j')) if not os.path.exists(sds_dir): os.makedirs(sds_dir) with open(sds_dir + '/' + sds_file, 'ab') as fd: fd.write(rec.header + rec.data) ts.current = rec.end_time nets.add((rec.net, rec.begin_time.year)) got_data = True except mseedlite.MSeedError as e: logs.error(str(e)) proc.stdout.close() proc.wait() if proc.returncode != 0: logs.error("error running fdsnws_fetch") return 1 for ((net, sta, loc, cha), ts) in ts_used: if not got_data: # no progress, skip to next segment ts.start += datetime.timedelta(minutes=options.max_timespan) else: # continue from current position ts.start = ts.current if ts.start >= ts.end: # timespan completed del timespan[(net, sta, loc, cha)] if nets and not options.no_citation: logs.info("retrieving network citation info") get_citation(nets, param0, options.verbose) except (IOError, Error) as e: logs.error(str(e)) return 1 return 0
def update(self): """Read the inventory file in XML format and store it in memory. All the information of the inventory is read into lists of networks, stations, sensor locations and streams. Only the necessary attributes are stored. This relies on the idea that some other agent should update the inventory file at a regular period of time. If the XML file have been already processed by other instance of this class, we could look for a temporary file containing a memory dump of the generated structures, avoiding the time invested in the construction. """ # Calculate when the next update should take place nextUpdate = self.lastUpdated + datetime.timedelta( seconds=self.time2refresh) # If the cache is still valid if nextUpdate > datetime.datetime.now(): return # Initialize lists self.networks = [] self.stations = [] self.sensorsLoc = [] self.streams = [] self.lastUpdated = datetime.datetime.now() # Just to shorten notation ptNets = self.networks ptStats = self.stations ptSens = self.sensorsLoc ptStre = self.streams start_time = datetime.datetime.now() # Look how old the two versions of inventory are. # First version: XML file try: xml_time = os.path.getmtime(self.inventory) except OSError as e: logs.error('No inventory file! Bye.') return ### NOT SURE WHAT WE SHOULD DO HERE. # Second version: A pickle dump of the processed structures in memory try: pic_time = os.path.getmtime(self.cachefile) except: pic_time = 0 lockfile = self.cachefile + '.lock' if pic_time > xml_time: try: if os.path.exists(lockfile): # Go to the whole processing of the XML file because the # pickle version is still being built. raise Exception with open(self.cachefile) as cache: (self.networks, self.stations, self.sensorsLoc, self.streams, self.streamidx) = pickle.load(cache) logs.info('Inventory loaded from pickle version') return except: pass logs.info('Processing XML: %s' % start_time) sensors = {} dataloggers = {} stationsDict = {} # Parse the inventory file # Two steps parser is defined. In the first one, a dictionary of # sensors and dataloggers is constructed. In the second one, the # networks/stations/sensors/streams tree structure is built. try: invfile = open(self.inventory) except IOError: msg = 'Error: Arclink-inventory.xml could not be opened.' logs.error(msg) raise wsgicomm.WIInternalError, msg for parsetype in ['SENSDAT', 'NET_STA']: # Traverse through the networks # get an iterable try: invfile.seek(0) context = ET.iterparse(invfile, events=("start", "end")) except IOError: msg = 'Error while trying to parse Arclink-inventory.xml.' logs.error(msg) raise wsgicomm.WIInternalError, msg # turn it into an iterator context = iter(context) # get the root element event, root = context.next() # Check that it is really an inventory if root.tag[-len('inventory'):] != 'inventory': msg = 'The file parsed seems not to be an inventory (XML).' logs.error(msg) raise wsgicomm.WIInternalError, msg # Extract the namespace from the root node namesp = root.tag[:-len('inventory')] for event, netw in context: # The tag of this node could actually be "network" or # "stationGroup". Now it is not being checked because # we need all the data, but if we need to filter, this # is the place. # if event == "end": if parsetype == 'NET_STA' and \ netw.tag == namesp + 'network': # Extract the year from start try: start_year = netw.get('start') start_year = int(start_year[:4]) except: start_year = None # Extract the year from end try: end_year = netw.get('end') end_year = int(end_year[:4]) except: end_year = None # Cast the attribute restricted try: if netw.get('restricted').lower() == 'true': restricted = 1 elif netw.get('restricted').lower() == 'false': restricted = 2 else: restricted = None except: restricted = None # Append the network to the list of networks ptNets.append([netw.get('code'), len(ptStats), None, None, start_year, end_year, netw.get('description'), restricted, netw.get('netClass'), netw.get('archive'), netw.get('institutions')]) last_child_station = len(ptStats) # Traverse through the stations for stat in netw.findall(namesp + 'station'): # Extract the year from start try: stat_start_string = stat.get('start') stat_start_date = datetime.datetime.strptime( stat_start_string, '%Y-%m-%dT%H:%M:%S.%fZ') except: stat_start_date = None # Extract the year from end try: stat_end_string = stat.get('end') stat_end_date = datetime.datetime.strptime( stat_end_string, '%Y-%m-%dT%H:%M:%S.%fZ') except: stat_end_date = None # Extract latitude try: lat = float(stat.get('latitude')) except: lat = None # Extract longitude try: lon = float(stat.get('longitude')) except: lon = None # Extract elevation try: elevation = float(stat.get('elevation')) except: elevation = None stationsDict[stat.get('publicID')] = len(ptStats) # Cast the attribute restricted try: if stat.get('restricted').lower() == 'true': restricted = 1 elif stat.get('restricted').lower() == 'false': restricted = 2 else: restricted = None except: restricted = None # Only store a reference to the network in the # first column ptStats.append([len(ptNets) - 1, len(ptSens), None, None, stat.get('code'), lat, lon, stat.get('description'), stat_start_date, stat_end_date, elevation, restricted]) last_child_station += 1 last_child_sensor = len(ptSens) sensXml = namesp + 'sensorLocation' for sensor in stat.findall(sensXml): # A reference to the containing station is # in the first column ptSens.append([len(ptStats) - 1, len(ptStre), None, None, sensor.get('code')]) last_child_sensor += 1 last_child_stream = len(ptStre) streXml = namesp + 'stream' for stream in sensor.findall(streXml): sens_type = sensors.get( stream.get('sensor')) try: d = stream.get('sampleRateDenominator') n = stream.get('sampleRateNumerator') denom = float(d) numer = float(n) except: denom = None numer = None try: startString = stream.get('start') startDate = datetime.datetime.strptime( startString, '%Y-%m-%dT%H:%M:%S.%fZ') except: startDate = None try: endString = stream.get('end') endDate = datetime.datetime.strptime( endString, '%Y-%m-%dT%H:%M:%S.%fZ') except: endDate = None # Cast the attribute restricted try: if stream.get('restricted').lower() \ == 'true': restricted = 1 elif stream.get('restricted').lower() \ == 'false': restricted = 2 else: restricted = None except: restricted = None auxCode = stream.get('code') auxDatLog = stream.get('datalogger') ptStre.append((len(ptSens) - 1, auxCode, sens_type, denom, numer, dataloggers.get(auxDatLog), startDate, endDate, restricted)) last_child_stream += 1 stream.clear() ptSens[-1][2] = last_child_stream sensor.clear() # Check if there is at least one stream. # Otherwise remove sensor. This case can happen # when there are only auxStreams instead of # streams if ptSens[-1][1] == ptSens[-1][2]: del ptSens[-1] last_child_sensor -= 1 self.stations[-1][2] = last_child_sensor stat.clear() # Check if there is at least one sensor. Otherwise # remove station. This case can happen when there # are only auxStreams instead of streams if ptStats[-1][1] == ptStats[-1][2]: del ptStats[-1] last_child_station -= 1 ptNets[-1][2] = last_child_station netw.clear() if((parsetype == 'SENSDAT') and (netw.tag == namesp + 'sensor')): pubId = netw.get('publicID') sensors[pubId] = netw.get('type') netw.clear() if((parsetype == 'SENSDAT') and (netw.tag == namesp + 'datalogger')): pubId = netw.get('publicID') dataloggers[pubId] = netw.get('description') netw.clear() if((parsetype == 'SENSDAT') and (netw.tag == namesp + 'stationGroup')): # Extract the year from start try: start_year = netw.get('start') start_year = int(start_year[:4]) except: start_year = None # Extract the year from end try: end_year = netw.get('end') end_year = int(end_year[:4]) except: end_year = None # Fill a list with station ID's. To be replaced later # with the index in self.stations virtualStations = [] statRefXml = namesp + 'stationReference' for statRef in netw.findall(statRefXml): virtualStations.append(statRef.get('stationID')) # Virtual networks are always permanent ptNets.append([netw.get('code'), None, None, virtualStations, start_year, end_year, netw.get('description'), False, 'p', 'GFZ', 'GFZ']) netw.clear() root.clear() invfile.close() # Resolving station references in virtual networks for netw in self.networks: if((netw[1] is None) and (netw[2] is None)): idxs = [] for stat in netw[3]: idxs.append(stationsDict[stat]) netw[3] = idxs end_time = datetime.datetime.now() logs.info('Done with XML: %s' % (end_time)) # Python 2.7: (end_time - start_time).total_seconds()) self.__indexStreams() if not os.path.exists(lockfile): try: lck = open(lockfile, 'w') os.chmod(lockfile, 0666) lck.close() except: logs.warning(('Error while attempting to create a lockfile' + ' (%s). Check whether the inventory is parsed' + ' every %d seconds. This could potentialy' + ' make some requests slower.') % (lockfile, self.time2refresh)) return with open(self.cachefile, 'wb') as cache: os.chmod(self.cachefile, 0666) pickle.dump((ptNets, ptStats, ptSens, ptStre, self.streamidx), cache) try: os.remove(lockfile) except: logs.error(('Error by removing lockfile (%s). Remove it' + ' manually or the pickle version will be always' + ' skipped.') % lockfile)
def submit_request(parser, req_name, breq_id): """ Routes the request and analyses its results. Creates the corresponding files in the Breq_fast processing directory. Returns an email message containing the processing status of the breqfast request. @arguments: parser, a BreqParser object req_name, a string defining the request name breq_id, a string specifying the internal Breq_fast request ID @return: a string, giving the processing status email message """ emailaddr = EMAIL_ADDR try: emailaddr = parser.tokendict["email"] except KeyError: pass label = LABEL try: label = parser.tokendict["label"] except KeyError: pass label = re.sub("[^\w]", "_", str(label)) arcl = ArclinkManager(DEFAULT_HOST + ":" + str(DEFAULT_PORT), emailaddr) # Default format is full SEED, however, we can request MSEED # and do the conversion here. In this case, we will end up # with a single SEED volume even if data comes from multiple # sources. wf_req = arcl.new_request("WAVEFORM", {"format": "FSEED"}, label) for x in parser.reqlist: wf_req.add(*x) # List of failed request lines associated to an error message. ok_content = [] failed_content = {} emailmsg = "" emailmsg_extra = "" reqlogmsg = "" try: global logstream logstream = cStringIO.StringIO() try: (inv, req_sent, req_noroute, req_nodata) = arcl.execute(wf_req, True, True) logs.info("the following data requests were sent:") for req in req_sent: logs.info(req.dcname) show_status(req.status()) if req_noroute: tmpstream = cStringIO.StringIO() req_noroute.dump(tmpstream) logs.info("the following entries could not be routed:") logs.info(tmpstream.getvalue()) if req_nodata: tmpstream = cStringIO.StringIO() req_nodata.dump(tmpstream) logs.info("the following entries returned no data:") logs.info(tmpstream.getvalue()) finally: reqlogmsg = logstream.getvalue() logstream = None if req_noroute: failed_content[STATUS_ROUTING] = req_noroute.content # This is necessary here because sometimes below we can't catch full empty requests if req_nodata: failed_content[STATUS_NODATA] = req_nodata.content if not os.path.exists("%s/%s" % (FTP_DIR, req_name)): os.mkdir("%s/%s" % (FTP_DIR, req_name)) prefix = "%s/%s_%s" % (req_name, label, breq_id) urllist = [] canJoin = True volumecounts = 0 for req in req_sent: for vol in req.status().volume: if arclink_status_string(vol.status) == "OK" and vol.size > 0: volumecounts += 1 if vol.encrypted and vol.size > 0: canJoin = False sufix = "" addname = "" fd_out = None logs.warning("Can Join is set to: %s" % canJoin) logs.warning("We have %s volumes to download" % volumecounts) if canJoin and volumecounts > 0: filename = FTP_DIR + "/" + prefix + ".seed" fd_out = open(filename, "wb") fd_out = SeedOutput(fd_out, inv) cset = set() # process resent requests before original failed requests req_sent.reverse() for req in req_sent: for vol in req.status().volume: if vol.size == 0: continue if not canJoin: addname = str(".%s.%s" % (req.id, vol.id)) filename = FTP_DIR + "/" + prefix + addname + ".seed" fd_out = open(filename, "wb") vol_status = vol.status try: req.download_data(fd_out, vol.id, block=True, purge=False) except (ArclinkError, socket.error), e: logs.error("error on downloading request: " + str(e)) if fd_out is not None: fd_out.close() raise except (IOError, OSError, DBError, SEEDError, mseed.MSeedError), e: logs.error("error creating SEED Volume: %s" % str(e)) vol_status = STATUS_ERROR
def show_status(rqstat): if rqstat.error: req_status = "ERROR" elif rqstat.ready: req_status = "READY" else: req_status = "PROCESSING" logs.info("Request ID: %s, Label: %s, Type: %s, Args: %s" % (rqstat.id, rqstat.label, rqstat.type, rqstat.args)) logs.info("Status: %s, Size: %d, Info: %s" % (req_status, rqstat.size, rqstat.message)) for vol in rqstat.volume: logs.info( " Volume ID: %s, Status: %s, Size: %d, Info: %s" % (vol.id, arclink_status_string(vol.status), vol.size, vol.message) ) for rqln in vol.line: logs.info(" Request: %s" % (rqln.content,)) logs.info( " Status: %s, Size: %d, Info: %s" % (arclink_status_string(rqln.status), rqln.size, rqln.message) ) logs.info("")
args[pv[0]] = "" else: args[pv[0]] = urllib.unquote(pv[1]) action(obj, args) except ArclinkAuthFailed, e: obj.err_headers_out['WWW-Authenticate'] = 'Basic realm="%s"' % (e.dcname,) logs.debug("unauthorized") return apache.HTTP_UNAUTHORIZED except (ArclinkError, socket.error), e: # # PLE 2013-03-13: This is where that message # "Error: missing user ID (email address)" # escapes from, but how does it get raised? # logs.error(fname+":"+str(e)) obj.content_type = "text/plain" obj.write("Error: " + "(%s) " % fname + str(e)) except apache.SERVER_RETURN: raise except Exception, e: logs.error(str(e)) raise return apache.OK syslog.openlog("webinterface", syslog.LOG_PID, syslog.LOG_LOCAL0) logs.info("ArcLink webinterface v" + VERSION + " started")
def submit_request(parser, req_name, breq_id): """ Routes the request and analyses its results. Creates the corresponding files in the Breq_fast processing directory. Returns an email message containing the processing status of the breqfast request. @arguments: parser, a BreqParser object reqname, a string defining the request name breqid, a string specifying the internal Breq_fast request ID @return: a string, giving the processing status email message """ emailaddr = EMAIL_ADDR try: emailaddr = parser.tokendict["email"] except KeyError: pass label = LABEL try: label = parser.tokendict["label"] except KeyError: pass label = re.sub("[^\w]", "_", str(label)) arcl = ArclinkManager(DEFAULT_HOST + ":" + str(DEFAULT_PORT), emailaddr) # Default format is full SEED, however, we can request MSEED # and do the conversion here. In this case, we will end up # with a single SEED volume even if data comes from multiple # sources. wf_req = arcl.new_request("WAVEFORM", {"format": "FSEED"}, label) for x in parser.reqlist: wf_req.add(*x) # List of failed request lines associated to an error message. ok_content = [] failed_content = {} emailmsg = "" emailmsg_extra = "" reqlogmsg = "" try: global logstream logstream = cStringIO.StringIO() try: (inv, req_sent, req_noroute, req_nodata) = arcl.execute(wf_req, True, True) logs.info("the following data requests were sent:") for req in req_sent: logs.info(req.dcname) show_status(req.status()) if req_noroute: tmpstream = cStringIO.StringIO() req_noroute.dump(tmpstream) logs.info("the following entries could not be routed:") logs.info(tmpstream.getvalue()) if req_nodata: tmpstream = cStringIO.StringIO() req_nodata.dump(tmpstream) logs.info("the following entries returned no data:") logs.info(tmpstream.getvalue()) finally: reqlogmsg = logstream.getvalue() logstream = None if req_noroute: failed_content[STATUS_ROUTING] = req_noroute.content # This is necessary here because sometimes below we can't catch full empty requests if req_nodata: failed_content[STATUS_NODATA] = req_nodata.content if not os.path.exists("%s/%s" % (FTP_DIR, req_name)): os.mkdir("%s/%s" % (FTP_DIR, req_name)) prefix = "%s/%s_%s" % (req_name, label, breq_id) urllist = [] canJoin = True volumecounts = 0 for req in req_sent: for vol in req.status().volume: if arclink_status_string(vol.status) == "OK" and vol.size > 0: volumecounts += 1 if vol.encrypted and vol.size > 0: canJoin = False sufix = "" addname = "" fd_out = None if canJoin: filename = FTP_DIR + '/' + prefix + '.seed' fd_out = open(filename, "wb") fd_out = SeedOutput(fd_out, inv) cset = set() # process resent requests before original failed requests req_sent.reverse() for req in req_sent: for vol in req.status().volume: if vol.size == 0: continue if not canJoin: addname = str(".%s.%s" % (req.id, vol.id)) filename = FTP_DIR + '/' + prefix + addname + '.seed' fd_out = open(filename, "wb") vol_status = vol.status try: req.download_data(fd_out, vol.id, block=True, purge=False) except (ArclinkError, socket.error), e: logs.error('error on downloading request: ' + str(e)) try: failed_content[STATUS_ERROR] += req.content except KeyError: failed_content[STATUS_ERROR] = req.content fd_out.close() break except (IOError, OSError, DBError, SEEDError, mseed.MSeedError), e: logs.error("error creating SEED Volume: %s" % str(e)) vol_status = STATUS_ERROR
for (netCode, restricted) in networkRestricted.iteritems(): inventory.setNetworkRestricted(netCode, restricted) for (netCode, network) in inventory.networks.iteritems(): if netCode not in existingNetworks: logs.notice("deleting network %s from inventory" % (netCode,)) inventory.obj.remove(network.obj) for ((netCode, staCode), station) in inventory.stations.iteritems(): if netCode in existingNetworks and (netCode, staCode) not in existingStations: logs.notice("deleting station %s_%s from inventory" % (netCode, staCode)) inventory.networks[netCode].obj.remove(station.obj) if incompleteResponse: logs.info("The following stations are missing full response data") logs.info("Use dlsv2inv if needed") #for netCode in sorted(incompleteResponse.keys()): # logs.info("%s: %s" % (netCode, " ".join(sorted(list(incompleteResponse[netCode]))))) tmpDict = sortDictionary(incompleteResponse) for netCode in tmpDict.keys(): tmpSortedList = list(tmpDict[netCode]) tmpSortedList.sort() logs.info("%s: %s" % (netCode, " ".join(tmpSortedList))) ar = seiscomp3.IO.XMLArchive() if not self.output: sys.stderr.write("Writing output to stdout\n") if not ar.create("-"): sys.stderr.write("Cannot open open stdout\n")
def _main(SSLpasswordDict, addr, request_format, data_format, label, resp_dict, rebuild_volume, proxymode, user, timeout, retries, output_file, input_file, spfr): reblock_mseed = False use_inventory = False use_routing = not proxymode req_args = {"compression": "bzip2"} if data_format.upper() == "MSEED": req_type = "WAVEFORM" req_args["format"] = "MSEED" elif data_format.upper() == "MSEED4K": req_type = "WAVEFORM" req_args["format"] = "MSEED" reblock_mseed = True elif data_format.upper() == "FSEED": req_type = "WAVEFORM" if rebuild_volume: req_args["format"] = "MSEED" else: req_args["format"] = "FSEED" elif data_format.upper() == "DSEED": req_type = "RESPONSE" use_routing = False elif len(data_format) >= 3 and data_format.upper() == "INVENTORY"[:len(data_format)]: req_type = "INVENTORY" req_args["instruments"] = "true" use_routing = False else: logs.error("unsupported data format: %s" % (data_format,)) return 1 if resp_dict: req_args["resp_dict"] = "true" else: req_args["resp_dict"] = "false" mgr = ArclinkManager(addr, user, socket_timeout=timeout, download_retry=retries) req = mgr.new_request(req_type, req_args, label) if request_format == "native": if input_file: parse_native(req, input_file) else: parse_native_from_handler(req, sys.stdin) elif request_format == "breqfast": if input_file: parse_breqfast(req, input_file) else: parse_breqfast_from_handler(req, sys.stdin) else: logs.error("unsupported request format: %s" % (request_format,)) return 1 if not req.content: logs.error("empty request") return 1 wildcards = False for i in req.content: for j in i[:4]: if j.find("*") >= 0 or j.find("?") >= 0: wildcards = True break if (rebuild_volume or wildcards) and req_type != "INVENTORY": use_inventory = True (inv, req_ok, req_noroute, req_nodata) = mgr.execute(req, use_inventory, use_routing, spfr) ## Better report what was going on logs.info("\nthe following data requests were sent:") for req in req_ok: show_status(req) if req_nodata: logs.warning("\nthe following entries returned no data after trying all routes") show_lines(req_nodata) if req_noroute: logs.warning("\nthe following entries could not be routed:") show_lines(req_noroute) retry_lines = set() for req in req_ok: for vol in req.status().volume: for line in vol.line: if line.status == STATUS_RETRY: retry_lines.add(line.content) elif line.content in retry_lines: retry_lines.remove(line.content) if retry_lines: logs.warning("\nthe following data is temporarily off-line (try again later)") for ln in retry_lines: logs.warning(ln) ## Prepare to download canJoin = False volumecount = 0 if req_type == "WAVEFORM" and req_args.get("format") == "MSEED": canJoin = True for req in req_ok: for vol in req.status().volume: if (arclink_status_string(vol.status) == "OK" or arclink_status_string(vol.status) == "WARNING") and vol.size > 0: volumecount += 1 if vol.encrypted and (vol.dcid not in SSLpasswordDict): canJoin = False if arclink_status_string(vol.status) == "WARNING": logs.warning("some requests returned a Warning status") if volumecount == 0: logs.warning("\nnone of the requests returned data") return 1 if not canJoin and volumecount > 1: logs.warning('cannot merge volumes saving volumes as individual files') ## Download if canJoin: filename = output_file fd_out = open(filename, "wb") if rebuild_volume: logs.info("rebuilding SEED volume") fd_out = SeedOutput(fd_out, inv, label, resp_dict) elif reblock_mseed: logs.info("reblocking Mini-SEED data") fd_out = MSeed4KOutput(fd_out) for req in req_ok: for vol in req.status().volume: if vol.size == 0 or (arclink_status_string(vol.status) != "OK" and arclink_status_string(vol.status) != "WARNING"): continue try: req.download_data(fd_out, vol.id, block=True, purge=False, password=SSLpasswordDict.get(vol.dcid)) except ArclinkError, e: logs.error('error on downloading request: ' + str(e)) try: req.purge() except ArclinkError, e: logs.error('error on purging request: ' + str(e))
def __load_file(self, func, file): if file and os.path.exists(file): logs.info("loading " + file) func(file)
def main(): param0 = ["-y", "station", "-q", "format=text", "-q", "level=network"] param1 = ["-y", "station", "-q", "format=text", "-q", "level=channel"] param2 = ["-y", "dataselect", "-z"] times = {"starttime": datetime.datetime( 1900, 1, 1), "endtime": datetime.datetime(2100, 1, 1)} nets = set() def add_param0(option, opt_str, value, parser): param0.append(opt_str) param0.append(value) def add_param1(option, opt_str, value, parser): param1.append(opt_str) param1.append(value) def add_param2(option, opt_str, value, parser): param2.append(opt_str) param2.append(value) def add_param(option, opt_str, value, parser): add_param0(option, opt_str, value, parser) add_param1(option, opt_str, value, parser) add_param2(option, opt_str, value, parser) def add_time(option, opt_str, value, parser): add_param1(option, opt_str, value, parser) try: t = dateutil.parser.parse(value) except ValueError as e: raise optparse.OptionValueError( "option '%s': invalid time value: '%s'" % (opt_str, value)) if t.tzinfo is not None: t = t.astimezone(dateutil.tz.tzutc()).replace(tzinfo=None) times[option.dest] = t parser = optparse.OptionParser( usage="Usage: %prog [-h|--help] [OPTIONS] -o directory", version="%prog " + VERSION) parser.set_defaults( url="http://geofon.gfz-potsdam.de/eidaws/routing/1/", timeout=600, retries=10, retry_wait=60, threads=5, max_lines=1000, max_timespan=1440) parser.add_option("-v", "--verbose", action="store_true", default=False, help="verbose mode") parser.add_option("-u", "--url", type="string", action="callback", callback=add_param, help="URL of routing service (default %default)") parser.add_option("-N", "--network", type="string", action="callback", callback=add_param1, help="network code or pattern") parser.add_option("-S", "--station", type="string", action="callback", callback=add_param1, help="station code or pattern") parser.add_option("-L", "--location", type="string", action="callback", callback=add_param1, help="location code or pattern") parser.add_option("-C", "--channel", type="string", action="callback", callback=add_param1, help="channel code or pattern") parser.add_option("-s", "--starttime", type="string", action="callback", callback=add_time, help="start time") parser.add_option("-e", "--endtime", type="string", action="callback", callback=add_time, help="end time") parser.add_option("-t", "--timeout", type="int", action="callback", callback=add_param, help="request timeout in seconds (default %default)") parser.add_option("-r", "--retries", type="int", action="callback", callback=add_param, help="number of retries (default %default)") parser.add_option("-w", "--retry-wait", type="int", action="callback", callback=add_param, help="seconds to wait before each retry (default %default)") parser.add_option("-n", "--threads", type="int", action="callback", callback=add_param, help="maximum number of download threads (default %default)") parser.add_option("-c", "--credentials-file", type="string", action="callback", callback=add_param2, help="URL,user,password file (CSV format) for queryauth") parser.add_option("-a", "--auth-file", type="string", action="callback", callback=add_param2, help="file that contains the auth token") parser.add_option("-o", "--output-dir", type="string", help="SDS directory where downloaded data is written") parser.add_option("-l", "--max-lines", type="int", help="max lines per request (default %default)") parser.add_option("-m", "--max-timespan", type="int", help="max timespan per request in minutes (default %default)") parser.add_option("-z", "--no-citation", action="store_true", default=False, help="suppress network citation info") parser.add_option("-Z", "--no-check", action="store_true", default=False, help="suppress checking received routes and data") (options, args) = parser.parse_args() if args or not options.output_dir: parser.print_usage(sys.stderr) return 1 def log_alert(s): if sys.stderr.isatty(): s = "\033[31m" + s + "\033[m" sys.stderr.write(s + '\n') sys.stderr.flush() def log_notice(s): if sys.stderr.isatty(): s = "\033[32m" + s + "\033[m" sys.stderr.write(s + '\n') sys.stderr.flush() def log_verbose(s): sys.stderr.write(s + '\n') sys.stderr.flush() def log_silent(s): pass logs.error = log_alert logs.warning = log_alert logs.notice = log_notice logs.info = (log_silent, log_verbose)[options.verbose] logs.debug = log_silent try: try: proc = exec_fetch(param1, None, options.verbose, options.no_check) except OSError as e: logs.error(str(e)) logs.error("error running fdsnws_fetch") return 1 timespan = {} for line in proc.stdout: if isinstance(line, bytes): line = line.decode('utf-8') if not line or line.startswith('#'): continue starttime = max(dateutil.parser.parse( line.split('|')[15]), times['starttime']) endtime = min(dateutil.parser.parse( line.split('|')[16]), times['endtime']) if starttime.tzinfo is not None: starttime = starttime.astimezone( dateutil.tz.tzutc()).replace(tzinfo=None) if endtime.tzinfo is not None: endtime = endtime.astimezone( dateutil.tz.tzutc()).replace(tzinfo=None) try: ts = timespan[tuple(line.split('|')[:4])] if ts.start > starttime: ts.start = starttime ts.current = starttime if ts.end < endtime: ts.end = endtime except KeyError: timespan[tuple(line.split('|')[:4])] = Timespan( starttime, endtime) proc.stdout.close() proc.wait() if proc.returncode != 0: logs.error("error running fdsnws_fetch") return 1 if os.path.exists(options.output_dir): scan_sds(options.output_dir, timespan, nets) while len(timespan) > 0: postdata = "" ts_used = random.sample(timespan.items(), min( len(timespan), options.max_lines)) for ((net, sta, loc, cha), ts) in ts_used: te = min(ts.end, ts.start + datetime.timedelta(minutes=options.max_timespan)) if loc == '': loc = '--' postdata += "%s %s %s %s %sZ %sZ\n" \ % (net, sta, loc, cha, ts.start.isoformat(), te.isoformat()) if not isinstance(postdata, bytes): postdata = postdata.encode('utf-8') try: proc = exec_fetch(param2, postdata, options.verbose, options.no_check) except OSError as e: logs.error(str(e)) logs.error("error running fdsnws_fetch") return 1 got_data = False try: for rec in mseedlite.Input(proc.stdout): try: ts = timespan[(rec.net, rec.sta, rec.loc, rec.cha)] except KeyError: logs.warning("unexpected data: %s.%s.%s.%s" % (rec.net, rec.sta, rec.loc, rec.cha)) continue if rec.end_time <= ts.current: continue sds_dir = "%s/%d/%s/%s/%s.D" \ % (options.output_dir, rec.begin_time.year, rec.net, rec.sta, rec.cha) sds_file = "%s.%s.%s.%s.D.%s" \ % (rec.net, rec.sta, rec.loc, rec.cha, rec.begin_time.strftime('%Y.%j')) if not os.path.exists(sds_dir): os.makedirs(sds_dir) with open(sds_dir + '/' + sds_file, 'ab') as fd: fd.write(rec.header + rec.data) ts.current = rec.end_time nets.add((rec.net, rec.begin_time.year)) got_data = True except mseedlite.MSeedError as e: logs.error(str(e)) proc.stdout.close() proc.wait() if proc.returncode != 0: logs.error("error running fdsnws_fetch") return 1 for ((net, sta, loc, cha), ts) in ts_used: if not got_data: # no progress, skip to next segment ts.start += datetime.timedelta( minutes=options.max_timespan) else: # continue from current position ts.start = ts.current if ts.start >= ts.end: # timespan completed del timespan[(net, sta, loc, cha)] if nets and not options.no_citation: logs.info("retrieving network citation info") get_citation(nets, param0, options.verbose) except (IOError, Error) as e: logs.error(str(e)) return 1 return 0
def run(self): try: seiscompRoot = self.commandline().unrecognizedOptions()[0] sys.stderr.write("root directory: %s\n" % seiscompRoot) try: DCID = self.configGetString("datacenterID") except: logs.error("datacenterID not found in global.cfg") return False networkRestricted = {} incompleteResponse = {} global instdb instdb = Instruments(DCID) self.__load_file(loadGains, os.path.join(seiscompRoot, "config", "gain.dlsv")) # for backwards compatibility self.__load_file( loadGains, os.path.join(seiscompRoot, "config", "gain.tab.out")) self.__load_file(loadGains, os.path.join(seiscompRoot, "config", "gain.tab")) try: self.__load_file(instdb.load_db, os.path.join(seiscompRoot, "resp", "inst.db")) self.__load_file( instdb.load_sensor_attr, os.path.join(seiscompRoot, "resp", "sensor_attr.csv")) self.__load_file( instdb.load_datalogger_attr, os.path.join(seiscompRoot, "resp", "datalogger_attr.csv")) except (IOError, NettabError) as e: logs.error("fatal error: " + str(e)) return False sc3Inv = seiscomp3.DataModel.Inventory() inventory = InventoryWrapper(sc3Inv, DCID) existingNetworks = set() existingStations = set() for f in glob.glob(os.path.join(seiscompRoot, "key", "network_*")): try: logs.debug("processing " + f) netCode = f.split("/network_")[-1] try: kf = Keyfile(f) except IOError as e: logs.error(str(e)) continue existingNetworks.add(netCode) networkRestricted[netCode] = False inventory.updateNetwork(netCode, kf) except ValueError as e: logs.error("%s: %s" % (f, str(e))) for f in glob.glob(os.path.join(seiscompRoot, "key", "station_*")): try: logs.debug("processing " + f) (netCode, staCode) = f.split("/station_")[-1].split('_', 1) try: kf = Keyfile(f) except IOError as e: logs.error(str(e)) continue existingStations.add((netCode, staCode)) if netCode not in existingNetworks: logs.warning( "network %s does not exist, ignoring station %s" % (netCode, staCode)) continue if not hasattr(kf, "latitude") or not kf.latitude: logs.warning("missing latitude for %s %s" % (netCode, staCode)) continue if not hasattr(kf, "longitude") or not kf.longitude: logs.warning("missing longitude for %s %s" % (netCode, staCode)) continue if not hasattr(kf, "elevation") or not kf.elevation: logs.warning("missing elevation for %s %s" % (netCode, staCode)) continue if not hasattr(kf, "depth1") or not kf.depth1: logs.warning( "missing depth of primary sensor for %s %s" % (netCode, staCode)) continue if decimal.Decimal(kf.latitude) == decimal.Decimal("0.0") and \ decimal.Decimal(kf.longitude) == decimal.Decimal("0.0"): logs.warning("missing coordinates for %s %s" % (netCode, staCode)) continue if not hasattr(kf, "orientation1") or not kf.orientation1: logs.warning( "missing orientation of primary sensor for %s %s, using default" % (netCode, staCode)) kf.orientation1 = "Z 0.0 -90.0; N 0.0 0.0; E 90.0 0.0" if not hasattr(kf, "orientation2"): kf.orientation2 = "" if not hasattr(kf, "unit1") or not kf.unit1: logs.warning( "missing unit of primary sensor for %s %s, using M/S" % (netCode, staCode)) kf.unit1 = "M/S" if not hasattr(kf, "unit2"): logs.warning( "missing unit of secondary sensor for %s %s, using M/S**2" % (netCode, staCode)) kf.unit2 = "M/S**2" if not hasattr(kf, "type"): kf.type = "" restricted = False # TODO: Make restricted part of the key file if not inventory.updateStation(netCode, staCode, restricted, kf): try: incNet = incompleteResponse[netCode] except KeyError: incNet = set() incompleteResponse[netCode] = incNet incNet.add(staCode) except ValueError as e: logs.error("%s: %s" % (f, str(e))) for (netCode, restricted) in networkRestricted.items(): inventory.setNetworkRestricted(netCode, restricted) for (netCode, network) in inventory.networks.items(): if netCode not in existingNetworks: logs.notice("deleting network %s from inventory" % (netCode, )) inventory.obj.remove(network.obj) for ((netCode, staCode), station) in inventory.stations.items(): if netCode in existingNetworks and ( netCode, staCode) not in existingStations: logs.notice("deleting station %s_%s from inventory" % (netCode, staCode)) inventory.networks[netCode].obj.remove(station.obj) if incompleteResponse: logs.info( "The following stations are missing full response data") logs.info("Use dlsv2inv if needed") # for netCode in sorted(incompleteResponse.keys()): # logs.info("%s: %s" % (netCode, " ".join(sorted(list(incompleteResponse[netCode]))))) tmpDict = sortDictionary(incompleteResponse) for netCode in list(tmpDict.keys()): tmpSortedList = list(tmpDict[netCode]) tmpSortedList.sort() logs.info("%s: %s" % (netCode, " ".join(tmpSortedList))) ar = seiscomp3.IO.XMLArchive() if not self.output: sys.stderr.write("Writing output to stdout\n") if not ar.create("-"): sys.stderr.write("Cannot open open stdout\n") return False else: sys.stderr.write("Writing output to %s\n" % self.output) if not ar.create(self.output): sys.stderr.write("Cannot open open %s\n" % self.output) return False ar.setFormattedOutput(self.commandline().hasOption("formatted")) ar.writeObject(sc3Inv) except Exception: logs.print_exc() return True
def _main(SSLpasswordDict, addr, request_format, data_format, label, resp_dict, rebuild_volume, proxymode, user, timeout, retries, output_file, input_file, spfr): reblock_mseed = False use_inventory = False use_routing = not proxymode req_args = {"compression": "bzip2"} if data_format.upper() == "MSEED": req_type = "WAVEFORM" req_args["format"] = "MSEED" elif data_format.upper() == "MSEED4K": req_type = "WAVEFORM" req_args["format"] = "MSEED" reblock_mseed = True elif data_format.upper() == "FSEED": req_type = "WAVEFORM" if rebuild_volume: req_args["format"] = "MSEED" else: req_args["format"] = "FSEED" elif data_format.upper() == "DSEED": req_type = "RESPONSE" use_routing = False elif len(data_format) >= 3 and data_format.upper() == "INVENTORY"[:len(data_format)]: req_type = "INVENTORY" req_args["instruments"] = "true" use_routing = False else: logs.error("unsupported data format: %s" % (data_format,)) return 1 if resp_dict: req_args["resp_dict"] = "true" else: req_args["resp_dict"] = "false" mgr = ArclinkManager(addr, user, socket_timeout=timeout, download_retry=retries) req = mgr.new_request(req_type, req_args, label) if request_format == "native": if input_file: parse_native(req, input_file) else: parse_native_from_handler(req, sys.stdin) elif request_format == "breqfast": if input_file: parse_breqfast(req, input_file) else: parse_breqfast_from_handler(req, sys.stdin) else: logs.error("unsupported request format: %s" % (request_format,)) return 1 if not req.content: logs.error("empty request") return 1 wildcards = False for i in req.content: for j in i[:4]: if j.find("*") >= 0 or j.find("?") >= 0: wildcards = True break if (rebuild_volume or wildcards) and req_type != "INVENTORY": use_inventory = True (inv, req_ok, req_noroute, req_nodata) = mgr.execute(req, use_inventory, use_routing, spfr) ## Better report what was going on logs.info("\nthe following data requests were sent:") for req in req_ok: show_status(req) if req_nodata: logs.warning("\nthe following entries returned no data after trying all routes") show_lines(req_nodata) if req_noroute: logs.warning("\nthe following entries could not be routed:") show_lines(req_noroute) retry_lines = set() for req in req_ok: for vol in req.status().volume: for line in vol.line: if line.status == STATUS_RETRY: retry_lines.add(line.content) elif line.content in retry_lines: retry_lines.remove(line.content) if retry_lines: logs.warning("\nthe following data is temporarily off-line (try again later)") for ln in retry_lines: logs.warning(ln) ## Prepare to download canJoin = True volumecount = 0 if req_type == "WAVEFORM" and req_args.get("format") != "MSEED": canJoin = False for req in req_ok: for vol in req.status().volume: if (arclink_status_string(vol.status) == "OK" or arclink_status_string(vol.status) == "WARNING") and vol.size > 0: volumecount += 1 if vol.encrypted and (vol.dcid not in SSLpasswordDict): canJoin = False if arclink_status_string(vol.status) == "WARNING": logs.warning("some requests returned a Warning status") if volumecount == 0: logs.warning("\nnone of the requests returned data") return 1 if not canJoin and volumecount > 1: logs.warning('cannot merge volumes saving volumes as individual files') ## Download if canJoin: filename = output_file fd_out = open(filename, "wb") if rebuild_volume: logs.info("rebuilding SEED volume") fd_out = SeedOutput(fd_out, inv, label, resp_dict) elif reblock_mseed: logs.info("reblocking Mini-SEED data") fd_out = MSeed4KOutput(fd_out) for req in req_ok: for vol in req.status().volume: if vol.size == 0 or (arclink_status_string(vol.status) != "OK" and arclink_status_string(vol.status) != "WARNING"): continue try: req.download_data(fd_out, vol.id, block=True, purge=False, password=SSLpasswordDict.get(vol.dcid)) except ArclinkError, e: logs.error('error on downloading request: ' + str(e)) try: req.purge() except ArclinkError, e: logs.error('error on purging request: ' + str(e))
def update(self): """Read the inventory file in XML format and store it in memory. All the information of the inventory is read into lists of networks, stations, sensor locations and streams. Only the necessary attributes are stored. This relies on the idea that some other agent should update the inventory file at a regular period of time. If the XML file have been already processed by other instance of this class, we could look for a temporary file containing a memory dump of the generated structures, avoiding the time invested in the construction. """ # Calculate when the next update should take place nextUpdate = self.lastUpdated + datetime.timedelta( seconds=self.time2refresh) # If the cache is still valid if nextUpdate > datetime.datetime.now(): return # Initialize lists self.networks = [] self.stations = [] self.sensorsLoc = [] self.streams = [] self.lastUpdated = datetime.datetime.now() # Just to shorten notation ptNets = self.networks ptStats = self.stations ptSens = self.sensorsLoc ptStre = self.streams start_time = datetime.datetime.now() # Look how old the two versions of inventory are. # First version: XML file try: xml_time = os.path.getmtime(self.inventory) except OSError as e: logs.error('No inventory file! Bye.') return ### NOT SURE WHAT WE SHOULD DO HERE. # Second version: A pickle dump of the processed structures in memory try: pic_time = os.path.getmtime(self.cachefile) except: pic_time = 0 lockfile = self.cachefile + '.lock' if pic_time > xml_time: try: if os.path.exists(lockfile): # Go to the whole processing of the XML file because the # pickle version is still being built. raise Exception with open(self.cachefile) as cache: (self.networks, self.stations, self.sensorsLoc, self.streams, self.streamidx) = pickle.load(cache) logs.info('Inventory loaded from pickle version') return except: pass logs.info('Processing XML: %s' % start_time) sensors = {} dataloggers = {} stationsDict = {} # Parse the inventory file. # There are two steps in parsing. In the first, a dictionary of # sensors and dataloggers is constructed. In the second step, the # networks/stations/sensors/streams tree structure is built. try: invfile = open(self.inventory) except IOError: msg = 'Error: could not open the inventory file ' + self.inventory logs.error(msg) raise wsgicomm.WIInternalError, msg for parsetype in ['SENSDAT', 'NET_STA']: # Traverse through the networks # get an iterable try: invfile.seek(0) context = ET.iterparse(invfile, events=("start", "end")) except IOError: msg = 'Error: could not parse the inventory file ' + self.inventory logs.error(msg) raise wsgicomm.WIInternalError, msg # turn it into an iterator context = iter(context) # get the root element event, root = context.next() # Check that it is really an inventory if root.tag[-len('inventory'):] != 'inventory': msg = 'The file parsed seems not to be an inventory (XML).' logs.error(msg) raise wsgicomm.WIInternalError, msg # Extract the namespace from the root node namesp = root.tag[:-len('inventory')] for event, netw in context: # The tag of this node could actually be "network" or # "stationGroup". Now it is not being checked because # we need all the data, but if we need to filter, this # is the place. # if event == "end": if parsetype == 'NET_STA' and \ netw.tag == namesp + 'network': # Extract the year from start try: start_year = netw.get('start') start_year = int(start_year[:4]) except: start_year = None # Extract the year from end try: end_year = netw.get('end') end_year = int(end_year[:4]) except: end_year = None # Cast the attribute restricted try: if netw.get('restricted').lower() == 'true': restricted = 1 elif netw.get('restricted').lower() == 'false': restricted = 2 else: restricted = None except: restricted = None # Append the network to the list of networks ptNets.append([ netw.get('code'), len(ptStats), None, None, start_year, end_year, netw.get('description'), restricted, netw.get('netClass'), netw.get('archive'), netw.get('institutions') ]) last_child_station = len(ptStats) # Traverse through the stations for stat in netw.findall(namesp + 'station'): # Extract the year from start try: stat_start_string = stat.get('start') stat_start_date = datetime.datetime.strptime( stat_start_string, '%Y-%m-%dT%H:%M:%S.%fZ') except: stat_start_date = None # Extract the year from end try: stat_end_string = stat.get('end') stat_end_date = datetime.datetime.strptime( stat_end_string, '%Y-%m-%dT%H:%M:%S.%fZ') except: stat_end_date = None # Extract latitude try: lat = float(stat.get('latitude')) except: lat = None # Extract longitude try: lon = float(stat.get('longitude')) except: lon = None # Extract elevation try: elevation = float(stat.get('elevation')) except: elevation = None stationsDict[stat.get('publicID')] = len(ptStats) # Cast the attribute restricted try: if stat.get('restricted').lower() == 'true': restricted = 1 elif stat.get('restricted').lower() == 'false': restricted = 2 else: restricted = None except: restricted = None # Only store a reference to the network in the # first column ptStats.append([ len(ptNets) - 1, len(ptSens), None, None, stat.get('code'), lat, lon, stat.get('description'), stat_start_date, stat_end_date, elevation, restricted ]) last_child_station += 1 last_child_sensor = len(ptSens) sensXml = namesp + 'sensorLocation' for sensor in stat.findall(sensXml): # A reference to the containing station is # in the first column ptSens.append([ len(ptStats) - 1, len(ptStre), None, None, sensor.get('code') ]) last_child_sensor += 1 last_child_stream = len(ptStre) streXml = namesp + 'stream' for stream in sensor.findall(streXml): sens_type = sensors.get( stream.get('sensor')) try: d = stream.get('sampleRateDenominator') n = stream.get('sampleRateNumerator') denom = float(d) numer = float(n) except: denom = None numer = None try: startString = stream.get('start') startDate = datetime.datetime.strptime( startString, '%Y-%m-%dT%H:%M:%S.%fZ') except: startDate = None try: endString = stream.get('end') endDate = datetime.datetime.strptime( endString, '%Y-%m-%dT%H:%M:%S.%fZ') except: endDate = None # Cast the attribute restricted try: if stream.get('restricted').lower() \ == 'true': restricted = 1 elif stream.get('restricted').lower() \ == 'false': restricted = 2 else: restricted = None except: restricted = None auxCode = stream.get('code') auxDatLog = stream.get('datalogger') ptStre.append( (len(ptSens) - 1, auxCode, sens_type, denom, numer, dataloggers.get(auxDatLog), startDate, endDate, restricted)) last_child_stream += 1 stream.clear() ptSens[-1][2] = last_child_stream sensor.clear() # Check if there is at least one stream. # Otherwise remove sensor. This case can happen # when there are only auxStreams instead of # streams if ptSens[-1][1] == ptSens[-1][2]: del ptSens[-1] last_child_sensor -= 1 self.stations[-1][2] = last_child_sensor stat.clear() # Check if there is at least one sensor. Otherwise # remove station. This case can happen when there # are only auxStreams instead of streams if ptStats[-1][1] == ptStats[-1][2]: del ptStats[-1] last_child_station -= 1 ptNets[-1][2] = last_child_station netw.clear() if ((parsetype == 'SENSDAT') and (netw.tag == namesp + 'sensor')): pubId = netw.get('publicID') sensors[pubId] = netw.get('type') netw.clear() if ((parsetype == 'SENSDAT') and (netw.tag == namesp + 'datalogger')): pubId = netw.get('publicID') dataloggers[pubId] = netw.get('description') netw.clear() if ((parsetype == 'SENSDAT') and (netw.tag == namesp + 'stationGroup')): # Extract the year from start try: start_year = netw.get('start') start_year = int(start_year[:4]) except: # March 2016: Quick workaround for virtual network with no start date; seems to break getStations() start_year = 1900 # None # Extract the year from end try: end_year = netw.get('end') end_year = int(end_year[:4]) except: end_year = None # Fill a list with station ID's. To be replaced later # with the index in self.stations virtualStations = [] statRefXml = namesp + 'stationReference' for statRef in netw.findall(statRefXml): virtualStations.append(statRef.get('stationID')) # Virtual networks are always permanent, # and have no archive DCID, since that just leads # to turf battles and much crying. netArchive = '' netInstitutes = netArchive # not used? ptNets.append([ netw.get('code'), None, None, virtualStations, start_year, end_year, netw.get('description'), False, 'p', netArchive, netInstitutes ]) netw.clear() root.clear() invfile.close() # Resolving station references in virtual networks for netw in self.networks: if ((netw[1] is None) and (netw[2] is None)): idxs = [] for stat in netw[3]: idxs.append(stationsDict[stat]) netw[3] = idxs end_time = datetime.datetime.now() logs.info( 'Done with XML: %s' % (end_time)) # Python 2.7: (end_time - start_time).total_seconds()) self.__indexStreams() if not os.path.exists(lockfile): try: lck = open(lockfile, 'w') os.chmod(lockfile, 0664) lck.close() except: logs.warning(('Error while attempting to create a lockfile' + ' (%s). Check whether the inventory is parsed' + ' every %d seconds. This could potentialy' + ' make some requests slower.') % (lockfile, self.time2refresh)) return with open(self.cachefile, 'wb') as cache: os.chmod(self.cachefile, 0664) pickle.dump((ptNets, ptStats, ptSens, ptStre, self.streamidx), cache) try: os.remove(lockfile) except: logs.error(('Error while removing lockfile (%s). Remove it' + ' manually or the pickle version will be always' + ' skipped.') % lockfile)
def run(self): try: logs.info("ArcLink request handler v" + VERSION + " started") logs.info("Configuration: ") logs.info("Request handler for %s (organization) at %s (Datacenter Id)" % (self.organization, self.dcid)) logs.info("Request Dir: %s" % self.reqdir) logs.info("Max request size: %s" % self.maxsize) logs.info("Archive Dir: %s" % self.archdir) logs.info("ISO Dir: %s" % self.isodir) logs.info("NRT Dir: %s" % self.nrtdir) logs.info("Trackdb is %s @ %s" % (self.trackdb, self.trackdir)) logs.info("Subnodelist: %s" % self.subnodelist) logs.info("File Database: %s" % self.filedb) subnode_addr = {} dcid_override = {} if self.subnodelist is not None: fd = open(self.subnodelist) line = fd.readline() while line: try: (dcid, addr) = line.split() subnode_addr[dcid] = addr except ValueError: (dcid, addr, net, sta) = line.split() subnode_addr[dcid] = addr dcid_override[(net,sta)] = dcid line = fd.readline() fd.close() sc3wrap.dbQuery = self.query() handler.sc3App = self inv = Inventory(self.query().loadInventory()) rtn = Routing(self.query().loadRouting()) wf = WiggleFetcher(self.nrtdir, self.archdir, self.isodir, self.filedb, 1024*1024*self.maxsize, self.dcid, subnode_addr, dcid_override) rh = RequestHandler(inv, rtn, wf, self.reqdir, (self.trackdir, self.trackdb), 5, self.organization, DEFAULT_LABEL) mt = MessageThread(self.connection(), rh) mt.start() rh.start() except Exception: logs.print_exc() return False return True