def __init__(self, db_list=[], subset=False): """ Load class and get the data from netperf table to MongoDB Usage: netperf = NetPerf(db,subset=False) netperf.validate() while True: if netperf.need_update(): netperf.update() data,error = netperf.data() sleep(time) """ self.logging = getLogger(self.__class__.__name__) self.logging.debug( "Dlevent.init()" ) self.db = {} self.database_list = db_list self.db_subset = subset self.cache = [] self.error_cache = [] self.tables = ['netperf']
def update_collection(mongo_db, name, data, index=[]): logging = getLogger() logging.info("update_collection(%s)" % name) # Verify if we need to update MongoDB if data: temp_name = "%s_temp" % name logging.debug("Update temp collection %s with data" % temp_name) collection = mongo_db[temp_name] for entry in data: logging.debug("collection.update(%s)" % entry["id"]) collection.update({"id": entry["id"]}, {"$set": entry}, upsert=True) # Create/update some indexes for the collection if index and len(index) > 0: index_db(collection, index) logging.debug("Move collection %s => %s" % (temp_name, name)) collection.rename(name, dropTarget=True) else: logging.debug("NO DATA FROM OBJECT: %s" % name)
def __init__(self, db=False, subset=False): """ Load class and get the data from dlevent table to MongoDB Class to query a Datascope dlevent database and cache values in memory. Usage: dlevent = Dlevent(db,subset=False) dlevent.validate() while True: if dlevent.need_update(): dlevent.update() data,error = dlevent.data() sleep(time) """ self.logging = getLogger(self.__class__.__name__) self.logging.debug( "Dlevent.init()" ) self.db = False self.database = db self.db_subset = subset self.cache = [] self.error_cache = [] self.tables = ['dlevent'] self.dbs_tables = {}
def test_table(dbname, tbl, verbose=False): """ Verify that we can work with table. Returns path if valid and we see data. """ logging = getLogger() path = False try: with datascope.closing(datascope.dbopen(dbname, "r")) as db: db = db.lookup(table=tbl) if not db.query(datascope.dbTABLE_PRESENT): logging.warning("No dbTABLE_PRESENT on %s" % dbname) return False if not db.record_count: logging.warning("No %s.record_count" % dbname) path = db.query("dbTABLE_FILENAME") except Exception, e: logging.warning("Prolembs with db[%s]: %s" % (dbname, e)) return False
def __init__(self, filename=False, start='oldest'): self.logging = getLogger('stateFile') self.logging.debug( "stateFile.init()" ) self.filename = filename self.packet = start self.time = 0 self.strtime = 'n/a' self.latency = 'n/a' self.pid = 'PID %s' % os.getpid() if not filename: return self.directory, self.filename = os.path.split(filename) if self.directory and not os.path.isdir( self.directory ): os.makedirs( self.directory ) self.file = os.path.join( self.directory, self.filename ) self.logging.debug( 'Open file for STATE tracking [%s]' % self.file ) if os.path.isfile( self.file ): self.open_file('r+') self.read_file() else: self.open_file('w+') if not os.path.isfile( self.file ): raise pocException( 'Cannot create STATE file %s' % self.file )
def __init__(self): """ Need a class to load the information in the dlsensor table and stores all values in a local dict. The tool returns the name for the provided serial. You can search for a sensor or for a digitizer. If not found then you get NULL value. In this case we set NULL to be "-". Usage: cache_object = dlsensor_cache() cache_object.add( dlident, dlmodel, snident, snmodel, time, endtime ) sname = cache_object.sensor(snident, time) dname = cache_object.digitizer(dlident, time) """ self.logging = getLogger(self.__class__.__name__) self.logging.debug("dlsensor_cache.init()") self.defaultTime = 0.0 self.defaultEndtime = 9999999999.9 self.sensors = {} self.digitizers = {}
def __init__(self, db=False, orbs={}, db_subset=False, orb_select=False): """ Class to load information from multiple Datascope tables that track station configuration and metadata values. Some information is appended to the objects if a value for an ORB is provided and the station is found on it. We track all packets related to the station and we have the option to extract some information from the pf/st packets. Usage: metadata = Metadata(db,orbs,db_subset,orb_select) metadata.validate() while True: if metadata.need_update(): metadata.update() data,error = metadata.data() sleep(time) """ self.logging = getLogger(self.__class__.__name__) self.logging.debug( "Metadata.init()" ) self.orbs = {} self.cache = {} self.db = False self.database = db self.dbs_tables = {} self.perf_db = False self.perf_subset = False self.orbservers = orbs self.timezone = 'UTC' self.error_cache = {} self.perf_days_back = 30 self.db_subset = db_subset self.orb_select = orb_select self.timeformat = '%D (%j) %H:%M:%S %z' self.tables = ['site'] self.seismic_sensors = {} self.tags = False self.deployment = False self.sensor = False self.comm = False self.digitizer = False self.balers = False self.windturbine = False self.dlsensor_cache = False
def find_status(blob, sta, debug=False): """ Sometimes we don't know if the station is active or offline. Look in the object for it's status. """ logging = getLogger() for status in blob: for snet in blob[status]: if sta in blob[status][snet]: logging.info("find_status(%s) => %s" % (sta, status)) return status logging.info("find_status(%s) => False" % sta) return False
def find_snet(blob, sta, debug=False): """ Sometimes we don't know if the snet value of a station. Look in the object for it's snet. """ logging = getLogger() for status in blob: for snet in blob[status]: if sta in blob[status][snet]: logging.info("find_snet(%s) => %s" % (sta, snet)) return snet logging.info("find_snet(%s) => False" % sta) return False
def run(cmd, directory="./"): logging = getLogger() logging.debug("run() - Running: %s" % cmd) p = subprocess.Popen([cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=directory, shell=True) stdout, stderr = p.communicate() if stderr: raise db2mongoException("STDERR present: %s => \n\t%s" % (cmd, stderr)) for line in iter(stdout.split("\n")): logging.debug("stdout:\t%s" % line) if p.returncode != 0: raise db2mongoException("Exitcode (%s) on [%s]" % (p.returncode, cmd)) return stdout
def __init__(self, collection, orb, orb_select=None, orb_reject=None, default_orb_read=0, statefile=False, reap_wait=3, timeout_exit=True, reap_timeout=5 ): """ Class to read an ORB for POC packets and update a MongoDatabase with the values. Set the serial of the instrumetn as the main id and just update that entry with the latest packet that comes into the ORB. We can also run with the clean option and clean the archive before we start putting data in it. There is a position flag to force the reader to jump to a particular part of the ORB and the usual statefile to look for a previous value for the last packet id read. """ self.logging = getLogger('poc_class') self.logging.debug( "Pocs.init()" ) self.poc = Poc() self.cache = {} self.orb = False self.errors = 0 self.orbname = orb self.lastread = 0 self.timezone = 'UTC' self.position = False self.error_cache = {} self.timeout_exit = timeout_exit self.reap_wait = int(reap_wait) self.statefile = statefile self.collection = collection self.orb_select = orb_select self.orb_reject = orb_reject self.reap_timeout = int(reap_timeout) self.timeformat = '%D (%j) %H:%M:%S %z' self.default_orb_read = default_orb_read # StateFile self.state = stateFile( self.statefile, self.default_orb_read ) self.position = self.state.last_packet() #self.last_time = self.state.last_time() if not self.orb_select: self.orb_select = None if not self.orb_reject: self.orb_reject = None
def clean_cache_object(cache, id="dlname"): """ Prepare memory dictionary for injection of data into a MongoDb structure. We have several requirements: 1) Base key "dlname" on every element. Unless "id" is defined. 2) All data should be convertible by json.load() 3) Base key "time" should be present. This is the time of the data. We will create a new key "id" for our returned object. This will be unique and if objects repeat in the cache then the function will silently overwrite previous entries. We append a new key "lddate" with the time of the object creation. All data returned should be strings and could be sent directly to MongoDB. """ logging = getLogger() logging.info("clean_cache_object(%s)" % id) results = [] for entry in cache: if not id in entry: continue # Convert to JSON then back to dict to stringify numeric keys entry = json.loads(json.dumps(entry)) try: # Try to find object for id if id != "id": entry["id"] = entry[id] except: # Generic id for this entry entry["id"] = len(results) # add entry for autoflush index entry["time_obj"] = datetime.fromtimestamp(entry["time"]) # add entry for last load of entry entry["lddate"] = datetime.fromtimestamp(stock.now()) results.append(entry) return results
def get_md5(test_file, debug=False): """ Verify the checksum of a table. Return False if no file found. """ logging = getLogger() logging.debug("get_md5(%s) => test for file" % test_file) if os.path.isfile(test_file): f = open(test_file) md5 = hashlib.md5(f.read()).hexdigest() f.close() return md5 else: raise db2mongoException("get_md5(%s) => FILE MISSING!!!" % test_file) return False
def index_db(collection, indexlist): """ Set index values on MongoDB """ logging = getLogger() re_simple = re.compile(".*simple.*") re_text = re.compile(".*text.*") re_sparse = re.compile(".*sparse.*") re_hashed = re.compile(".*hashed.*") re_unique = re.compile(".*unique.*") logging.debug(indexlist) for field, param in indexlist.iteritems(): unique = 1 if re_unique.match(param) else 0 sparse = 1 if re_sparse.match(param) else 0 style = 1 if re_text.match(param): style = "text" elif re_hashed.match(param): style = "hashed" elif re_simple.match(param): style = 1 try: expireAfter = float(param) except: expireAfter = False logging.debug( "ensure_index( [(%s,%s)], expireAfterSeconds = %s, unique=%s, sparse=%s)" % (field, style, expireAfter, unique, sparse) ) collection.ensure_index([(field, style)], expireAfterSeconds=expireAfter, unique=unique, sparse=sparse) collection.reindex() for index in collection.list_indexes(): logging.debug(index)
def extract_from_db(db, steps, fields, subset=""): logging = getLogger() if subset: steps.extend(["dbsubset %s" % subset]) logging.debug("Extract from db: " + ", ".join(steps)) results = [] with datascope.closing(datascope.dbopen(db, "r")) as dbview: dbview = dbview.process(steps) logging.debug("Records in new view: %s" % dbview.record_count) if not dbview.record_count: logging.warning("No records after deployment-site join %s" % dbview.query(datascope.dbDATABASE_NAME)) return None for temp in dbview.iter_record(): results.append(dict(zip(fields, temp.getv(*fields)))) return results
def verify_db(db): logging = getLogger() logging.debug("Verify database: [%s]" % (db)) name = False if isinstance(db, str): with datascope.closing(datascope.dbopen(db, "r")) as pointer: if pointer.query(datascope.dbDATABASE_COUNT): logging.debug(pointer.query(datascope.dbDATABASE_NAME)) name = pointer.query(datascope.dbDATABASE_NAME) logging.info("%s => valid" % name) else: logging.warning("PROBLEMS OPENING DB: %s" % db) else: logging.error("Not a valid parameter for db: [%s]" % db) return name
def __init__(self, db=False, subset=False): """ Class to query a Datascope event database and cache them in memory. The origin table is the main source of information. The system will try to join with the event table if present. The netmag table will be imported into memory and used to expand the events. Usage: events = Events(db,subset=False) events.validate() while True: if events.need_update(): events.update() data,error = events.data() sleep(time) """ self.logging = getLogger(self.__class__.__name__) self.logging.debug( "Events.init()" ) self.db = False self.database = False self.db_subset = False self.cache = [] self.cache_error = [] self.mags = {} # event table is not tested here. self.tables = ['origin','netmag'] self.dbs_tables = {} self.timeformat = False self.timezone = False