def job(self, linedata): """ @type linedata: dict @param linedata: One line of BP data dict-ified. Handles a static job insert event. """ job = self.linedataToObject(linedata, Job()) job.wf_id = self.wf_uuid_to_id(job.wf_uuid) job.clustered = util.as_bool(job.clustered) self.log.debug('job: %s', job) if self._batch: self._batch_cache['batch_events'].append(job) else: job.commit_to_db(self.session)
def __init__(self, add_hash="no", _validate=False): """Will be overridden by subclasses to take parameters specific to their function. """ self.log = logging.getLogger("%s.%s" % (self.__module__, self.__class__.__name__)) self._do_preprocess = False # may get set to True, below self.last_flush = time.time() self._validate = _validate # Parameter: add_hash try: self._add_hash = util.as_bool(add_hash) self._do_preprocess = True except ValueError, err: self.log.exception(err) self.log.error("Paramenter error: add_hash = %s", add_hash) self._add_hash = False
def __init__(self, add_hash="no", _validate=False, schemata=None): """Will be overridden by subclasses to take parameters specific to their function. """ DoesLogging.__init__(self) self._do_preprocess = False # may get set to True, below self.last_flush = time.time() self._validate = _validate # Parameter: add_hash try: self._add_hash = util.as_bool(add_hash) self._do_preprocess = True except ValueError, err: self.log.error("parameter.error", name="add_hash", value=add_hash, msg=err) self._add_hash = False
class Analyzer(BaseAnalyzer, SQLAlchemyInit): """Load into the Stampede SQL schema through SQLAlchemy. Parameters: - connString {string,None*}: SQLAlchemy connection string. The general form of this is 'dialect+driver://username:password@host:port/database'. See the SQLAlchemy docs for details. For sqlite, use 'sqlite:///foo.db' for a relative path and 'sqlite:////path/to/foo.db' (four slashes) for an absolute one. When using MySQL, the general form will work, but the library expects the database to exist (ie: will not issue CREATE DB) but will populate an empty DB with tables/indexes/etc. """ def __init__(self, connString=None, perf='no', batch='no', props=None, db_type=None, **kw): """Init object @type connString: string @param connString: SQLAlchemy connection string - REQUIRED """ BaseAnalyzer.__init__(self, **kw) if connString is None: raise ValueError("connString is required") try: SQLAlchemyInit.__init__(self, connString, props=props, db_type=db_type) except (connection.ConnectionError, DBAdminError), e: self.log.exception(e) self.log.error('Error initializing workflow loader') raise RuntimeError # "Case" dict to map events to handler methods self.eventMap = { 'stampede.wf.plan': self.workflow, 'stampede.wf.map.task_job': self.task_map, 'stampede.static.start': self.noop, # good 'stampede.static.end': self.static_end, 'stampede.xwf.start': self.workflowstate, 'stampede.xwf.end': self.workflowstate, 'stampede.xwf.map.subwf_job': self.subwf_map, 'stampede.task.info': self.task, 'stampede.task.edge': self.task_edge, 'stampede.job.info': self.job, 'stampede.job.edge': self.job_edge, 'stampede.job_inst.pre.start': self.job_instance, 'stampede.job_inst.pre.term': self.jobstate, 'stampede.job_inst.pre.end': self.jobstate, 'stampede.job_inst.submit.start': self.job_instance, 'stampede.job_inst.submit.end': self.jobstate, 'stampede.job_inst.held.start': self.jobstate, 'stampede.job_inst.held.end': self.jobstate, 'stampede.job_inst.main.start': self.jobstate, 'stampede.job_inst.main.term': self.jobstate, 'stampede.job_inst.main.end': self.job_instance, 'stampede.job_inst.post.start': self.jobstate, 'stampede.job_inst.post.term': self.jobstate, 'stampede.job_inst.post.end': self.job_instance, 'stampede.job_inst.host.info': self.host, 'stampede.job_inst.image.info': self.jobstate, 'stampede.job_inst.abort.info': self.jobstate, 'stampede.job_inst.grid.submit.start': self.noop, # good 'stampede.job_inst.grid.submit.end': self.jobstate, 'stampede.job_inst.globus.submit.start': self.noop, # good 'stampede.job_inst.globus.submit.end': self.jobstate, 'stampede.inv.start': self.noop, # good 'stampede.inv.end': self.invocation, } # Dicts for caching FK lookups self.wf_id_cache = {} self.root_wf_id_cache = {} self.job_id_cache = {} self.job_instance_id_cache = {} self.host_cache = {} self.hosts_written_cache = None # undocumented performance option self._perf = util.as_bool(perf) if self._perf: self._insert_time, self._insert_num = 0, 0 self._start_time = time.time() # flags and state for batching self._batch = util.as_bool(batch) self._flush_every = 1000 self._flush_count = 0 self._last_flush = time.time() # caches for batched events self._batch_cache = { 'batch_events': [], 'update_events': [], 'host_map_events': [] } self._task_map_flush = {} self._task_edge_flush = {}
def read(self, str_or_file): """Read and parse the data. Args: - str_or_file (str|file): A string or file-like object, which must implement readline(). If it is a string attempt to open the file with that name. Exceptions: - IOError: If a file is specified but can't be opened - ValueError: Bad type specification """ if hasattr(str_or_file, "readline"): fileobj = str_or_file else: fileobj = open(str(str_or_file), 'r') self._parser.readfp(fileobj) name_expr = re.compile("^[0-9a-zA-Z._-]+$") msg = "must be 1 or more of alphanumeric, dash, underline or dot" for sect in self._parser.sections(): # check that section name is legal m = name_expr.match(sect) if m is None: raise ValueError("Event name [%s]: %s" % (sect, msg)) type_map, defaults = { }, { } # process directives (since they apply to all values in loop) try: drop_opt = self._parser.get(sect, '@drop') drop = util.as_bool(drop_opt) except ConfigParser.NoOptionError: drop = False for name, value in self._parser.items(sect): # skip to next, if name is directive if name[0] == '@': continue # check that name is legal m = name_expr.match(name) if m is None: raise ValueError("Field name '%s': %s" % (name, msg)) # extract type and default value, if any m = self._SCHEMAVAL.match(value) if m is None: raise ValueError("Bad value '%s' for field '%s'" % ( value, name)) mgd = m.groupdict() value_type, value_default = mgd['type'], mgd['default'] # set type function try: fn = self._TYPEFN[value_type] # If not dropping, make the 'str' function even cheaper # by skipping the type map if (not drop) and (fn is str): pass # Otherwise, put function into mapping else: type_map[name] = fn except KeyError: raise ValueError("Unknown type '%s' in '%s=%s' " "in section [%s]" % ( value_type, name, value, sect)) # set default value if value_default is not None: s = value_default[1:-1] # strip { } defaults[name] = s self._mapping[sect] = type_map self._defaults[sect] = defaults self._drop[sect] = drop
class Analyzer(BaseAnalyzer, SQLAlchemyInit): """Load into the Stampede Dashboard SQL schema through SQLAlchemy. Parameters: - connString {string,None*}: SQLAlchemy connection string. The general form of this is 'dialect+driver://username:password@host:port/database'. See the SQLAlchemy docs for details. For sqlite, use 'sqlite:///foo.db' for a relative path and 'sqlite:////path/to/foo.db' (four slashes) for an absolute one. When using MySQL, the general form will work, but the library expects the database to exist (ie: will not issue CREATE DB) but will populate an empty DB with tables/indexes/etc. - mysql_engine {string,None*}: For MySQL, the storage engine. Accepted values include 'InnoDB' and 'MyISAM'. See SQLAlchemy/MySQL documentation for more details. Ignored if connString does not start with 'mysql'. """ def __init__(self, connString=None, perf='no', batch='no', mysql_engine=None, **kw): """Init object @type connString: string @param connString: SQLAlchemy connection string - REQUIRED """ BaseAnalyzer.__init__(self, **kw) _kw = {} if connString is None: raise ValueError("connString is required") dialect = dsn_dialect(connString) _kw[dialect] = {} if dialect == 'mysql': # mySQL-specific options if mysql_engine is not None: _kw[dialect]['mysql_engine'] = mysql_engine # This mixin adds a class member "self.session" after initialization. # This is the session handler that the code logic uses for queries # and other DB interaction. The arg "initializeToDashboardDB" is # a function from the stampede_schema module. try: SQLAlchemyInit.__init__(self, connString, initializeToDashboardDB, **_kw) except exceptions.OperationalError, e: self.log.error('init', msg='%s' % ErrorStrings.get_init_error(e)) raise RuntimeError # Check the schema version before proceeding. # don't check any schema # s_check = SchemaCheck(self.session) # if not s_check.check_schema(): # raise SchemaVersionError self.log.info('init.start') # "Case" dict to map events to handler methods self.eventMap = { 'dashboard.wf.plan': self.workflow, # 'dashboard.wf.map.task_job' : self.task_map, 'dashboard.xwf.start': self.workflowstate, 'dashboard.xwf.end': self.workflowstate, } # Dicts for caching FK lookups self.wf_id_cache = {} self.root_wf_id_cache = {} # undocumented performance option self._perf = util.as_bool(perf) if self._perf: self._insert_time, self._insert_num = 0, 0 self._start_time = time.time() # flags and state for batching self._batch = util.as_bool(batch) self._flush_every = 1 self._flush_count = 0 self._last_flush = time.time() # caches for batched events self._batch_cache = { 'batch_events': [], 'update_events': [], 'host_map_events': [] } self.log.info('init.end', msg='Batching: %s' % self._batch) pass
def read(self, str_or_file): """Read and parse the data. Args: - str_or_file (str|file): A string or file-like object, which must implement readline(). If it is a string attempt to open the file with that name. Exceptions: - IOError: If a file is specified but can't be opened - ValueError: Bad type specification """ if hasattr(str_or_file, "readline"): fileobj = str_or_file else: fileobj = open(str(str_or_file), 'r') self._parser.readfp(fileobj) name_expr = re.compile("^[0-9a-zA-Z._-]+$") msg = "must be 1 or more of alphanumeric, dash, underline or dot" for sect in self._parser.sections(): # check that section name is legal m = name_expr.match(sect) if m is None: raise ValueError("Event name [%s]: %s" % (sect, msg)) type_map, defaults = {}, {} # process directives (since they apply to all values in loop) try: drop_opt = self._parser.get(sect, '@drop') drop = util.as_bool(drop_opt) except ConfigParser.NoOptionError: drop = False for name, value in self._parser.items(sect): # skip to next, if name is directive if name[0] == '@': continue # check that name is legal m = name_expr.match(name) if m is None: raise ValueError("Field name '%s': %s" % (name, msg)) # extract type and default value, if any m = self._SCHEMAVAL.match(value) if m is None: raise ValueError("Bad value '%s' for field '%s'" % (value, name)) mgd = m.groupdict() value_type, value_default = mgd['type'], mgd['default'] # set type function try: fn = self._TYPEFN[value_type] # If not dropping, make the 'str' function even cheaper # by skipping the type map if (not drop) and (fn is str): pass # Otherwise, put function into mapping else: type_map[name] = fn except KeyError: raise ValueError("Unknown type '%s' in '%s=%s' " "in section [%s]" % (value_type, name, value, sect)) # set default value if value_default is not None: s = value_default[1:-1] # strip { } defaults[name] = s self._mapping[sect] = type_map self._defaults[sect] = defaults self._drop[sect] = drop
def __init__(self, host="localhost", port=27017, database='application', collection='netlogger', indices="", datetime='yes', intvals="", floatvals="", event_filter="", user="", password="", batch=0, perf=None, **kw): BaseAnalyzer.__init__(self, _validate=True, **kw) # map for converting values self._convert = {} # mongo database and collection self.db_name, self.coll_name = database, collection # connect try: self.connection = pymongo.Connection(host=host, port=port) except ConnectionFailure: raise ConnectionException("Couldn't connect to DB " "at %s:%d" % (host, port)) # create/use database, by retrieving it if self._dbg: self.log.debug("init.database_name", value=self.db_name) self.database = self.connection[self.db_name] # if authentication is on, use it if user != "": success = self.database.authenticate(user, password) if not success: raise ConnectionException( "Could not authenticate to " "database=%s, collection=%s as user '%s'" % (self.db_name, self.coll_name, user)) # create/use collection, by retrieving it if self._dbg: self.log.debug("init.collection_name", value=self.coll_name) self.collection = self.database[self.coll_name] # ensure indexes are set index_fields = indices.split(",") for field in index_fields: field = field.strip() if not field or field == "^": continue if self._dbg: self.log.debug("init.index", value=field) if field[0] == '^': unique = True field = field[1:] else: unique = False self.collection.ensure_index(field, unique=unique) # datetime flag self._datetime = util.as_bool(datetime) # Add numeric values to conversion map if intvals.strip(): self._convert.update(dict.fromkeys(intvals.split(','), int)) if floatvals.strip(): self._convert.update(dict.fromkeys(floatvals.split(','), float)) # filter, if given self._event_re = None if event_filter: self._event_re = re.compile(event_filter) # batch, if requested if batch: self._batch = int(batch) self._curbatch = [] self._batchlen = 0 else: self._batch = 0