Пример #1
0
    def job(self, linedata):
        """
        @type   linedata: dict
        @param  linedata: One line of BP data dict-ified.

        Handles a static job insert event.
        """
        job = self.linedataToObject(linedata, Job())
        job.wf_id = self.wf_uuid_to_id(job.wf_uuid)
        job.clustered = util.as_bool(job.clustered)
        self.log.debug('job: %s', job)

        if self._batch:
            self._batch_cache['batch_events'].append(job)
        else:
            job.commit_to_db(self.session)
Пример #2
0
 def __init__(self, add_hash="no", _validate=False):
     """Will be overridden by subclasses to take
     parameters specific to their function.
     """
     self.log = logging.getLogger("%s.%s" % (self.__module__, self.__class__.__name__))
     self._do_preprocess = False # may get set to True, below
     self.last_flush = time.time()
     self._validate = _validate
     # Parameter: add_hash
     try:
         self._add_hash = util.as_bool(add_hash)
         self._do_preprocess = True
     except ValueError, err:
         self.log.exception(err)
         self.log.error("Paramenter error: add_hash = %s", add_hash)
         self._add_hash = False
Пример #3
0
    def job(self, linedata):
        """
        @type   linedata: dict
        @param  linedata: One line of BP data dict-ified.

        Handles a static job insert event.
        """
        job = self.linedataToObject(linedata, Job())
        job.wf_id = self.wf_uuid_to_id(job.wf_uuid)
        job.clustered = util.as_bool(job.clustered)
        self.log.debug('job: %s', job)

        if self._batch:
            self._batch_cache['batch_events'].append(job)
        else:
            job.commit_to_db(self.session)
Пример #4
0
 def __init__(self, add_hash="no", _validate=False):
     """Will be overridden by subclasses to take
     parameters specific to their function.
     """
     self.log = logging.getLogger("%s.%s" % (self.__module__, self.__class__.__name__))
     self._do_preprocess = False # may get set to True, below
     self.last_flush = time.time()
     self._validate = _validate
     # Parameter: add_hash
     try:
         self._add_hash = util.as_bool(add_hash)
         self._do_preprocess = True
     except ValueError, err:
         self.log.exception(err)
         self.log.error("Paramenter error: add_hash = %s", add_hash)
         self._add_hash = False
Пример #5
0
 def __init__(self, add_hash="no", _validate=False,
              schemata=None):
     """Will be overridden by subclasses to take
     parameters specific to their function.
     """
     DoesLogging.__init__(self)
     self._do_preprocess = False # may get set to True, below
     self.last_flush = time.time()
     self._validate = _validate  
     # Parameter: add_hash
     try:
         self._add_hash = util.as_bool(add_hash)
         self._do_preprocess = True
     except ValueError, err:
         self.log.error("parameter.error",
                        name="add_hash", value=add_hash, msg=err)
         self._add_hash = False
Пример #6
0
class Analyzer(BaseAnalyzer, SQLAlchemyInit):
    """Load into the Stampede SQL schema through SQLAlchemy.

    Parameters:
      - connString {string,None*}: SQLAlchemy connection string.
        The general form of this is
          'dialect+driver://username:password@host:port/database'.
        See the SQLAlchemy docs for details.
        For sqlite, use 'sqlite:///foo.db' for a relative path and
        'sqlite:////path/to/foo.db' (four slashes) for an absolute one.
        When using MySQL, the general form will work, but the library
        expects the database to exist (ie: will not issue CREATE DB)
        but will populate an empty DB with tables/indexes/etc.
    """
    def __init__(self,
                 connString=None,
                 perf='no',
                 batch='no',
                 props=None,
                 db_type=None,
                 **kw):
        """Init object

        @type   connString: string
        @param  connString: SQLAlchemy connection string - REQUIRED
        """
        BaseAnalyzer.__init__(self, **kw)
        if connString is None:
            raise ValueError("connString is required")

        try:
            SQLAlchemyInit.__init__(self,
                                    connString,
                                    props=props,
                                    db_type=db_type)
        except (connection.ConnectionError, DBAdminError), e:
            self.log.exception(e)
            self.log.error('Error initializing workflow loader')
            raise RuntimeError

        # "Case" dict to map events to handler methods
        self.eventMap = {
            'stampede.wf.plan': self.workflow,
            'stampede.wf.map.task_job': self.task_map,
            'stampede.static.start': self.noop,  # good
            'stampede.static.end': self.static_end,
            'stampede.xwf.start': self.workflowstate,
            'stampede.xwf.end': self.workflowstate,
            'stampede.xwf.map.subwf_job': self.subwf_map,
            'stampede.task.info': self.task,
            'stampede.task.edge': self.task_edge,
            'stampede.job.info': self.job,
            'stampede.job.edge': self.job_edge,
            'stampede.job_inst.pre.start': self.job_instance,
            'stampede.job_inst.pre.term': self.jobstate,
            'stampede.job_inst.pre.end': self.jobstate,
            'stampede.job_inst.submit.start': self.job_instance,
            'stampede.job_inst.submit.end': self.jobstate,
            'stampede.job_inst.held.start': self.jobstate,
            'stampede.job_inst.held.end': self.jobstate,
            'stampede.job_inst.main.start': self.jobstate,
            'stampede.job_inst.main.term': self.jobstate,
            'stampede.job_inst.main.end': self.job_instance,
            'stampede.job_inst.post.start': self.jobstate,
            'stampede.job_inst.post.term': self.jobstate,
            'stampede.job_inst.post.end': self.job_instance,
            'stampede.job_inst.host.info': self.host,
            'stampede.job_inst.image.info': self.jobstate,
            'stampede.job_inst.abort.info': self.jobstate,
            'stampede.job_inst.grid.submit.start': self.noop,  # good
            'stampede.job_inst.grid.submit.end': self.jobstate,
            'stampede.job_inst.globus.submit.start': self.noop,  # good
            'stampede.job_inst.globus.submit.end': self.jobstate,
            'stampede.inv.start': self.noop,  # good
            'stampede.inv.end': self.invocation,
        }

        # Dicts for caching FK lookups
        self.wf_id_cache = {}
        self.root_wf_id_cache = {}
        self.job_id_cache = {}
        self.job_instance_id_cache = {}
        self.host_cache = {}
        self.hosts_written_cache = None

        # undocumented performance option
        self._perf = util.as_bool(perf)
        if self._perf:
            self._insert_time, self._insert_num = 0, 0
            self._start_time = time.time()

        # flags and state for batching
        self._batch = util.as_bool(batch)
        self._flush_every = 1000
        self._flush_count = 0
        self._last_flush = time.time()

        # caches for batched events
        self._batch_cache = {
            'batch_events': [],
            'update_events': [],
            'host_map_events': []
        }
        self._task_map_flush = {}
        self._task_edge_flush = {}
Пример #7
0
    def read(self, str_or_file):
        """Read and parse the data.

        Args:
          - str_or_file (str|file): A string or file-like
              object, which must implement readline(). If it is a
              string attempt to open the file with that name.

        Exceptions:
          - IOError: If a file is specified but can't be opened
          - ValueError: Bad type specification
        """
        if hasattr(str_or_file, "readline"):
            fileobj = str_or_file
        else:
            fileobj = open(str(str_or_file), 'r')
        self._parser.readfp(fileobj)
        name_expr = re.compile("^[0-9a-zA-Z._-]+$")
        msg = "must be 1 or more of alphanumeric, dash, underline or dot"
        for sect in self._parser.sections():
            # check that section name is legal
            m = name_expr.match(sect)
            if m is None:
                raise ValueError("Event name [%s]: %s" % (sect, msg))
            type_map, defaults = { }, { }
            # process directives (since they apply to all values in loop)
            try:
                drop_opt = self._parser.get(sect, '@drop')
                drop = util.as_bool(drop_opt)
            except ConfigParser.NoOptionError:
                drop = False
            for name, value in self._parser.items(sect):
                # skip to next, if name is directive
                if name[0] == '@':
                    continue
                # check that name is legal
                m = name_expr.match(name)
                if m is None:
                    raise ValueError("Field name '%s': %s" % (name, msg))
                # extract type and default value, if any
                m = self._SCHEMAVAL.match(value)
                if m is None:
                    raise ValueError("Bad value '%s' for field '%s'" % (
                        value, name))
                mgd = m.groupdict()
                value_type, value_default = mgd['type'], mgd['default']
                # set type function
                try:
                    fn = self._TYPEFN[value_type]
                    # If not dropping, make the 'str' function even cheaper
                    # by skipping the type map
                    if (not drop) and (fn is str):
                        pass
                    # Otherwise, put function into mapping
                    else:
                        type_map[name] = fn
                except KeyError:
                    raise ValueError("Unknown type '%s' in '%s=%s' "
                                     "in section [%s]" % (
                                         value_type, name, value, sect))
                # set default value
                if value_default is not None:
                    s = value_default[1:-1] # strip { }
                    defaults[name] = s
            self._mapping[sect] = type_map
            self._defaults[sect] = defaults
            self._drop[sect] = drop
Пример #8
0
class Analyzer(BaseAnalyzer, SQLAlchemyInit):
    """Load into the Stampede Dashboard SQL schema through SQLAlchemy.
    
    Parameters:
      - connString {string,None*}: SQLAlchemy connection string.
        The general form of this is 
          'dialect+driver://username:password@host:port/database'.
        See the SQLAlchemy docs for details.
        For sqlite, use 'sqlite:///foo.db' for a relative path and
        'sqlite:////path/to/foo.db' (four slashes) for an absolute one.
        When using MySQL, the general form will work, but the library
        expects the database to exist (ie: will not issue CREATE DB)
        but will populate an empty DB with tables/indexes/etc.
     - mysql_engine {string,None*}: For MySQL, the storage engine. Accepted
       values include 'InnoDB' and 'MyISAM'. See SQLAlchemy/MySQL documentation
       for more details. Ignored if connString does not start with 'mysql'. 
    """
    def __init__(self,
                 connString=None,
                 perf='no',
                 batch='no',
                 mysql_engine=None,
                 **kw):
        """Init object

        @type   connString: string
        @param  connString: SQLAlchemy connection string - REQUIRED
        """
        BaseAnalyzer.__init__(self, **kw)
        _kw = {}
        if connString is None:
            raise ValueError("connString is required")
        dialect = dsn_dialect(connString)
        _kw[dialect] = {}
        if dialect == 'mysql':
            # mySQL-specific options
            if mysql_engine is not None:
                _kw[dialect]['mysql_engine'] = mysql_engine
        # This mixin adds a class member "self.session" after initialization.
        # This is the session handler that the code logic uses for queries
        # and other DB interaction.  The arg "initializeToDashboardDB" is
        # a function from the stampede_schema module.
        try:
            SQLAlchemyInit.__init__(self, connString, initializeToDashboardDB,
                                    **_kw)
        except exceptions.OperationalError, e:
            self.log.error('init', msg='%s' % ErrorStrings.get_init_error(e))
            raise RuntimeError

        # Check the schema version before proceeding.
        # don't check any schema


#        s_check = SchemaCheck(self.session)
#        if not s_check.check_schema():
#            raise SchemaVersionError

        self.log.info('init.start')

        # "Case" dict to map events to handler methods
        self.eventMap = {
            'dashboard.wf.plan': self.workflow,
            #            'dashboard.wf.map.task_job' : self.task_map,
            'dashboard.xwf.start': self.workflowstate,
            'dashboard.xwf.end': self.workflowstate,
        }

        # Dicts for caching FK lookups
        self.wf_id_cache = {}
        self.root_wf_id_cache = {}

        # undocumented performance option
        self._perf = util.as_bool(perf)
        if self._perf:
            self._insert_time, self._insert_num = 0, 0
            self._start_time = time.time()

        # flags and state for batching
        self._batch = util.as_bool(batch)
        self._flush_every = 1
        self._flush_count = 0
        self._last_flush = time.time()

        # caches for batched events
        self._batch_cache = {
            'batch_events': [],
            'update_events': [],
            'host_map_events': []
        }

        self.log.info('init.end', msg='Batching: %s' % self._batch)
        pass
Пример #9
0
    def read(self, str_or_file):
        """Read and parse the data.

        Args:
          - str_or_file (str|file): A string or file-like
              object, which must implement readline(). If it is a
              string attempt to open the file with that name.

        Exceptions:
          - IOError: If a file is specified but can't be opened
          - ValueError: Bad type specification
        """
        if hasattr(str_or_file, "readline"):
            fileobj = str_or_file
        else:
            fileobj = open(str(str_or_file), 'r')
        self._parser.readfp(fileobj)
        name_expr = re.compile("^[0-9a-zA-Z._-]+$")
        msg = "must be 1 or more of alphanumeric, dash, underline or dot"
        for sect in self._parser.sections():
            # check that section name is legal
            m = name_expr.match(sect)
            if m is None:
                raise ValueError("Event name [%s]: %s" % (sect, msg))
            type_map, defaults = {}, {}
            # process directives (since they apply to all values in loop)
            try:
                drop_opt = self._parser.get(sect, '@drop')
                drop = util.as_bool(drop_opt)
            except ConfigParser.NoOptionError:
                drop = False
            for name, value in self._parser.items(sect):
                # skip to next, if name is directive
                if name[0] == '@':
                    continue
                # check that name is legal
                m = name_expr.match(name)
                if m is None:
                    raise ValueError("Field name '%s': %s" % (name, msg))
                # extract type and default value, if any
                m = self._SCHEMAVAL.match(value)
                if m is None:
                    raise ValueError("Bad value '%s' for field '%s'" %
                                     (value, name))
                mgd = m.groupdict()
                value_type, value_default = mgd['type'], mgd['default']
                # set type function
                try:
                    fn = self._TYPEFN[value_type]
                    # If not dropping, make the 'str' function even cheaper
                    # by skipping the type map
                    if (not drop) and (fn is str):
                        pass
                    # Otherwise, put function into mapping
                    else:
                        type_map[name] = fn
                except KeyError:
                    raise ValueError("Unknown type '%s' in '%s=%s' "
                                     "in section [%s]" %
                                     (value_type, name, value, sect))
                # set default value
                if value_default is not None:
                    s = value_default[1:-1]  # strip { }
                    defaults[name] = s
            self._mapping[sect] = type_map
            self._defaults[sect] = defaults
            self._drop[sect] = drop
Пример #10
0
 def __init__(self,
              host="localhost",
              port=27017,
              database='application',
              collection='netlogger',
              indices="",
              datetime='yes',
              intvals="",
              floatvals="",
              event_filter="",
              user="",
              password="",
              batch=0,
              perf=None,
              **kw):
     BaseAnalyzer.__init__(self, _validate=True, **kw)
     # map for converting values
     self._convert = {}
     # mongo database and collection
     self.db_name, self.coll_name = database, collection
     # connect
     try:
         self.connection = pymongo.Connection(host=host, port=port)
     except ConnectionFailure:
         raise ConnectionException("Couldn't connect to DB "
                                   "at %s:%d" % (host, port))
     # create/use database, by retrieving it
     if self._dbg:
         self.log.debug("init.database_name", value=self.db_name)
     self.database = self.connection[self.db_name]
     # if authentication is on, use it
     if user != "":
         success = self.database.authenticate(user, password)
         if not success:
             raise ConnectionException(
                 "Could not authenticate to "
                 "database=%s, collection=%s as user '%s'" %
                 (self.db_name, self.coll_name, user))
     # create/use collection, by retrieving it
     if self._dbg:
         self.log.debug("init.collection_name", value=self.coll_name)
     self.collection = self.database[self.coll_name]
     # ensure indexes are set
     index_fields = indices.split(",")
     for field in index_fields:
         field = field.strip()
         if not field or field == "^":
             continue
         if self._dbg:
             self.log.debug("init.index", value=field)
         if field[0] == '^':
             unique = True
             field = field[1:]
         else:
             unique = False
         self.collection.ensure_index(field, unique=unique)
     # datetime flag
     self._datetime = util.as_bool(datetime)
     # Add numeric values to conversion map
     if intvals.strip():
         self._convert.update(dict.fromkeys(intvals.split(','), int))
     if floatvals.strip():
         self._convert.update(dict.fromkeys(floatvals.split(','), float))
     # filter, if given
     self._event_re = None
     if event_filter:
         self._event_re = re.compile(event_filter)
     # batch, if requested
     if batch:
         self._batch = int(batch)
         self._curbatch = []
         self._batchlen = 0
     else:
         self._batch = 0