Пример #1
0
 def __init__(self, name, cleanup=None, reset_running=False,
              initial_submit=False, max_threads=None,
              preserve_order=False, max_faults=None,
              mrt_limit=None):
     self.logger = PrefixLoggerAdapter(logger, name)
     self.name = name
     self.job_classes = {}
     self.collection_name = self.COLLECTION_BASE + self.name
     self.collection = get_db()[self.collection_name]
     self.active_mrt = {}  # ReduceTask -> Job instance
     self.cleanup_callback = cleanup
     self.reset_running = reset_running
     self.ignored = []
     self.initial_submit = initial_submit
     self.initial_submit_next_check = {}  # job class -> timestamp
     self.max_threads = max_threads
     self.preserve_order = preserve_order
     self.max_faults = max_faults
     self.mrt_limit = mrt_limit
     self.mrt_overload = False
     self.running_lock = threading.Lock()
     self.running_count = defaultdict(int)  # Group -> Count
     self.log_jobs = None
     self.metrics = MetricsHub(
         "noc.scheduler.%s" % name,
         "jobs.count",
         "jobs.success",
         "jobs.failed",
         "jobs.dereference.count",
         "jobs.dereference.success",
         "jobs.dereference.failed",
         "jobs.time"
     )
Пример #2
0
 def __init__(self, name, local=False):
     self.logger = PrefixLoggerAdapter(logger, name)
     if name not in self.COLLECTIONS:
         self.logger.error("Invalid collection '%s'", name)
         raise ValueError("Invalid collection '%s'" % name)
     m, c = name.split(".", 1)
     self.module = m
     self.cname = name
     self.name = c
     self.local = local
     self.doc = self.COLLECTIONS[name]
     self.items = {}  # uuid -> CollectionItem
     self.changed = False
     self.ref_cache = {}
     self.partial = set()
     if hasattr(self.doc, "name"):
         # Use .name field when present
         self.get_name = attrgetter("name")
     else:
         # Or first unique field otherwise
         uname = None
         for spec in self.doc._meta["index_specs"]:
             if spec["unique"] and len(spec["fields"]) == 1:
                 uname = spec["fields"][0][0]
         if not uname:
             self.logger.error("Cannot find unique index")
             raise ValueError("No unique index")
         self.get_name = attrgetter(uname)
     self.translations = self.TRANSLATIONS.get(name,
                                               self.TRANSLATIONS[None])
Пример #3
0
 def __init__(self, factory, socket=None):
     self.logger = PrefixLoggerAdapter(logger, self.get_label())
     self.factory = factory
     self.socket = socket
     self.start_time = time.time()
     self.last_read = self.start_time + 100  # @todo: Meaningful value
     self.name = None
     self.closing = False  # In closing state
     self.stale = False  # Closed as stale
     self.ttl = self.TTL
     self.set_timeout(self.TTL)
     self.factory.register_socket(self)
     if socket:
         self.set_status(r=True)
Пример #4
0
 def __init__(self, object):
     self.object = object
     self.logger = PrefixLoggerAdapter(logger, self.object.name)
     self.env = None
     self.templates = {}  # fact class -> template
     self.fcls = {}  # template -> Fact class
     self.facts = {}  # Index -> Fact
     self.rn = 0  # Rule number
     self.config = None  # Cached config
     self.interface_ranges = None
     with self.ILOCK:
         self.AC_POLICY_VIOLATION = AlarmClass.objects.filter(
             name="Config | Policy Violation").first()
         if not self.AC_POLICY_VIOLATION:
             logger.error("Alarm class 'Config | Policy Violation' is not found. Alarms cannot be raised")
Пример #5
0
class Engine(object):
    ILOCK = threading.Lock()
    AC_POLICY_VIOLATION = None

    def __init__(self, object):
        self.object = object
        self.logger = PrefixLoggerAdapter(logger, self.object.name)
        self.env = None
        self.templates = {}  # fact class -> template
        self.fcls = {}  # template -> Fact class
        self.facts = {}  # Index -> Fact
        self.rn = 0  # Rule number
        self.config = None  # Cached config
        self.interface_ranges = None
        with self.ILOCK:
            self.AC_POLICY_VIOLATION = AlarmClass.objects.filter(
                name="Config | Policy Violation").first()
            if not self.AC_POLICY_VIOLATION:
                logger.error("Alarm class 'Config | Policy Violation' is not found. Alarms cannot be raised")

    def get_template(self, fact):
        if fact.cls not in self.templates:
            self.logger.debug("Creating template %s", fact.cls)
            self.templates[fact.cls] = self.env.BuildTemplate(
                fact.cls, fact.get_template())
            self.fcls[fact.cls] = fact.__class__
            self.logger.debug("Define template %s",
                              self.templates[fact.cls].PPForm())
        return self.templates[fact.cls]

    def get_rule_number(self):
        return self.rn

    def assert_fact(self, fact):
        f = self.get_template(fact).BuildFact()
        f.AssignSlotDefaults()
        for k, v in fact.iter_factitems():
            if v is None or v == [] or v == tuple():
                continue
            if isinstance(v, basestring):
                v = v.replace("\n", "\\n")
            f.Slots[k] = v
        try:
            f.Assert()
        except clips.ClipsError, why:
            self.logger.error("Could not assert: %s", f.PPForm())
            self.logger.error(
                "CLIPS Error: %s\n%s",
                why,
                clips.ErrorStream.Read()
            )
            return
        self.facts[f.Index] = fact
        self.logger.debug("Assert %s", f.PPForm())
Пример #6
0
 def __init__(self, name, is_superuser, enabled, user, uid, group, gid,
              instance_id, config_path):
     self.logger = PrefixLoggerAdapter(logger,
                                       "%s#%s" % (name, instance_id))
     self.logger.info("Reading config")
     self.instance_id = instance_id
     self.name = name
     self.config_path = config_path
     self.config = ConfigParser.SafeConfigParser()
     self.config.read("etc/%s.defaults" % name)
     self.config.read(config_path)
     self.enabled = enabled
     self.pid = None
     self.pidfile = self.config.get("main", "pidfile")\
         .replace("{{instance}}", self.instance_id)
     self.is_superuser = is_superuser
     self.user = user
     self.uid = uid
     self.group = group
     self.gid = gid
Пример #7
0
 def __init__(self, name="pool", metrics_prefix=None,
              start_threads=1, max_threads=10,
              min_spare=1, max_spare=1, backlog=0):
     if min_spare > max_spare:
         raise ValueError("min_spare (%d) must not be greater"
                          " than max_spare (%d)" % (min_spare,
                                                    max_spare))
     if start_threads > max_threads:
         raise ValueError("start_threads (%d) must not be greater"
                          " than max_threads (%d)" % (start_threads,
                                                      max_threads))
     self.logger = PrefixLoggerAdapter(logger, name)
     self.name = name
     if not metrics_prefix:
         metrics_prefix = "noc"
     metrics_prefix += "pool.%s" % name
     self.metrics = MetricsHub(
         metrics_prefix,
         "threads.running",
         "threads.idle",
         "queue.len"
     )
     self.start_threads = start_threads
     self.max_threads = max_threads
     self.min_spare = min_spare
     self.max_spare = max_spare
     self.backlog = backlog if backlog else max_threads
     self.t_lock = Lock()
     self.threads = set()
     self.queue = Queue(backlog)
     self.stopping = False
     self.stopped = Event()
     self.n_idle = 0
     self.idle_lock = Lock()
     self.logger.info("Running thread pool '%s'", self.name)
     self.set_idle(None)
Пример #8
0
 def __init__(self, scheduler, key=None, data=None, schedule=None):
     self.scheduler = scheduler
     self.key = key
     self.data = data or {}
     self.schedule = schedule or {}
     self.object = None  # Set by dereference()
     self.started = None  # Timestamp
     self._log = []
     self.on_complete = []  # List of (job_name, key)
     # to launch on complete
     self.to_log = scheduler and scheduler.to_log_jobs
     self.job_log = []
     self.logger = PrefixLoggerAdapter(
         logger, "%s][%s][%s" %
         (self.scheduler.name, self.name, self.get_display_key()))
     if scheduler.to_log_jobs:
         self.logger = TeeLoggerAdapter(self.logger, self.job_log)
Пример #9
0
 def configure(self, uuid, handler, interval, metrics, config,
               managed_object, **kwargs):
     if not self.uuid:
         self.logger = PrefixLoggerAdapter(logger, uuid)
     self.uuid = uuid
     self.handler_name = handler
     nh = probe_registry.get_handler(handler)
     if nh != self.handler:
         self.handler = nh
         self.probe = nh.im_class(self.daemon, self)
     if interval != self.interval:
         # Change offset
         self.offset = interval * random.random()
         self.interval = interval
         self.next_run = self.get_next_run()
         if not self.running:
             self.daemon.reschedule(self)
     self.config = config
     # Apply metrics
     if self.metrics != metrics:
         self.metrics = metrics
         c = set(self.mdata)
         n = set(m["metric_type"] for m in metrics)
         # Remove metrics
         for m in c - n:
             del self.mdata[m]
         # Create metrics
         for m in n - c:
             self.mdata[m] = Metric(self.daemon)
         # Configure metrics
         for m in metrics:
             m["managed_object"] = managed_object
             self.mdata[m["metric_type"]].configure(**m)
         if len(metrics) == 1:
             self.default_metric_type = metrics[0]["metric_type"]
         else:
             self.default_metric_type = None
Пример #10
0
class Probe(object):
    __metaclass__ = ProbeBase
    # Form class JS file name

    # Human-readable probe title.
    # Means only for human-configurable probes
    TITLE = None
    # Human-readable description
    # Means only for human-configurable probes
    DESCRIPTION = None
    # Human-readable tags for plugin classification.
    # List of strings
    # Means only for human-configurable probes
    TAGS = []
    # Either list of field configuration or
    # string containing full JS class name
    # Means only for human-configurable probes
    CONFIG_FORM = None

    SNMP_v2c = noc.lib.snmp.consts.SNMP_v2c

    INVALID_OID_TTL = 3600

    def __init__(self, daemon, task):
        self.daemon = daemon
        self.task = task
        self.missed_oids = {}  # oid -> expire time
        self.logger = PrefixLoggerAdapter(logging.getLogger(self.__module__),
                                          self.task.uuid)

    def disable(self):
        raise NotImplementedError()

    def is_missed_oid(self, oid):
        t = self.missed_oids.get(oid)
        if t:
            if t > time.time():
                return True
            else:
                del self.missed_oids[oid]
        return False

    def set_missed_oid(self, oid):
        self.logger.info("Disabling missed oid %s", oid)
        self.missed_oids[oid] = time.time() + self.INVALID_OID_TTL

    def set_convert(self, metric, convert=None, scale=None):
        """
        Change metric conversions
        """
        self.task.set_metric_convert(metric, convert, scale)

    def snmp_get(self,
                 oids,
                 address,
                 port=161,
                 community="public",
                 version=SNMP_v2c):
        """
        Perform SNMP request to one or more OIDs.
        oids can be string or dict.
        When oid is string returns value
        When oid is dict of <metric type> : oid, returns
        dict of <metric type>: value
        """
        if isinstance(oids, basestring):
            if self.is_missed_oid(oids):
                return None  # Missed oid
        elif isinstance(oids, dict):
            oids = dict((k, v) for k, v in oids.iteritems()
                        if not self.is_missed_oid(v))
            if not oids:
                return None  # All oids are missed
        try:
            result = self.daemon.io.snmp_get(oids,
                                             address,
                                             port,
                                             community=community,
                                             version=version)
        except SNMPError, why:
            if why.code == NO_SUCH_NAME:
                # Disable invalid oid
                self.set_missed_oid(why.oid)
            return None
        if isinstance(result, dict):
            for k in result:
                if result[k] is None:
                    self.set_missed_oid(result[k])
        return result
Пример #11
0
 def __init__(self, daemon, name):
     self.daemon = daemon
     self.name = name
     self.logger = PrefixLoggerAdapter(daemon.logger, name)
     self.logger.info("Starting %s (%s)", name, self.type)
     self.cmd_queue = []
Пример #12
0
class DaemonData(object):
    """
    Daemon wrapper
    """
    def __init__(self, name, is_superuser, enabled, user, uid, group, gid,
                 instance_id, config_path):
        self.logger = PrefixLoggerAdapter(logger,
                                          "%s#%s" % (name, instance_id))
        self.logger.info("Reading config")
        self.instance_id = instance_id
        self.name = name
        self.config_path = config_path
        self.config = ConfigParser.SafeConfigParser()
        self.config.read("etc/%s.defaults" % name)
        self.config.read(config_path)
        self.enabled = enabled
        self.pid = None
        self.pidfile = self.config.get("main", "pidfile")\
            .replace("{{instance}}", self.instance_id)
        self.is_superuser = is_superuser
        self.user = user
        self.uid = uid
        self.group = group
        self.gid = gid

    def __repr__(self):
        return "<DaemonData %s>" % self.name

    def launch(self):
        """
        Launch daemon
        """
        logger.info("Launching")
        try:
            pid = os.fork()
        except OSError, e:
            self.logger.error("Fork failed: %s(%s)", e.strerror, e.errno)
            return
        if pid:
            self.pid = pid
            self.logger.info("Daemon started as PID %d", self.pid)
        else:
            # Run child
            try:
                if self.group:
                    os.setgid(self.gid)
                    os.setegid(self.gid)
                if self.user:
                    os.setuid(self.uid)
                    os.seteuid(self.uid)
                    # Set up EGG Cache to prevent permissions problem in python 2.6
                    os.environ[
                        "PYTHON_EGG_CACHE"] = "/tmp/.egg-cache%d" % self.uid
                    # Adjust HOME and USER environment variables
                    os.environ["USER"] = self.user
                    os.environ["HOME"] = pwd.getpwuid(self.uid).pw_dir
                os.execv(sys.executable, [
                    sys.executable,
                    "./scripts/%s.py" % self.name, "launch", "-c",
                    self.config_path, "-i", self.instance_id
                ])
            except OSError, e:
                self.logger.error("OS Error: %s(%s)", e.strerror, e.errno)
                sys.exit(1)
Пример #13
0
class Scheduler(object):
    COLLECTION_BASE = "noc.schedules."
    ATTR_TS = "ts"
    ATTR_CLASS = "jcls"
    ATTR_STATUS = "s"
    ATTR_TIMEOUT = "timeout"
    ATTR_KEY = "key"
    ATTR_DATA = "data"
    ATTR_SCHEDULE = "schedule"
    ATTR_LAST = "last"  # last run
    ATTR_LAST_STATUS = "ls"  # last completion status
    ATTR_LAST_DURATION = "ldur"  # last job duration
    ATTR_LAST_SUCCESS = "st"  # last success timestamp
    ATTR_RUNS = "runs"  # Number of runs
    ATTR_TRACEBACK = "tb"  # Last error traceback
    ATTR_LOG = "log"  # Job log
    ATTR_FAULTS = "f"  # Amount of sequental faults
    # ATTR_STATUS values
    S_WAIT = "W"  # Waiting to run
    S_RUN = "R"   # Running
    S_STOP = "S"  # Stopped by operator
    S_DISABLED = "D"  # Disabled by system

    JobExists = JobExists

    IGNORE_MRT_CODES = set([
        12,  # ERR_OVERLOAD
        15,  # ERR_ACTIVATOR_NOT_AVAILABLE
        16,  # ERR_DOWN
        18,  # ERR_ACTIVATOR_LOST
        24,  # ERR_SHARD_IS_DOWN
    ])

    def __init__(self, name, cleanup=None, reset_running=False,
                 initial_submit=False, max_threads=None,
                 preserve_order=False, max_faults=None,
                 mrt_limit=None):
        self.logger = PrefixLoggerAdapter(logger, name)
        self.name = name
        self.job_classes = {}
        self.collection_name = self.COLLECTION_BASE + self.name
        self.collection = get_db()[self.collection_name]
        self.active_mrt = {}  # ReduceTask -> Job instance
        self.cleanup_callback = cleanup
        self.reset_running = reset_running
        self.ignored = []
        self.initial_submit = initial_submit
        self.initial_submit_next_check = {}  # job class -> timestamp
        self.max_threads = max_threads
        self.preserve_order = preserve_order
        self.max_faults = max_faults
        self.mrt_limit = mrt_limit
        self.mrt_overload = False
        self.running_lock = threading.Lock()
        self.running_count = defaultdict(int)  # Group -> Count
        self.log_jobs = None
        self.metrics = MetricsHub(
            "noc.scheduler.%s" % name,
            "jobs.count",
            "jobs.success",
            "jobs.failed",
            "jobs.dereference.count",
            "jobs.dereference.success",
            "jobs.dereference.failed",
            "jobs.time"
        )

    def ensure_indexes(self):
        if self.preserve_order:
            k = [("ts", 1), ("_id", 1)]
        else:
            k = [("ts", 1)]
        self.logger.debug("Checking indexes: %s", ", ".join(x[0] for x in k))
        self.collection.ensure_index(k)
        self.logger.debug("Checking indexes: jcls, key")
        self.collection.ensure_index([("jcls", 1), ("key", 1)])
        self.logger.debug("Checking indexes: s, ts, jcls")
        self.collection.ensure_index([("s", 1), ("ts", 1), ("jcls", 1)])
        self.logger.debug("Checking indexes: key, s")
        self.collection.ensure_index([("s", 1), ("key", 1)])
        self.logger.debug("Indexes are ready")

    def debug(self, msg):
        warnings.warn("Using deprecated Scheduler.debug() method",
                      DeprecationWarning, stacklevel=2)
        self.logger.debug(msg)

    def info(self, msg):
        warnings.warn("Using deprecated Scheduler.info() method",
                      DeprecationWarning, stacklevel=2)
        self.logger.info(msg)

    def error(self, msg):
        warnings.warn("Using deprecated Scheduler.error() method",
                      DeprecationWarning, stacklevel=2)
        self.logger.error(msg)

    def register_job_class(self, cls):
        if not cls.name:
            return  # Abstract classes
        s = " (ignored)" if cls.ignored else ""
        self.logger.info("Registering job class: %s%s", cls.name, s)
        self.job_classes[cls.name] = cls
        # Set up ignored jobs
        if cls.ignored:
            self.ignored += [cls.name]
        else:
            # Initialize job class
            cls.initialize(self)
            # Register intial submit handlers
            if (self.initial_submit and
                hasattr(cls, "initial_submit") and
                callable(cls.initial_submit) and
                hasattr(cls, "initial_submit_interval")):
                self.initial_submit_next_check[cls] = time.time()

    def register_all(self, path, exclude=None):
        """
        Register all Job classes defined within directory
        :param path:
        :return:
        """
        exclude = exclude or []
        if not os.path.isdir(path):
            raise ValueError("'%s' must be a directory" % path)
        mr = "noc.%s." % ".".join(path.split(os.sep))
        for f in os.listdir(path):
            if f in exclude or not f.endswith(".py"):
                continue
            mn = mr + f[:-3]  # Full module name
            m = __import__(mn, {}, {}, "*")
            for on in dir(m):
                o = getattr(m, on)
                if (inspect.isclass(o) and issubclass(o, Job) and
                    o.__module__.startswith(mn)):
                    self.register_job_class(o)

    def get_job_class(self, name):
        return self.job_classes[name]

    def submit(self, job_name, key=None, data=None,
               schedule=None, ts=None):
        """
        Submit new job
        """
        if ts is None:
            ts = datetime.datetime.now()
        elif type(ts) in (int, long, float):
            ts = (datetime.datetime.now() +
                  datetime.timedelta(seconds=ts))
        # Check Job is not exists
        if key is not None:
            if self.collection.find_one({
                self.ATTR_CLASS: job_name,
                self.ATTR_KEY: key
            }):
                raise JobExists()
        # Submit job
        id = self.collection.insert({
            self.ATTR_TS: ts,
            self.ATTR_CLASS: job_name,
            self.ATTR_STATUS: self.S_WAIT,
            self.ATTR_KEY: key,
            self.ATTR_DATA: data,
            self.ATTR_SCHEDULE: schedule
        }, manipulate=True, safe=True)
        self.logger.info("Scheduling job %s(%s) id=%s at %s",
            job_name, key, id, ts)

    def remove_job(self, job_name, key):
        self.logger.info("Removing job %s(%s)", job_name, key)
        self.collection.remove({
            self.ATTR_CLASS: job_name,
            self.ATTR_KEY: key
        }, safe=True)

    def reschedule_job(self, job_name, key, ts, status=None,
                       duration=None, last_status=None, tb=None,
                       log=None, update_runs=False,
                       skip_running=False, faults=None):
        self.logger.info("Rescheduling job %s(%s) to %s%s",
            job_name, key, ts, " status=%s" % status if status else "")
        q = {
            self.ATTR_CLASS: job_name,
            self.ATTR_KEY: key
        }
        if skip_running:
            q[self.ATTR_STATUS] = self.S_WAIT
        s = {
            self.ATTR_TS: ts,
            self.ATTR_TRACEBACK: tb,
            self.ATTR_LOG: log or []
        }
        if status:
            s[self.ATTR_STATUS] = status
        if last_status:
            s[self.ATTR_LAST_STATUS] = last_status
            if last_status == Job.S_SUCCESS:
                s[self.ATTR_LAST_SUCCESS] = datetime.datetime.now()
        if duration is not None:
            s[self.ATTR_LAST_DURATION] = duration
        if faults is not None:
            s[self.ATTR_FAULTS] = faults
        op = {"$set": s}
        if update_runs:
            op["$inc"] = {self.ATTR_RUNS: 1}
        self.collection.update(q, op, safe=True)

    def set_job_status(self, job_name, key, status):
        self.logger.info("Changing %s(%s) status to %s",
            job_name, key, status)
        self.collection.update({
            self.ATTR_CLASS: job_name,
            self.ATTR_KEY: key
        }, {
            "$set": {self.ATTR_STATUS: status}
        }, safe=True)

    def run_job(self, job):
        """
        Begin job execution
        :param job:
        :return:
        """
        # Dereference job
        self.metrics.jobs_dereference_count += 1
        if not job.dereference():
            self.logger.info("Cannot dereference job %s(%s). Removing",
                job.name, job.key)
            self.remove_job(job.name, job.key)
            self.metrics.jobs_dereference_failed += 1
            return
        self.metrics.jobs_dereference_success += 1
        # Check threaded jobs limit
        if job.threaded and self.max_threads:
            if threading.active_count() >= self.max_threads:
                return
        # Check job can be run
        job.started = time.time()
        if not job.can_run():
            job.logger.debug("Deferred")
            self._complete_job(job, job.S_DEFERRED, None)
            return
        # Change status
        s = "threaded " if job.threaded else ""
        job.logger.info("Running job")
        self.collection.update({
            self.ATTR_CLASS: job.name,
            self.ATTR_KEY: job.key
        }, {"$set": {
            self.ATTR_STATUS: self.S_RUN,
            self.ATTR_LAST: datetime.datetime.fromtimestamp(job.started)
        }})
        #
        if job.map_task:
            if job.beef and job.key in job.beef:
                # Do not run job, provide beef instead
                self._run_job_handler(
                    job,
                    object=job.get_managed_object(),
                    result=job.beef[job.key])
            else:
                job.logger.info("Running script %s", job.map_task)
                # Run in MRT mode
                t = ReduceTask.create_task(
                    job.get_managed_object(),  # Managed object is in key
                    None, {},
                    job.map_task, job.get_map_task_params()
                )
                self.active_mrt[t] = job
        else:
            self._run_job_handler(job)

    def _run_job_handler(self, job, **kwargs):
        if job.threaded:
            t = threading.Thread(target=self._job_wrapper,
                args=(job,), kwargs=kwargs
            )
            t.daemon = True
            t.start()
        else:
            return self._job_wrapper(job, **kwargs)

    def _job_wrapper(self, job, **kwargs):
        tb = None
        t0 = time.time()
        job.logger.info("Running job handler")
        try:
            r = job.handler(**kwargs)
        except Exception:
            # error_report()
            tb = get_traceback()
            job.error(tb)
            job.on_exception()
            s = job.S_EXCEPTION
        else:
            if r:
                job.logger.info("Job completed successfully (%.2fms)",
                                (time.time() - t0) * 1000)
                job.on_success()
                s = job.S_SUCCESS
            else:
                job.logger.info("Job failed (%fsec)",
                    time.time() - t0
                )
                job.on_failure()
                s = job.S_FAILED
        self._complete_job(job, s, tb)

    def _complete_job(self, job, status, tb):
        self.metrics.jobs_time.timer(self.name, job.name, job.key).log(
            job.started, time.time(), status)
        if self.to_log_jobs:
            path = os.path.join(self.log_jobs, job.name, str(job.key))
            safe_rewrite(path, job.get_job_log())
        group = job.get_group()
        if group is not None:
            with self.running_lock:
                self.running_count[group] -= 1
                if not self.running_count[group]:
                    del self.running_count[group]
        on_complete = job.on_complete
        t = job.get_schedule(status)
        if t is None:
            # Unschedule job
            self.remove_job(job.name, job.key)
        else:
            # Reschedule job
            t1 = time.time()
            if self.max_faults and status in (Job.S_FAILED, Job.S_EXCEPTION):
                code = None
                if type(tb) == dict:
                    code = tb.get("code")
                if code in self.IGNORE_MRT_CODES:
                    fc = None  # Ignore temporary errors
                    next_status = self.S_WAIT
                else:
                    # Get fault count
                    fc = self.get_faults(job.name, job.key) + 1
                    if fc >= self.max_faults:  # Disable job
                        next_status = self.S_DISABLED
                        self.logger.info("Disabling job %s(%s) due to %d sequental faults",
                            job.name, job.key, fc)
                    else:
                        next_status = self.S_WAIT
            else:
                next_status = self.S_WAIT
                fc = 0
            self.reschedule_job(
                job.name, job.key, t,
                status=next_status,
                last_status=status,
                duration=t1 - job.started,  # @todo: maybe error
                tb=tb,
                update_runs=True,
                faults=fc
            )
        # Reschedule jobs must be executed on complete
        for job_name, key in on_complete:
            ts = datetime.datetime.now()
            self.reschedule_job(job_name, key, ts, skip_running=True)

    def complete_mrt_job(self, t):
        job = self.active_mrt.pop(t)
        for m in t.maptask_set.all():
            if m.status == "C":
                self._run_job_handler(job, object=m.managed_object,
                    result=m.script_result)
            else:
                self.logger.info("Job %s(%s) is failed",
                    job.name, job.get_display_key())
                self._complete_job(job, job.S_FAILED, m.script_result)
        t.delete()

    def iter_pending_jobs(self):
        """
        Iterate pending jobs
        """
        q = {
            self.ATTR_TS: {"$lte": datetime.datetime.now()},
            self.ATTR_STATUS: self.S_WAIT
        }
        if self.ignored:
            q[self.ATTR_CLASS] = {"$nin": self.ignored}
        # Get remaining pending tasks
        qs = self.collection.find(q)
        if self.preserve_order:
            qs = qs.sort([(self.ATTR_TS, 1), ("_id", 1)])
        else:
            qs = qs.sort(self.ATTR_TS)
        try:
            for job in qs.batch_size(100):
                yield job
        except pymongo.errors.CursorNotFound:
            self.logger.info("Server cursor timed out. Waiting for next cycle")
        except pymongo.errors.OperationFailure, why:
            self.logger.error("Operation failure: %s", why)
            self.logger.error("Trying to recover")
Пример #14
0
class Collection(object):
    TRANSLATIONS = {}
    ALLOW_FUZZY = {}
    COLLECTIONS = {}
    COLLECTION_ORDER = []

    def __init__(self, name, local=False):
        self.logger = PrefixLoggerAdapter(logger, name)
        if name not in self.COLLECTIONS:
            self.logger.error("Invalid collection '%s'", name)
            raise ValueError("Invalid collection '%s'" % name)
        m, c = name.split(".", 1)
        self.module = m
        self.cname = name
        self.name = c
        self.local = local
        self.doc = self.COLLECTIONS[name]
        self.items = {}  # uuid -> CollectionItem
        self.changed = False
        self.ref_cache = {}
        self.partial = set()
        if hasattr(self.doc, "name"):
            # Use .name field when present
            self.get_name = attrgetter("name")
        else:
            # Or first unique field otherwise
            uname = None
            for spec in self.doc._meta["index_specs"]:
                if spec["unique"] and len(spec["fields"]) == 1:
                    uname = spec["fields"][0][0]
            if not uname:
                self.logger.error("Cannot find unique index")
                raise ValueError("No unique index")
            self.get_name = attrgetter(uname)
        self.translations = self.TRANSLATIONS.get(name,
                                                  self.TRANSLATIONS[None])

    def __unicode__(self):
        return self.name

    def die(self, msg):
        raise ValueError(msg)

    def get_collection_path(self):
        if self.local:
            return os.path.join("local", "collections", self.module,
                                self.name + ".csv")
        else:
            return os.path.join(self.module, "collections", self.name,
                                "manifest.csv")

    def get_item_path(self, mi):
        return os.path.join(self.module, "collections", self.name, mi.path)

    def load(self):
        """
        Load collection from CSV file
        """
        path = self.get_collection_path()
        if not os.path.exists(path):
            return
        with open(path) as f:
            reader = csv.reader(f)
            reader.next()  # Skip header
            for name, uuid, path, hash in reader:
                uuid = UUID(uuid)
                mi = CollectionItem(name=name, uuid=uuid, path=path, hash=hash)
                self.items[uuid] = mi

    def save(self):
        self.logger.info("Updating manifest")
        rows = sorted(
            ([r.name, r.uuid, r.path, r.hash] for r in self.items.values()),
            key=lambda x: x[0])
        rows = [["name", "uuid", "path", "hash"]] + rows
        out = StringIO()
        writer = csv.writer(out)
        writer.writerows(rows)
        safe_rewrite(self.get_collection_path(), out.getvalue(), mode=0644)
        # Update collection cache
        self.logger.info("Updating CollectionCache")
        CollectionCache.merge("%s.%s" % (self.module, self.name),
                              set(self.items))

    def load_item(self, mi):
        p = self.get_item_path(mi)
        if not os.path.exists(p):
            self.die("File not found: %s" % p)
        with open(p) as f:
            fdata = f.read()
            try:
                data = json_decode(fdata)
            except ValueError, why:
                self.die("Failed to read JSON file '%s': %s" % (p, why))
        if not isinstance(data, dict):
            self.die("Invalid JSON file: %s" % p)
        if self.get_hash(fdata) != mi.hash:
            self.die("Checksum mismatch for file '%s'" % p)
        return data
Пример #15
0
class KeyValueStore(object):
    TABLE = "kv"

    def __init__(self, path, indexes=None, fields=None):
        self.logger = PrefixLoggerAdapter(logger, path)
        self.path = path
        self.fields = ["uuid"] + list(fields or [])
        self.indexes = indexes or []
        self.connect = None

    def get_connect(self):
        if not self.connect:
            is_empty = not os.path.exists(self.path)
            self.logger.info("Connecting to database")
            self.connect = sqlite3.connect(self.path)
            self.logger.debug("SQLite version %s", sqlite3.version)
            if is_empty:
                self.logger.info("Formatting key-value store")
                c = self.connect.cursor()
                fields = ["%s TEXT" % f for f in self.fields]
                c.execute("CREATE TABLE %s(%s)" %
                          (self.TABLE, ",".join(fields)))
                for i in self.indexes:
                    self.logger.debug("Indexing %s", i)
                    c.execute("CREATE INDEX x_%s_%s ON %s(%s)" %
                              (self.TABLE, i, self.TABLE, i))
                self.connect.commit()
        return self.connect

    def commit(self):
        self.logger.debug("Commit")
        connect = self.get_connect()
        connect.commit()

    def cursor(self):
        connect = self.get_connect()
        return connect.cursor()

    def get(self, **kwargs):
        where = []
        args = []
        for k in kwargs:
            where += ["%s = ?" % k]
            args += [kwargs[k]]
        sql = "SELECT %s FROM %s WHERE %s" % (", ".join(
            self.fields), self.TABLE, " AND ".join(where))
        self.logger.debug("%s %s", sql, args)
        c = self.cursor()
        c.execute(sql, tuple(args))
        r = c.fetchone()
        if not r:
            return None
        return dict(zip(self.fields, r))

    def find(self, **kwargs):
        where = []
        args = []
        for k in kwargs:
            where += ["%s = ?" % k]
            args += [kwargs[k]]
        sql = "SELECT %s FROM %s" % (", ".join(self.fields), self.TABLE)
        if where:
            sql += " WHERE %s" % " AND ".join(where)
        self.logger.debug("%s %s", sql, args)
        c = self.cursor()
        c.execute(sql, tuple(args))
        data = []
        for r in c.fetchall():
            data += [dict(zip(self.fields, r))]
        return data

    def put(self, uuid, **kwargs):
        self.logger.debug("PUT: uuid=%s, %s", uuid, kwargs)
        if self.get(uuid=uuid):
            sop = []
            args = []
            for k in kwargs:
                sop += ["%s = ?" % k]
                args += [kwargs[k]]
            args += [uuid]
            sql = "UPDATE %s SET %s WHERE uuid=?" % (self.TABLE,
                                                     ", ".join(sop))
            self.logger.debug("%s %s", sql, args)
            c = self.cursor()
            c.execute(sql, tuple(args))
        else:
            sf = ["uuid"]
            args = [uuid]
            for k in kwargs:
                sf += [k]
                args += [kwargs[k]]
            c = self.cursor()
            c.execute(
                "INSERT INTO %s(%s) VALUES(%s)" %
                (self.TABLE, ", ".join(sf), ", ".join(["?"] *
                                                      (len(kwargs) + 1))),
                tuple(args))
        self.commit()

    def delete(self, uuid):
        self.logger.debug("DELETE %s", uuid)
        sql = "DELETE FROM %s WHERE uuid=?" % self.TABLE
        self.logger.debug("%s %s", sql, (uuid, ))
        c = self.cursor()
        c.execute(sql, (uuid, ))
        self.commit()
Пример #16
0
class Pool(object):
    def __init__(self, name="pool", metrics_prefix=None,
                 start_threads=1, max_threads=10,
                 min_spare=1, max_spare=1, backlog=0):
        if min_spare > max_spare:
            raise ValueError("min_spare (%d) must not be greater"
                             " than max_spare (%d)" % (min_spare,
                                                       max_spare))
        if start_threads > max_threads:
            raise ValueError("start_threads (%d) must not be greater"
                             " than max_threads (%d)" % (start_threads,
                                                         max_threads))
        self.logger = PrefixLoggerAdapter(logger, name)
        self.name = name
        if not metrics_prefix:
            metrics_prefix = "noc"
        metrics_prefix += "pool.%s" % name
        self.metrics = MetricsHub(
            metrics_prefix,
            "threads.running",
            "threads.idle",
            "queue.len"
        )
        self.start_threads = start_threads
        self.max_threads = max_threads
        self.min_spare = min_spare
        self.max_spare = max_spare
        self.backlog = backlog if backlog else max_threads
        self.t_lock = Lock()
        self.threads = set()
        self.queue = Queue(backlog)
        self.stopping = False
        self.stopped = Event()
        self.n_idle = 0
        self.idle_lock = Lock()
        self.logger.info("Running thread pool '%s'", self.name)
        self.set_idle(None)

    def set_idle(self, status):
        with self.idle_lock:
            if status is not None:
                self.n_idle += 1 if status else -1
            n = len(self.threads)
            self.metrics.threads_idle = self.n_idle
            self.metrics.threads_running = n
            self.metrics.queue_len = self.queue.qsize()
            if (not status and self.n_idle < self.min_spare and
                        n < self.max_threads):
                # Run additional thread
                w = Worker(self, self.queue)
                self.threads.add(w)
                w.start()
            elif status and (self.n_idle > self.max_spare or n > self.max_threads):
                # Stop one thread
                self.queue.put(None)

    def thread_done(self, t):
        with self.t_lock:
            if t in self.threads:
                self.threads.remove(t)
            if self.stopping and not len(self.threads):
                self.stopped.set()

    def get_status(self):
        s = []
        t = time.time()
        with self.t_lock:
            for w in self.threads:
                if w.is_idle:
                    s += [{
                        "id": w.ident,
                        "status": "IDLE"
                    }]
                else:
                    s += [{
                        "id": w.ident,
                        "status": "RUN",
                        "title": w.title,
                        "start": w.start_time,
                        "duration": t - w.start_time
                    }]
        return s

    def stop(self, timeout=3):
        self.stopping = True
        with self.t_lock:
            n = len(self.threads)
            if not n:
                return  # Stopped
            for i in range(n):
                self.queue.put(None)  # Send shutdown signals
        # Wait for clean stop
        self.stopped.wait(timeout)
        if self.stopped.is_set():
            return
        # Forcefully cancel
        with self.t_lock:
            for t in self.threads:
                if t.is_alive():
                    t.cancel()
        time.sleep(timeout)

    def run(self, title, target, args=(), kwargs={}):
        if self.stopping:
            return
        self.queue.put((title, target, args, kwargs))

    def configure(self, max_threads=None, min_spare=None,
                  max_spare=None, backlog=None):
        if max_threads is not None:
            self.max_threads = max_threads
        if min_spare is not None:
            self.min_spare = min_spare
        if max_spare is not None:
            self.max_spare = max_spare
        if backlog is not None:
            self.backlog = backlog
Пример #17
0
class SyncHandler(object):
    type = None  # Set to handler type
    # DictParameter instance used to parse and clean config
    config = {}

    def __init__(self, daemon, name):
        self.daemon = daemon
        self.name = name
        self.logger = PrefixLoggerAdapter(daemon.logger, name)
        self.logger.info("Starting %s (%s)", name, self.type)
        self.cmd_queue = []

    def configure(self, **kwargs):
        pass

    def close(self):
        """
        Called when handler is closed
        """
        pass

    def on_create(self, uuid, data):
        """
        Object first seen
        """
        pass

    def on_delete(self, uuid):
        """
        Object removed
        """
        pass

    def on_change(self, uuid, data):
        """
        Object changed
        """
        pass

    def on_configuration_done(self):
        """
        End of configuration round
        """
        for c in self.cmd_queue:
            self.run_command(c)
        self.cmd_queue = []

    def get_command(self, cmd, **ctx):
        for v in ctx:
            cmd = cmd.replace("{%s}" % v, str(ctx[v]))
        return cmd

    def queue_command(self, cmd, once=False, **ctx):
        if not cmd:
            return
        cmd = self.get_command(cmd, **ctx)
        if not once or cmd not in self.cmd_queue:
            self.logger.debug("Queueing command: %s", cmd)
            self.cmd_queue += [cmd]

    def run_command(self, cmd, **ctx):
        """
        Run shell command with given context
        """
        if not cmd:
            return
        cmd = self.get_command(cmd, **ctx)
        self.logger.info("Running '%s'" % cmd)
        p = subprocess.Popen(cmd,
                             shell=True,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)
        p.wait()
        output = p.stdout.read()
        if output:
            self.logger.debug("Output:\n%s", output)
        if p.returncode == 0:
            self.logger.debug("Success")
        else:
            self.logger.info("Failed (retcode %s)", p.returncode)
Пример #18
0
class Socket(object):
    """
    Abstract non-blocking socket wrapper
    """
    TTL = None  # maximum time to live in seconds
    READ_CHUNK = 65536  # @todo: configuration parameter
    CLOSE_ON_ERROR = True  # Call .close() from .on_error()

    def __init__(self, factory, socket=None):
        self.logger = PrefixLoggerAdapter(logger, self.get_label())
        self.factory = factory
        self.socket = socket
        self.start_time = time.time()
        self.last_read = self.start_time + 100  # @todo: Meaningful value
        self.name = None
        self.closing = False  # In closing state
        self.stale = False  # Closed as stale
        self.ttl = self.TTL
        self.set_timeout(self.TTL)
        self.factory.register_socket(self)
        if socket:
            self.set_status(r=True)

    def __repr__(self):
        return "<%s(0x%x, %s)>" % (
            self.__class__.__name__, id(self),
            ", ".join(self.get_flags()))

    def get_label(self):
        return self.__class__.__name__

    def get_flags(self):
        """
        Returns list of flags
        :return:
        """
        if not hasattr(self, "socket"):
            return ["init"]
        f = []
        if self.closing:
            f += ["closing"]
        if self.stale:
            f += ["stale"]
        return f

    def create_socket(self):
        """
        Performs actial socket creation and initialization
        and pust socket into nonblocking mode.
        """
        if not self.socket_is_ready():  # Socket was not created
            raise SocketNotImplemented()
        self.socket.setblocking(0)
        self.set_status(r=True)
        self.update_status()

    def set_timeout(self, ttl):
        """
        Change socket timeout

        :param ttl: Timeout in seconds
        :type ttl: int
        """
        if ttl and ttl != self.ttl:
            self.logger.debug("Set timeout to %s secs" % ttl)
            self.ttl = ttl

    def socket_is_ready(self):
        """
        Check socket is created and ready for operation

        :rtype: bool
        """
        return self.socket is not None

    def fileno(self):
        """
        Get socket system file id

        :return: file id or None
        :rtype: int or None
        """
        return self.socket.fileno() if self.socket else None

    def handle_read(self):
        """
        Read handler. Called every time when socket has data available
        to be reading.
        """
        pass

    def handle_write(self):
        """
        Read handler. Called every time when socket has data available
        to be written.
        """
        pass

    def on_close(self):
        """
        Close handler. Called on socket close.
        """
        pass

    def on_error(self, exc):
        """
        Error handler. Called on eny socket error.
        Default behavior is to emit error message and close the socket

        :param exc: SocketException
        """
        self.logger.error(exc.message)
        if self.CLOSE_ON_ERROR:
            self.close()

    def close(self):
        """
        Close socket and unregister from factory
        """
        if self.closing:
            return
        self.logger.debug("Closing socket")
        self.closing = True
        if self.socket:
            self.factory.unregister_socket(self)
            if self.socket:
                try:
                    self.socket.close()
                except socket.error, why:
                    if why[0] not in IGNORABLE_CLOSE_ERRORS:
                        error_report(logger=self.logger)
            self.socket = None
            self.on_close()
Пример #19
0
 def __init__(self, path, indexes=None, fields=None):
     self.logger = PrefixLoggerAdapter(logger, path)
     self.path = path
     self.fields = ["uuid"] + list(fields or [])
     self.indexes = indexes or []
     self.connect = None
Пример #20
0
 def __init__(self, daemon, task):
     self.daemon = daemon
     self.task = task
     self.missed_oids = {}  # oid -> expire time
     self.logger = PrefixLoggerAdapter(logging.getLogger(self.__module__),
                                       self.task.uuid)
Пример #21
0
def wipe(o):
    if o.profile_name.startswith("NOC."):
        return True
    log = PrefixLoggerAdapter(logger, str(o.id))
    # Delete active map tasks
    log.debug("Wiping MAP tasks")
    MapTask.objects.filter(managed_object=o).delete()
    # Wiping discovery tasks
    log.debug("Wiping discovery tasks")
    db = get_db()
    db.noc.schedules.inv.discovery.remove({"key": o.id})
    # Wiping FM events
    log.debug("Wiping events")
    NewEvent.objects.filter(managed_object=o.id).delete()
    FailedEvent.objects.filter(managed_object=o.id).delete()
    ActiveEvent.objects.filter(managed_object=o.id).delete()
    ArchivedEvent.objects.filter(managed_object=o.id).delete()
    # Wiping alarms
    log.debug("Wiping alarms")
    for ac in (ActiveAlarm, ArchivedAlarm):
        for a in ac.objects.filter(managed_object=o.id):
            # Relink root causes
            my_root = a.root
            for iac in (ActiveAlarm, ArchivedAlarm):
                for ia in iac.objects.filter(root=a.id):
                    ia.root = my_root
                    ia.save()
            # Delete alarm
            a.delete()
    # Wiping MAC DB
    log.debug("Wiping MAC DB")
    MACDB._get_collection().remove({"managed_object": o.id})
    # Wiping pending link check
    log.debug("Wiping pending link check")
    PendingLinkCheck._get_collection().remove({"local_object": o.id})
    PendingLinkCheck._get_collection().remove({"remote_object": o.id})
    # Wiping discovery id cache
    log.debug("Wiping discovery id")
    DiscoveryID._get_collection().remove({"object": o.id})
    # Wiping interfaces, subs and links
    # Wipe links
    log.debug("Wiping links")
    for i in Interface.objects.filter(managed_object=o.id):
        # @todo: Remove aggregated links correctly
        Link.objects.filter(interfaces=i.id).delete()
    #
    log.debug("Wiping subinterfaces")
    SubInterface.objects.filter(managed_object=o.id).delete()
    log.debug("Wiping interfaces")
    Interface.objects.filter(managed_object=o.id).delete()
    log.debug("Wiping forwarding instances")
    ForwardingInstance.objects.filter(managed_object=o.id).delete()
    # Unbind from IPAM
    log.debug("Unbind from IPAM")
    for a in Address.objects.filter(managed_object=o):
        a.managed_object = None
        a.save()
    # Wipe object status
    log.debug("Wiping object status")
    ObjectStatus.objects.filter(object=o.id).delete()
    # Wipe outages
    log.debug("Wiping outages")
    Outage.objects.filter(object=o.id).delete()
    # Wipe uptimes
    log.debug("Wiping uptimes")
    Uptime.objects.filter(object=o.id).delete()
    # Wipe reboots
    log.debug("Wiping reboots")
    Reboot.objects.filter(object=o.id).delete()
    # Delete Managed Object's capabilities
    log.debug("Wiping capabilitites")
    ObjectCapabilities.objects.filter(object=o.id).delete()
    # Delete Managed Object's facts
    log.debug("Wiping facts")
    ObjectFact.objects.filter(object=o.id).delete()
    # Delete Managed Object's attributes
    log.debug("Wiping attributes")
    ManagedObjectAttribute.objects.filter(managed_object=o).delete()
    # Detach from validation rule
    log.debug("Detaching from validation rules")
    for vr in ValidationRule.objects.filter(objects_list__object=o.id):
        vr.objects_list = [x for x in vr.objects_list if x.object.id != o.id]
        if not vr.objects_list and not vr.selectors_list:
            vr.is_active = False
        vr.save()
    # Finally delete object and config
    log.debug("Finally wiping object")
    o.delete()
    log.debug("Done")