Пример #1
0
class Engine(object):
    ILOCK = threading.Lock()
    AC_POLICY_VIOLATION = None

    def __init__(self, object):
        self.object = object
        self.logger = PrefixLoggerAdapter(logger, self.object.name)
        self.env = None
        self.templates = {}  # fact class -> template
        self.fcls = {}  # template -> Fact class
        self.facts = {}  # Index -> Fact
        self.rn = 0  # Rule number
        self.config = None  # Cached config
        self.interface_ranges = None
        with self.ILOCK:
            self.AC_POLICY_VIOLATION = AlarmClass.objects.filter(
                name="Config | Policy Violation").first()
            if not self.AC_POLICY_VIOLATION:
                logger.error("Alarm class 'Config | Policy Violation' is not found. Alarms cannot be raised")

    def get_template(self, fact):
        if fact.cls not in self.templates:
            self.logger.debug("Creating template %s", fact.cls)
            self.templates[fact.cls] = self.env.BuildTemplate(
                fact.cls, fact.get_template())
            self.fcls[fact.cls] = fact.__class__
            self.logger.debug("Define template %s",
                              self.templates[fact.cls].PPForm())
        return self.templates[fact.cls]

    def get_rule_number(self):
        return self.rn

    def assert_fact(self, fact):
        f = self.get_template(fact).BuildFact()
        f.AssignSlotDefaults()
        for k, v in fact.iter_factitems():
            if v is None or v == [] or v == tuple():
                continue
            if isinstance(v, basestring):
                v = v.replace("\n", "\\n")
            f.Slots[k] = v
        try:
            f.Assert()
        except clips.ClipsError, why:
            self.logger.error("Could not assert: %s", f.PPForm())
            self.logger.error(
                "CLIPS Error: %s\n%s",
                why,
                clips.ErrorStream.Read()
            )
            return
        self.facts[f.Index] = fact
        self.logger.debug("Assert %s", f.PPForm())
Пример #2
0
class Socket(object):
    """
    Abstract non-blocking socket wrapper
    """
    TTL = None  # maximum time to live in seconds
    READ_CHUNK = 65536  # @todo: configuration parameter
    CLOSE_ON_ERROR = True  # Call .close() from .on_error()

    def __init__(self, factory, socket=None):
        self.logger = PrefixLoggerAdapter(logger, self.get_label())
        self.factory = factory
        self.socket = socket
        self.start_time = time.time()
        self.last_read = self.start_time + 100  # @todo: Meaningful value
        self.name = None
        self.closing = False  # In closing state
        self.stale = False  # Closed as stale
        self.ttl = self.TTL
        self.set_timeout(self.TTL)
        self.factory.register_socket(self)
        if socket:
            self.set_status(r=True)

    def __repr__(self):
        return "<%s(0x%x, %s)>" % (
            self.__class__.__name__, id(self),
            ", ".join(self.get_flags()))

    def get_label(self):
        return self.__class__.__name__

    def get_flags(self):
        """
        Returns list of flags
        :return:
        """
        if not hasattr(self, "socket"):
            return ["init"]
        f = []
        if self.closing:
            f += ["closing"]
        if self.stale:
            f += ["stale"]
        return f

    def create_socket(self):
        """
        Performs actial socket creation and initialization
        and pust socket into nonblocking mode.
        """
        if not self.socket_is_ready():  # Socket was not created
            raise SocketNotImplemented()
        self.socket.setblocking(0)
        self.set_status(r=True)
        self.update_status()

    def set_timeout(self, ttl):
        """
        Change socket timeout

        :param ttl: Timeout in seconds
        :type ttl: int
        """
        if ttl and ttl != self.ttl:
            self.logger.debug("Set timeout to %s secs" % ttl)
            self.ttl = ttl

    def socket_is_ready(self):
        """
        Check socket is created and ready for operation

        :rtype: bool
        """
        return self.socket is not None

    def fileno(self):
        """
        Get socket system file id

        :return: file id or None
        :rtype: int or None
        """
        return self.socket.fileno() if self.socket else None

    def handle_read(self):
        """
        Read handler. Called every time when socket has data available
        to be reading.
        """
        pass

    def handle_write(self):
        """
        Read handler. Called every time when socket has data available
        to be written.
        """
        pass

    def on_close(self):
        """
        Close handler. Called on socket close.
        """
        pass

    def on_error(self, exc):
        """
        Error handler. Called on eny socket error.
        Default behavior is to emit error message and close the socket

        :param exc: SocketException
        """
        self.logger.error(exc.message)
        if self.CLOSE_ON_ERROR:
            self.close()

    def close(self):
        """
        Close socket and unregister from factory
        """
        if self.closing:
            return
        self.logger.debug("Closing socket")
        self.closing = True
        if self.socket:
            self.factory.unregister_socket(self)
            if self.socket:
                try:
                    self.socket.close()
                except socket.error, why:
                    if why[0] not in IGNORABLE_CLOSE_ERRORS:
                        error_report(logger=self.logger)
            self.socket = None
            self.on_close()
Пример #3
0
class DaemonData(object):
    """
    Daemon wrapper
    """
    def __init__(self, name, is_superuser, enabled, user, uid, group, gid,
                 instance_id, config_path):
        self.logger = PrefixLoggerAdapter(logger,
                                          "%s#%s" % (name, instance_id))
        self.logger.info("Reading config")
        self.instance_id = instance_id
        self.name = name
        self.config_path = config_path
        self.config = ConfigParser.SafeConfigParser()
        self.config.read("etc/%s.defaults" % name)
        self.config.read(config_path)
        self.enabled = enabled
        self.pid = None
        self.pidfile = self.config.get("main", "pidfile")\
            .replace("{{instance}}", self.instance_id)
        self.is_superuser = is_superuser
        self.user = user
        self.uid = uid
        self.group = group
        self.gid = gid

    def __repr__(self):
        return "<DaemonData %s>" % self.name

    def launch(self):
        """
        Launch daemon
        """
        logger.info("Launching")
        try:
            pid = os.fork()
        except OSError, e:
            self.logger.error("Fork failed: %s(%s)", e.strerror, e.errno)
            return
        if pid:
            self.pid = pid
            self.logger.info("Daemon started as PID %d", self.pid)
        else:
            # Run child
            try:
                if self.group:
                    os.setgid(self.gid)
                    os.setegid(self.gid)
                if self.user:
                    os.setuid(self.uid)
                    os.seteuid(self.uid)
                    # Set up EGG Cache to prevent permissions problem in python 2.6
                    os.environ[
                        "PYTHON_EGG_CACHE"] = "/tmp/.egg-cache%d" % self.uid
                    # Adjust HOME and USER environment variables
                    os.environ["USER"] = self.user
                    os.environ["HOME"] = pwd.getpwuid(self.uid).pw_dir
                os.execv(sys.executable, [
                    sys.executable,
                    "./scripts/%s.py" % self.name, "launch", "-c",
                    self.config_path, "-i", self.instance_id
                ])
            except OSError, e:
                self.logger.error("OS Error: %s(%s)", e.strerror, e.errno)
                sys.exit(1)
Пример #4
0
class Scheduler(object):
    COLLECTION_BASE = "noc.schedules."
    ATTR_TS = "ts"
    ATTR_CLASS = "jcls"
    ATTR_STATUS = "s"
    ATTR_TIMEOUT = "timeout"
    ATTR_KEY = "key"
    ATTR_DATA = "data"
    ATTR_SCHEDULE = "schedule"
    ATTR_LAST = "last"  # last run
    ATTR_LAST_STATUS = "ls"  # last completion status
    ATTR_LAST_DURATION = "ldur"  # last job duration
    ATTR_LAST_SUCCESS = "st"  # last success timestamp
    ATTR_RUNS = "runs"  # Number of runs
    ATTR_TRACEBACK = "tb"  # Last error traceback
    ATTR_LOG = "log"  # Job log
    ATTR_FAULTS = "f"  # Amount of sequental faults
    # ATTR_STATUS values
    S_WAIT = "W"  # Waiting to run
    S_RUN = "R"   # Running
    S_STOP = "S"  # Stopped by operator
    S_DISABLED = "D"  # Disabled by system

    JobExists = JobExists

    IGNORE_MRT_CODES = set([
        12,  # ERR_OVERLOAD
        15,  # ERR_ACTIVATOR_NOT_AVAILABLE
        16,  # ERR_DOWN
        18,  # ERR_ACTIVATOR_LOST
        24,  # ERR_SHARD_IS_DOWN
    ])

    def __init__(self, name, cleanup=None, reset_running=False,
                 initial_submit=False, max_threads=None,
                 preserve_order=False, max_faults=None,
                 mrt_limit=None):
        self.logger = PrefixLoggerAdapter(logger, name)
        self.name = name
        self.job_classes = {}
        self.collection_name = self.COLLECTION_BASE + self.name
        self.collection = get_db()[self.collection_name]
        self.active_mrt = {}  # ReduceTask -> Job instance
        self.cleanup_callback = cleanup
        self.reset_running = reset_running
        self.ignored = []
        self.initial_submit = initial_submit
        self.initial_submit_next_check = {}  # job class -> timestamp
        self.max_threads = max_threads
        self.preserve_order = preserve_order
        self.max_faults = max_faults
        self.mrt_limit = mrt_limit
        self.mrt_overload = False
        self.running_lock = threading.Lock()
        self.running_count = defaultdict(int)  # Group -> Count
        self.log_jobs = None
        self.metrics = MetricsHub(
            "noc.scheduler.%s" % name,
            "jobs.count",
            "jobs.success",
            "jobs.failed",
            "jobs.dereference.count",
            "jobs.dereference.success",
            "jobs.dereference.failed",
            "jobs.time"
        )

    def ensure_indexes(self):
        if self.preserve_order:
            k = [("ts", 1), ("_id", 1)]
        else:
            k = [("ts", 1)]
        self.logger.debug("Checking indexes: %s", ", ".join(x[0] for x in k))
        self.collection.ensure_index(k)
        self.logger.debug("Checking indexes: jcls, key")
        self.collection.ensure_index([("jcls", 1), ("key", 1)])
        self.logger.debug("Checking indexes: s, ts, jcls")
        self.collection.ensure_index([("s", 1), ("ts", 1), ("jcls", 1)])
        self.logger.debug("Checking indexes: key, s")
        self.collection.ensure_index([("s", 1), ("key", 1)])
        self.logger.debug("Indexes are ready")

    def debug(self, msg):
        warnings.warn("Using deprecated Scheduler.debug() method",
                      DeprecationWarning, stacklevel=2)
        self.logger.debug(msg)

    def info(self, msg):
        warnings.warn("Using deprecated Scheduler.info() method",
                      DeprecationWarning, stacklevel=2)
        self.logger.info(msg)

    def error(self, msg):
        warnings.warn("Using deprecated Scheduler.error() method",
                      DeprecationWarning, stacklevel=2)
        self.logger.error(msg)

    def register_job_class(self, cls):
        if not cls.name:
            return  # Abstract classes
        s = " (ignored)" if cls.ignored else ""
        self.logger.info("Registering job class: %s%s", cls.name, s)
        self.job_classes[cls.name] = cls
        # Set up ignored jobs
        if cls.ignored:
            self.ignored += [cls.name]
        else:
            # Initialize job class
            cls.initialize(self)
            # Register intial submit handlers
            if (self.initial_submit and
                hasattr(cls, "initial_submit") and
                callable(cls.initial_submit) and
                hasattr(cls, "initial_submit_interval")):
                self.initial_submit_next_check[cls] = time.time()

    def register_all(self, path, exclude=None):
        """
        Register all Job classes defined within directory
        :param path:
        :return:
        """
        exclude = exclude or []
        if not os.path.isdir(path):
            raise ValueError("'%s' must be a directory" % path)
        mr = "noc.%s." % ".".join(path.split(os.sep))
        for f in os.listdir(path):
            if f in exclude or not f.endswith(".py"):
                continue
            mn = mr + f[:-3]  # Full module name
            m = __import__(mn, {}, {}, "*")
            for on in dir(m):
                o = getattr(m, on)
                if (inspect.isclass(o) and issubclass(o, Job) and
                    o.__module__.startswith(mn)):
                    self.register_job_class(o)

    def get_job_class(self, name):
        return self.job_classes[name]

    def submit(self, job_name, key=None, data=None,
               schedule=None, ts=None):
        """
        Submit new job
        """
        if ts is None:
            ts = datetime.datetime.now()
        elif type(ts) in (int, long, float):
            ts = (datetime.datetime.now() +
                  datetime.timedelta(seconds=ts))
        # Check Job is not exists
        if key is not None:
            if self.collection.find_one({
                self.ATTR_CLASS: job_name,
                self.ATTR_KEY: key
            }):
                raise JobExists()
        # Submit job
        id = self.collection.insert({
            self.ATTR_TS: ts,
            self.ATTR_CLASS: job_name,
            self.ATTR_STATUS: self.S_WAIT,
            self.ATTR_KEY: key,
            self.ATTR_DATA: data,
            self.ATTR_SCHEDULE: schedule
        }, manipulate=True, safe=True)
        self.logger.info("Scheduling job %s(%s) id=%s at %s",
            job_name, key, id, ts)

    def remove_job(self, job_name, key):
        self.logger.info("Removing job %s(%s)", job_name, key)
        self.collection.remove({
            self.ATTR_CLASS: job_name,
            self.ATTR_KEY: key
        }, safe=True)

    def reschedule_job(self, job_name, key, ts, status=None,
                       duration=None, last_status=None, tb=None,
                       log=None, update_runs=False,
                       skip_running=False, faults=None):
        self.logger.info("Rescheduling job %s(%s) to %s%s",
            job_name, key, ts, " status=%s" % status if status else "")
        q = {
            self.ATTR_CLASS: job_name,
            self.ATTR_KEY: key
        }
        if skip_running:
            q[self.ATTR_STATUS] = self.S_WAIT
        s = {
            self.ATTR_TS: ts,
            self.ATTR_TRACEBACK: tb,
            self.ATTR_LOG: log or []
        }
        if status:
            s[self.ATTR_STATUS] = status
        if last_status:
            s[self.ATTR_LAST_STATUS] = last_status
            if last_status == Job.S_SUCCESS:
                s[self.ATTR_LAST_SUCCESS] = datetime.datetime.now()
        if duration is not None:
            s[self.ATTR_LAST_DURATION] = duration
        if faults is not None:
            s[self.ATTR_FAULTS] = faults
        op = {"$set": s}
        if update_runs:
            op["$inc"] = {self.ATTR_RUNS: 1}
        self.collection.update(q, op, safe=True)

    def set_job_status(self, job_name, key, status):
        self.logger.info("Changing %s(%s) status to %s",
            job_name, key, status)
        self.collection.update({
            self.ATTR_CLASS: job_name,
            self.ATTR_KEY: key
        }, {
            "$set": {self.ATTR_STATUS: status}
        }, safe=True)

    def run_job(self, job):
        """
        Begin job execution
        :param job:
        :return:
        """
        # Dereference job
        self.metrics.jobs_dereference_count += 1
        if not job.dereference():
            self.logger.info("Cannot dereference job %s(%s). Removing",
                job.name, job.key)
            self.remove_job(job.name, job.key)
            self.metrics.jobs_dereference_failed += 1
            return
        self.metrics.jobs_dereference_success += 1
        # Check threaded jobs limit
        if job.threaded and self.max_threads:
            if threading.active_count() >= self.max_threads:
                return
        # Check job can be run
        job.started = time.time()
        if not job.can_run():
            job.logger.debug("Deferred")
            self._complete_job(job, job.S_DEFERRED, None)
            return
        # Change status
        s = "threaded " if job.threaded else ""
        job.logger.info("Running job")
        self.collection.update({
            self.ATTR_CLASS: job.name,
            self.ATTR_KEY: job.key
        }, {"$set": {
            self.ATTR_STATUS: self.S_RUN,
            self.ATTR_LAST: datetime.datetime.fromtimestamp(job.started)
        }})
        #
        if job.map_task:
            if job.beef and job.key in job.beef:
                # Do not run job, provide beef instead
                self._run_job_handler(
                    job,
                    object=job.get_managed_object(),
                    result=job.beef[job.key])
            else:
                job.logger.info("Running script %s", job.map_task)
                # Run in MRT mode
                t = ReduceTask.create_task(
                    job.get_managed_object(),  # Managed object is in key
                    None, {},
                    job.map_task, job.get_map_task_params()
                )
                self.active_mrt[t] = job
        else:
            self._run_job_handler(job)

    def _run_job_handler(self, job, **kwargs):
        if job.threaded:
            t = threading.Thread(target=self._job_wrapper,
                args=(job,), kwargs=kwargs
            )
            t.daemon = True
            t.start()
        else:
            return self._job_wrapper(job, **kwargs)

    def _job_wrapper(self, job, **kwargs):
        tb = None
        t0 = time.time()
        job.logger.info("Running job handler")
        try:
            r = job.handler(**kwargs)
        except Exception:
            # error_report()
            tb = get_traceback()
            job.error(tb)
            job.on_exception()
            s = job.S_EXCEPTION
        else:
            if r:
                job.logger.info("Job completed successfully (%.2fms)",
                                (time.time() - t0) * 1000)
                job.on_success()
                s = job.S_SUCCESS
            else:
                job.logger.info("Job failed (%fsec)",
                    time.time() - t0
                )
                job.on_failure()
                s = job.S_FAILED
        self._complete_job(job, s, tb)

    def _complete_job(self, job, status, tb):
        self.metrics.jobs_time.timer(self.name, job.name, job.key).log(
            job.started, time.time(), status)
        if self.to_log_jobs:
            path = os.path.join(self.log_jobs, job.name, str(job.key))
            safe_rewrite(path, job.get_job_log())
        group = job.get_group()
        if group is not None:
            with self.running_lock:
                self.running_count[group] -= 1
                if not self.running_count[group]:
                    del self.running_count[group]
        on_complete = job.on_complete
        t = job.get_schedule(status)
        if t is None:
            # Unschedule job
            self.remove_job(job.name, job.key)
        else:
            # Reschedule job
            t1 = time.time()
            if self.max_faults and status in (Job.S_FAILED, Job.S_EXCEPTION):
                code = None
                if type(tb) == dict:
                    code = tb.get("code")
                if code in self.IGNORE_MRT_CODES:
                    fc = None  # Ignore temporary errors
                    next_status = self.S_WAIT
                else:
                    # Get fault count
                    fc = self.get_faults(job.name, job.key) + 1
                    if fc >= self.max_faults:  # Disable job
                        next_status = self.S_DISABLED
                        self.logger.info("Disabling job %s(%s) due to %d sequental faults",
                            job.name, job.key, fc)
                    else:
                        next_status = self.S_WAIT
            else:
                next_status = self.S_WAIT
                fc = 0
            self.reschedule_job(
                job.name, job.key, t,
                status=next_status,
                last_status=status,
                duration=t1 - job.started,  # @todo: maybe error
                tb=tb,
                update_runs=True,
                faults=fc
            )
        # Reschedule jobs must be executed on complete
        for job_name, key in on_complete:
            ts = datetime.datetime.now()
            self.reschedule_job(job_name, key, ts, skip_running=True)

    def complete_mrt_job(self, t):
        job = self.active_mrt.pop(t)
        for m in t.maptask_set.all():
            if m.status == "C":
                self._run_job_handler(job, object=m.managed_object,
                    result=m.script_result)
            else:
                self.logger.info("Job %s(%s) is failed",
                    job.name, job.get_display_key())
                self._complete_job(job, job.S_FAILED, m.script_result)
        t.delete()

    def iter_pending_jobs(self):
        """
        Iterate pending jobs
        """
        q = {
            self.ATTR_TS: {"$lte": datetime.datetime.now()},
            self.ATTR_STATUS: self.S_WAIT
        }
        if self.ignored:
            q[self.ATTR_CLASS] = {"$nin": self.ignored}
        # Get remaining pending tasks
        qs = self.collection.find(q)
        if self.preserve_order:
            qs = qs.sort([(self.ATTR_TS, 1), ("_id", 1)])
        else:
            qs = qs.sort(self.ATTR_TS)
        try:
            for job in qs.batch_size(100):
                yield job
        except pymongo.errors.CursorNotFound:
            self.logger.info("Server cursor timed out. Waiting for next cycle")
        except pymongo.errors.OperationFailure, why:
            self.logger.error("Operation failure: %s", why)
            self.logger.error("Trying to recover")
Пример #5
0
class Collection(object):
    TRANSLATIONS = {}
    ALLOW_FUZZY = {}
    COLLECTIONS = {}
    COLLECTION_ORDER = []

    def __init__(self, name, local=False):
        self.logger = PrefixLoggerAdapter(logger, name)
        if name not in self.COLLECTIONS:
            self.logger.error("Invalid collection '%s'", name)
            raise ValueError("Invalid collection '%s'" % name)
        m, c = name.split(".", 1)
        self.module = m
        self.cname = name
        self.name = c
        self.local = local
        self.doc = self.COLLECTIONS[name]
        self.items = {}  # uuid -> CollectionItem
        self.changed = False
        self.ref_cache = {}
        self.partial = set()
        if hasattr(self.doc, "name"):
            # Use .name field when present
            self.get_name = attrgetter("name")
        else:
            # Or first unique field otherwise
            uname = None
            for spec in self.doc._meta["index_specs"]:
                if spec["unique"] and len(spec["fields"]) == 1:
                    uname = spec["fields"][0][0]
            if not uname:
                self.logger.error("Cannot find unique index")
                raise ValueError("No unique index")
            self.get_name = attrgetter(uname)
        self.translations = self.TRANSLATIONS.get(name,
                                                  self.TRANSLATIONS[None])

    def __unicode__(self):
        return self.name

    def die(self, msg):
        raise ValueError(msg)

    def get_collection_path(self):
        if self.local:
            return os.path.join("local", "collections", self.module,
                                self.name + ".csv")
        else:
            return os.path.join(self.module, "collections", self.name,
                                "manifest.csv")

    def get_item_path(self, mi):
        return os.path.join(self.module, "collections", self.name, mi.path)

    def load(self):
        """
        Load collection from CSV file
        """
        path = self.get_collection_path()
        if not os.path.exists(path):
            return
        with open(path) as f:
            reader = csv.reader(f)
            reader.next()  # Skip header
            for name, uuid, path, hash in reader:
                uuid = UUID(uuid)
                mi = CollectionItem(name=name, uuid=uuid, path=path, hash=hash)
                self.items[uuid] = mi

    def save(self):
        self.logger.info("Updating manifest")
        rows = sorted(
            ([r.name, r.uuid, r.path, r.hash] for r in self.items.values()),
            key=lambda x: x[0])
        rows = [["name", "uuid", "path", "hash"]] + rows
        out = StringIO()
        writer = csv.writer(out)
        writer.writerows(rows)
        safe_rewrite(self.get_collection_path(), out.getvalue(), mode=0644)
        # Update collection cache
        self.logger.info("Updating CollectionCache")
        CollectionCache.merge("%s.%s" % (self.module, self.name),
                              set(self.items))

    def load_item(self, mi):
        p = self.get_item_path(mi)
        if not os.path.exists(p):
            self.die("File not found: %s" % p)
        with open(p) as f:
            fdata = f.read()
            try:
                data = json_decode(fdata)
            except ValueError, why:
                self.die("Failed to read JSON file '%s': %s" % (p, why))
        if not isinstance(data, dict):
            self.die("Invalid JSON file: %s" % p)
        if self.get_hash(fdata) != mi.hash:
            self.die("Checksum mismatch for file '%s'" % p)
        return data