Exemplo n.º 1
0
    def run(self):
        """Defines main worker process.

        By default, the worker is designed to poll for file tasks from the
        broker, distribute the files to scanners, and write the scan results
        to disk. The worker self-manages its life based on how long it has
        lived and how many files it has scanned; on planned or unplanned
        shutdown, the worker notifies the broker it should no longer receive
        file tasks (status `\x10`). If the worker does not receive a file task
        within the configured delta, then it will send a heartbeat to the
        broker notifying it that the worker is still alive and ready to
        receive tasks (`\x00`).

        This method can be overriden to create custom workers.
        """
        logging.info(f"{self.name} ({self.identity.decode()}): starting up")
        signal.signal(signal.SIGUSR1,
                      functools.partial(utils.shutdown_handler, self))
        conf.parse_yaml(path=self.strelka_cfg, section="remote")
        conf.parse_yaml(path=self.strelka_cfg, section="scan")
        self.setup_zmq()

        try:
            counter = 0
            worker_start_time = time.time()
            worker_expire_time = worker_start_time + random.randint(1, 60)
            self.send_ready_status()
            logging.debug(f"{self.name} ({self.identity.decode()}):"
                          " sent ready status")
            self.set_heartbeat_at()

            while 1:
                if counter >= self.file_max:
                    break
                if (time.time() - worker_expire_time) >= self.time_to_live:
                    break

                tasks = dict(self.task_poller.poll(self.poller_timeout))
                if tasks.get(self.task_socket) == zmq.POLLIN:
                    task = self.task_socket.recv_multipart()
                    worker_identity = task[1]
                    if worker_identity != self.identity:
                        logging.error(f"{self.name}"
                                      f" ({self.identity.decode()}): routing"
                                      " error, received task destined for"
                                      f" {worker_identity.decode()}")

                    if len(task) == 4:
                        file_task = task[-1]
                        scan_result = self.distribute_task(file_task)
                        self.log_to_disk(scan_result)
                        counter += 1
                    else:
                        logging.error(f"{self.name}"
                                      f" ({self.identity.decode()}): received"
                                      " invalid task")

                    self.send_ready_status()
                    logging.debug(f"{self.name} ({self.identity.decode()}):"
                                  " sent ready status")
                    self.set_heartbeat_at()

                elif time.time() >= self.heartbeat_at:
                    self.send_ready_status()
                    logging.debug(f"{self.name} ({self.identity.decode()}):"
                                  " sent heartbeat")
                    self.set_heartbeat_at()

        except errors.QuitWorker:
            logging.debug(f"{self.name} ({self.identity.decode()}): received"
                          " shutdown signal")
        except Exception:
            logging.exception(f"{self.name} ({self.identity.decode()}):"
                              " exception in main loop (see traceback below)")

        self.send_shutdown_status()
        logging.debug(f"{self.name} ({self.identity.decode()}): sent"
                      " shutdown status")
        time.sleep(1)
        distribution.close_scanners()
        logging.info(f"{self.name} ({self.identity.decode()}): shutdown"
                     f" after scanning {counter} file(s) and"
                     f" {time.time() - worker_start_time} seconds")
Exemplo n.º 2
0
def main():
    def shutdown(signum, frame):
        """Signal handler for shutting down main."""
        logging.debug("main: shutdown triggered")
        global run
        run = 0

    signal.signal(signal.SIGTERM, shutdown)
    signal.signal(signal.SIGINT, shutdown)

    parser = argparse.ArgumentParser(prog="strelka_dirstream.py",
                                     description="sends files from a directory"
                                                 " to a Strelka cluster in"
                                                 " near real-time.",
                                     usage="%(prog)s [options]")
    parser.add_argument("-d", "--debug",
                        action="store_true",
                        default=False,
                        dest="debug",
                        help="enable debug messages to the console")
    parser.add_argument("-c", "--dirstream-config",
                        action="store",
                        dest="dirstream_cfg",
                        help="path to dirstream configuration file")
    args = parser.parse_args()

    if args.debug:
        logging.basicConfig(
            level=logging.DEBUG,
            format="%(asctime)s %(levelname)-8s %(message)s",
            datefmt="%Y-%m-%d %H:%M:%S")
    else:
        logging.basicConfig(
            level=logging.INFO,
            format="%(asctime)s %(levelname)-8s %(message)s",
            datefmt="%Y-%m-%d %H:%M:%S")

    dirstream_cfg = None
    if args.dirstream_cfg:
        if not os.path.exists(args.dirstream_cfg):
            sys.exit(f"main: stream directory config {args.dirstream_cfg}"
                     " does not exist")
        dirstream_cfg = args.dirstream_cfg
    elif os.path.exists(DEFAULT_CONFIGS['sys_dirstream_cfg']):
        dirstream_cfg = DEFAULT_CONFIGS['sys_dirstream_cfg']
    elif os.path.exists(DEFAULT_CONFIGS['dev_dirstream_cfg']):
        dirstream_cfg = DEFAULT_CONFIGS['dev_dirstream_cfg']

    if dirstream_cfg is None:
        sys.exit("main: no dirstream configuration found")
    logging.info(f"main: using dirstream configuration {dirstream_cfg}")

    dirstream_cfg = conf.parse_yaml(path=dirstream_cfg, section="dirstream")
    directory_cfg = dirstream_cfg.get("directory", {})
    network_cfg = dirstream_cfg.get("network", {})
    processes_cfg = dirstream_cfg.get("processes", {})
    directory = directory_cfg.get("directory")
    shutdown_timeout = processes_cfg.get("shutdown_timeout", 10)
    worker_count = processes_cfg.get("worker_count", 1)

    worker_processes = []

    if not os.path.isdir(directory):
        sys.exit(f"main: directory {directory} does not exist")

    manager = multiprocessing.Manager()
    intake_queue = manager.Queue()
    inotify = inotify_simple.INotify()
    watch_flags = inotify_simple.flags.CLOSE_WRITE
    inotify.add_watch(directory, watch_flags)

    for _ in range(worker_count):
        worker_process = Worker(intake_queue, directory_cfg, network_cfg)
        worker_process.start()
        worker_processes.append(worker_process)

    with os.scandir(directory) as sd:
        for entry in sd:
            if not entry.name.startswith(".") and entry.is_file():
                file_path = os.path.join(directory, entry.name)
                intake_queue.put(file_path)

    while run:
        for process in list(worker_processes):
            if not process.is_alive():
                process.join()
                worker_processes.remove(process)
                worker_process = Worker(intake_queue,
                                        directory_cfg,
                                        network_cfg)
                worker_process.start()
                worker_processes.append(worker_process)

        for evt in inotify.read(timeout=100, read_delay=500):
            file_path = os.path.join(directory, evt.name)
            intake_queue.put(file_path)

    logging.info("main: starting shutdown of running child processes"
                 f" (using timeout value {shutdown_timeout})")
    try:
        with interruptingcow.timeout(shutdown_timeout,
                                     exception=errors.QuitDirStream):
            utils.signal_children(worker_processes, signal.SIGUSR1)
            logging.debug("main: finished shutdown of running"
                          " child processes")
    except errors.QuitDirStream:
        logging.debug("main: starting forcible shutdown of running"
                      " child processes")
        utils.signal_children(worker_processes, signal.SIGKILL)
    logging.info("main: finished")
Exemplo n.º 3
0
def main():
    def shutdown(signum, frame):
        """Signal handler for shutting down main."""
        logging.debug("main: shutdown triggered")
        global run
        run = 0

    signal.signal(signal.SIGTERM, shutdown)
    signal.signal(signal.SIGINT, shutdown)

    parser = argparse.ArgumentParser(prog="strelka_dirstream.py",
                                     description="sends files from a directory"
                                     " to a Strelka cluster in"
                                     " near real-time.",
                                     usage="%(prog)s [options]")
    parser.add_argument("-d",
                        "--debug",
                        action="store_true",
                        default=False,
                        dest="debug",
                        help="enable debug messages to the console")
    parser.add_argument("-c",
                        "--dirstream-config",
                        action="store",
                        dest="dirstream_cfg",
                        help="path to dirstream configuration file")
    args = parser.parse_args()

    if args.debug:
        logging.basicConfig(level=logging.DEBUG,
                            format="%(asctime)s %(levelname)-8s %(message)s",
                            datefmt="%Y-%m-%d %H:%M:%S")
    else:
        logging.basicConfig(level=logging.INFO,
                            format="%(asctime)s %(levelname)-8s %(message)s",
                            datefmt="%Y-%m-%d %H:%M:%S")

    dirstream_cfg = None
    if args.dirstream_cfg:
        if not os.path.exists(args.dirstream_cfg):
            sys.exit(f"main: stream directory config {args.dirstream_cfg}"
                     " does not exist")
        dirstream_cfg = args.dirstream_cfg
    elif os.path.exists(DEFAULT_CONFIGS['sys_dirstream_cfg']):
        dirstream_cfg = DEFAULT_CONFIGS['sys_dirstream_cfg']
    elif os.path.exists(DEFAULT_CONFIGS['dev_dirstream_cfg']):
        dirstream_cfg = DEFAULT_CONFIGS['dev_dirstream_cfg']

    if dirstream_cfg is None:
        sys.exit("main: no dirstream configuration found")
    logging.info(f"main: using dirstream configuration {dirstream_cfg}")

    dirstream_cfg = conf.parse_yaml(path=dirstream_cfg, section="dirstream")
    processes_cfg = dirstream_cfg.get("processes", {})
    shutdown_timeout = processes_cfg.get("shutdown_timeout", 10)
    workers_cfg = dirstream_cfg.get("workers", [])

    worker_processes = {}

    for worker_cfg in workers_cfg:
        worker_process = DirWorker(worker_cfg)
        worker_process.start()
        worker_processes[worker_process] = worker_cfg

    while run:
        for process in list(worker_processes.keys()):
            if not process.is_alive():
                process.join()
                worker_cfg = worker_processes.pop(process)
                worker_process = DirWorker(worker_cfg)
                worker_process.start()
                worker_processes[worker_process] = worker_cfg
        time.sleep(5)

    logging.info("main: starting shutdown of running child processes"
                 f" (using timeout value {shutdown_timeout})")

    try:
        with interruptingcow.timeout(shutdown_timeout,
                                     exception=errors.QuitDirStream):
            utils.signal_children(list(worker_processes.keys()),
                                  signal.SIGUSR1)
            logging.debug("main: finished shutdown of running"
                          " child processes")
    except errors.QuitDirStream:
        logging.debug("main: starting forcible shutdown of running"
                      " child processes")
        utils.signal_children(list(worker_processes.keys()), signal.SIGKILL)
    logging.info("main: finished")
Exemplo n.º 4
0
def main():
    def shutdown(signum, frame):
        """Signal handler for shutting down main."""
        logging.debug("shutdown triggered")
        global run
        run = 0

    signal.signal(signal.SIGTERM, shutdown)
    signal.signal(signal.SIGINT, shutdown)

    parser = argparse.ArgumentParser(prog="strelka.py",
                                     description="runs Strelka as a"
                                                 " distributed cluster.",
                                     usage="%(prog)s [options]")
    parser.add_argument("-d", "--debug",
                        action="store_true",
                        default=False,
                        dest="debug",
                        help="enable debug messages to the console")
    parser.add_argument("-c", "--strelka-config",
                        action="store",
                        dest="strelka_cfg",
                        help="path to strelka configuration file")
    parser.add_argument("-l", "--logging-ini",
                        action="store",
                        dest="logging_ini",
                        help="path to python logging configuration file")
    args = parser.parse_args()

    logging_ini = None
    if args.logging_ini:
        if not os.path.exists(args.logging_ini):
            sys.exit(f"logging configuration {args.logging_ini}"
                     " does not exist")
        logging_ini = args.logging_ini
    elif os.path.exists(DEFAULT_CONFIGS["sys_logging_ini"]):
        logging_ini = DEFAULT_CONFIGS["sys_logging_ini"]
    elif os.path.exists(DEFAULT_CONFIGS["dev_logging_ini"]):
        logging_ini = DEFAULT_CONFIGS["dev_logging_ini"]

    if logging_ini is None:
        sys.exit("no logging configuration found")
    logging.config.fileConfig(logging_ini)

    strelka_cfg = None
    if args.strelka_cfg:
        if not os.path.exists(args.strelka_cfg):
            sys.exit(f"strelka configuration {args.strelka_cfg}"
                     " does not exist")
        strelka_cfg = args.strelka_cfg
    elif os.path.exists(DEFAULT_CONFIGS["sys_strelka_cfg"]):
        strelka_cfg = DEFAULT_CONFIGS["sys_strelka_cfg"]
    elif os.path.exists(DEFAULT_CONFIGS["dev_strelka_cfg"]):
        strelka_cfg = DEFAULT_CONFIGS["dev_strelka_cfg"]

    if strelka_cfg is None:
        sys.exit("no strelka configuration found")
    logging.info(f"using strelka configuration {strelka_cfg}")

    daemon_cfg = conf.parse_yaml(path=strelka_cfg, section="daemon")
    processes_cfg = daemon_cfg.get("processes", {})
    run_broker = processes_cfg.get("run_broker", True)
    run_workers = processes_cfg.get("run_workers", True)
    worker_count = processes_cfg.get("worker_count", 4)
    run_logrotate = processes_cfg.get("run_logrotate", True)
    shutdown_timeout = processes_cfg.get("shutdown_timeout", 45)

    broker_process = None
    logrotate_process = None
    worker_processes = []

    if run_broker:
        broker_process = lib.Broker(daemon_cfg)
        broker_process.start()
    else:
        logging.info("broker disabled")

    if run_logrotate:
        logrotate_process = lib.LogRotate(daemon_cfg)
        logrotate_process.start()
    else:
        logging.info("log rotation disabled")

    if run_workers:
        for _ in range(worker_count):
            worker_process = lib.Worker(strelka_cfg, daemon_cfg)
            worker_process.start()
            worker_processes.append(worker_process)
    else:
        logging.info("workers disabled")

    while run:
        if run_broker:
            if not broker_process.is_alive():
                broker_process.join()
                broker_process = lib.Broker(daemon_cfg)
                broker_process.start()

        if run_logrotate:
            if not logrotate_process.is_alive():
                logrotate_process.join()
                logrotate_process = lib.LogRotate(daemon_cfg)
                logrotate_process.start()

        if run_workers:
            for process in list(worker_processes):
                if not process.is_alive():
                    process.join()
                    worker_processes.remove(process)
                    worker_process = lib.Worker(strelka_cfg, daemon_cfg)
                    worker_process.start()
                    worker_processes.append(worker_process)
        time.sleep(5)

    logging.info("starting shutdown of running child processes"
                 f" (using timeout value {shutdown_timeout})")
    try:
        with interruptingcow.timeout(shutdown_timeout,
                                     exception=errors.QuitStrelka):
            if run_broker:
                utils.signal_children([broker_process], signal.SIGUSR1)
            if run_workers:
                utils.signal_children(worker_processes, signal.SIGUSR1)
            if run_logrotate:
                utils.signal_children([logrotate_process], signal.SIGUSR1)
            logging.debug("finished shutdown of running"
                          " child processes")
    except errors.QuitStrelka:
        logging.debug("starting forcible shutdown of running"
                      " child processes")
        if run_broker:
            utils.signal_children([broker_process], signal.SIGKILL)
        if run_workers:
            utils.signal_children(worker_processes, signal.SIGKILL)
        if run_logrotate:
            utils.signal_children([logrotate_process], signal.SIGKILL)
    logging.info("finished")