def test_import_noconfirm(self, p): set_cwd(tempfile.mkdtemp()) p.side_effect = True, False dirpath = init_legacy_analyses() os.makedirs(os.path.join(dirpath, "lib", "cuckoo", "common")) open(os.path.join( dirpath, "lib", "cuckoo", "common", "constants.py" ), "wb").write(constants_11_py) shutil.copytree( "tests/files/conf/110_plain", os.path.join(dirpath, "conf") ) filepath = os.path.join(dirpath, "conf", "cuckoo.conf") buf = open(filepath, "rb").read() open(filepath, "wb").write(buf.replace( "connection =", "connection = %s" % self.URI )) main.main( ("--cwd", cwd(), "import", dirpath), standalone_mode=False ) db = Database() db.connect() assert db.engine.name == self.ENGINE assert open(cwd("logs", "a.txt", analysis=1), "rb").read() == "a" assert config("cuckoo:database:connection") == self.URI assert db.count_tasks() == 2
def test_import_confirm(self, p): set_cwd(tempfile.mkdtemp()) p.return_value = True dirpath = init_legacy_analyses() os.makedirs(os.path.join(dirpath, "lib", "cuckoo", "common")) open(os.path.join( dirpath, "lib", "cuckoo", "common", "constants.py" ), "wb").write(constants_11_py) shutil.copytree( "tests/files/conf/110_plain", os.path.join(dirpath, "conf") ) filepath = os.path.join(dirpath, "conf", "cuckoo.conf") buf = open(filepath, "rb").read() open(filepath, "wb").write(buf.replace( "connection =", "connection = %s" % self.URI )) try: main.main( ("--cwd", cwd(), "import", dirpath), standalone_mode=False ) except CuckooOperationalError as e: assert "SQL database dump as the command" in e.message assert not is_linux() return db = Database() db.connect() assert db.engine.name == self.ENGINE assert open(cwd("logs", "a.txt", analysis=1), "rb").read() == "a" assert config("cuckoo:database:connection") == self.URI assert db.count_tasks() == 2
def test_import_confirm(self, p): set_cwd(tempfile.mkdtemp()) p.return_value = True dirpath = init_legacy_analyses() os.makedirs(os.path.join(dirpath, "lib", "cuckoo", "common")) open(os.path.join(dirpath, "lib", "cuckoo", "common", "constants.py"), "wb").write(constants_11_py) shutil.copytree("tests/files/conf/110_plain", os.path.join(dirpath, "conf")) filepath = os.path.join(dirpath, "conf", "cuckoo.conf") buf = open(filepath, "rb").read() open(filepath, "wb").write( buf.replace("connection =", "connection = %s" % self.URI)) try: main.main(("--cwd", cwd(), "import", dirpath), standalone_mode=False) except CuckooOperationalError as e: assert "SQL database dump as the command" in e.message assert not is_linux() return db = Database() db.connect() assert db.engine.name == self.ENGINE assert open(cwd("logs", "a.txt", analysis=1), "rb").read() == "a" assert config("cuckoo:database:connection") == self.URI assert db.count_tasks() == 2
def test_import_noconfirm(self, p): set_cwd(tempfile.mkdtemp()) p.side_effect = True, False dirpath = init_legacy_analyses() os.makedirs(os.path.join(dirpath, "lib", "cuckoo", "common")) open(os.path.join(dirpath, "lib", "cuckoo", "common", "constants.py"), "wb").write(constants_11_py) shutil.copytree("tests/files/conf/110_plain", os.path.join(dirpath, "conf")) filepath = os.path.join(dirpath, "conf", "cuckoo.conf") buf = open(filepath, "rb").read() open(filepath, "wb").write( buf.replace("connection =", "connection = %s" % self.URI)) main.main(("--cwd", cwd(), "import", dirpath), standalone_mode=False) db = Database() db.connect() assert db.engine.name == self.ENGINE assert open(cwd("logs", "a.txt", analysis=1), "rb").read() == "a" assert config("cuckoo:database:connection") == self.URI assert db.count_tasks() == 2
class Scheduler(object): def __init__(self, maxcount=None): self.running = True self.db = Database() self.maxcount = maxcount self.total_analysis_count = 0 self.machinery = None self.machine_lock = None self.managers = [] def initialize(self): machinery_name = config("cuckoo:cuckoo:machinery") max_vmstartup = config("cuckoo:cuckoo:max_vmstartup_count") # Initialize a semaphore or lock to prevent to many VMs from # starting at the same time. self.machine_lock = threading.Semaphore(max_vmstartup) log.info( "Using '%s' as machine manager", machinery_name, extra={ "action": "init.machinery", "status": "success", "machinery": machinery_name, } ) # Create the machine manager self.machinery = cuckoo.machinery.plugins[machinery_name]() # Provide a dictionary with the configuration options to the # machine manager instance. self.machinery.set_options(Config(machinery_name)) try: self.machinery.initialize(machinery_name) except CuckooMachineError as e: raise CuckooCriticalError("Error initializing machines: %s" % e) # At this point all the available machines should have been identified # and added to the list. If none were found, Cuckoo aborts the # execution. TODO In the future we'll probably want get rid of this. machines = self.machinery.machines() if not machines: raise CuckooCriticalError("No machines available.") log.info( "Loaded %s machine/s", len(machines), extra={ "action": "init.machines", "status": "success", "count": len(machines) } ) if len(machines) > 1 and self.db.engine.name == "sqlite": log.warning( "As you've configured Cuckoo to execute parallel " "analyses, we recommend you to switch to a MySQL or " "a PostgreSQL database as SQLite might cause some " "issues." ) if len(machines) > 4 and config("cuckoo:cuckoo:process_results"): log.warning( "When running many virtual machines it is recommended to " "process the results in separate 'cuckoo process' instances " "increase throughput and stability. Please read the " "documentation about the `Processing Utility`." ) self.drop_forwarding_rules() # Command-line overrides the configuration file. if self.maxcount is None: self.maxcount = config("cuckoo:cuckoo:max_analysis_count") def drop_forwarding_rules(self): """Drop all existing packet forwarding rules for each VM. Just in case Cuckoo was terminated for some reason and various forwarding rules have thus not been dropped yet.""" for machine in self.machinery.machines(): if not machine.interface: log.info( "Unable to determine the network interface for VM " "with name %s, Cuckoo will not be able to give it " "full internet access or route it through a VPN! " "Please define a default network interface for the " "machinery or define a network interface for each " "VM.", machine.name ) continue # Drop forwarding rule to each VPN. if config("routing:vpn:enabled"): for vpn in config("routing:vpn:vpns"): rooter( "forward_disable", machine.interface, config("routing:%s:interface" % vpn), machine.ip ) # Drop forwarding rule to the internet / dirty line. if config("routing:routing:internet") != "none": rooter( "forward_disable", machine.interface, config("routing:routing:internet"), machine.ip ) def stop(self): """Stop the Cuckoo task scheduler.""" self.running = False # Shutdown machine manager (used to kill machines that still alive). for manager in self.managers: manager.force_cleanup() self.machinery.shutdown() def ready_for_new_run(self): """Performs checks to see if Cuckoo should start a new pending task or not""" # Wait until the machine lock is not locked. This is only the case # when all machines are fully running, rather that about to start # or still busy starting. This way we won't have race conditions # with finding out there are no available machines in the analysis # manager or having two analyses pick the same machine. if not self.machine_lock.acquire(False): logger( "Could not acquire machine lock", action="scheduler.machine_lock", status="busy" ) return False self.machine_lock.release() # Verify if the minimum amount of disk space is available if config("cuckoo:cuckoo:freespace"): freespace = get_free_disk(cwd("storage", "analyses")) # If freespace is None, the check failed. Continue, since this # can happen if the disk check is not supported on others than # unix and winxp+. The call might also fail on win32. if freespace is None: log.error("Error determining free disk space") elif freespace <= config("cuckoo:cuckoo:freespace"): log.error( "Not enough free disk space! (Only %d MB!)", freespace, extra={ "action": "scheduler.diskspace", "status": "error", "available": freespace, } ) return False max_vm = config("cuckoo:cuckoo:max_machines_count") if max_vm and len(self.machinery.running()) >= max_vm: log.debug( "Maximum amount of machines is running", extra={ "action": "scheduler.machines", "status": "maxed" } ) return False if not self.machinery.availables(): logger( "No available machines", action="scheduler.machines", status="none" ) return False return True def task_limit_hit(self): """Stops the scheduler is the maximum amount of tasks has been reached. This can be configured by max_analysis_count in cuckoo.conf or passed as an argument when starting Cuckoo.""" if self.maxcount and self.total_analysis_count >= self.maxcount: if not self.managers: log.debug( "Reached max analysis count, exiting.", extra={ "action": "scheduler.max_analysis", "status": "success", "limit": self.total_analysis_count, } ) self.stop() return True log.debug( "Maximum analyses hit, awaiting active analyses to finish. " "Still active: %s", len(self.managers), extra={ "action": "scheduler.max_analysis", "status": "busy", "active": len(self.managers) } ) return True return False def handle_pending(self): """Handles pending tasks. Checks if a new task can be started. Eg: not too many machines already running, disk space left etc. Selects a machine matching the task requirements and creates a matching analysis manager for the type of the selected pending task""" # Acquire machine lock non-blocking. This is because the scheduler # also handles requests made by analysis manager. A blocking lock # could cause a deadlock if not self.machine_lock.acquire(False): return # Select task that is specifically for one of the available machines # possibly a service machine or reserved machine machine, task, analysis = None, None, False for available_machine in self.db.get_available_machines(): # If the machine has been reserved for a specific task, this # task should be processed first, as the machine will only be # released it has finished (Example: longterm task). if available_machine.reserved_by: task = self.db.fetch(task_id=available_machine.reserved_by) if task: machine = self.machinery.acquire( machine_id=available_machine.name ) break continue task = self.db.fetch(machine=available_machine.name) if task: machine = self.machinery.acquire( machine_id=available_machine.name ) break if available_machine.is_analysis(): analysis = True # No task for a specific machine and at least one of the available # machines is not a service machine. Fetch task that is not # for a service machine if not task and not machine and analysis: # Search for a task, but don't lock it until we are sure a machine # for this task is available, since it might have tags or require # a specific platform. Ignore a task if we know a machine is not # available for it. exclude = [] while not machine: task = self.db.fetch(service=False, exclude=exclude) if task is None: break try: machine = self.machinery.acquire( machine_id=task.machine, platform=task.platform, tags=task.tags ) except CuckooOperationalError: log.error( "Task #%s cannot be started, no machine with matching " "requirements for this task exists. Requirements: %s", task.id, Task.requirements_str(task) ) # No machine with required tags, name etc exists # Set analysis to failed. # TODO Use another status so it might be recovered # on next Cuckoo startup if the machine exists by then self.db.set_status(task.id, TASK_FAILED_ANALYSIS) break if not machine: exclude.append(task.id) if not task or not machine: self.machine_lock.release() if machine: self.machinery.release(label=machine.label) return log.info( "Task #%d: acquired machine %s (label=%s)", task.id, machine.name, machine.label, extra={ "action": "vm.acquire", "status": "success", "vmname": machine.name, } ) # Task and matching machine found. Find analysis manager # which supports the type of this task. Lock it when found analysis_manager = self.get_analysis_manager(task, machine) if not analysis_manager: # If no analysis manager is found for this task type, it # cannot be started, therefore we release the machine again self.machinery.release(label=machine.label) # Release machine lock as the machine will not be starting self.machine_lock.release() # Set task status to failed as it cannot be analysed if no matching # analysis manager for its type exists self.db.set_status(task.id, TASK_FAILED_ANALYSIS) return # Only lock task for running if we are sure we will try to start it self.db.set_status(task.id, TASK_RUNNING) # Increment the total amount of analyses self.total_analysis_count += 1 analysis_manager.daemon = True if not analysis_manager.init(self.db): self.db.set_status(task.id, TASK_FAILED_ANALYSIS) log.error( "Failed to initialize analysis manager for task #%s", task.id ) self.machine_lock.release() self.machinery.release(label=machine.label) return # If initialization succeeded, start the analysis manager # and store it so we can track it analysis_manager.start() self.managers.append(analysis_manager) def get_analysis_manager(self, db_task, machine): """Searches all available analysis managers for one that supports the type of the given task. Returns an analysis manager. Returns None if no manager supports the type""" managers = cuckoo.analysis.plugins analysis_manager = None for manager in managers: if db_task.type in manager.supports: core_task = Task(db_task) analysis_manager = manager( machine, self.machinery, self.machine_lock ) try: analysis_manager.set_task(core_task) analysis_manager.set_target(core_task.targets) except Exception as e: analysis_manager = None log.exception( "Failure when setting task and target for analysis" " manager '%s'.", manager ) break return analysis_manager def handle_managers(self): """Executes actions requested by analysis managers. If an analysis manager is finished, executes its finalize actions. Returns a list of analysis managers to untrack""" remove = [] for manager in self.managers: if manager.action_requested(): status = manager.get_analysis_status() status_action = getattr(manager, "on_status_%s" % status, None) if status_action: log.debug( "Executing requested action by task #%s for status" " '%s'", manager.task.id, status ) try: status_action(self.db) except Exception as e: log.exception( "Error executing requested action: %s. Error: %s", status_action, e ) else: log.error( "Analysis manager for task #%s requested action for" " status '%s', but no action is implemented", manager.task.id, status ) manager.action_lock.release() if not manager.isAlive(): manager.finalize(self.db) remove.append(manager) return remove def keep_running(self): return self.running def start(self): """Start the Cuckoo task scheduler""" self.initialize() log.info("Waiting for analysis tasks") while self.keep_running(): time.sleep(1) # Handles actions requested by analysis managers and performs # finalization actions for the managers if they exit. for untrack_manager in self.handle_managers(): self.managers.remove(untrack_manager) # Verify if the maximum amount of analyses to process has been hit. # Stops the scheduler if no running analysis managers are left. if self.task_limit_hit(): continue # Handle pending tasks by finding the matching machine and # analysis manager. The manager is started added to tracked # analysis managers. if self.db.count_tasks(status=TASK_PENDING): # Check if the max amount of VMs are running, if there is # enough disk space, etc. if self.ready_for_new_run(): # Grab a pending task, find a machine that matches, find # a matching analysis manager and start the analysis. self.handle_pending() log.debug("End of analyses.")