Exemplo n.º 1
0
def process_task(task):
    db = Database()

    try:
        task_log_start(task["id"])

        logger(
            "Starting task reporting",
            action="task.report", status="pending",
            target=task["target"], category=task["category"],
            package=task["package"], options=emit_options(task["options"]),
            custom=task["custom"]
        )

        if task["category"] == "file" and task.get("sample_id"):
            sample = db.view_sample(task["sample_id"])
            copy_path = cwd("storage", "binaries", sample.sha256)
        else:
            copy_path = None

        try:
            process(task["target"], copy_path, task)
            db.set_status(task["id"], TASK_REPORTED)
        except Exception as e:
            log.exception("Task #%d: error reporting: %s", task["id"], e)
            db.set_status(task["id"], TASK_FAILED_PROCESSING)

        log.info("Task #%d: reports generation completed", task["id"], extra={
            "action": "task.report", "status": "success",
        })
    except Exception as e:
        log.exception("Caught unknown exception: %s", e)
    finally:
        task_log_stop(task["id"])
Exemplo n.º 2
0
def init_tasks():
    """Check tasks and reschedule uncompleted ones."""
    db = Database()

    log.debug("Checking for locked tasks..")
    for task in db.list_tasks(status=TASK_RUNNING):
        if config("cuckoo:cuckoo:reschedule"):
            task_id = db.reschedule(task.id)
            log.info(
                "Rescheduled task with ID %s and target %s: task #%s",
                task.id, task.target, task_id
            )
        else:
            db.set_status(task.id, TASK_FAILED_ANALYSIS)
            log.info(
                "Updated running task ID %s status to failed_analysis",
                task.id
            )

    log.debug("Checking for pending service tasks..")
    for task in db.list_tasks(status=TASK_PENDING, category="service"):
        db.set_status(task.id, TASK_FAILED_ANALYSIS)
Exemplo n.º 3
0
class DatabaseEngine(object):
    """Tests database stuff."""
    URI = None

    def setup_class(self):
        set_cwd(tempfile.mkdtemp())

        self.d = Database()
        self.d.connect(dsn=self.URI)

    def add_url(self, url, priority=1, status="pending"):
        task_id = self.d.add_url(url, priority=priority)
        self.d.set_status(task_id, status)
        return task_id

    def test_add_tasks(self):
        fd, sample_path = tempfile.mkstemp()
        os.write(fd, "hehe")
        os.close(fd)

        # Add task.
        count = self.d.Session().query(Task).count()
        self.d.add_path(sample_path)
        assert self.d.Session().query(Task).count() == count + 1

        # Add url.
        self.d.add_url("http://foo.bar")
        assert self.d.Session().query(Task).count() == count + 2

    def test_processing_get_task(self):
        # First reset all existing rows so that earlier exceptions don't affect
        # this unit test run.
        null, session = None, self.d.Session()

        session.query(Task).filter(
            Task.status == "completed", Task.processing == null
        ).update({
            "processing": "something",
        })
        session.commit()

        t1 = self.add_url("http://google.com/1", priority=1, status="completed")
        t2 = self.add_url("http://google.com/2", priority=2, status="completed")
        t3 = self.add_url("http://google.com/3", priority=1, status="completed")
        t4 = self.add_url("http://google.com/4", priority=1, status="completed")
        t5 = self.add_url("http://google.com/5", priority=3, status="completed")
        t6 = self.add_url("http://google.com/6", priority=1, status="completed")
        t7 = self.add_url("http://google.com/7", priority=1, status="completed")

        assert self.d.processing_get_task("foo") == t5
        assert self.d.processing_get_task("foo") == t2
        assert self.d.processing_get_task("foo") == t1
        assert self.d.processing_get_task("foo") == t3
        assert self.d.processing_get_task("foo") == t4
        assert self.d.processing_get_task("foo") == t6
        assert self.d.processing_get_task("foo") == t7
        assert self.d.processing_get_task("foo") is None

    def test_error_exists(self):
        task_id = self.add_url("http://google.com/")
        self.d.add_error("A"*1024, task_id)
        assert len(self.d.view_errors(task_id)) == 1
        self.d.add_error("A"*1024, task_id)
        assert len(self.d.view_errors(task_id)) == 2

    def test_long_error(self):
        self.add_url("http://google.com/")
        self.d.add_error("A"*1024, 1)
        err = self.d.view_errors(1)
        assert err and len(err[0].message) == 1024

    def test_submit(self):
        dirpath = tempfile.mkdtemp()
        submit_id = self.d.add_submit(dirpath, "files", {
            "foo": "bar",
        })
        submit = self.d.view_submit(submit_id)
        assert submit.id == submit_id
        assert submit.tmp_path == dirpath
        assert submit.submit_type == "files"
        assert submit.data == {
            "foo": "bar",
        }

    def test_connect_no_create(self):
        AlembicVersion.__table__.drop(self.d.engine)
        self.d.connect(dsn=self.URI, create=False)
        assert "alembic_version" not in self.d.engine.table_names()
        self.d.connect(dsn=self.URI)
        assert "alembic_version" in self.d.engine.table_names()

    def test_view_submit_tasks(self):
        submit_id = self.d.add_submit(None, None, None)
        t1 = self.d.add_path(__file__, custom="1", submit_id=submit_id)
        t2 = self.d.add_path(__file__, custom="2", submit_id=submit_id)

        submit = self.d.view_submit(submit_id)
        assert submit.id == submit_id
        with pytest.raises(DetachedInstanceError):
            print submit.tasks

        submit = self.d.view_submit(submit_id, tasks=True)
        assert len(submit.tasks) == 2
        tasks = sorted((task.id, task) for task in submit.tasks)
        assert tasks[0][1].id == t1
        assert tasks[0][1].custom == "1"
        assert tasks[1][1].id == t2
        assert tasks[1][1].custom == "2"

    def test_add_reboot(self):
        t0 = self.d.add_path(__file__)
        s0 = self.d.add_submit(None, None, None)
        t1 = self.d.add_reboot(task_id=t0, submit_id=s0)

        t = self.d.view_task(t1)
        assert t.custom == "%s" % t0
        assert t.submit_id == s0

    def test_task_set_options(self):
        t0 = self.d.add_path(__file__, options={"foo": "bar"})
        t1 = self.d.add_path(__file__, options="foo=bar")
        assert self.d.view_task(t0).options == {"foo": "bar"}
        assert self.d.view_task(t1).options == {"foo": "bar"}

    def test_task_tags_str(self):
        task = self.d.add_path(__file__, tags="foo,,bar")
        tag0, tag1 = self.d.view_task(task).tags
        assert sorted((tag0.name, tag1.name)) == ["bar", "foo"]

    def test_task_tags_list(self):
        task = self.d.add_path(__file__, tags=["tag1", "tag2", "", 1, "tag3"])
        tag0, tag1, tag2 = self.d.view_task(task).tags
        assert sorted((tag0.name, tag1.name, tag2.name)) == [
            "tag1", "tag2", "tag3"
        ]

    def test_error_action(self):
        task_id = self.d.add_path(__file__)
        self.d.add_error("message1", task_id)
        self.d.add_error("message2", task_id, "actionhere")
        e1, e2 = self.d.view_errors(task_id)
        assert e1.message == "message1"
        assert e1.action is None
        assert e2.message == "message2"
        assert e2.action == "actionhere"

    def test_view_tasks(self):
        t1 = self.d.add_path(__file__)
        t2 = self.d.add_url("http://google.com/")
        tasks = self.d.view_tasks([t1, t2])
        assert tasks[0].to_dict() == self.d.view_task(t1).to_dict()
        assert tasks[1].to_dict() == self.d.view_task(t2).to_dict()

    def test_add_machine(self):
        self.d.add_machine(
            "name1", "label", "1.2.3.4", "windows", None,
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        self.d.add_machine(
            "name2", "label", "1.2.3.4", "windows", "",
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        self.d.add_machine(
            "name3", "label", "1.2.3.4", "windows", "opt1 opt2",
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        self.d.add_machine(
            "name4", "label", "1.2.3.4", "windows", ["opt3", "opt4"],
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        m1 = self.d.view_machine("name1")
        m2 = self.d.view_machine("name2")
        m3 = self.d.view_machine("name3")
        m4 = self.d.view_machine("name4")
        assert m1.options == []
        assert m2.options == []
        assert m3.options == ["opt1", "opt2"]
        assert m4.options == ["opt3", "opt4"]

    @mock.patch("cuckoo.common.objects.magic")
    def test_add_sample(self, p):
        p.from_file.return_value = ""
        assert self.d.add_path(Files.temp_put(os.urandom(16))) is not None
Exemplo n.º 4
0
class AnalysisManager(threading.Thread):
    """Analysis Manager.

    This class handles the full analysis process for a given task. It takes
    care of selecting the analysis machine, preparing the configuration and
    interacting with the guest agent and analyzer components to launch and
    complete the analysis and store, process and report its results.
    """

    def __init__(self, task_id, error_queue):
        """@param task: task object containing the details for the analysis."""
        threading.Thread.__init__(self)

        self.errors = error_queue
        self.cfg = Config()
        self.storage = ""
        self.binary = ""
        self.storage_binary = ""
        self.machine = None
        self.db = Database()
        self.task = self.db.view_task(task_id)
        self.guest_manager = None
        self.route = None
        self.interface = None
        self.rt_table = None

    def init(self):
        """Initialize the analysis."""
        self.storage = cwd(analysis=self.task.id)

        # If the analysis storage folder already exists, we need to abort the
        # analysis or previous results will be overwritten and lost.
        if os.path.exists(self.storage):
            log.error("Analysis results folder already exists at path \"%s\", "
                      "analysis aborted", self.storage)
            return False

        # If we're not able to create the analysis storage folder, we have to
        # abort the analysis.
        try:
            Folders.create(self.storage)
        except CuckooOperationalError:
            log.error("Unable to create analysis folder %s", self.storage)
            return False

        self.store_task_info()

        if self.task.category == "file" or self.task.category == "archive":
            # Check if we have permissions to access the file.
            # And fail this analysis if we don't have access to the file.
            if not os.access(self.task.target, os.R_OK):
                log.error(
                    "Unable to access target file, please check if we have "
                    "permissions to access the file: \"%s\"",
                    self.task.target
                )
                return False

            # Check whether the file has been changed for some unknown reason.
            # And fail this analysis if it has been modified.
            # TODO Absorb the file upon submission.
            sample = self.db.view_sample(self.task.sample_id)
            sha256 = File(self.task.target).get_sha256()
            if sha256 != sample.sha256:
                log.error(
                    "Target file has been modified after submission: \"%s\"",
                    self.task.target
                )
                return False

            # Store a copy of the original file if does not exist already.
            # TODO This should be done at submission time.
            self.binary = cwd("storage", "binaries", sha256)
            if not os.path.exists(self.binary):
                try:
                    shutil.copy(self.task.target, self.binary)
                except (IOError, shutil.Error):
                    log.error(
                        "Unable to store file from \"%s\" to \"%s\", "
                        "analysis aborted", self.task.target, self.binary
                    )
                    return False

            # Each analysis directory contains a symlink/copy of the binary.
            try:
                self.storage_binary = os.path.join(self.storage, "binary")

                if hasattr(os, "symlink"):
                    os.symlink(self.binary, self.storage_binary)
                else:
                    shutil.copy(self.binary, self.storage_binary)
            except (AttributeError, OSError) as e:
                log.error("Unable to create symlink/copy from \"%s\" to "
                          "\"%s\": %s", self.binary, self.storage, e)
                return False

        # Initiates per-task logging.
        task_log_start(self.task.id)
        return True

    def store_task_info(self):
        """grab latest task from db (if available) and update self.task"""
        dbtask = self.db.view_task(self.task.id)
        self.task = dbtask.to_dict()

        task_info_path = os.path.join(self.storage, "task.json")
        open(task_info_path, "w").write(dbtask.to_json())

    def acquire_machine(self):
        """Acquire an analysis machine from the pool of available ones."""
        machine = None

        # Start a loop to acquire the a machine to run the analysis on.
        while True:
            machine_lock.acquire()

            # In some cases it's possible that we enter this loop without
            # having any available machines. We should make sure this is not
            # such case, or the analysis task will fail completely.
            if not machinery.availables():
                machine_lock.release()
                time.sleep(1)
                continue

            # If the user specified a specific machine ID, a platform to be
            # used or machine tags acquire the machine accordingly.
            machine = machinery.acquire(machine_id=self.task.machine,
                                        platform=self.task.platform,
                                        tags=self.task.tags)

            # If no machine is available at this moment, wait for one second
            # and try again.
            if not machine:
                machine_lock.release()
                log.debug("Task #%d: no machine available yet", self.task.id)
                time.sleep(1)
            else:
                log.info(
                    "Task #%d: acquired machine %s (label=%s)",
                    self.task.id, machine.name, machine.label, extra={
                        "action": "vm.acquire",
                        "status": "success",
                        "vmname": machine.name,
                    }
                )
                break

        self.machine = machine

    def build_options(self):
        """Generate analysis options.
        @return: options dict.
        """
        options = {}

        if self.task.category == "file":
            options["file_name"] = File(self.task.target).get_name()
            options["file_type"] = File(self.task.target).get_type()
            options["pe_exports"] = \
                ",".join(File(self.task.target).get_exported_functions())

            package, activity = File(self.task.target).get_apk_entry()
            self.task.options["apk_entry"] = "%s:%s" % (package, activity)
        elif self.task.category == "archive":
            options["file_name"] = File(self.task.target).get_name()

        options["id"] = self.task.id
        options["ip"] = self.machine.resultserver_ip
        options["port"] = self.machine.resultserver_port
        options["category"] = self.task.category
        options["target"] = self.task.target
        options["package"] = self.task.package
        options["options"] = emit_options(self.task.options)
        options["enforce_timeout"] = self.task.enforce_timeout
        options["clock"] = self.task.clock
        options["terminate_processes"] = self.cfg.cuckoo.terminate_processes

        if not self.task.timeout:
            options["timeout"] = self.cfg.timeouts.default
        else:
            options["timeout"] = self.task.timeout

        # copy in other analyzer specific options, TEMPORARY (most likely)
        vm_options = getattr(machinery.options, self.machine.name)
        for k in vm_options:
            if k.startswith("analyzer_"):
                options[k] = vm_options[k]

        return options

    def route_network(self):
        """Enable network routing if desired."""
        # Determine the desired routing strategy (none, internet, VPN).
        self.route = self.task.options.get(
            "route", config("routing:routing:route")
        )

        if self.route == "none" or self.route == "drop":
            self.interface = None
            self.rt_table = None
        elif self.route == "inetsim":
            pass
        elif self.route == "tor":
            pass
        elif self.route == "internet":
            if config("routing:routing:internet") == "none":
                log.warning(
                    "Internet network routing has been specified, but not "
                    "configured, ignoring routing for this analysis", extra={
                        "action": "network.route",
                        "status": "error",
                        "route": self.route,
                    }
                )
                self.route = "none"
                self.task.options["route"] = "none"
                self.interface = None
                self.rt_table = None
            else:
                self.interface = config("routing:routing:internet")
                self.rt_table = config("routing:routing:rt_table")
        elif self.route in config("routing:vpn:vpns"):
            self.interface = config("routing:%s:interface" % self.route)
            self.rt_table = config("routing:%s:rt_table" % self.route)
        else:
            log.warning(
                "Unknown network routing destination specified, ignoring "
                "routing for this analysis: %r", self.route, extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                }
            )
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # Check if the network interface is still available. If a VPN dies for
        # some reason, its tunX interface will no longer be available.
        if self.interface and not rooter("nic_available", self.interface):
            log.error(
                "The network interface '%s' configured for this analysis is "
                "not available at the moment, switching to route=none mode.",
                self.interface, extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                }
            )
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # For now this doesn't work yet in combination with tor routing.
        if self.route == "drop" or self.route == "internet":
            rooter(
                "drop_enable", self.machine.ip,
                config("cuckoo:resultserver:ip"),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter(
                "inetsim_enable", self.machine.ip,
                config("routing:inetsim:server"),
                config("%s:%s:interface" % (machinery, machinery)),
                str(config("cuckoo:resultserver:port")),
                config("routing:inetsim:ports") or ""
            )

        if self.route == "tor":
            rooter(
                "tor_enable", self.machine.ip,
                str(config("cuckoo:resultserver:ip")),
                str(config("routing:tor:dnsport")),
                str(config("routing:tor:proxyport"))
            )

        if self.interface:
            rooter(
                "forward_enable", self.machine.interface,
                self.interface, self.machine.ip
            )

        if self.rt_table:
            rooter(
                "srcroute_enable", self.rt_table, self.machine.ip
            )

        # Propagate the taken route to the database.
        self.db.set_route(self.task.id, self.route)

    def unroute_network(self):
        """Disable any enabled network routing."""
        if self.interface:
            rooter(
                "forward_disable", self.machine.interface,
                self.interface, self.machine.ip
            )

        if self.rt_table:
            rooter(
                "srcroute_disable", self.rt_table, self.machine.ip
            )

        if self.route == "drop" or self.route == "internet":
            rooter(
                "drop_disable", self.machine.ip,
                config("cuckoo:resultserver:ip"),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter(
                "inetsim_disable", self.machine.ip,
                config("routing:inetsim:server"),
                config("%s:%s:interface" % (machinery, machinery)),
                str(config("cuckoo:resultserver:port")),
                config("routing:inetsim:ports") or ""
            )

        if self.route == "tor":
            rooter(
                "tor_disable", self.machine.ip,
                str(config("cuckoo:resultserver:ip")),
                str(config("routing:tor:dnsport")),
                str(config("routing:tor:proxyport"))
            )

    def wait_finish(self):
        """Some VMs don't have an actual agent. Mainly those that are used as
        assistance for an analysis through the services auxiliary module. This
        method just waits until the analysis is finished rather than actively
        trying to engage with the Cuckoo Agent."""
        self.db.guest_set_status(self.task.id, "running")
        while self.db.guest_get_status(self.task.id) == "running":
            time.sleep(1)

    def guest_manage(self, options):
        # Handle a special case where we're creating a baseline report of this
        # particular virtual machine - a report containing all the results
        # that are gathered if no additional samples are ran in the VM. These
        # results, such as loaded drivers and opened sockets in volatility, or
        # DNS requests to hostnames related to Microsoft Windows, etc may be
        # omitted or at the very least given less priority when creating a
        # report for an analysis that ran on this VM later on.
        if self.task.category == "baseline":
            time.sleep(options["timeout"])
        else:
            # Start the analysis.
            self.db.guest_set_status(self.task.id, "starting")
            monitor = self.task.options.get("monitor", "latest")
            self.guest_manager.start_analysis(options, monitor)

            # In case the Agent didn't respond and we force-quit the analysis
            # at some point while it was still starting the analysis the state
            # will be "stop" (or anything but "running", really).
            if self.db.guest_get_status(self.task.id) == "starting":
                self.db.guest_set_status(self.task.id, "running")
                self.guest_manager.wait_for_completion()

            self.db.guest_set_status(self.task.id, "stopping")

    def launch_analysis(self):
        """Start analysis."""
        succeeded = False

        if self.task.category == "file" or self.task.category == "archive":
            target = os.path.basename(self.task.target)
        else:
            target = self.task.target

        log.info(
            "Starting analysis of %s \"%s\" (task #%d, options \"%s\")",
            self.task.category.upper(), target, self.task.id,
            emit_options(self.task.options), extra={
                "action": "task.init",
                "status": "starting",
                "task_id": self.task.id,
                "target": target,
                "category": self.task.category,
                "package": self.task.package,
                "options": emit_options(self.task.options),
                "custom": self.task.custom,
            }
        )

        # Initialize the analysis.
        if not self.init():
            logger("Failed to initialize", action="task.init", status="error")
            return False

        # Acquire analysis machine.
        try:
            self.acquire_machine()
        except CuckooOperationalError as e:
            machine_lock.release()
            log.error("Cannot acquire machine: %s", e, extra={
                "action": "vm.acquire", "status": "error",
            })
            return False

        # At this point we can tell the ResultServer about it.
        try:
            ResultServer().add_task(self.task, self.machine)
        except Exception as e:
            machinery.release(self.machine.label)
            self.errors.put(e)

        # Initialize the guest manager.
        self.guest_manager = GuestManager(
            self.machine.name, self.machine.ip,
            self.machine.platform, self.task.id, self
        )

        self.aux = RunAuxiliary(self.task, self.machine, self.guest_manager)
        self.aux.start()

        # Generate the analysis configuration file.
        options = self.build_options()

        # Check if the current task has remotecontrol
        # enabled before starting the machine.
        control_enabled = (
            config("cuckoo:remotecontrol:enabled") and
            "remotecontrol" in self.task.options
        )
        if control_enabled:
            try:
                machinery.enable_remote_control(self.machine.label)
            except NotImplementedError:
                raise CuckooMachineError(
                    "Remote control support has not been implemented "
                    "for this machinery."
                )

        try:
            unlocked = False
            self.interface = None

            # Mark the selected analysis machine in the database as started.
            guest_log = self.db.guest_start(self.task.id,
                                            self.machine.name,
                                            self.machine.label,
                                            machinery.__class__.__name__)
            logger(
                "Starting VM",
                action="vm.start", status="pending",
                vmname=self.machine.name
            )

            # Start the machine.
            machinery.start(self.machine.label, self.task)

            logger(
                "Started VM",
                action="vm.start", status="success",
                vmname=self.machine.name
            )

            # retrieve the port used for remote control
            if control_enabled:
                try:
                    params = machinery.get_remote_control_params(
                        self.machine.label
                    )
                    self.db.set_machine_rcparams(self.machine.label, params)
                except NotImplementedError:
                    raise CuckooMachineError(
                        "Remote control support has not been implemented "
                        "for this machinery."
                    )

            # Enable network routing.
            self.route_network()

            # By the time start returns it will have fully started the Virtual
            # Machine. We can now safely release the machine lock.
            machine_lock.release()
            unlocked = True

            # Run and manage the components inside the guest unless this
            # machine has the "noagent" option specified (please refer to the
            # wait_finish() function for more details on this function).
            if "noagent" not in self.machine.options:
                self.guest_manage(options)
            else:
                self.wait_finish()

            succeeded = True
        except CuckooMachineSnapshotError as e:
            log.error(
                "Unable to restore to the snapshot for this Virtual Machine! "
                "Does your VM have a proper Snapshot and can you revert to it "
                "manually? VM: %s, error: %s",
                self.machine.name, e, extra={
                    "action": "vm.resume",
                    "status": "error",
                    "vmname": self.machine.name,
                }
            )
        except CuckooMachineError as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error starting Virtual Machine! VM: %s, error: %s",
                self.machine.name, e, extra={
                    "action": "vm.start",
                    "status": "error",
                    "vmname": self.machine.name,
                }
            )
        except CuckooGuestCriticalTimeout as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error from machine '%s': it appears that this Virtual "
                "Machine hasn't been configured properly as the Cuckoo Host "
                "wasn't able to connect to the Guest. There could be a few "
                "reasons for this, please refer to our documentation on the "
                "matter: %s",
                self.machine.name,
                faq("troubleshooting-vm-network-configuration"),
                extra={
                    "error_action": "vmrouting",
                    "action": "guest.handle",
                    "status": "error",
                    "task_id": self.task.id,
                }
            )
        except CuckooGuestError as e:
            if not unlocked:
                machine_lock.release()
            log.error("Error from the Cuckoo Guest: %s", e, extra={
                "action": "guest.handle",
                "status": "error",
                "task_id": self.task.id,
            })
        finally:
            # Stop Auxiliary modules.
            self.aux.stop()

            # Take a memory dump of the machine before shutting it off.
            if self.cfg.cuckoo.memory_dump or self.task.memory:
                logger(
                    "Taking full memory dump",
                    action="vm.memdump", status="pending",
                    vmname=self.machine.name
                )
                try:
                    dump_path = os.path.join(self.storage, "memory.dmp")
                    machinery.dump_memory(self.machine.label, dump_path)

                    logger(
                        "Taken full memory dump",
                        action="vm.memdump", status="success",
                        vmname=self.machine.name
                    )
                except NotImplementedError:
                    log.error(
                        "The memory dump functionality is not available for "
                        "the current machine manager.", extra={
                            "action": "vm.memdump",
                            "status": "error",
                            "vmname": self.machine.name,
                        }
                    )
                except CuckooMachineError as e:
                    log.error("Machinery error: %s", e, extra={
                        "action": "vm.memdump",
                        "status": "error",
                    })

            logger(
                "Stopping VM",
                action="vm.stop", status="pending",
                vmname=self.machine.name
            )

            try:
                # Stop the analysis machine.
                machinery.stop(self.machine.label)
            except CuckooMachineError as e:
                log.warning(
                    "Unable to stop machine %s: %s",
                    self.machine.label, e, extra={
                        "action": "vm.stop",
                        "status": "error",
                        "vmname": self.machine.name,
                    }
                )

            logger(
                "Stopped VM",
                action="vm.stop", status="success",
                vmname=self.machine.name
            )

            # Disable remote control after stopping the machine
            # if it was enabled for the task.
            if control_enabled:
                try:
                    machinery.disable_remote_control(self.machine.label)
                except NotImplementedError:
                    raise CuckooMachineError(
                        "Remote control support has not been implemented "
                        "for this machinery."
                    )

            # Mark the machine in the database as stopped. Unless this machine
            # has been marked as dead, we just keep it as "started" in the
            # database so it'll not be used later on in this session.
            self.db.guest_stop(guest_log)

            # After all this, we can make the ResultServer forget about the
            # internal state for this analysis task.
            ResultServer().del_task(self.task, self.machine)

            # Drop the network routing rules if any.
            self.unroute_network()

            try:
                # Release the analysis machine. But only if the machine has
                # not turned dead yet.
                machinery.release(self.machine.label)
            except CuckooMachineError as e:
                log.error(
                    "Unable to release machine %s, reason %s. You might need "
                    "to restore it manually.", self.machine.label, e, extra={
                        "action": "vm.release",
                        "status": "error",
                        "vmname": self.machine.name,
                    }
                )

        return succeeded

    def process_results(self):
        """Process the analysis results and generate the enabled reports."""
        logger(
            "Starting task reporting",
            action="task.report", status="pending"
        )

        # TODO Refactor this function as currently "cuckoo process" has a 1:1
        # copy of its code. TODO Also remove "archive" files.
        results = RunProcessing(task=self.task).run()
        RunSignatures(results=results).run()
        RunReporting(task=self.task, results=results).run()

        # If the target is a file and the user enabled the option,
        # delete the original copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_original:
            if not os.path.exists(self.task.target):
                log.warning("Original file does not exist anymore: \"%s\": "
                            "File not found.", self.task.target)
            else:
                try:
                    os.remove(self.task.target)
                except OSError as e:
                    log.error("Unable to delete original file at path "
                              "\"%s\": %s", self.task.target, e)

        # If the target is a file and the user enabled the delete copy of
        # the binary option, then delete the copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_bin_copy:
            if not os.path.exists(self.binary):
                log.warning("Copy of the original file does not exist anymore: \"%s\": File not found", self.binary)
            else:
                try:
                    os.remove(self.binary)
                except OSError as e:
                    log.error("Unable to delete the copy of the original file at path \"%s\": %s", self.binary, e)
            # Check if the binary in the analysis directory is an invalid symlink. If it is, delete it.
            if os.path.islink(self.storage_binary) and not os.path.exists(self.storage_binary):
                try:
                    os.remove(self.storage_binary)
                except OSError as e:
                    log.error("Unable to delete symlink to the binary copy at path \"%s\": %s", self.storage_binary, e)

        log.info(
            "Task #%d: reports generation completed",
            self.task.id, extra={
                "action": "task.report",
                "status": "success",
            }
        )

        return True

    def run(self):
        """Run manager thread."""
        global active_analysis_count
        active_analysis_count += 1
        try:
            self.launch_analysis()

            log.debug("Released database task #%d", self.task.id)

            if self.cfg.cuckoo.process_results:
                self.store_task_info()
                self.db.set_status(self.task.id, TASK_COMPLETED)
                # TODO If self.process_results() is unified with apps.py's
                # process() method, then ensure that TASK_FAILED_PROCESSING is
                # handled correctly and not overwritten by the db.set_status()
                # at the end of this method.
                self.process_results()

            # We make a symbolic link ("latest") which links to the latest
            # analysis - this is useful for debugging purposes. This is only
            # supported under systems that support symbolic links.
            if hasattr(os, "symlink"):
                latest = cwd("storage", "analyses", "latest")

                # First we have to remove the existing symbolic link, then we
                # have to create the new one.
                # Deal with race conditions using a lock.
                latest_symlink_lock.acquire()
                try:
                    # As per documentation, lexists() returns True for dead
                    # symbolic links.
                    if os.path.lexists(latest):
                        os.remove(latest)

                    os.symlink(self.storage, latest)
                except OSError as e:
                    log.warning("Error pointing latest analysis symlink: %s" % e)
                finally:
                    latest_symlink_lock.release()

            # overwrite task.json so we have the latest data inside
            self.store_task_info()
            log.info(
                "Task #%d: analysis procedure completed",
                self.task.id, extra={
                    "action": "task.stop",
                    "status": "success",
                }
            )
        except:
            log.exception("Failure in AnalysisManager.run", extra={
                "action": "task.stop",
                "status": "error",
            })
        finally:
            if self.cfg.cuckoo.process_results:
                self.db.set_status(self.task.id, TASK_REPORTED)
            else:
                self.db.set_status(self.task.id, TASK_COMPLETED)
            task_log_stop(self.task.id)
            active_analysis_count -= 1
Exemplo n.º 5
0
class DatabaseEngine(object):
    """Tests database stuff."""
    URI = None

    def setup_class(self):
        set_cwd(tempfile.mkdtemp())

        self.d = Database()
        self.d.connect(dsn=self.URI)

    def add_url(self, url, priority=1, status="pending"):
        task_id = self.d.add_url(url, priority=priority)
        self.d.set_status(task_id, status)
        return task_id

    def test_add_tasks(self):
        fd, sample_path = tempfile.mkstemp()
        os.write(fd, "hehe")
        os.close(fd)

        # Add task.
        count = self.d.Session().query(Task).count()
        self.d.add_path(sample_path)
        assert self.d.Session().query(Task).count() == count + 1

        # Add url.
        self.d.add_url("http://foo.bar")
        assert self.d.Session().query(Task).count() == count + 2

    def test_processing_get_task(self):
        # First reset all existing rows so that earlier exceptions don't affect
        # this unit test run.
        null, session = None, self.d.Session()

        session.query(Task).filter(Task.status == "completed",
                                   Task.processing == null).update({
                                       "processing":
                                       "something",
                                   })
        session.commit()

        t1 = self.add_url("http://google.com/1",
                          priority=1,
                          status="completed")
        t2 = self.add_url("http://google.com/2",
                          priority=2,
                          status="completed")
        t3 = self.add_url("http://google.com/3",
                          priority=1,
                          status="completed")
        t4 = self.add_url("http://google.com/4",
                          priority=1,
                          status="completed")
        t5 = self.add_url("http://google.com/5",
                          priority=3,
                          status="completed")
        t6 = self.add_url("http://google.com/6",
                          priority=1,
                          status="completed")
        t7 = self.add_url("http://google.com/7",
                          priority=1,
                          status="completed")

        assert self.d.processing_get_task("foo") == t5
        assert self.d.processing_get_task("foo") == t2
        assert self.d.processing_get_task("foo") == t1
        assert self.d.processing_get_task("foo") == t3
        assert self.d.processing_get_task("foo") == t4
        assert self.d.processing_get_task("foo") == t6
        assert self.d.processing_get_task("foo") == t7
        assert self.d.processing_get_task("foo") is None

    def test_error_exists(self):
        task_id = self.add_url("http://google.com/")
        self.d.add_error("A" * 1024, task_id)
        assert len(self.d.view_errors(task_id)) == 1
        self.d.add_error("A" * 1024, task_id)
        assert len(self.d.view_errors(task_id)) == 2

    def test_long_error(self):
        self.add_url("http://google.com/")
        self.d.add_error("A" * 1024, 1)
        err = self.d.view_errors(1)
        assert err and len(err[0].message) == 1024

    def test_submit(self):
        dirpath = tempfile.mkdtemp()
        submit_id = self.d.add_submit(dirpath, "files", {
            "foo": "bar",
        })
        submit = self.d.view_submit(submit_id)
        assert submit.id == submit_id
        assert submit.tmp_path == dirpath
        assert submit.submit_type == "files"
        assert submit.data == {
            "foo": "bar",
        }

    def test_connect_no_create(self):
        AlembicVersion.__table__.drop(self.d.engine)
        self.d.connect(dsn=self.URI, create=False)
        assert "alembic_version" not in self.d.engine.table_names()
        self.d.connect(dsn=self.URI)
        assert "alembic_version" in self.d.engine.table_names()

    def test_view_submit_tasks(self):
        submit_id = self.d.add_submit(None, None, None)
        t1 = self.d.add_path(__file__, custom="1", submit_id=submit_id)
        t2 = self.d.add_path(__file__, custom="2", submit_id=submit_id)

        submit = self.d.view_submit(submit_id)
        assert submit.id == submit_id
        with pytest.raises(DetachedInstanceError):
            print submit.tasks

        submit = self.d.view_submit(submit_id, tasks=True)
        assert len(submit.tasks) == 2
        tasks = sorted((task.id, task) for task in submit.tasks)
        assert tasks[0][1].id == t1
        assert tasks[0][1].custom == "1"
        assert tasks[1][1].id == t2
        assert tasks[1][1].custom == "2"

    def test_add_reboot(self):
        t0 = self.d.add_path(__file__)
        s0 = self.d.add_submit(None, None, None)
        t1 = self.d.add_reboot(task_id=t0, submit_id=s0)

        t = self.d.view_task(t1)
        assert t.custom == "%s" % t0
        assert t.submit_id == s0

    def test_task_set_options(self):
        t0 = self.d.add_path(__file__, options={"foo": "bar"})
        t1 = self.d.add_path(__file__, options="foo=bar")
        assert self.d.view_task(t0).options == {"foo": "bar"}
        assert self.d.view_task(t1).options == {"foo": "bar"}

    def test_task_tags_str(self):
        task = self.d.add_path(__file__, tags="foo,,bar")
        tag0, tag1 = self.d.view_task(task).tags
        assert sorted((tag0.name, tag1.name)) == ["bar", "foo"]

    def test_task_tags_list(self):
        task = self.d.add_path(__file__, tags=["tag1", "tag2", "", 1, "tag3"])
        tag0, tag1, tag2 = self.d.view_task(task).tags
        assert sorted(
            (tag0.name, tag1.name, tag2.name)) == ["tag1", "tag2", "tag3"]

    def test_error_action(self):
        task_id = self.d.add_path(__file__)
        self.d.add_error("message1", task_id)
        self.d.add_error("message2", task_id, "actionhere")
        e1, e2 = self.d.view_errors(task_id)
        assert e1.message == "message1"
        assert e1.action is None
        assert e2.message == "message2"
        assert e2.action == "actionhere"

    def test_view_tasks(self):
        t1 = self.d.add_path(__file__)
        t2 = self.d.add_url("http://google.com/")
        tasks = self.d.view_tasks([t1, t2])
        assert tasks[0].to_dict() == self.d.view_task(t1).to_dict()
        assert tasks[1].to_dict() == self.d.view_task(t2).to_dict()

    def test_add_machine(self):
        self.d.add_machine("name1", "label", "1.2.3.4", "windows", None,
                           "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043)
        self.d.add_machine("name2", "label", "1.2.3.4", "windows", "",
                           "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043)
        self.d.add_machine("name3", "label", "1.2.3.4", "windows", "opt1 opt2",
                           "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043)
        self.d.add_machine("name4", "label", "1.2.3.4", "windows",
                           ["opt3", "opt4"], "tag1 tag2", "int0", "snap0",
                           "5.6.7.8", 2043)
        m1 = self.d.view_machine("name1")
        m2 = self.d.view_machine("name2")
        m3 = self.d.view_machine("name3")
        m4 = self.d.view_machine("name4")
        assert m1.options == []
        assert m2.options == []
        assert m3.options == ["opt1", "opt2"]
        assert m4.options == ["opt3", "opt4"]

    def test_set_machine_rcparams(self):
        self.d.add_machine("name5", "label5", "1.2.3.4", "windows", None,
                           "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043)

        self.d.set_machine_rcparams("label5", {
            "protocol": "rdp",
            "host": "127.0.0.1",
            "port": 3389,
        })

        m = self.d.view_machine("name5")
        assert m.rcparams == {
            "protocol": "rdp",
            "host": "127.0.0.1",
            "port": "3389",
        }

    @mock.patch("cuckoo.common.objects.magic")
    def test_add_sample(self, p):
        p.from_file.return_value = ""
        assert self.d.add_path(Files.temp_put(os.urandom(16))) is not None
Exemplo n.º 6
0
class AnalysisManager(threading.Thread):
    """Analysis Manager.

    This class handles the full analysis process for a given task. It takes
    care of selecting the analysis machine, preparing the configuration and
    interacting with the guest agent and analyzer components to launch and
    complete the analysis and store, process and report its results.
    """

    def __init__(self, task_id, error_queue):
        """@param task: task object containing the details for the analysis."""
        threading.Thread.__init__(self)

        self.errors = error_queue
        self.cfg = Config()
        self.storage = ""
        self.binary = ""
        self.storage_binary = ""
        self.machine = None
        self.db = Database()
        self.task = self.db.view_task(task_id)
        self.guest_manager = None
        self.route = None
        self.interface = None
        self.rt_table = None

        self.is_vnc = False
        if self.task.options.get("vnc", False):
            self.is_vnc = True


    def init(self):
        """Initialize the analysis."""
        self.storage = cwd(analysis=self.task.id)

        # If the analysis storage folder already exists, we need to abort the
        # analysis or previous results will be overwritten and lost.
        if os.path.exists(self.storage):
            log.error("Analysis results folder already exists at path \"%s\", "
                      "analysis aborted", self.storage)
            return False

        # If we're not able to create the analysis storage folder, we have to
        # abort the analysis.
        try:
            Folders.create(self.storage)
        except CuckooOperationalError:
            log.error("Unable to create analysis folder %s", self.storage)
            return False

        self.store_task_info()

        if self.task.category == "file" or self.task.category == "archive":
            # Check if we have permissions to access the file.
            # And fail this analysis if we don't have access to the file.
            if not os.access(self.task.target, os.R_OK):
                log.error(
                    "Unable to access target file, please check if we have "
                    "permissions to access the file: \"%s\"",
                    self.task.target
                )
                return False

            # Check whether the file has been changed for some unknown reason.
            # And fail this analysis if it has been modified.
            # TODO Absorb the file upon submission.
            sample = self.db.view_sample(self.task.sample_id)
            sha256 = File(self.task.target).get_sha256()
            if sha256 != sample.sha256:
                log.error(
                    "Target file has been modified after submission: \"%s\"",
                    self.task.target
                )
                return False

            # Store a copy of the original file if does not exist already.
            # TODO This should be done at submission time.
            self.binary = cwd("storage", "binaries", sha256)
            if not os.path.exists(self.binary):
                try:
                    shutil.copy(self.task.target, self.binary)
                except (IOError, shutil.Error):
                    log.error(
                        "Unable to store file from \"%s\" to \"%s\", "
                        "analysis aborted", self.task.target, self.binary
                    )
                    return False

            # Each analysis directory contains a symlink/copy of the binary.
            try:
                self.storage_binary = os.path.join(self.storage, "binary")

                if hasattr(os, "symlink"):
                    os.symlink(self.binary, self.storage_binary)
                else:
                    shutil.copy(self.binary, self.storage_binary)
            except (AttributeError, OSError) as e:
                log.error("Unable to create symlink/copy from \"%s\" to "
                          "\"%s\": %s", self.binary, self.storage, e)
                return False

        # Initiates per-task logging.
        task_log_start(self.task.id)
        return True

    def store_task_info(self):
        """grab latest task from db (if available) and update self.task"""
        dbtask = self.db.view_task(self.task.id)
        self.task = dbtask.to_dict()

        task_info_path = os.path.join(self.storage, "task.json")
        open(task_info_path, "w").write(dbtask.to_json())

    def acquire_machine(self):
        """Acquire an analysis machine from the pool of available ones."""
        machine = None

        # Start a loop to acquire the a machine to run the analysis on.
        while True:
            machine_lock.acquire()

            # In some cases it's possible that we enter this loop without
            # having any available machines. We should make sure this is not
            # such case, or the analysis task will fail completely.
            if not machinery.availables():
                machine_lock.release()
                time.sleep(1)
                continue

            # If the user specified a specific machine ID, a platform to be
            # used or machine tags acquire the machine accordingly.
            machine = machinery.acquire(machine_id=self.task.machine,
                                        platform=self.task.platform,
                                        tags=self.task.tags)

            # If no machine is available at this moment, wait for one second
            # and try again.
            if not machine:
                machine_lock.release()
                log.debug("Task #%d: no machine available yet", self.task.id)
                time.sleep(1)
            else:
                log.info(
                    "Task #%d: acquired machine %s (label=%s)",
                    self.task.id, machine.name, machine.label, extra={
                        "action": "vm.acquire",
                        "status": "success",
                        "vmname": machine.name,
                    }
                )
                break

        self.machine = machine

    def build_options(self):
        """Generate analysis options.
        @return: options dict.
        """
        options = {}

        if self.task.category == "file":
            options["file_name"] = File(self.task.target).get_name()
            options["file_type"] = File(self.task.target).get_type()
            options["pe_exports"] = \
                ",".join(File(self.task.target).get_exported_functions())

            package, activity = File(self.task.target).get_apk_entry()
            self.task.options["apk_entry"] = "%s:%s" % (package, activity)
        elif self.task.category == "archive":
            options["file_name"] = File(self.task.target).get_name()

        options["id"] = self.task.id
        options["ip"] = self.machine.resultserver_ip
        options["port"] = self.machine.resultserver_port
        options["category"] = self.task.category
        options["target"] = self.task.target
        options["package"] = self.task.package
        options["options"] = emit_options(self.task.options)
        options["enforce_timeout"] = self.task.enforce_timeout
        options["clock"] = self.task.clock
        options["vnc"] = self.task.vnc
        options["terminate_processes"] = self.cfg.cuckoo.terminate_processes

        if not self.task.timeout:
            options["timeout"] = self.cfg.timeouts.default
        else:
            options["timeout"] = self.task.timeout

        # copy in other analyzer specific options, TEMPORARY (most likely)
        vm_options = getattr(machinery.options, self.machine.name)
        for k in vm_options:
            if k.startswith("analyzer_"):
                options[k] = vm_options[k]

        log.info(" [*] build_options() - options built:\n %s", str(options))
        return options

    def route_network(self):
        """Enable network routing if desired."""
        # Determine the desired routing strategy (none, internet, VPN).
        self.route = self.task.options.get(
            "route", config("routing:routing:route")
        )

        if self.route == "none" or self.route == "drop":
            self.interface = None
            self.rt_table = None
        elif self.route == "inetsim":
            pass
        elif self.route == "tor":
            pass
        elif self.route == "internet":
            if config("routing:routing:internet") == "none":
                log.warning(
                    "Internet network routing has been specified, but not "
                    "configured, ignoring routing for this analysis", extra={
                        "action": "network.route",
                        "status": "error",
                        "route": self.route,
                    }
                )
                self.route = "none"
                self.task.options["route"] = "none"
                self.interface = None
                self.rt_table = None
            else:
                self.interface = config("routing:routing:internet")
                self.rt_table = config("routing:routing:rt_table")
        elif self.route in config("routing:vpn:vpns"):
            self.interface = config("routing:%s:interface" % self.route)
            self.rt_table = config("routing:%s:rt_table" % self.route)
        else:
            log.warning(
                "Unknown network routing destination specified, ignoring "
                "routing for this analysis: %r", self.route, extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                }
            )
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # Check if the network interface is still available. If a VPN dies for
        # some reason, its tunX interface will no longer be available.
        if self.interface and not rooter("nic_available", self.interface):
            log.error(
                "The network interface '%s' configured for this analysis is "
                "not available at the moment, switching to route=none mode.",
                self.interface, extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                }
            )
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # For now this doesn't work yet in combination with tor routing.
        if self.route == "drop" or self.route == "internet":
            rooter(
                "drop_enable", self.machine.ip,
                config("cuckoo:resultserver:ip"),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter(
                "inetsim_enable", self.machine.ip,
                config("routing:inetsim:server"),
                config("%s:%s:interface" % (machinery, machinery)),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "tor":
            rooter(
                "tor_enable", self.machine.ip,
                str(config("cuckoo:resultserver:ip")),
                str(config("routing:tor:dnsport")),
                str(config("routing:tor:proxyport"))
            )

        if self.interface:
            rooter(
                "forward_enable", self.machine.interface,
                self.interface, self.machine.ip
            )

        if self.rt_table:
            rooter(
                "srcroute_enable", self.rt_table, self.machine.ip
            )

        # Propagate the taken route to the database.
        self.db.set_route(self.task.id, self.route)

    def unroute_network(self):
        """Disable any enabled network routing."""
        if self.interface:
            rooter(
                "forward_disable", self.machine.interface,
                self.interface, self.machine.ip
            )

        if self.rt_table:
            rooter(
                "srcroute_disable", self.rt_table, self.machine.ip
            )

        if self.route != "none":
            rooter(
                "drop_disable", self.machine.ip,
                config("cuckoo:resultserver:ip"),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter(
                "inetsim_disable", self.machine.ip,
                config("routing:inetsim:server"),
                config("%s:%s:interface" % (machinery, machinery)),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "tor":
            rooter(
                "tor_disable", self.machine.ip,
                str(config("cuckoo:resultserver:ip")),
                str(config("routing:tor:dnsport")),
                str(config("routing:tor:proxyport"))
            )

    def wait_finish(self):
        """Some VMs don't have an actual agent. Mainly those that are used as
        assistance for an analysis through the services auxiliary module. This
        method just waits until the analysis is finished rather than actively
        trying to engage with the Cuckoo Agent."""
        self.db.guest_set_status(self.task.id, "running")
        while self.db.guest_get_status(self.task.id) == "running":
            time.sleep(1)

    def guest_manage(self, options):
        # Handle a special case where we're creating a baseline report of this
        # particular virtual machine - a report containing all the results
        # that are gathered if no additional samples are ran in the VM. These
        # results, such as loaded drivers and opened sockets in volatility, or
        # DNS requests to hostnames related to Microsoft Windows, etc may be
        # omitted or at the very least given less priority when creating a
        # report for an analysis that ran on this VM later on.
        if self.task.category == "baseline":
            time.sleep(options["timeout"])
        else:
            # Start the analysis.
            self.db.guest_set_status(self.task.id, "starting")
            monitor = self.task.options.get("monitor", "latest")
            self.guest_manager.start_analysis(options, monitor)

            # In case the Agent didn't respond and we force-quit the analysis
            # at some point while it was still starting the analysis the state
            # will be "stop" (or anything but "running", really).
            if self.db.guest_get_status(self.task.id) == "starting":
                self.db.guest_set_status(self.task.id, "running")
                self.guest_manager.wait_for_completion()

            if self.is_vnc:
                self.guest_manager.start_vnc(options, monitor)
                self.db.guest_set_status(self.task.id, "vnc")
            else:
                self.db.guest_set_status(self.task.id, "stopping")

    def launch_analysis(self):
        """Start analysis."""
        succeeded = False

        if self.task.category == "file" or self.task.category == "archive":
            target = os.path.basename(self.task.target)
        else:
            target = self.task.target

        log.info(
            "Starting analysis of %s \"%s\" (task #%d, options \"%s\")",
            self.task.category.upper(), target, self.task.id,
            emit_options(self.task.options), extra={
                "action": "task.init",
                "status": "starting",
                "task_id": self.task.id,
                "target": target,
                "category": self.task.category,
                "package": self.task.package,
                "options": emit_options(self.task.options),
                "custom": self.task.custom,
            }
        )

        # Initialize the analysis.
        if not self.init():
            logger("Failed to initialize", action="task.init", status="error")
            return False

        # Acquire analysis machine.
        try:
            self.acquire_machine()
        except CuckooOperationalError as e:
            machine_lock.release()
            log.error("Cannot acquire machine: %s", e, extra={
                "action": "vm.acquire", "status": "error",
            })
            return False

        # At this point we can tell the ResultServer about it.
        try:
            ResultServer().add_task(self.task, self.machine)
        except Exception as e:
            machinery.release(self.machine.label)
            self.errors.put(e)

        # Initialize the guest manager.
        self.guest_manager = GuestManager(
            self.machine.name, self.machine.ip,
            self.machine.platform, self.task.id, self
        )

        self.aux = RunAuxiliary(self.task, self.machine, self.guest_manager)
        self.aux.start()

        # Generate the analysis configuration file.
        options = self.build_options()

        try:
            unlocked = False
            self.interface = None

            # Mark the selected analysis machine in the database as started.
            guest_log = self.db.guest_start(self.task.id,
                                            self.machine.name,
                                            self.machine.label,
                                            machinery.__class__.__name__)
            logger(
                "Starting VM",
                action="vm.start", status="pending",
                vmname=self.machine.name
            )

            # Start the Virtual Machine.
            machinery.start(self.machine.label, self.task)

            logger(
                "Started VM",
                action="vm.start", status="success",
                vmname=self.machine.name
            )

            # Enable network routing.
            self.route_network()

            # By the time start returns it will have fully started the Virtual
            # Machine. We can now safely release the machine lock.
            machine_lock.release()
            unlocked = True

            # Run and manage the components inside the guest unless this
            # machine has the "noagent" option specified (please refer to the
            # wait_finish() function for more details on this function).
            if "noagent" not in self.machine.options:
                self.guest_manage(options)
            else:
                self.wait_finish()

            succeeded = True
        except CuckooMachineSnapshotError as e:
            log.error(
                "Unable to restore to the snapshot for this Virtual Machine! "
                "Does your VM have a proper Snapshot and can you revert to it "
                "manually? VM: %s, error: %s",
                self.machine.name, e, extra={
                    "action": "vm.resume",
                    "status": "error",
                    "vmname": self.machine.name,
                }
            )
        except CuckooMachineError as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error starting Virtual Machine! VM: %s, error: %s",
                self.machine.name, e, extra={
                    "action": "vm.start",
                    "status": "error",
                    "vmname": self.machine.name,
                }
            )
        except CuckooGuestCriticalTimeout as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error from machine '%s': it appears that this Virtual "
                "Machine hasn't been configured properly as the Cuckoo Host "
                "wasn't able to connect to the Guest. There could be a few "
                "reasons for this, please refer to our documentation on the "
                "matter: %s",
                self.machine.name,
                faq("troubleshooting-vm-network-configuration"),
                extra={
                    "error_action": "vmrouting",
                    "action": "guest.handle",
                    "status": "error",
                    "task_id": self.task.id,
                }
            )
        except CuckooGuestError as e:
            if not unlocked:
                machine_lock.release()
            log.error("Error from the Cuckoo Guest: %s", e, extra={
                "action": "guest.handle",
                "status": "error",
                "task_id": self.task.id,
            })
        finally:
            # Stop Auxiliary modules.
            self.aux.stop()

            # Take a memory dump of the machine before shutting it off.
            if self.cfg.cuckoo.memory_dump or self.task.memory:
                logger(
                    "Taking full memory dump",
                    action="vm.memdump", status="pending",
                    vmname=self.machine.name
                )
                try:
                    dump_path = os.path.join(self.storage, "memory.dmp")
                    machinery.dump_memory(self.machine.label, dump_path)

                    logger(
                        "Taken full memory dump",
                        action="vm.memdump", status="success",
                        vmname=self.machine.name
                    )
                except NotImplementedError:
                    log.error(
                        "The memory dump functionality is not available for "
                        "the current machine manager.", extra={
                            "action": "vm.memdump",
                            "status": "error",
                            "vmname": self.machine.name,
                        }
                    )
                except CuckooMachineError as e:
                    log.error("Machinery error: %s", e, extra={
                        "action": "vm.memdump",
                        "status": "error",
                    })

            logger(
                "Stopping VM",
                action="vm.stop", status="pending",
                vmname=self.machine.name
            )

            try:
                # Stop the analysis machine.
                """
                while machinery.is_running(self.machine.label) and self.is_vnc:
                    time.sleep(5)
                    log.info(" [*] machinery.is_running - Post analysis ")
                """
                log.info(" [*] machinery.is_running = FALSE Done ")
                if self.is_vnc:
                    self.db.set_status(self.task.id, TASK_COMPLETED)

                    if self.cfg.cuckoo.process_results:
                        # this updates self.task so processing gets the latest and greatest
                        self.store_task_info()

                        self.process_results()
                        self.db.set_status(self.task.id, TASK_REPORTED)

                    # overwrite task.json so we have the latest data inside
                    self.store_task_info()

                    self.db.guest_set_status(self.task.id, "VNC")
                    while machinery.is_running(self.machine.label):
                        log.info(" [*] [%s] VNC mode ", self.machine.name)
                        time.sleep(5)
                    self.db.guest_set_status(self.task.id, "Done")
                    log.info(" [*] [%s] VNC mode - Done ", self.machine.name)
                else:
                    self.db.guest_set_status(self.task.id, "stopping")
                    machinery.stop(self.machine.label)
            except CuckooMachineError as e:
                log.warning(
                    "Unable to stop machine %s: %s",
                    self.machine.label, e, extra={
                        "action": "vm.stop",
                        "status": "error",
                        "vmname": self.machine.name,
                    }
                )

            logger(
                "Stopped VM",
                action="vm.stop", status="success",
                vmname=self.machine.name
            )

            # Mark the machine in the database as stopped. Unless this machine
            # has been marked as dead, we just keep it as "started" in the
            # database so it'll not be used later on in this session.
            self.db.guest_stop(guest_log)

            # After all this, we can make the ResultServer forget about the
            # internal state for this analysis task.
            ResultServer().del_task(self.task, self.machine)

            # Drop the network routing rules if any.
            self.unroute_network()

            try:
                # Release the analysis machine. But only if the machine has
                # not turned dead yet.
                machinery.release(self.machine.label)
            except CuckooMachineError as e:
                log.error(
                    "Unable to release machine %s, reason %s. You might need "
                    "to restore it manually.", self.machine.label, e, extra={
                        "action": "vm.release",
                        "status": "error",
                        "vmname": self.machine.name,
                    }
                )

        return succeeded

    def process_results(self):
        """Process the analysis results and generate the enabled reports."""
        logger(
            "Starting task reporting",
            action="task.report", status="pending"
        )

        # TODO Refactor this function as currently "cuckoo process" has a 1:1
        # copy of its code. TODO Also remove "archive" files.
        results = RunProcessing(task=self.task).run()
        RunSignatures(results=results).run()
        RunReporting(task=self.task, results=results).run()

        # If the target is a file and the user enabled the option,
        # delete the original copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_original:
            if not os.path.exists(self.task.target):
                log.warning("Original file does not exist anymore: \"%s\": "
                            "File not found.", self.task.target)
            else:
                try:
                    os.remove(self.task.target)
                except OSError as e:
                    log.error("Unable to delete original file at path "
                              "\"%s\": %s", self.task.target, e)

        # If the target is a file and the user enabled the delete copy of
        # the binary option, then delete the copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_bin_copy:
            if not os.path.exists(self.binary):
                log.warning("Copy of the original file does not exist anymore: \"%s\": File not found", self.binary)
            else:
                try:
                    os.remove(self.binary)
                except OSError as e:
                    log.error("Unable to delete the copy of the original file at path \"%s\": %s", self.binary, e)
            # Check if the binary in the analysis directory is an invalid symlink. If it is, delete it.
            if os.path.islink(self.storage_binary) and not os.path.exists(self.storage_binary):
                try:
                    os.remove(self.storage_binary)
                except OSError as e:
                    log.error("Unable to delete symlink to the binary copy at path \"%s\": %s", self.storage_binary, e)

        log.info(
            "Task #%d: reports generation completed",
            self.task.id, extra={
                "action": "task.report",
                "status": "success",
            }
        )

        return True

    def run(self):
        """Run manager thread."""
        global active_analysis_count
        active_analysis_count += 1
        try:
            self.launch_analysis()

            if not self.is_vnc:
                self.db.set_status(self.task.id, TASK_COMPLETED)

                log.debug("Released database task #%d", self.task.id)

                if self.cfg.cuckoo.process_results:
                    # this updates self.task so processing gets the latest and greatest
                    self.store_task_info()

                    self.process_results()
                    self.db.set_status(self.task.id, TASK_REPORTED)

                # We make a symbolic link ("latest") which links to the latest
                # analysis - this is useful for debugging purposes. This is only
                # supported under systems that support symbolic links.
                if hasattr(os, "symlink"):
                    latest = cwd("storage", "analyses", "latest")

                    # First we have to remove the existing symbolic link, then we
                    # have to create the new one.
                    # Deal with race conditions using a lock.
                    latest_symlink_lock.acquire()
                    try:
                        # As per documentation, lexists() returns True for dead
                        # symbolic links.
                        if os.path.lexists(latest):
                            os.remove(latest)

                        os.symlink(self.storage, latest)
                    except OSError as e:
                        log.warning("Error pointing latest analysis symlink: %s" % e)
                    finally:
                        latest_symlink_lock.release()

                # overwrite task.json so we have the latest data inside
                self.store_task_info()
                log.info(
                    "Task #%d: analysis procedure completed",
                    self.task.id, extra={
                        "action": "task.stop",
                        "status": "success",
                    }
                )
        except:
            log.exception("Failure in AnalysisManager.run", extra={
                "action": "task.stop",
                "status": "error",
            })

        task_log_stop(self.task.id)
        active_analysis_count -= 1
Exemplo n.º 7
0
class Scheduler(object):

    def __init__(self, maxcount=None):
        self.running = True
        self.db = Database()
        self.maxcount = maxcount
        self.total_analysis_count = 0
        self.machinery = None
        self.machine_lock = None
        self.managers = []

    def initialize(self):
        machinery_name = config("cuckoo:cuckoo:machinery")
        max_vmstartup = config("cuckoo:cuckoo:max_vmstartup_count")

        # Initialize a semaphore or lock to prevent to many VMs from
        # starting at the same time.
        self.machine_lock = threading.Semaphore(max_vmstartup)

        log.info(
            "Using '%s' as machine manager", machinery_name,
            extra={
                "action": "init.machinery",
                "status": "success",
                "machinery": machinery_name,
            }
        )

        # Create the machine manager
        self.machinery = cuckoo.machinery.plugins[machinery_name]()

        # Provide a dictionary with the configuration options to the
        # machine manager instance.
        self.machinery.set_options(Config(machinery_name))

        try:
            self.machinery.initialize(machinery_name)
        except CuckooMachineError as e:
            raise CuckooCriticalError("Error initializing machines: %s" % e)

        # At this point all the available machines should have been identified
        # and added to the list. If none were found, Cuckoo aborts the
        # execution. TODO In the future we'll probably want get rid of this.
        machines = self.machinery.machines()
        if not machines:
            raise CuckooCriticalError("No machines available.")

        log.info(
            "Loaded %s machine/s", len(machines),
            extra={
                "action": "init.machines",
                "status": "success",
                "count": len(machines)
            }
        )

        if len(machines) > 1 and self.db.engine.name == "sqlite":
            log.warning(
                "As you've configured Cuckoo to execute parallel "
                "analyses, we recommend you to switch to a MySQL or "
                "a PostgreSQL database as SQLite might cause some "
                "issues."
            )

        if len(machines) > 4 and config("cuckoo:cuckoo:process_results"):
            log.warning(
                "When running many virtual machines it is recommended to "
                "process the results in separate 'cuckoo process' instances "
                "increase throughput and stability. Please read the "
                "documentation about the `Processing Utility`."
            )

        self.drop_forwarding_rules()

        # Command-line overrides the configuration file.
        if self.maxcount is None:
            self.maxcount = config("cuckoo:cuckoo:max_analysis_count")

    def drop_forwarding_rules(self):
        """Drop all existing packet forwarding rules for each VM. Just in case
        Cuckoo was terminated for some reason and various forwarding rules
        have thus not been dropped yet."""
        for machine in self.machinery.machines():
            if not machine.interface:
                log.info(
                    "Unable to determine the network interface for VM "
                     "with name %s, Cuckoo will not be able to give it "
                     "full internet access or route it through a VPN! "
                     "Please define a default network interface for the "
                     "machinery or define a network interface for each "
                     "VM.", machine.name
                )
                continue

            # Drop forwarding rule to each VPN.
            if config("routing:vpn:enabled"):
                for vpn in config("routing:vpn:vpns"):
                    rooter(
                        "forward_disable", machine.interface,
                        config("routing:%s:interface" % vpn), machine.ip
                    )

            # Drop forwarding rule to the internet / dirty line.
            if config("routing:routing:internet") != "none":
                rooter(
                    "forward_disable", machine.interface,
                    config("routing:routing:internet"), machine.ip
                )

    def stop(self):
        """Stop the Cuckoo task scheduler."""
        self.running = False
        # Shutdown machine manager (used to kill machines that still alive).
        for manager in self.managers:
            manager.force_cleanup()

        self.machinery.shutdown()

    def ready_for_new_run(self):
        """Performs checks to see if Cuckoo should start a new
        pending task or not"""
        # Wait until the machine lock is not locked. This is only the case
        # when all machines are fully running, rather that about to start
        # or still busy starting. This way we won't have race conditions
        # with finding out there are no available machines in the analysis
        # manager or having two analyses pick the same machine.
        if not self.machine_lock.acquire(False):
            logger(
                "Could not acquire machine lock",
                action="scheduler.machine_lock", status="busy"
            )
            return False

        self.machine_lock.release()

        # Verify if the minimum amount of disk space is available
        if config("cuckoo:cuckoo:freespace"):
            freespace = get_free_disk(cwd("storage", "analyses"))

            # If freespace is None, the check failed. Continue, since this
            # can happen if the disk check is not supported on others than
            # unix and winxp+. The call might also fail on win32.
            if freespace is None:
                log.error("Error determining free disk space")
            elif freespace <= config("cuckoo:cuckoo:freespace"):
                log.error(
                    "Not enough free disk space! (Only %d MB!)",
                    freespace, extra={
                        "action": "scheduler.diskspace",
                        "status": "error",
                        "available": freespace,
                    }
                )
                return False

        max_vm = config("cuckoo:cuckoo:max_machines_count")
        if max_vm and len(self.machinery.running()) >= max_vm:
            log.debug(
                "Maximum amount of machines is running", extra={
                    "action": "scheduler.machines",
                    "status": "maxed"
                }
            )
            return False

        if not self.machinery.availables():
            logger(
                "No available machines",
                action="scheduler.machines", status="none"
            )
            return False

        return True

    def task_limit_hit(self):
        """Stops the scheduler is the maximum amount of tasks has been
        reached. This can be configured by max_analysis_count in cuckoo.conf
        or passed as an argument when starting Cuckoo."""
        if self.maxcount and self.total_analysis_count >= self.maxcount:
            if not self.managers:
                log.debug(
                    "Reached max analysis count, exiting.", extra={
                        "action": "scheduler.max_analysis",
                        "status": "success",
                        "limit": self.total_analysis_count,
                    }
                )
                self.stop()
                return True

            log.debug(
                "Maximum analyses hit, awaiting active analyses to finish. "
                "Still active: %s", len(self.managers), extra={
                    "action": "scheduler.max_analysis",
                    "status": "busy",
                    "active": len(self.managers)
                }
            )
            return True
        return False

    def handle_pending(self):
        """Handles pending tasks. Checks if a new task can be started. Eg:
        not too many machines already running, disk space left etc. Selects a
        machine matching the task requirements and creates
        a matching analysis manager for the type of the selected pending
        task"""
        # Acquire machine lock non-blocking. This is because the scheduler
        # also handles requests made by analysis manager. A blocking lock
        # could cause a deadlock
        if not self.machine_lock.acquire(False):
            return

        # Select task that is specifically for one of the available machines
        # possibly a service machine or reserved machine
        machine, task, analysis = None, None, False
        for available_machine in self.db.get_available_machines():

            # If the machine has been reserved for a specific task, this
            # task should be processed first, as the machine will only be
            # released it has finished (Example: longterm task).
            if available_machine.reserved_by:
                task = self.db.fetch(task_id=available_machine.reserved_by)
                if task:
                    machine = self.machinery.acquire(
                        machine_id=available_machine.name
                    )
                    break
                continue

            task = self.db.fetch(machine=available_machine.name)
            if task:
                machine = self.machinery.acquire(
                    machine_id=available_machine.name
                )
                break

            if available_machine.is_analysis():
                analysis = True

        # No task for a specific machine and at least one of the available
        # machines is not a service machine. Fetch task that is not
        # for a service machine
        if not task and not machine and analysis:

            # Search for a task, but don't lock it until we are sure a machine
            # for this task is available, since it might have tags or require
            # a specific platform. Ignore a task if we know a machine is not
            # available for it.
            exclude = []
            while not machine:
                task = self.db.fetch(service=False, exclude=exclude)

                if task is None:
                    break

                try:
                    machine = self.machinery.acquire(
                        machine_id=task.machine, platform=task.platform,
                        tags=task.tags
                    )
                except CuckooOperationalError:
                    log.error(
                        "Task #%s cannot be started, no machine with matching "
                        "requirements for this task exists. Requirements: %s",
                        task.id, Task.requirements_str(task)
                    )
                    # No machine with required tags, name etc exists
                    # Set analysis to failed.
                    # TODO Use another status so it might be recovered
                    # on next Cuckoo startup if the machine exists by then
                    self.db.set_status(task.id, TASK_FAILED_ANALYSIS)
                    break

                if not machine:
                    exclude.append(task.id)

        if not task or not machine:
            self.machine_lock.release()
            if machine:
                self.machinery.release(label=machine.label)
            return

        log.info(
            "Task #%d: acquired machine %s (label=%s)",
            task.id, machine.name, machine.label, extra={
                "action": "vm.acquire",
                "status": "success",
                "vmname": machine.name,
            }
        )

        # Task and matching machine found. Find analysis manager
        # which supports the type of this task. Lock it when found
        analysis_manager = self.get_analysis_manager(task, machine)

        if not analysis_manager:
            # If no analysis manager is found for this task type, it
            # cannot be started, therefore we release the machine again
            self.machinery.release(label=machine.label)

            # Release machine lock as the machine will not be starting
            self.machine_lock.release()

            # Set task status to failed as it cannot be analysed if no matching
            # analysis manager for its type exists
            self.db.set_status(task.id, TASK_FAILED_ANALYSIS)
            return

        # Only lock task for running if we are sure we will try to start it
        self.db.set_status(task.id, TASK_RUNNING)

        # Increment the total amount of analyses
        self.total_analysis_count += 1

        analysis_manager.daemon = True
        if not analysis_manager.init(self.db):
            self.db.set_status(task.id, TASK_FAILED_ANALYSIS)
            log.error(
                "Failed to initialize analysis manager for task #%s", task.id
            )
            self.machine_lock.release()
            self.machinery.release(label=machine.label)
            return

        # If initialization succeeded, start the analysis manager
        # and store it so we can track it
        analysis_manager.start()
        self.managers.append(analysis_manager)

    def get_analysis_manager(self, db_task, machine):
        """Searches all available analysis managers for one
        that supports the type of the given task. Returns an
        analysis manager. Returns None if no manager supports the type"""
        managers = cuckoo.analysis.plugins
        analysis_manager = None
        for manager in managers:
            if db_task.type in manager.supports:

                core_task = Task(db_task)

                analysis_manager = manager(
                    machine, self.machinery, self.machine_lock
                )
                try:
                    analysis_manager.set_task(core_task)
                    analysis_manager.set_target(core_task.targets)
                except Exception as e:
                    analysis_manager = None
                    log.exception(
                        "Failure when setting task and target for analysis"
                        " manager '%s'.", manager
                    )
                break

        return analysis_manager

    def handle_managers(self):
        """Executes actions requested by analysis managers. If an analysis
        manager is finished, executes its finalize actions. Returns a
        list of analysis managers to untrack"""
        remove = []
        for manager in self.managers:

            if manager.action_requested():
                status = manager.get_analysis_status()
                status_action = getattr(manager, "on_status_%s" % status, None)
                if status_action:
                    log.debug(
                        "Executing requested action by task #%s for status"
                        " '%s'", manager.task.id, status
                    )
                    try:
                        status_action(self.db)
                    except Exception as e:
                        log.exception(
                            "Error executing requested action: %s. Error: %s",
                            status_action, e
                        )
                else:
                    log.error(
                        "Analysis manager for task #%s requested action for"
                        " status '%s', but no action is implemented",
                        manager.task.id, status
                    )
                manager.action_lock.release()

            if not manager.isAlive():
                manager.finalize(self.db)
                remove.append(manager)

        return remove

    def keep_running(self):
        return self.running

    def start(self):
        """Start the Cuckoo task scheduler"""
        self.initialize()

        log.info("Waiting for analysis tasks")

        while self.keep_running():
            time.sleep(1)

            # Handles actions requested by analysis managers and performs
            # finalization actions for the managers if they exit.
            for untrack_manager in self.handle_managers():
                self.managers.remove(untrack_manager)

            # Verify if the maximum amount of analyses to process has been hit.
            # Stops the scheduler if no running analysis managers are left.
            if self.task_limit_hit():
                continue

            # Handle pending tasks by finding the matching machine and
            # analysis manager. The manager is started added to tracked
            # analysis managers.
            if self.db.count_tasks(status=TASK_PENDING):
                # Check if the max amount of VMs are running, if there is
                # enough disk space, etc.
                if self.ready_for_new_run():
                    # Grab a pending task, find a machine that matches, find
                    # a matching analysis manager and start the analysis.
                    self.handle_pending()

        log.debug("End of analyses.")