示例#1
0
    def init(self, db):
        # If for some reason the task dir does not exist, stop the analysis
        # because it should have been created upon submission
        if not self.task.dir_exists():
            log.error("Task directory for task #%s does not exist",
                      self.task.id)
            return False

        if not URLDiaries.init_done:
            URLDiaries.init()

        self.curr_block = self.new_target_block()
        if not self.curr_block:
            log.error("Empty target list, cannot proceed.")
            return False

        self.rt = RealTimeHandler()
        self.ev_client = EventClient()
        self.URL_BLOCKSIZE = int(
            self.task.options.get("urlblocksize", self.URL_BLOCKSIZE))
        self.SECS_PER_BLOCK = int(
            self.task.options.get("blocktime", self.SECS_PER_BLOCK))
        self.aborted = False
        self.completed = False

        # Write task to disk in json file
        self.task.write_task_json()
        self.build_options(
            options={
                "category": "url",
                "target": ",".join(self.curr_block.keys()),
                "enforce_timeout": True,
                "timeout": len(self.task.targets) * self.SECS_PER_BLOCK * 3
            })

        self.guest_manager = GuestManager(
            self.machine.name, self.machine.ip, self.machine.platform,
            self.task, self, self.analysis,
            self.curr_block.get(
                self.curr_block.keys().pop(0)).get("target_obj"))

        self.aux = RunAuxiliary(self.task.task_dict, self.machine,
                                self.guest_manager)

        # The wait/agent status checking etc is run in a separate thread. This
        # allows the analysis manager to perform other actions while the
        # analysis is running
        self.gm_wait_th = threading.Thread(
            target=self.guest_manager.wait_for_completion)
        self.gm_wait_th.daemon = True
        self.detection_events = []
        self.netflow_events = []
        self.js_events = []
        self.realtime_finished = set()
        self.tlskeys_response = None
        self.realtime_error = False
        self.requestfinder = RequestFinder(self.task.id)
        self.all_pids_targets = {}

        return True
示例#2
0
class AnalysisManager(threading.Thread):
    """Analysis Manager.

    This class handles the full analysis process for a given task. It takes
    care of selecting the analysis machine, preparing the configuration and
    interacting with the guest agent and analyzer components to launch and
    complete the analysis and store, process and report its results.
    """
    def __init__(self, task_id, error_queue):
        """@param task: task object containing the details for the analysis."""
        threading.Thread.__init__(self)

        self.errors = error_queue
        self.cfg = Config()
        self.storage = ""
        self.binary = ""
        self.storage_binary = ""
        self.machine = None
        self.db = Database()
        self.task = self.db.view_task(task_id)
        self.guest_manager = None
        self.route = None
        self.interface = None
        self.rt_table = None
        self.unrouted_network = False
        self.stopped_aux = False
        self.rs_port = config("cuckoo:resultserver:port")

    def init(self):
        """Initialize the analysis."""
        self.storage = cwd(analysis=self.task.id)

        # If the analysis storage folder already exists, we need to abort the
        # analysis or previous results will be overwritten and lost.
        if os.path.exists(self.storage):
            log.error(
                "Analysis results folder already exists at path \"%s\", "
                "analysis aborted", self.storage)
            return False

        # If we're not able to create the analysis storage folder, we have to
        # abort the analysis.
        # Also create all directories that the ResultServer can use for file
        # uploads.
        try:
            Folders.create(self.storage, RESULT_DIRECTORIES)
        except CuckooOperationalError:
            log.error("Unable to create analysis folder %s", self.storage)
            return False

        self.store_task_info()

        if self.task.category == "file" or self.task.category == "archive":
            # Check if we have permissions to access the file.
            # And fail this analysis if we don't have access to the file.
            if not os.access(self.task.target, os.R_OK):
                log.error(
                    "Unable to access target file, please check if we have "
                    "permissions to access the file: \"%s\"", self.task.target)
                return False

            # Check whether the file has been changed for some unknown reason.
            # And fail this analysis if it has been modified.
            # TODO Absorb the file upon submission.
            sample = self.db.view_sample(self.task.sample_id)
            sha256 = File(self.task.target).get_sha256()
            if sha256 != sample.sha256:
                log.error(
                    "Target file has been modified after submission: \"%s\"",
                    self.task.target)
                return False

            # Store a copy of the original file if does not exist already.
            # TODO This should be done at submission time.
            self.binary = cwd("storage", "binaries", sha256)
            if not os.path.exists(self.binary):
                try:
                    shutil.copy(self.task.target, self.binary)
                except (IOError, shutil.Error):
                    log.error(
                        "Unable to store file from \"%s\" to \"%s\", "
                        "analysis aborted", self.task.target, self.binary)
                    return False

            # Each analysis directory contains a symlink/copy of the binary.
            try:
                self.storage_binary = os.path.join(self.storage, "binary")

                if hasattr(os, "symlink"):
                    os.symlink(self.binary, self.storage_binary)
                else:
                    shutil.copy(self.binary, self.storage_binary)
            except (AttributeError, OSError) as e:
                log.error(
                    "Unable to create symlink/copy from \"%s\" to "
                    "\"%s\": %s", self.binary, self.storage, e)
                return False

        # Initiates per-task logging.
        task_log_start(self.task.id)
        return True

    def store_task_info(self):
        """Grab latest task from db (if available) and update self.task"""
        dbtask = self.db.view_task(self.task.id)
        self.task = dbtask.to_dict()

        task_info_path = os.path.join(self.storage, "task.json")
        open(task_info_path, "w").write(dbtask.to_json())

    def acquire_machine(self):
        """Acquire an analysis machine from the pool of available ones."""
        machine = None

        # Start a loop to acquire the a machine to run the analysis on.
        while True:
            machine_lock.acquire()

            # In some cases it's possible that we enter this loop without
            # having any available machines. We should make sure this is not
            # such case, or the analysis task will fail completely.
            if not machinery.availables():
                machine_lock.release()
                time.sleep(1)
                continue

            # If the user specified a specific machine ID, a platform to be
            # used or machine tags acquire the machine accordingly.
            machine = machinery.acquire(machine_id=self.task.machine,
                                        platform=self.task.platform,
                                        tags=self.task.tags)

            # If no machine is available at this moment, wait for one second
            # and try again.
            if not machine:
                machine_lock.release()
                log.debug("Task #%d: no machine available yet", self.task.id)
                time.sleep(1)
            else:
                log.info("Task #%d: acquired machine %s (label=%s)",
                         self.task.id,
                         machine.name,
                         machine.label,
                         extra={
                             "action": "vm.acquire",
                             "status": "success",
                             "vmname": machine.name,
                         })
                break

        self.machine = machine

    def build_options(self):
        """Generate analysis options.
        @return: options dict.
        """
        options = {}

        if self.task.category == "file":
            options["file_name"] = File(self.task.target).get_name()
            options["file_type"] = File(self.task.target).get_type()
            options["pe_exports"] = \
                ",".join(File(self.task.target).get_exported_functions())

            package, activity = File(self.task.target).get_apk_entry()
            self.task.options["apk_entry"] = "%s:%s" % (package, activity)
        elif self.task.category == "archive":
            options["file_name"] = File(self.task.target).get_name()

        options["id"] = self.task.id
        options["ip"] = self.machine.resultserver_ip
        options["port"] = self.rs_port
        options["category"] = self.task.category
        options["target"] = self.task.target
        options["package"] = self.task.package
        options["options"] = emit_options(self.task.options)
        options["enforce_timeout"] = self.task.enforce_timeout
        options["clock"] = self.task.clock
        options["terminate_processes"] = self.cfg.cuckoo.terminate_processes

        if not self.task.timeout:
            options["timeout"] = self.cfg.timeouts.default
        else:
            options["timeout"] = self.task.timeout

        # copy in other analyzer specific options, TEMPORARY (most likely)
        vm_options = getattr(machinery.options, self.machine.name)
        for k in vm_options:
            if k.startswith("analyzer_"):
                options[k] = vm_options[k]

        return options

    def route_network(self):
        """Enable network routing if desired."""
        # Determine the desired routing strategy (none, internet, VPN).
        self.route = self.task.options.get("route",
                                           config("routing:routing:route"))

        if self.route == "none" or self.route == "drop":
            self.interface = None
            self.rt_table = None
        elif self.route == "inetsim":
            pass
        elif self.route == "tor":
            pass
        elif self.route == "internet":
            if config("routing:routing:internet") == "none":
                log.warning(
                    "Internet network routing has been specified, but not "
                    "configured, ignoring routing for this analysis",
                    extra={
                        "action": "network.route",
                        "status": "error",
                        "route": self.route,
                    })
                self.route = "none"
                self.task.options["route"] = "none"
                self.interface = None
                self.rt_table = None
            else:
                self.interface = config("routing:routing:internet")
                self.rt_table = config("routing:routing:rt_table")
        elif self.route in config("routing:vpn:vpns"):
            self.interface = config("routing:%s:interface" % self.route)
            self.rt_table = config("routing:%s:rt_table" % self.route)
        else:
            log.warning(
                "Unknown network routing destination specified, ignoring "
                "routing for this analysis: %r",
                self.route,
                extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                })
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # Check if the network interface is still available. If a VPN dies for
        # some reason, its tunX interface will no longer be available.
        if self.interface and not rooter("nic_available", self.interface):
            log.error(
                "The network interface '%s' configured for this analysis is "
                "not available at the moment, switching to route=none mode.",
                self.interface,
                extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                })
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # For now this doesn't work yet in combination with tor routing.
        if self.route == "drop" or self.route == "internet":
            rooter("drop_enable", self.machine.ip,
                   config("cuckoo:resultserver:ip"), str(self.rs_port))

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter("inetsim_enable", self.machine.ip,
                   config("routing:inetsim:server"),
                   config("%s:%s:interface" % (machinery, machinery)),
                   str(self.rs_port),
                   config("routing:inetsim:ports") or "")

        if self.route == "tor":
            rooter("tor_enable", self.machine.ip,
                   str(config("cuckoo:resultserver:ip")),
                   str(config("routing:tor:dnsport")),
                   str(config("routing:tor:proxyport")))

        if self.interface:
            rooter("forward_enable", self.machine.interface, self.interface,
                   self.machine.ip)

        if self.rt_table:
            rooter("srcroute_enable", self.rt_table, self.machine.ip)

        # Propagate the taken route to the database.
        self.db.set_route(self.task.id, self.route)

    def unroute_network(self):
        """Disable any enabled network routing."""
        if self.interface:
            rooter("forward_disable", self.machine.interface, self.interface,
                   self.machine.ip)

        if self.rt_table:
            rooter("srcroute_disable", self.rt_table, self.machine.ip)

        if self.route == "drop" or self.route == "internet":
            rooter("drop_disable", self.machine.ip,
                   config("cuckoo:resultserver:ip"), str(self.rs_port))

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter("inetsim_disable", self.machine.ip,
                   config("routing:inetsim:server"),
                   config("%s:%s:interface" % (machinery, machinery)),
                   str(self.rs_port),
                   config("routing:inetsim:ports") or "")

        if self.route == "tor":
            rooter("tor_disable", self.machine.ip,
                   str(config("cuckoo:resultserver:ip")),
                   str(config("routing:tor:dnsport")),
                   str(config("routing:tor:proxyport")))

        self.unrouted_network = True

    def wait_finish(self):
        """Some VMs don't have an actual agent. Mainly those that are used as
        assistance for an analysis through the services auxiliary module. This
        method just waits until the analysis is finished rather than actively
        trying to engage with the Cuckoo Agent."""
        self.db.guest_set_status(self.task.id, "running")
        while self.db.guest_get_status(self.task.id) == "running":
            time.sleep(1)

    def guest_manage(self, options):
        # Handle a special case where we're creating a baseline report of this
        # particular virtual machine - a report containing all the results
        # that are gathered if no additional samples are ran in the VM. These
        # results, such as loaded drivers and opened sockets in volatility, or
        # DNS requests to hostnames related to Microsoft Windows, etc may be
        # omitted or at the very least given less priority when creating a
        # report for an analysis that ran on this VM later on.
        if self.task.category == "baseline":
            time.sleep(options["timeout"])
        else:
            # Start the analysis.
            self.db.guest_set_status(self.task.id, "starting")
            monitor = self.task.options.get("monitor", "latest")
            self.guest_manager.start_analysis(options, monitor)

            # In case the Agent didn't respond and we force-quit the analysis
            # at some point while it was still starting the analysis the state
            # will be "stop" (or anything but "running", really).
            if self.db.guest_get_status(self.task.id) == "starting":
                self.db.guest_set_status(self.task.id, "running")
                self.guest_manager.wait_for_completion()

            self.db.guest_set_status(self.task.id, "stopping")

    def launch_analysis(self):
        """Start analysis."""
        succeeded = False

        if self.task.category == "file" or self.task.category == "archive":
            target = os.path.basename(self.task.target)
        else:
            target = self.task.target

        log.info("Starting analysis of %s \"%s\" (task #%d, options \"%s\")",
                 self.task.category.upper(),
                 target,
                 self.task.id,
                 emit_options(self.task.options),
                 extra={
                     "action": "task.init",
                     "status": "starting",
                     "task_id": self.task.id,
                     "target": target,
                     "category": self.task.category,
                     "package": self.task.package,
                     "options": emit_options(self.task.options),
                     "custom": self.task.custom,
                 })

        # Initialize the analysis.
        if not self.init():
            logger("Failed to initialize", action="task.init", status="error")
            return False

        # Acquire analysis machine.
        try:
            self.acquire_machine()
        except CuckooOperationalError as e:
            machine_lock.release()
            log.error("Cannot acquire machine: %s",
                      e,
                      extra={
                          "action": "vm.acquire",
                          "status": "error",
                      })
            return False

        self.rs_port = self.machine.resultserver_port or ResultServer().port

        # At this point we can tell the ResultServer about it.
        try:
            ResultServer().add_task(self.task, self.machine)
        except Exception as e:
            machinery.release(self.machine.label)
            self.errors.put(e)

        # Initialize the guest manager.
        self.guest_manager = GuestManager(self.machine.name, self.machine.ip,
                                          self.machine.platform, self.task.id,
                                          self)

        self.aux = RunAuxiliary(self.task, self.machine, self.guest_manager)
        self.aux.start()

        # Generate the analysis configuration file.
        options = self.build_options()

        # Check if the current task has remotecontrol
        # enabled before starting the machine.
        control_enabled = (config("cuckoo:remotecontrol:enabled")
                           and "remotecontrol" in self.task.options)
        if control_enabled:
            try:
                machinery.enable_remote_control(self.machine.label)
            except NotImplementedError:
                log.error(
                    "Remote control support has not been implemented for the "
                    "configured machinery module: %s",
                    config("cuckoo:cuckoo:machinery"))

        try:
            unlocked = False
            self.interface = None

            # Mark the selected analysis machine in the database as started.
            guest_log = self.db.guest_start(self.task.id, self.machine.name,
                                            self.machine.label,
                                            machinery.__class__.__name__)
            logger("Starting VM",
                   action="vm.start",
                   status="pending",
                   vmname=self.machine.name)

            # Start the machine.
            machinery.start(self.machine.label, self.task)

            logger("Started VM",
                   action="vm.start",
                   status="success",
                   vmname=self.machine.name)

            # retrieve the port used for remote control
            if control_enabled:
                try:
                    params = machinery.get_remote_control_params(
                        self.machine.label)
                    self.db.set_machine_rcparams(self.machine.label, params)
                except NotImplementedError:
                    log.error(
                        "Remote control support has not been implemented for the "
                        "configured machinery module: %s",
                        config("cuckoo:cuckoo:machinery"))

            # Enable network routing.
            self.route_network()

            # By the time start returns it will have fully started the Virtual
            # Machine. We can now safely release the machine lock.
            machine_lock.release()
            unlocked = True

            # Run and manage the components inside the guest unless this
            # machine has the "noagent" option specified (please refer to the
            # wait_finish() function for more details on this function).
            if "noagent" not in self.machine.options:
                self.guest_manage(options)
            else:
                self.wait_finish()

            succeeded = True
        except CuckooMachineSnapshotError as e:
            log.error(
                "Unable to restore to the snapshot for this Virtual Machine! "
                "Does your VM have a proper Snapshot and can you revert to it "
                "manually? VM: %s, error: %s",
                self.machine.name,
                e,
                extra={
                    "action": "vm.resume",
                    "status": "error",
                    "vmname": self.machine.name,
                })
        except CuckooMachineError as e:
            if not unlocked:
                machine_lock.release()
            log.error("Error starting Virtual Machine! VM: %s, error: %s",
                      self.machine.name,
                      e,
                      extra={
                          "action": "vm.start",
                          "status": "error",
                          "vmname": self.machine.name,
                      })
        except CuckooGuestCriticalTimeout as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error from machine '%s': it appears that this Virtual "
                "Machine hasn't been configured properly as the Cuckoo Host "
                "wasn't able to connect to the Guest. There could be a few "
                "reasons for this, please refer to our documentation on the "
                "matter: %s",
                self.machine.name,
                faq("troubleshooting-vm-network-configuration"),
                extra={
                    "error_action": "vmrouting",
                    "action": "guest.handle",
                    "status": "error",
                    "task_id": self.task.id,
                })
        except CuckooGuestError as e:
            if not unlocked:
                machine_lock.release()
            log.error("Error from the Cuckoo Guest: %s",
                      e,
                      extra={
                          "action": "guest.handle",
                          "status": "error",
                          "task_id": self.task.id,
                      })
        finally:
            # Stop Auxiliary modules.
            if not self.stopped_aux:
                self.stopped_aux = True
                self.aux.stop()

            # Take a memory dump of the machine before shutting it off.
            if self.cfg.cuckoo.memory_dump or self.task.memory:
                logger("Taking full memory dump",
                       action="vm.memdump",
                       status="pending",
                       vmname=self.machine.name)
                try:
                    dump_path = os.path.join(self.storage, "memory.dmp")
                    machinery.dump_memory(self.machine.label, dump_path)

                    logger("Taken full memory dump",
                           action="vm.memdump",
                           status="success",
                           vmname=self.machine.name)
                except NotImplementedError:
                    log.error(
                        "The memory dump functionality is not available for "
                        "the current machine manager.",
                        extra={
                            "action": "vm.memdump",
                            "status": "error",
                            "vmname": self.machine.name,
                        })
                except CuckooMachineError as e:
                    log.error("Machinery error: %s",
                              e,
                              extra={
                                  "action": "vm.memdump",
                                  "status": "error",
                              })

            logger("Stopping VM",
                   action="vm.stop",
                   status="pending",
                   vmname=self.machine.name)

            try:
                # Stop the analysis machine.
                machinery.stop(self.machine.label)
            except CuckooMachineError as e:
                log.warning("Unable to stop machine %s: %s",
                            self.machine.label,
                            e,
                            extra={
                                "action": "vm.stop",
                                "status": "error",
                                "vmname": self.machine.name,
                            })

            logger("Stopped VM",
                   action="vm.stop",
                   status="success",
                   vmname=self.machine.name)

            # Disable remote control after stopping the machine
            # if it was enabled for the task.
            if control_enabled:
                try:
                    machinery.disable_remote_control(self.machine.label)
                except NotImplementedError:
                    log.error(
                        "Remote control support has not been implemented for the "
                        "configured machinery module: %s",
                        config("cuckoo:cuckoo:machinery"))

            # Mark the machine in the database as stopped. Unless this machine
            # has been marked as dead, we just keep it as "started" in the
            # database so it'll not be used later on in this session.
            self.db.guest_stop(guest_log)

            # After all this, we can make the ResultServer forget about the
            # internal state for this analysis task.
            ResultServer().del_task(self.task, self.machine)

            # Drop the network routing rules if any.
            if not self.unrouted_network:
                self.unroute_network()

            try:
                # Release the analysis machine. But only if the machine has
                # not turned dead yet.
                machinery.release(self.machine.label)
            except CuckooMachineError as e:
                log.error(
                    "Unable to release machine %s, reason %s. You might need "
                    "to restore it manually.",
                    self.machine.label,
                    e,
                    extra={
                        "action": "vm.release",
                        "status": "error",
                        "vmname": self.machine.name,
                    })

        return succeeded

    def process_results(self):
        """Process the analysis results and generate the enabled reports."""
        logger("Starting task reporting",
               action="task.report",
               status="pending")

        # TODO Refactor this function as currently "cuckoo process" has a 1:1
        # copy of its code. TODO Also remove "archive" files.
        results = RunProcessing(task=self.task).run()
        RunSignatures(results=results).run()
        RunReporting(task=self.task, results=results).run()

        # If the target is a file and the user enabled the option,
        # delete the original copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_original:
            if not os.path.exists(self.task.target):
                log.warning(
                    "Original file does not exist anymore: \"%s\": "
                    "File not found.", self.task.target)
            else:
                try:
                    os.remove(self.task.target)
                except OSError as e:
                    log.error(
                        "Unable to delete original file at path "
                        "\"%s\": %s", self.task.target, e)

        # If the target is a file and the user enabled the delete copy of
        # the binary option, then delete the copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_bin_copy:
            if not os.path.exists(self.binary):
                log.warning(
                    "Copy of the original file does not exist anymore: \"%s\": File not found",
                    self.binary)
            else:
                try:
                    os.remove(self.binary)
                except OSError as e:
                    log.error(
                        "Unable to delete the copy of the original file at path \"%s\": %s",
                        self.binary, e)
            # Check if the binary in the analysis directory is an invalid symlink. If it is, delete it.
            if os.path.islink(self.storage_binary) and not os.path.exists(
                    self.storage_binary):
                try:
                    os.remove(self.storage_binary)
                except OSError as e:
                    log.error(
                        "Unable to delete symlink to the binary copy at path \"%s\": %s",
                        self.storage_binary, e)

        log.info("Task #%d: reports generation completed",
                 self.task.id,
                 extra={
                     "action": "task.report",
                     "status": "success",
                 })

        return True

    def run(self):
        """Run manager thread."""
        global active_analysis_count
        active_analysis_count += 1
        try:
            self.launch_analysis()

            log.debug("Released database task #%d", self.task.id)

            if self.cfg.cuckoo.process_results:
                self.store_task_info()
                self.db.set_status(self.task.id, TASK_COMPLETED)
                # TODO If self.process_results() is unified with apps.py's
                # process() method, then ensure that TASK_FAILED_PROCESSING is
                # handled correctly and not overwritten by the db.set_status()
                # at the end of this method.
                self.process_results()

            # We make a symbolic link ("latest") which links to the latest
            # analysis - this is useful for debugging purposes. This is only
            # supported under systems that support symbolic links.
            if hasattr(os, "symlink"):
                latest = cwd("storage", "analyses", "latest")

                # First we have to remove the existing symbolic link, then we
                # have to create the new one.
                # Deal with race conditions using a lock.
                latest_symlink_lock.acquire()
                try:
                    # As per documentation, lexists() returns True for dead
                    # symbolic links.
                    if os.path.lexists(latest):
                        os.remove(latest)

                    os.symlink(self.storage, latest)
                except OSError as e:
                    log.warning("Error pointing latest analysis symlink: %s" %
                                e)
                finally:
                    latest_symlink_lock.release()

            # overwrite task.json so we have the latest data inside
            self.store_task_info()
            log.info("Task #%d: analysis procedure completed",
                     self.task.id,
                     extra={
                         "action": "task.stop",
                         "status": "success",
                     })
        except:
            log.exception("Failure in AnalysisManager.run",
                          extra={
                              "action": "task.stop",
                              "status": "error",
                          })
        finally:
            if self.cfg.cuckoo.process_results:
                self.db.set_status(self.task.id, TASK_REPORTED)
            else:
                self.db.set_status(self.task.id, TASK_COMPLETED)
            task_log_stop(self.task.id)
            active_analysis_count -= 1

    def cleanup(self):
        # In case the analysis manager crashes, the network cleanup
        # should still be performed.
        if not self.unrouted_network:
            self.unroute_network()

        if not self.stopped_aux:
            self.stopped_aux = True
            self.aux.stop()

    def force_stop(self):
        # Make the guest manager stop the status checking loop and return
        # to the main analysis manager routine.
        if self.db.guest_get_status(self.task.id):
            self.db.guest_set_status(self.task.id, "stopping")

        self.guest_manager.stop()
        log.debug("Force stopping task #%s", self.task.id)
示例#3
0
    def launch_analysis(self):
        """Start analysis."""
        succeeded = False

        if self.task.category == "file" or self.task.category == "archive":
            target = os.path.basename(self.task.target)
        else:
            target = self.task.target

        log.info("Starting analysis of %s \"%s\" (task #%d, options \"%s\")",
                 self.task.category.upper(),
                 target,
                 self.task.id,
                 emit_options(self.task.options),
                 extra={
                     "action": "task.init",
                     "status": "starting",
                     "task_id": self.task.id,
                     "target": target,
                     "category": self.task.category,
                     "package": self.task.package,
                     "options": emit_options(self.task.options),
                     "custom": self.task.custom,
                 })

        # Initialize the analysis.
        if not self.init():
            logger("Failed to initialize", action="task.init", status="error")
            return False

        # Acquire analysis machine.
        try:
            self.acquire_machine()
        except CuckooOperationalError as e:
            machine_lock.release()
            log.error("Cannot acquire machine: %s",
                      e,
                      extra={
                          "action": "vm.acquire",
                          "status": "error",
                      })
            return False

        self.rs_port = self.machine.resultserver_port or ResultServer().port

        # At this point we can tell the ResultServer about it.
        try:
            ResultServer().add_task(self.task, self.machine)
        except Exception as e:
            machinery.release(self.machine.label)
            self.errors.put(e)

        # Initialize the guest manager.
        self.guest_manager = GuestManager(self.machine.name, self.machine.ip,
                                          self.machine.platform, self.task.id,
                                          self)

        self.aux = RunAuxiliary(self.task, self.machine, self.guest_manager)
        self.aux.start()

        # Generate the analysis configuration file.
        options = self.build_options()

        # Check if the current task has remotecontrol
        # enabled before starting the machine.
        control_enabled = (config("cuckoo:remotecontrol:enabled")
                           and "remotecontrol" in self.task.options)
        if control_enabled:
            try:
                machinery.enable_remote_control(self.machine.label)
            except NotImplementedError:
                log.error(
                    "Remote control support has not been implemented for the "
                    "configured machinery module: %s",
                    config("cuckoo:cuckoo:machinery"))

        try:
            unlocked = False
            self.interface = None

            # Mark the selected analysis machine in the database as started.
            guest_log = self.db.guest_start(self.task.id, self.machine.name,
                                            self.machine.label,
                                            machinery.__class__.__name__)
            logger("Starting VM",
                   action="vm.start",
                   status="pending",
                   vmname=self.machine.name)

            # Start the machine.
            machinery.start(self.machine.label, self.task)

            logger("Started VM",
                   action="vm.start",
                   status="success",
                   vmname=self.machine.name)

            # retrieve the port used for remote control
            if control_enabled:
                try:
                    params = machinery.get_remote_control_params(
                        self.machine.label)
                    self.db.set_machine_rcparams(self.machine.label, params)
                except NotImplementedError:
                    log.error(
                        "Remote control support has not been implemented for the "
                        "configured machinery module: %s",
                        config("cuckoo:cuckoo:machinery"))

            # Enable network routing.
            self.route_network()

            # By the time start returns it will have fully started the Virtual
            # Machine. We can now safely release the machine lock.
            machine_lock.release()
            unlocked = True

            # Run and manage the components inside the guest unless this
            # machine has the "noagent" option specified (please refer to the
            # wait_finish() function for more details on this function).
            if "noagent" not in self.machine.options:
                self.guest_manage(options)
            else:
                self.wait_finish()

            succeeded = True
        except CuckooMachineSnapshotError as e:
            log.error(
                "Unable to restore to the snapshot for this Virtual Machine! "
                "Does your VM have a proper Snapshot and can you revert to it "
                "manually? VM: %s, error: %s",
                self.machine.name,
                e,
                extra={
                    "action": "vm.resume",
                    "status": "error",
                    "vmname": self.machine.name,
                })
        except CuckooMachineError as e:
            if not unlocked:
                machine_lock.release()
            log.error("Error starting Virtual Machine! VM: %s, error: %s",
                      self.machine.name,
                      e,
                      extra={
                          "action": "vm.start",
                          "status": "error",
                          "vmname": self.machine.name,
                      })
        except CuckooGuestCriticalTimeout as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error from machine '%s': it appears that this Virtual "
                "Machine hasn't been configured properly as the Cuckoo Host "
                "wasn't able to connect to the Guest. There could be a few "
                "reasons for this, please refer to our documentation on the "
                "matter: %s",
                self.machine.name,
                faq("troubleshooting-vm-network-configuration"),
                extra={
                    "error_action": "vmrouting",
                    "action": "guest.handle",
                    "status": "error",
                    "task_id": self.task.id,
                })
        except CuckooGuestError as e:
            if not unlocked:
                machine_lock.release()
            log.error("Error from the Cuckoo Guest: %s",
                      e,
                      extra={
                          "action": "guest.handle",
                          "status": "error",
                          "task_id": self.task.id,
                      })
        finally:
            # Stop Auxiliary modules.
            if not self.stopped_aux:
                self.stopped_aux = True
                self.aux.stop()

            # Take a memory dump of the machine before shutting it off.
            if self.cfg.cuckoo.memory_dump or self.task.memory:
                logger("Taking full memory dump",
                       action="vm.memdump",
                       status="pending",
                       vmname=self.machine.name)
                try:
                    dump_path = os.path.join(self.storage, "memory.dmp")
                    machinery.dump_memory(self.machine.label, dump_path)

                    logger("Taken full memory dump",
                           action="vm.memdump",
                           status="success",
                           vmname=self.machine.name)
                except NotImplementedError:
                    log.error(
                        "The memory dump functionality is not available for "
                        "the current machine manager.",
                        extra={
                            "action": "vm.memdump",
                            "status": "error",
                            "vmname": self.machine.name,
                        })
                except CuckooMachineError as e:
                    log.error("Machinery error: %s",
                              e,
                              extra={
                                  "action": "vm.memdump",
                                  "status": "error",
                              })

            logger("Stopping VM",
                   action="vm.stop",
                   status="pending",
                   vmname=self.machine.name)

            try:
                # Stop the analysis machine.
                machinery.stop(self.machine.label)
            except CuckooMachineError as e:
                log.warning("Unable to stop machine %s: %s",
                            self.machine.label,
                            e,
                            extra={
                                "action": "vm.stop",
                                "status": "error",
                                "vmname": self.machine.name,
                            })

            logger("Stopped VM",
                   action="vm.stop",
                   status="success",
                   vmname=self.machine.name)

            # Disable remote control after stopping the machine
            # if it was enabled for the task.
            if control_enabled:
                try:
                    machinery.disable_remote_control(self.machine.label)
                except NotImplementedError:
                    log.error(
                        "Remote control support has not been implemented for the "
                        "configured machinery module: %s",
                        config("cuckoo:cuckoo:machinery"))

            # Mark the machine in the database as stopped. Unless this machine
            # has been marked as dead, we just keep it as "started" in the
            # database so it'll not be used later on in this session.
            self.db.guest_stop(guest_log)

            # After all this, we can make the ResultServer forget about the
            # internal state for this analysis task.
            ResultServer().del_task(self.task, self.machine)

            # Drop the network routing rules if any.
            if not self.unrouted_network:
                self.unroute_network()

            try:
                # Release the analysis machine. But only if the machine has
                # not turned dead yet.
                machinery.release(self.machine.label)
            except CuckooMachineError as e:
                log.error(
                    "Unable to release machine %s, reason %s. You might need "
                    "to restore it manually.",
                    self.machine.label,
                    e,
                    extra={
                        "action": "vm.release",
                        "status": "error",
                        "vmname": self.machine.name,
                    })

        return succeeded
示例#4
0
    def launch_analysis(self):
        """Start analysis."""
        succeeded = False

        if self.task.category == "file" or self.task.category == "archive":
            target = os.path.basename(self.task.target)
        else:
            target = self.task.target

        log.info(
            "Starting analysis of %s \"%s\" (task #%d, options \"%s\")",
            self.task.category.upper(), target, self.task.id,
            emit_options(self.task.options), extra={
                "action": "task.init",
                "status": "starting",
                "task_id": self.task.id,
                "target": target,
                "category": self.task.category,
                "package": self.task.package,
                "options": emit_options(self.task.options),
                "custom": self.task.custom,
            }
        )

        # Initialize the analysis.
        if not self.init():
            logger("Failed to initialize", action="task.init", status="error")
            return False

        # Acquire analysis machine.
        try:
            self.acquire_machine()
        except CuckooOperationalError as e:
            machine_lock.release()
            log.error("Cannot acquire machine: %s", e, extra={
                "action": "vm.acquire", "status": "error",
            })
            return False

        # At this point we can tell the ResultServer about it.
        try:
            ResultServer().add_task(self.task, self.machine)
        except Exception as e:
            machinery.release(self.machine.label)
            self.errors.put(e)

        # Initialize the guest manager.
        self.guest_manager = GuestManager(
            self.machine.name, self.machine.ip,
            self.machine.platform, self.task.id, self
        )

        self.aux = RunAuxiliary(self.task, self.machine, self.guest_manager)
        self.aux.start()

        # Generate the analysis configuration file.
        options = self.build_options()

        # Check if the current task has remotecontrol
        # enabled before starting the machine.
        control_enabled = (
            config("cuckoo:remotecontrol:enabled") and
            "remotecontrol" in self.task.options
        )
        if control_enabled:
            try:
                machinery.enable_remote_control(self.machine.label)
            except NotImplementedError:
                raise CuckooMachineError(
                    "Remote control support has not been implemented "
                    "for this machinery."
                )

        try:
            unlocked = False
            self.interface = None

            # Mark the selected analysis machine in the database as started.
            guest_log = self.db.guest_start(self.task.id,
                                            self.machine.name,
                                            self.machine.label,
                                            machinery.__class__.__name__)
            logger(
                "Starting VM",
                action="vm.start", status="pending",
                vmname=self.machine.name
            )

            # Start the machine.
            machinery.start(self.machine.label, self.task)

            logger(
                "Started VM",
                action="vm.start", status="success",
                vmname=self.machine.name
            )

            # retrieve the port used for remote control
            if control_enabled:
                try:
                    params = machinery.get_remote_control_params(
                        self.machine.label
                    )
                    self.db.set_machine_rcparams(self.machine.label, params)
                except NotImplementedError:
                    raise CuckooMachineError(
                        "Remote control support has not been implemented "
                        "for this machinery."
                    )

            # Enable network routing.
            self.route_network()

            # By the time start returns it will have fully started the Virtual
            # Machine. We can now safely release the machine lock.
            machine_lock.release()
            unlocked = True

            # Run and manage the components inside the guest unless this
            # machine has the "noagent" option specified (please refer to the
            # wait_finish() function for more details on this function).
            if "noagent" not in self.machine.options:
                self.guest_manage(options)
            else:
                self.wait_finish()

            succeeded = True
        except CuckooMachineSnapshotError as e:
            log.error(
                "Unable to restore to the snapshot for this Virtual Machine! "
                "Does your VM have a proper Snapshot and can you revert to it "
                "manually? VM: %s, error: %s",
                self.machine.name, e, extra={
                    "action": "vm.resume",
                    "status": "error",
                    "vmname": self.machine.name,
                }
            )
        except CuckooMachineError as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error starting Virtual Machine! VM: %s, error: %s",
                self.machine.name, e, extra={
                    "action": "vm.start",
                    "status": "error",
                    "vmname": self.machine.name,
                }
            )
        except CuckooGuestCriticalTimeout as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error from machine '%s': it appears that this Virtual "
                "Machine hasn't been configured properly as the Cuckoo Host "
                "wasn't able to connect to the Guest. There could be a few "
                "reasons for this, please refer to our documentation on the "
                "matter: %s",
                self.machine.name,
                faq("troubleshooting-vm-network-configuration"),
                extra={
                    "error_action": "vmrouting",
                    "action": "guest.handle",
                    "status": "error",
                    "task_id": self.task.id,
                }
            )
        except CuckooGuestError as e:
            if not unlocked:
                machine_lock.release()
            log.error("Error from the Cuckoo Guest: %s", e, extra={
                "action": "guest.handle",
                "status": "error",
                "task_id": self.task.id,
            })
        finally:
            # Stop Auxiliary modules.
            self.aux.stop()

            # Take a memory dump of the machine before shutting it off.
            if self.cfg.cuckoo.memory_dump or self.task.memory:
                logger(
                    "Taking full memory dump",
                    action="vm.memdump", status="pending",
                    vmname=self.machine.name
                )
                try:
                    dump_path = os.path.join(self.storage, "memory.dmp")
                    machinery.dump_memory(self.machine.label, dump_path)

                    logger(
                        "Taken full memory dump",
                        action="vm.memdump", status="success",
                        vmname=self.machine.name
                    )
                except NotImplementedError:
                    log.error(
                        "The memory dump functionality is not available for "
                        "the current machine manager.", extra={
                            "action": "vm.memdump",
                            "status": "error",
                            "vmname": self.machine.name,
                        }
                    )
                except CuckooMachineError as e:
                    log.error("Machinery error: %s", e, extra={
                        "action": "vm.memdump",
                        "status": "error",
                    })

            logger(
                "Stopping VM",
                action="vm.stop", status="pending",
                vmname=self.machine.name
            )

            try:
                # Stop the analysis machine.
                machinery.stop(self.machine.label)
            except CuckooMachineError as e:
                log.warning(
                    "Unable to stop machine %s: %s",
                    self.machine.label, e, extra={
                        "action": "vm.stop",
                        "status": "error",
                        "vmname": self.machine.name,
                    }
                )

            logger(
                "Stopped VM",
                action="vm.stop", status="success",
                vmname=self.machine.name
            )

            # Disable remote control after stopping the machine
            # if it was enabled for the task.
            if control_enabled:
                try:
                    machinery.disable_remote_control(self.machine.label)
                except NotImplementedError:
                    raise CuckooMachineError(
                        "Remote control support has not been implemented "
                        "for this machinery."
                    )

            # Mark the machine in the database as stopped. Unless this machine
            # has been marked as dead, we just keep it as "started" in the
            # database so it'll not be used later on in this session.
            self.db.guest_stop(guest_log)

            # After all this, we can make the ResultServer forget about the
            # internal state for this analysis task.
            ResultServer().del_task(self.task, self.machine)

            # Drop the network routing rules if any.
            self.unroute_network()

            try:
                # Release the analysis machine. But only if the machine has
                # not turned dead yet.
                machinery.release(self.machine.label)
            except CuckooMachineError as e:
                log.error(
                    "Unable to release machine %s, reason %s. You might need "
                    "to restore it manually.", self.machine.label, e, extra={
                        "action": "vm.release",
                        "status": "error",
                        "vmname": self.machine.name,
                    }
                )

        return succeeded
示例#5
0
class AnalysisManager(threading.Thread):
    """Analysis Manager.

    This class handles the full analysis process for a given task. It takes
    care of selecting the analysis machine, preparing the configuration and
    interacting with the guest agent and analyzer components to launch and
    complete the analysis and store, process and report its results.
    """

    def __init__(self, task_id, error_queue):
        """@param task: task object containing the details for the analysis."""
        threading.Thread.__init__(self)

        self.errors = error_queue
        self.cfg = Config()
        self.storage = ""
        self.binary = ""
        self.storage_binary = ""
        self.machine = None
        self.db = Database()
        self.task = self.db.view_task(task_id)
        self.guest_manager = None
        self.route = None
        self.interface = None
        self.rt_table = None

    def init(self):
        """Initialize the analysis."""
        self.storage = cwd(analysis=self.task.id)

        # If the analysis storage folder already exists, we need to abort the
        # analysis or previous results will be overwritten and lost.
        if os.path.exists(self.storage):
            log.error("Analysis results folder already exists at path \"%s\", "
                      "analysis aborted", self.storage)
            return False

        # If we're not able to create the analysis storage folder, we have to
        # abort the analysis.
        try:
            Folders.create(self.storage)
        except CuckooOperationalError:
            log.error("Unable to create analysis folder %s", self.storage)
            return False

        self.store_task_info()

        if self.task.category == "file" or self.task.category == "archive":
            # Check if we have permissions to access the file.
            # And fail this analysis if we don't have access to the file.
            if not os.access(self.task.target, os.R_OK):
                log.error(
                    "Unable to access target file, please check if we have "
                    "permissions to access the file: \"%s\"",
                    self.task.target
                )
                return False

            # Check whether the file has been changed for some unknown reason.
            # And fail this analysis if it has been modified.
            # TODO Absorb the file upon submission.
            sample = self.db.view_sample(self.task.sample_id)
            sha256 = File(self.task.target).get_sha256()
            if sha256 != sample.sha256:
                log.error(
                    "Target file has been modified after submission: \"%s\"",
                    self.task.target
                )
                return False

            # Store a copy of the original file if does not exist already.
            # TODO This should be done at submission time.
            self.binary = cwd("storage", "binaries", sha256)
            if not os.path.exists(self.binary):
                try:
                    shutil.copy(self.task.target, self.binary)
                except (IOError, shutil.Error):
                    log.error(
                        "Unable to store file from \"%s\" to \"%s\", "
                        "analysis aborted", self.task.target, self.binary
                    )
                    return False

            # Each analysis directory contains a symlink/copy of the binary.
            try:
                self.storage_binary = os.path.join(self.storage, "binary")

                if hasattr(os, "symlink"):
                    os.symlink(self.binary, self.storage_binary)
                else:
                    shutil.copy(self.binary, self.storage_binary)
            except (AttributeError, OSError) as e:
                log.error("Unable to create symlink/copy from \"%s\" to "
                          "\"%s\": %s", self.binary, self.storage, e)
                return False

        # Initiates per-task logging.
        task_log_start(self.task.id)
        return True

    def store_task_info(self):
        """grab latest task from db (if available) and update self.task"""
        dbtask = self.db.view_task(self.task.id)
        self.task = dbtask.to_dict()

        task_info_path = os.path.join(self.storage, "task.json")
        open(task_info_path, "w").write(dbtask.to_json())

    def acquire_machine(self):
        """Acquire an analysis machine from the pool of available ones."""
        machine = None

        # Start a loop to acquire the a machine to run the analysis on.
        while True:
            machine_lock.acquire()

            # In some cases it's possible that we enter this loop without
            # having any available machines. We should make sure this is not
            # such case, or the analysis task will fail completely.
            if not machinery.availables():
                machine_lock.release()
                time.sleep(1)
                continue

            # If the user specified a specific machine ID, a platform to be
            # used or machine tags acquire the machine accordingly.
            machine = machinery.acquire(machine_id=self.task.machine,
                                        platform=self.task.platform,
                                        tags=self.task.tags)

            # If no machine is available at this moment, wait for one second
            # and try again.
            if not machine:
                machine_lock.release()
                log.debug("Task #%d: no machine available yet", self.task.id)
                time.sleep(1)
            else:
                log.info(
                    "Task #%d: acquired machine %s (label=%s)",
                    self.task.id, machine.name, machine.label, extra={
                        "action": "vm.acquire",
                        "status": "success",
                        "vmname": machine.name,
                    }
                )
                break

        self.machine = machine

    def build_options(self):
        """Generate analysis options.
        @return: options dict.
        """
        options = {}

        if self.task.category == "file":
            options["file_name"] = File(self.task.target).get_name()
            options["file_type"] = File(self.task.target).get_type()
            options["pe_exports"] = \
                ",".join(File(self.task.target).get_exported_functions())

            package, activity = File(self.task.target).get_apk_entry()
            self.task.options["apk_entry"] = "%s:%s" % (package, activity)
        elif self.task.category == "archive":
            options["file_name"] = File(self.task.target).get_name()

        options["id"] = self.task.id
        options["ip"] = self.machine.resultserver_ip
        options["port"] = self.machine.resultserver_port
        options["category"] = self.task.category
        options["target"] = self.task.target
        options["package"] = self.task.package
        options["options"] = emit_options(self.task.options)
        options["enforce_timeout"] = self.task.enforce_timeout
        options["clock"] = self.task.clock
        options["terminate_processes"] = self.cfg.cuckoo.terminate_processes

        if not self.task.timeout:
            options["timeout"] = self.cfg.timeouts.default
        else:
            options["timeout"] = self.task.timeout

        # copy in other analyzer specific options, TEMPORARY (most likely)
        vm_options = getattr(machinery.options, self.machine.name)
        for k in vm_options:
            if k.startswith("analyzer_"):
                options[k] = vm_options[k]

        return options

    def route_network(self):
        """Enable network routing if desired."""
        # Determine the desired routing strategy (none, internet, VPN).
        self.route = self.task.options.get(
            "route", config("routing:routing:route")
        )

        if self.route == "none" or self.route == "drop":
            self.interface = None
            self.rt_table = None
        elif self.route == "inetsim":
            pass
        elif self.route == "tor":
            pass
        elif self.route == "internet":
            if config("routing:routing:internet") == "none":
                log.warning(
                    "Internet network routing has been specified, but not "
                    "configured, ignoring routing for this analysis", extra={
                        "action": "network.route",
                        "status": "error",
                        "route": self.route,
                    }
                )
                self.route = "none"
                self.task.options["route"] = "none"
                self.interface = None
                self.rt_table = None
            else:
                self.interface = config("routing:routing:internet")
                self.rt_table = config("routing:routing:rt_table")
        elif self.route in config("routing:vpn:vpns"):
            self.interface = config("routing:%s:interface" % self.route)
            self.rt_table = config("routing:%s:rt_table" % self.route)
        else:
            log.warning(
                "Unknown network routing destination specified, ignoring "
                "routing for this analysis: %r", self.route, extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                }
            )
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # Check if the network interface is still available. If a VPN dies for
        # some reason, its tunX interface will no longer be available.
        if self.interface and not rooter("nic_available", self.interface):
            log.error(
                "The network interface '%s' configured for this analysis is "
                "not available at the moment, switching to route=none mode.",
                self.interface, extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                }
            )
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # For now this doesn't work yet in combination with tor routing.
        if self.route == "drop" or self.route == "internet":
            rooter(
                "drop_enable", self.machine.ip,
                config("cuckoo:resultserver:ip"),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter(
                "inetsim_enable", self.machine.ip,
                config("routing:inetsim:server"),
                config("%s:%s:interface" % (machinery, machinery)),
                str(config("cuckoo:resultserver:port")),
                config("routing:inetsim:ports") or ""
            )

        if self.route == "tor":
            rooter(
                "tor_enable", self.machine.ip,
                str(config("cuckoo:resultserver:ip")),
                str(config("routing:tor:dnsport")),
                str(config("routing:tor:proxyport"))
            )

        if self.interface:
            rooter(
                "forward_enable", self.machine.interface,
                self.interface, self.machine.ip
            )

        if self.rt_table:
            rooter(
                "srcroute_enable", self.rt_table, self.machine.ip
            )

        # Propagate the taken route to the database.
        self.db.set_route(self.task.id, self.route)

    def unroute_network(self):
        """Disable any enabled network routing."""
        if self.interface:
            rooter(
                "forward_disable", self.machine.interface,
                self.interface, self.machine.ip
            )

        if self.rt_table:
            rooter(
                "srcroute_disable", self.rt_table, self.machine.ip
            )

        if self.route == "drop" or self.route == "internet":
            rooter(
                "drop_disable", self.machine.ip,
                config("cuckoo:resultserver:ip"),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter(
                "inetsim_disable", self.machine.ip,
                config("routing:inetsim:server"),
                config("%s:%s:interface" % (machinery, machinery)),
                str(config("cuckoo:resultserver:port")),
                config("routing:inetsim:ports") or ""
            )

        if self.route == "tor":
            rooter(
                "tor_disable", self.machine.ip,
                str(config("cuckoo:resultserver:ip")),
                str(config("routing:tor:dnsport")),
                str(config("routing:tor:proxyport"))
            )

    def wait_finish(self):
        """Some VMs don't have an actual agent. Mainly those that are used as
        assistance for an analysis through the services auxiliary module. This
        method just waits until the analysis is finished rather than actively
        trying to engage with the Cuckoo Agent."""
        self.db.guest_set_status(self.task.id, "running")
        while self.db.guest_get_status(self.task.id) == "running":
            time.sleep(1)

    def guest_manage(self, options):
        # Handle a special case where we're creating a baseline report of this
        # particular virtual machine - a report containing all the results
        # that are gathered if no additional samples are ran in the VM. These
        # results, such as loaded drivers and opened sockets in volatility, or
        # DNS requests to hostnames related to Microsoft Windows, etc may be
        # omitted or at the very least given less priority when creating a
        # report for an analysis that ran on this VM later on.
        if self.task.category == "baseline":
            time.sleep(options["timeout"])
        else:
            # Start the analysis.
            self.db.guest_set_status(self.task.id, "starting")
            monitor = self.task.options.get("monitor", "latest")
            self.guest_manager.start_analysis(options, monitor)

            # In case the Agent didn't respond and we force-quit the analysis
            # at some point while it was still starting the analysis the state
            # will be "stop" (or anything but "running", really).
            if self.db.guest_get_status(self.task.id) == "starting":
                self.db.guest_set_status(self.task.id, "running")
                self.guest_manager.wait_for_completion()

            self.db.guest_set_status(self.task.id, "stopping")

    def launch_analysis(self):
        """Start analysis."""
        succeeded = False

        if self.task.category == "file" or self.task.category == "archive":
            target = os.path.basename(self.task.target)
        else:
            target = self.task.target

        log.info(
            "Starting analysis of %s \"%s\" (task #%d, options \"%s\")",
            self.task.category.upper(), target, self.task.id,
            emit_options(self.task.options), extra={
                "action": "task.init",
                "status": "starting",
                "task_id": self.task.id,
                "target": target,
                "category": self.task.category,
                "package": self.task.package,
                "options": emit_options(self.task.options),
                "custom": self.task.custom,
            }
        )

        # Initialize the analysis.
        if not self.init():
            logger("Failed to initialize", action="task.init", status="error")
            return False

        # Acquire analysis machine.
        try:
            self.acquire_machine()
        except CuckooOperationalError as e:
            machine_lock.release()
            log.error("Cannot acquire machine: %s", e, extra={
                "action": "vm.acquire", "status": "error",
            })
            return False

        # At this point we can tell the ResultServer about it.
        try:
            ResultServer().add_task(self.task, self.machine)
        except Exception as e:
            machinery.release(self.machine.label)
            self.errors.put(e)

        # Initialize the guest manager.
        self.guest_manager = GuestManager(
            self.machine.name, self.machine.ip,
            self.machine.platform, self.task.id, self
        )

        self.aux = RunAuxiliary(self.task, self.machine, self.guest_manager)
        self.aux.start()

        # Generate the analysis configuration file.
        options = self.build_options()

        # Check if the current task has remotecontrol
        # enabled before starting the machine.
        control_enabled = (
            config("cuckoo:remotecontrol:enabled") and
            "remotecontrol" in self.task.options
        )
        if control_enabled:
            try:
                machinery.enable_remote_control(self.machine.label)
            except NotImplementedError:
                raise CuckooMachineError(
                    "Remote control support has not been implemented "
                    "for this machinery."
                )

        try:
            unlocked = False
            self.interface = None

            # Mark the selected analysis machine in the database as started.
            guest_log = self.db.guest_start(self.task.id,
                                            self.machine.name,
                                            self.machine.label,
                                            machinery.__class__.__name__)
            logger(
                "Starting VM",
                action="vm.start", status="pending",
                vmname=self.machine.name
            )

            # Start the machine.
            machinery.start(self.machine.label, self.task)

            logger(
                "Started VM",
                action="vm.start", status="success",
                vmname=self.machine.name
            )

            # retrieve the port used for remote control
            if control_enabled:
                try:
                    params = machinery.get_remote_control_params(
                        self.machine.label
                    )
                    self.db.set_machine_rcparams(self.machine.label, params)
                except NotImplementedError:
                    raise CuckooMachineError(
                        "Remote control support has not been implemented "
                        "for this machinery."
                    )

            # Enable network routing.
            self.route_network()

            # By the time start returns it will have fully started the Virtual
            # Machine. We can now safely release the machine lock.
            machine_lock.release()
            unlocked = True

            # Run and manage the components inside the guest unless this
            # machine has the "noagent" option specified (please refer to the
            # wait_finish() function for more details on this function).
            if "noagent" not in self.machine.options:
                self.guest_manage(options)
            else:
                self.wait_finish()

            succeeded = True
        except CuckooMachineSnapshotError as e:
            log.error(
                "Unable to restore to the snapshot for this Virtual Machine! "
                "Does your VM have a proper Snapshot and can you revert to it "
                "manually? VM: %s, error: %s",
                self.machine.name, e, extra={
                    "action": "vm.resume",
                    "status": "error",
                    "vmname": self.machine.name,
                }
            )
        except CuckooMachineError as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error starting Virtual Machine! VM: %s, error: %s",
                self.machine.name, e, extra={
                    "action": "vm.start",
                    "status": "error",
                    "vmname": self.machine.name,
                }
            )
        except CuckooGuestCriticalTimeout as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error from machine '%s': it appears that this Virtual "
                "Machine hasn't been configured properly as the Cuckoo Host "
                "wasn't able to connect to the Guest. There could be a few "
                "reasons for this, please refer to our documentation on the "
                "matter: %s",
                self.machine.name,
                faq("troubleshooting-vm-network-configuration"),
                extra={
                    "error_action": "vmrouting",
                    "action": "guest.handle",
                    "status": "error",
                    "task_id": self.task.id,
                }
            )
        except CuckooGuestError as e:
            if not unlocked:
                machine_lock.release()
            log.error("Error from the Cuckoo Guest: %s", e, extra={
                "action": "guest.handle",
                "status": "error",
                "task_id": self.task.id,
            })
        finally:
            # Stop Auxiliary modules.
            self.aux.stop()

            # Take a memory dump of the machine before shutting it off.
            if self.cfg.cuckoo.memory_dump or self.task.memory:
                logger(
                    "Taking full memory dump",
                    action="vm.memdump", status="pending",
                    vmname=self.machine.name
                )
                try:
                    dump_path = os.path.join(self.storage, "memory.dmp")
                    machinery.dump_memory(self.machine.label, dump_path)

                    logger(
                        "Taken full memory dump",
                        action="vm.memdump", status="success",
                        vmname=self.machine.name
                    )
                except NotImplementedError:
                    log.error(
                        "The memory dump functionality is not available for "
                        "the current machine manager.", extra={
                            "action": "vm.memdump",
                            "status": "error",
                            "vmname": self.machine.name,
                        }
                    )
                except CuckooMachineError as e:
                    log.error("Machinery error: %s", e, extra={
                        "action": "vm.memdump",
                        "status": "error",
                    })

            logger(
                "Stopping VM",
                action="vm.stop", status="pending",
                vmname=self.machine.name
            )

            try:
                # Stop the analysis machine.
                machinery.stop(self.machine.label)
            except CuckooMachineError as e:
                log.warning(
                    "Unable to stop machine %s: %s",
                    self.machine.label, e, extra={
                        "action": "vm.stop",
                        "status": "error",
                        "vmname": self.machine.name,
                    }
                )

            logger(
                "Stopped VM",
                action="vm.stop", status="success",
                vmname=self.machine.name
            )

            # Disable remote control after stopping the machine
            # if it was enabled for the task.
            if control_enabled:
                try:
                    machinery.disable_remote_control(self.machine.label)
                except NotImplementedError:
                    raise CuckooMachineError(
                        "Remote control support has not been implemented "
                        "for this machinery."
                    )

            # Mark the machine in the database as stopped. Unless this machine
            # has been marked as dead, we just keep it as "started" in the
            # database so it'll not be used later on in this session.
            self.db.guest_stop(guest_log)

            # After all this, we can make the ResultServer forget about the
            # internal state for this analysis task.
            ResultServer().del_task(self.task, self.machine)

            # Drop the network routing rules if any.
            self.unroute_network()

            try:
                # Release the analysis machine. But only if the machine has
                # not turned dead yet.
                machinery.release(self.machine.label)
            except CuckooMachineError as e:
                log.error(
                    "Unable to release machine %s, reason %s. You might need "
                    "to restore it manually.", self.machine.label, e, extra={
                        "action": "vm.release",
                        "status": "error",
                        "vmname": self.machine.name,
                    }
                )

        return succeeded

    def process_results(self):
        """Process the analysis results and generate the enabled reports."""
        logger(
            "Starting task reporting",
            action="task.report", status="pending"
        )

        # TODO Refactor this function as currently "cuckoo process" has a 1:1
        # copy of its code. TODO Also remove "archive" files.
        results = RunProcessing(task=self.task).run()
        RunSignatures(results=results).run()
        RunReporting(task=self.task, results=results).run()

        # If the target is a file and the user enabled the option,
        # delete the original copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_original:
            if not os.path.exists(self.task.target):
                log.warning("Original file does not exist anymore: \"%s\": "
                            "File not found.", self.task.target)
            else:
                try:
                    os.remove(self.task.target)
                except OSError as e:
                    log.error("Unable to delete original file at path "
                              "\"%s\": %s", self.task.target, e)

        # If the target is a file and the user enabled the delete copy of
        # the binary option, then delete the copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_bin_copy:
            if not os.path.exists(self.binary):
                log.warning("Copy of the original file does not exist anymore: \"%s\": File not found", self.binary)
            else:
                try:
                    os.remove(self.binary)
                except OSError as e:
                    log.error("Unable to delete the copy of the original file at path \"%s\": %s", self.binary, e)
            # Check if the binary in the analysis directory is an invalid symlink. If it is, delete it.
            if os.path.islink(self.storage_binary) and not os.path.exists(self.storage_binary):
                try:
                    os.remove(self.storage_binary)
                except OSError as e:
                    log.error("Unable to delete symlink to the binary copy at path \"%s\": %s", self.storage_binary, e)

        log.info(
            "Task #%d: reports generation completed",
            self.task.id, extra={
                "action": "task.report",
                "status": "success",
            }
        )

        return True

    def run(self):
        """Run manager thread."""
        global active_analysis_count
        active_analysis_count += 1
        try:
            self.launch_analysis()

            log.debug("Released database task #%d", self.task.id)

            if self.cfg.cuckoo.process_results:
                self.store_task_info()
                self.db.set_status(self.task.id, TASK_COMPLETED)
                # TODO If self.process_results() is unified with apps.py's
                # process() method, then ensure that TASK_FAILED_PROCESSING is
                # handled correctly and not overwritten by the db.set_status()
                # at the end of this method.
                self.process_results()

            # We make a symbolic link ("latest") which links to the latest
            # analysis - this is useful for debugging purposes. This is only
            # supported under systems that support symbolic links.
            if hasattr(os, "symlink"):
                latest = cwd("storage", "analyses", "latest")

                # First we have to remove the existing symbolic link, then we
                # have to create the new one.
                # Deal with race conditions using a lock.
                latest_symlink_lock.acquire()
                try:
                    # As per documentation, lexists() returns True for dead
                    # symbolic links.
                    if os.path.lexists(latest):
                        os.remove(latest)

                    os.symlink(self.storage, latest)
                except OSError as e:
                    log.warning("Error pointing latest analysis symlink: %s" % e)
                finally:
                    latest_symlink_lock.release()

            # overwrite task.json so we have the latest data inside
            self.store_task_info()
            log.info(
                "Task #%d: analysis procedure completed",
                self.task.id, extra={
                    "action": "task.stop",
                    "status": "success",
                }
            )
        except:
            log.exception("Failure in AnalysisManager.run", extra={
                "action": "task.stop",
                "status": "error",
            })
        finally:
            if self.cfg.cuckoo.process_results:
                self.db.set_status(self.task.id, TASK_REPORTED)
            else:
                self.db.set_status(self.task.id, TASK_COMPLETED)
            task_log_stop(self.task.id)
            active_analysis_count -= 1
示例#6
0
    def init(self, db):
        """Executed by the scheduler. Prepares the analysis for starting."""
        # Used at the processing and final stage to determine is processing
        # was run and successful
        self.processing_success = False

        # TODO remove this when the latest RDP/VNC machine additions/changes
        # have been added. Currently, the enabling requires a db change at
        # the start of a machine
        self.db = db

        # If for some reason the task dir does not exist, stop the analysis
        # because it should have been created upon submission
        if not self.task.dir_exists():
            log.error("Task directory for task #%s does not exist",
                      self.task.id)
            return False

        self.guest_manager = GuestManager(self.machine.name, self.machine.ip,
                                          self.machine.platform, self.task,
                                          self, self.analysis, self.target)

        self.aux = RunAuxiliary(self.task.task_dict, self.machine,
                                self.guest_manager)

        self.rt = RealTimeHandler()

        # Check if the current task has remotecontrol
        # enabled before starting the machine.
        self.control_enabled = (config("cuckoo:remotecontrol:enabled")
                                and "remotecontrol" in self.task.options)

        # Write task to disk in json file
        self.task.write_task_json()

        if not self.target.target:
            self.build_options()
            return True

        options = {
            "category": self.target.category,
            "target": self.target.target
        }

        if self.target.is_file:
            if not self.target.copy_exists():
                log.error("The file to submit '%s' does not exist",
                          self.target.copied_binary)
                return False

            if not self.target.helper.is_readable():
                log.error(
                    "Unable to read target file %s, please check if it is"
                    " readable for the user executing Cuckoo Sandbox",
                    self.target.copied_binary)
                return False

            task_options = {}
            package, activity = self.target.helper.get_apk_entry()
            if package and activity:
                task_options["apk_entry"] = "%s:%s" % (package, activity)

            options.update({
                "file_name":
                os.path.basename(self.target.target),
                "file_type":
                self.target.helper.get_type(),
                "pe_exports":
                ",".join(self.target.helper.get_exported_functions()),
                "options":
                task_options
            })

        self.build_options(options=options)

        return True
示例#7
0
class Regular(AnalysisManager):

    supports = ["regular", "baseline", "service"]

    def init(self, db):
        """Executed by the scheduler. Prepares the analysis for starting."""
        # Used at the processing and final stage to determine is processing
        # was run and successful
        self.processing_success = False

        # TODO remove this when the latest RDP/VNC machine additions/changes
        # have been added. Currently, the enabling requires a db change at
        # the start of a machine
        self.db = db

        # If for some reason the task dir does not exist, stop the analysis
        # because it should have been created upon submission
        if not self.task.dir_exists():
            log.error("Task directory for task #%s does not exist",
                      self.task.id)
            return False

        self.guest_manager = GuestManager(self.machine.name, self.machine.ip,
                                          self.machine.platform, self.task,
                                          self, self.analysis, self.target)

        self.aux = RunAuxiliary(self.task.task_dict, self.machine,
                                self.guest_manager)

        self.rt = RealTimeHandler()

        # Check if the current task has remotecontrol
        # enabled before starting the machine.
        self.control_enabled = (config("cuckoo:remotecontrol:enabled")
                                and "remotecontrol" in self.task.options)

        # Write task to disk in json file
        self.task.write_task_json()

        if not self.target.target:
            self.build_options()
            return True

        options = {
            "category": self.target.category,
            "target": self.target.target
        }

        if self.target.is_file:
            if not self.target.copy_exists():
                log.error("The file to submit '%s' does not exist",
                          self.target.copied_binary)
                return False

            if not self.target.helper.is_readable():
                log.error(
                    "Unable to read target file %s, please check if it is"
                    " readable for the user executing Cuckoo Sandbox",
                    self.target.copied_binary)
                return False

            task_options = {}
            package, activity = self.target.helper.get_apk_entry()
            if package and activity:
                task_options["apk_entry"] = "%s:%s" % (package, activity)

            options.update({
                "file_name":
                os.path.basename(self.target.target),
                "file_type":
                self.target.helper.get_type(),
                "pe_exports":
                ",".join(self.target.helper.get_exported_functions()),
                "options":
                task_options
            })

        self.build_options(options=options)

        return True

    def run(self):
        """Starts the analysis manager thread."""
        task_log_start(self.task.id)
        analysis_success = False

        try:
            analysis_success = self.start_and_wait()

            # See if the analysis did not fail in the analysis manager
            # and see if the status was not set to failed by
            # the guest manager
            if analysis_success:
                if self.analysis.status == Analysis.FAILED:
                    analysis_success = False
        except Exception as e:
            log.exception("Failure during the starting of task #%s. Error: %s",
                          self.task.id,
                          e,
                          extra={
                              "action": "task.start",
                              "status": "error"
                          })
        finally:
            try:
                self.stop_and_wait()
            except Exception as e:
                log.exception(
                    "Failure during the stopping of task #%s. Error: %s",
                    self.task.id,
                    e,
                    extra={
                        "action": "task.stop",
                        "status": "error"
                    })

        if analysis_success:
            self.set_analysis_status(Analysis.STOPPED, wait=True)
        else:
            self.set_analysis_status(Analysis.FAILED, wait=True)

        if not config("cuckoo:cuckoo:process_results"):
            log.debug("Cuckoo process_results is set to 'no',"
                      " not processing results")
            return

        log.info("Processing and reporting results for task #%s",
                 self.task.id,
                 extra={
                     "action": "task.report",
                     "status": "pending"
                 })
        try:
            self.processing_success = self.task.process()
        except Exception as e:
            log.exception("Error during processing of task #%s. Error: %s",
                          self.task.id,
                          e,
                          extra={
                              "action": "task.report",
                              "status": "failed"
                          })
            return

        log.info("Task #%d: analysis procedure completed",
                 self.task.id,
                 extra={
                     "action": "task.report",
                     "status": "finished",
                 })

    def start_and_wait(self):
        """Start the analysis by running the auxiliary modules,
        adding the task to the resultserver, starting the machine
        and running a guest manager."""
        # Set guest status to starting and start analysis machine
        self.set_analysis_status(Analysis.STARTING)

        target = self.target.target
        if self.target.target and self.target.is_file:
            target = os.path.basename(target)

        log.info(
            "Starting analysis (task #%s, options: '%s') type '%s'."
            " Target: %s '%s'",
            self.task.id,
            self.options["options"],
            self.task.type,
            self.target.category,
            target,
            extra={
                "action": "task.init",
                "status": "starting",
                "task_id": self.task.id,
                "target": target,
                "category": self.target.category,
                "package": self.task.package,
                "options": self.options["options"],
                "custom": self.task.custom,
                "type": self.task.type
            })

        ResultServer().add_task(self.task.db_task, self.machine, self.rt)

        # Start auxiliary modules
        self.aux.start()

        if self.control_enabled:
            try:
                self.machinery.enable_remote_control(self.machine.label)
            except NotImplementedError:
                self.control_enabled = False
                log.exception(
                    "Remote control support has not been implemented "
                    "for machinery %s.", self.machine.manager)

        # Json log for performance measurement purposes
        logger("Starting VM",
               action="vm.start",
               status="pending",
               vmname=self.machine.name)

        try:
            self.machinery.start(self.machine.label, self.task.db_task)
        except CuckooMachineSnapshotError as e:
            log.error(
                "Unable to restore to the snapshot for this Virtual Machine! "
                "Does your VM have a proper Snapshot and can you revert to it "
                "manually? VM: %s, error: %s",
                self.machine.name,
                e,
                extra={
                    "action": "vm.resume",
                    "status": "error",
                    "vmname": self.machine.name,
                })
            return False
        except CuckooMachineError as e:
            log.error("Error starting Virtual Machine! VM: %s, error: %s",
                      self.machine.name,
                      e,
                      extra={
                          "action": "vm.start",
                          "status": "error",
                          "vmname": self.machine.name,
                      })
            return False

        logger("Started VM",
               action="vm.start",
               status="success",
               vmname=self.machine.name)

        # retrieve the port used for remote control
        if self.control_enabled:
            try:
                params = self.machinery.get_remote_control_params(
                    self.machine.label)
                self.db.set_machine_rcparams(self.machine.label, params)
            except NotImplementedError:
                log.exception(
                    "Remote control support has not been implemented "
                    "for machinery %s.", self.machine.manager)

        # Enable network routing
        self.route.route_network()

        # By the time start returns it will have fully started the Virtual
        # Machine. We can now safely release the machine lock.
        self.release_machine_lock()

        # Request scheduler action for status 'starting'
        self.request_scheduler_action(Analysis.STARTING)

        # Choose the correct way of waiting or managing the agent and
        # execute it
        try:
            self.manage()
        except CuckooGuestCriticalTimeout as e:
            log.error(
                "Error from machine '%s': it appears that this Virtual "
                "Machine hasn't been configured properly as the Cuckoo Host "
                "wasn't able to connect to the Guest. There could be a few "
                "reasons for this, please refer to our documentation on the "
                "matter: %s",
                self.machine.name,
                faq("troubleshooting-vm-network-configuration"),
                extra={
                    "error_action": "vmrouting",
                    "action": "guest.handle",
                    "status": "error",
                    "task_id": self.task.id,
                })

        except CuckooGuestError as e:
            log.error("Error from the Cuckoo Guest: %s",
                      e,
                      extra={
                          "action": "guest.handle",
                          "status": "error",
                          "task_id": self.task.id,
                      })

        return True

    def stop_and_wait(self):
        """Stop the analysis by stopping the aux modules, optionally
        dumping VM memory, stopping the VM and deleting the task from
        the resultserver."""
        self.set_analysis_status(Analysis.STOPPING)

        # Stop all Auxiliary modules
        self.aux.stop()

        # If enabled, make a full memory dump of the machine
        # before it shuts down
        if config("cuckoo:cuckoo:memory_dump") or self.task.memory:
            logger("Taking full memory dump",
                   action="vm.memdump",
                   status="pending",
                   vmname=self.machine.name)
            try:
                dump_path = os.path.join(self.task.path, "memory.dmp")
                self.machinery.dump_memory(self.machine.label, dump_path)

                logger("Taken full memory dump",
                       action="vm.memdump",
                       status="success",
                       vmname=self.machine.name)

            except NotImplementedError:
                log.error(
                    "The memory dump functionality is not available for "
                    "the current machine manager.",
                    extra={
                        "action": "vm.memdump",
                        "status": "error",
                        "vmname": self.machine.name,
                    })
            except CuckooMachineError as e:
                log.error("Machinery error: %s",
                          e,
                          extra={
                              "action": "vm.memdump",
                              "status": "error",
                          })

        logger("Stopping VM",
               action="vm.stop",
               status="pending",
               vmname=self.machine.name)

        # Stop the analysis machine.
        try:
            self.machinery.stop(self.machine.label)
        except CuckooMachineError as e:
            log.warning("Unable to stop machine %s: %s",
                        self.machine.label,
                        e,
                        extra={
                            "action": "vm.stop",
                            "status": "error",
                            "vmname": self.machine.name,
                        })

        logger("Stopped VM",
               action="vm.stop",
               status="success",
               vmname=self.machine.name)

        # Disable remote control after stopping the machine
        # if it was enabled for the task.
        if self.control_enabled:
            try:
                self.machinery.disable_remote_control(self.machine.label)
            except NotImplementedError:
                log.exception(
                    "Remote control support has not been implemented "
                    "for machinery %s.", self.machine.manager)

        # After all this, we can make the ResultServer forget about the
        # internal state for this analysis task.
        ResultServer().del_task(self.task.db_task, self.machine)

        # Drop the network routing rules if any.
        self.route.unroute_network()

    def manage(self):
        """Choose and use to method of waiting or managing the further steps
        to be taken of an analysis."""
        if "noagent" in self.machine.options:
            log.debug("Usage handler for the 'noagent' option")
            self.set_analysis_status(Analysis.RUNNING)
            self.wait_finish()
        elif self.task.type == "baseline":
            log.debug("Sleeping until timeout for baseline")
            self.set_analysis_status(Analysis.RUNNING)
            time.sleep(self.options["timeout"])
        else:
            log.debug("Using guest manager")
            monitor = self.task.options.get("monitor", "latest")
            self.guest_manager.start_analysis(self.options, monitor)

            if self.analysis.status == Analysis.STARTING:
                self.set_analysis_status(Analysis.RUNNING)
                self.guest_manager.wait_for_completion()

    def set_target(self, targets):
        if targets:
            self.target = targets[0]
        else:
            self.target = Target()

    def on_status_starting(self, db):
        """Is executed by the scheduler on analysis status starting
        Stores the chosen route in the db."""
        log.info("Using route '%s' for task #%s", self.route.route,
                 self.task.id)
        # Propagate the taken route to the database.
        db.set_route(self.task.id, self.route.route)

        # Store used machine in the task
        db.set_machine(self.task.id, self.machine.name)

    def on_status_stopped(self, db):
        """Is executed by the scheduler on analysis status stopped
        Sets the task to completed, writes task json to analysis folder
        and releases machine if it is locked."""
        log.debug("Setting task #%s status to %s", self.task.id,
                  TASK_COMPLETED)
        self.task.set_status(TASK_COMPLETED)

        # Update task obj and write json to disk
        self.task.write_task_json()

        if self.machine.locked:
            log.debug("Releasing machine lock on %s", self.machine.label)
            self.machine = self.machinery.release(self.machine.label)

    def on_status_failed(self, db):
        """Is executed by the scheduler on analysis status failed.
        Releases the locked machine if it is locked and updates task status
        to analysis failed."""
        log.error("Analysis for task #%s failed", self.task.id)
        if self.machine.locked:
            log.debug("Releasing machine lock on %s", self.machine.label)
            self.machine = self.machinery.release(self.machine.label)

    def finalize(self, db):
        """Executed by the scheduler when the analysis manager thread exists.
        Updates the task status to the correct one and updates the
        task.json."""
        self.task.set_latest()
        self.release_machine_lock()
        # If, at this point, the analysis is not stopped, it cannot
        # succeeded, since the manager thread already exited. Updated status
        # to failed if the results were not processed.
        if self.analysis.status != Analysis.STOPPED:
            log.warning("Analysis status is '%s' after exit.",
                        self.analysis.status)
            if not config("cuckoo:cuckoo:process_results"):
                log.debug("Setting task #%s status to %s",
                          TASK_FAILED_ANALYSIS)
                self.task.write_task_json(status=TASK_FAILED_ANALYSIS)
                self.task.set_status(TASK_FAILED_ANALYSIS)

        if config("cuckoo:cuckoo:process_results"):
            if self.processing_success:
                log.debug("Setting task #%s status to %s", self.task.id,
                          TASK_REPORTED)
                self.task.write_task_json(status=TASK_REPORTED)
                self.task.set_status(TASK_REPORTED)
            else:
                log.debug("Setting task #%s status to %s", self.task.id,
                          TASK_FAILED_PROCESSING)
                self.task.write_task_json(status=TASK_FAILED_PROCESSING)
                self.task.set_status(TASK_FAILED_PROCESSING)

        task_log_stop(self.task.id)
示例#8
0
class MassURL(AnalysisManager):

    supports = ["massurl"]
    URL_BLOCKSIZE = 5
    SECS_PER_BLOCK = 25

    def init(self, db):
        # If for some reason the task dir does not exist, stop the analysis
        # because it should have been created upon submission
        if not self.task.dir_exists():
            log.error("Task directory for task #%s does not exist",
                      self.task.id)
            return False

        if not URLDiaries.init_done:
            URLDiaries.init()

        self.curr_block = self.new_target_block()
        if not self.curr_block:
            log.error("Empty target list, cannot proceed.")
            return False

        self.rt = RealTimeHandler()
        self.ev_client = EventClient()
        self.URL_BLOCKSIZE = int(
            self.task.options.get("urlblocksize", self.URL_BLOCKSIZE))
        self.SECS_PER_BLOCK = int(
            self.task.options.get("blocktime", self.SECS_PER_BLOCK))
        self.aborted = False
        self.completed = False

        # Write task to disk in json file
        self.task.write_task_json()
        self.build_options(
            options={
                "category": "url",
                "target": ",".join(self.curr_block.keys()),
                "enforce_timeout": True,
                "timeout": len(self.task.targets) * self.SECS_PER_BLOCK * 3
            })

        self.guest_manager = GuestManager(
            self.machine.name, self.machine.ip, self.machine.platform,
            self.task, self, self.analysis,
            self.curr_block.get(
                self.curr_block.keys().pop(0)).get("target_obj"))

        self.aux = RunAuxiliary(self.task.task_dict, self.machine,
                                self.guest_manager)

        # The wait/agent status checking etc is run in a separate thread. This
        # allows the analysis manager to perform other actions while the
        # analysis is running
        self.gm_wait_th = threading.Thread(
            target=self.guest_manager.wait_for_completion)
        self.gm_wait_th.daemon = True
        self.detection_events = []
        self.netflow_events = []
        self.js_events = []
        self.realtime_finished = set()
        self.tlskeys_response = None
        self.realtime_error = False
        self.requestfinder = RequestFinder(self.task.id)
        self.all_pids_targets = {}

        return True

    def set_target(self, targets):
        blocksize = int(
            self.task.options.get("urlblocksize", self.URL_BLOCKSIZE))
        self.targets = []
        for i in range(0, len(targets), blocksize):
            self.targets.append(targets[i:i + blocksize])

    def new_target_block(self):
        block = {}
        if self.targets:
            for t in self.targets.pop(0):
                diary = URLDiary(t.target, t.sha256, self.machine.name,
                                 self.task.package)
                block[t.target] = {"diary": diary, "target_obj": t}
            return block
        return {}

    def start_run(self):
        self.set_analysis_status(Analysis.STARTING)
        log.info(
            "Starting analysis (task #%s, options: '%s') type '%s'. %d URLs",
            self.task.id, self.options["options"], self.task.type,
            len(self.task.targets))

        ResultServer().add_task(self.task.db_task, self.machine, self.rt)
        self.aux.start()

        try:
            self.machinery.start(self.machine.label, self.task.db_task)
        except CuckooMachineSnapshotError as e:
            log.error(
                "Unable to restore to the snapshot for this Virtual Machine! "
                "Does your VM have a proper Snapshot and can you revert to it "
                "manually? VM: %s, error: %s", self.machine.name, e)
            return False
        except CuckooMachineError as e:
            log.error("Error starting Virtual Machine! VM: %s, error: %s",
                      self.machine.name, e)
            return False

        # Enable network routing
        if not self.route.route_network():
            log.error("Failed to use chosen route for the analysis")
            self.ev_client.send_event(
                "massurltaskfailure", {
                    "taskid":
                    self.task.id,
                    "error":
                    "Failed to use chosen route '%s'. "
                    "Inspect the log" % self.route.route,
                    "status":
                    self.analysis.status
                })
            return False

        # By the time start returns it will have fully started the Virtual
        # Machine. We can now safely release the machine lock.
        self.release_machine_lock()

        # Request scheduler action for status 'starting'
        self.request_scheduler_action(Analysis.STARTING)

        try:
            self.guest_manager.start_analysis(
                self.options, self.task.options.get("monitor", "latest"))
        except CuckooGuestCriticalTimeout as e:
            log.error(
                "Critical timeout reached while starting virtual"
                " machine. %s", e)
            return False
        except CuckooGuestError as e:
            log.error("Failed to prepare guest for analysis: %s", e)
            return False

        return self.analysis.status == Analysis.STARTING

    def stop_and_wait(self):
        if self.rt.sock:
            try:
                # Use the realtime protocol to request the analyzer to stop. So
                # that the analysis, logs etc can be closed gracefully.
                self.rt.send_command_blocking(RealTimeMessages.stop_analyzer(),
                                              maxwait=3)
            except RealtimeError:
                log.warning("No response from analyzer to stopping request")

        self.set_analysis_status(Analysis.STOPPING)

        # Stop all Auxiliary modules
        self.aux.stop()

        # Wait for the guest manager wait to stop before stopping the machine.
        # We want any exception messages to be retrieved from the agent.
        if self.gm_wait_th.is_alive():
            self.gm_wait_th.join(timeout=6)

        # Stop the analysis machine.
        try:
            self.machinery.stop(self.machine.label)
        except CuckooMachineError as e:
            log.warning(
                "Unable to stop machine %s: %s",
                self.machine.label,
                e,
            )

        # After all this, we can make the ResultServer forget about the
        # internal state for this analysis task.
        ResultServer().del_task(self.task.db_task, self.machine)

        # Drop the network routing rules if any.
        self.route.unroute_network()

    def run_analysis(self):
        while self.curr_block:
            if not self.gm_wait_th.is_alive():
                return

            log.info("Uploaded new block of %d URLs", len(self.curr_block))

            pkg_info = {}
            tries = len(self.curr_block) * 10
            while not pkg_info:
                try:
                    pkg_info = self.rt.send_command_blocking(
                        RealTimeMessages.list_packages(),
                        maxwait=len(self.curr_block) * 2)
                except RealtimeError as e:
                    log.error(
                        "No response from guest or it failed to send analysis "
                        "package information. %s", e)
                    return

                tries -= 1
                if not pkg_info and tries <= 0:
                    log.error(
                        "Analyzer is not returning target PIDs. It might have "
                        "failed to start the targets.")
                    return
                time.sleep(1)

            pids_targets = {
                int(pid): target
                for p in pkg_info for pid, target in p.get("pids").items()
            }
            self.all_pids_targets.update(pids_targets)

            # Give the URLs some time to load and remain opened
            time.sleep(self.SECS_PER_BLOCK)

            # Request the analyzer to stop all running analysis packages
            try:
                self.rt.send_command(RealTimeMessages.stop_all_packages())
            except RealtimeError as e:
                log.error("Error sending real-time package stop command. %s",
                          e)

            # Ask realtime to process the generated onemon protobuf file.
            signature_events = self.handle_events(pids_targets)

            # The end of the URL block is reached, have the scheduler
            # do the database operations
            self.request_scheduler_action(for_status="stopurlblock")

            # Store URL diaries
            for url, info in self.curr_block.iteritems():
                diary = info.get("diary")
                if not diary.stored:
                    URLDiaries.store_diary(info.get("diary"))

            if signature_events:
                self.request_scheduler_action(for_status="aborted")
                return

            # Acquire the next block of URLs according to the defined URL
            # blocksize
            self.curr_block = self.new_target_block()
            if not self.curr_block:
                continue

            try:
                self.rt.send_command_blocking(RealTimeMessages.start_package(
                    target=self.curr_block.keys(),
                    category="url",
                    package=self.task.package,
                    options=self.task.options,
                    respond=True),
                                              maxwait=len(self.curr_block) *
                                              10)
            except RealtimeError as e:
                log.error(
                    "No response from guest or it failed to open new URLs. "
                    "Error: %s", e)
                return

        # The loop was broken because there are no targets left. This means
        # the analysis was completed.
        self.completed = True

    def attribute_js(self, pid_target):
        pid_js = {}
        ppid_pid = {}

        for pid, ppid, code in self.js_events:
            if pid not in pid_js:
                pid_js[pid] = []

            pid_js[pid].append(code)

            if ppid not in ppid_pid:
                ppid_pid[ppid] = set()
            ppid_pid[ppid].add(pid)

        def walk_childprocs(pid, t):
            for js in pid_js.get(pid, []):
                self.curr_block.get(target).get("diary").add_javascript(js)

            for child in ppid_pid.get(pid, []):
                walk_childprocs(child, t)

        for pid, target in pid_target.iteritems():
            walk_childprocs(pid, target)

    def extract_requests(self, pid_target):
        flow_target = {}
        flows = {}
        ppid_pid = {}
        ports = set()

        for flow, pid, ppid in self.netflow_events:
            ports.add(flow[3])
            if pid not in flows:
                flows[pid] = []
            flows[pid].append(flow)

            if ppid not in ppid_pid:
                ppid_pid[ppid] = set()
            ppid_pid[ppid].add(pid)

        def walk_childprocs(pid, t):
            for flow in flows.get(pid, []):
                flow_target[flow] = t

            for child in ppid_pid.get(pid, []):
                walk_childprocs(child, t)

        for pid, target in pid_target.iteritems():
            walk_childprocs(pid, target)

        reports = self.requestfinder.process(flow_target, ports=ports)
        for target_url, report in reports.iteritems():
            log.debug("Traffic extracted for %s", target_url)
            target_helpers = self.curr_block.get(target_url)
            diary = target_helpers.get("diary")
            diary.set_request_report(report)

    def handle_events(self, pid_target):
        # New queue for a new batch, to be sure it is empty
        self.detection_events = []
        self.netflow_events = []
        self.realtime_finished = set()
        self.tlskeys_response = None
        self.realtime_error = False
        self.js_events = []
        wait_for = set()

        # Tell onemon to process results.
        self.ev_client.send_event("massurltask", body={"taskid": self.task.id})
        wait_for.add("massurltask")

        # If IE was used, TLS master secrets van be extracted.
        # If not package is supplied, the analyzer will use IE.
        if not self.task.package or self.task.package.lower() == "ie":
            if config("massurl:massurl:extract_tls"):
                lsass_pid = get_lsass_pids(self.task.id)
                if lsass_pid:
                    log.debug("Running TLS key extraction for task #%s",
                              self.task.id)
                    self.ev_client.send_event("dumptls", {
                        "taskid": self.task.id,
                        "lsass_pid": lsass_pid
                    })
                    wait_for.add("dumptls")

        waited = 0
        while wait_for and not self.realtime_error:
            if waited >= 60:
                log.error(
                    "Timeout for realtime onemon processor reached. No results"
                    " received. Stopping analysis of URL current block: %r. "
                    "Was waiting for results of: %s", self.curr_block.keys(),
                    wait_for)
                break
            waited += 0.5
            time.sleep(0.5)
            wait_for -= self.realtime_finished

        if self.realtime_error:
            log.error("Realtime processor reported an error. %s",
                      self.realtime_error)

        if self.netflow_events:
            log.debug("Running request extraction for task: #%s", self.task.id)
            self.extract_requests(pid_target)

        if self.js_events:
            log.debug("Running Javascript attribution")
            self.attribute_js(pid_target)

        # If no events were sent by Onemon, no signatures were triggered.
        # Continue analysis.
        if self.detection_events:
            self.handle_signature_events()
            return True

        return False

    def handle_signature_events(self):
        log.info("%d realtime signature triggered for task #%d",
                 len(self.detection_events), self.task.id)
        # Collect all triggered signatures from the queue
        sigs = []
        for ev in self.detection_events:
            sigs.append({
                "signature": ev.get("signature"),
                "description": ev.get("description"),
                "ioc": ev.get("ioc")
            })

        # A signature was triggered while only a single URL was opened. Update
        # and store the URL diary, and send a detection event.
        if len(self.curr_block) == 1:
            diary = self.curr_block.itervalues().next().get("diary")
            diary.add_signature(sigs)
            diary_id = URLDiaries.store_diary(diary)

            self.ev_client.send_event("massurldetection",
                                      body={
                                          "taskid": self.task.id,
                                          "status": "aborted",
                                          "candidates": self.curr_block.keys(),
                                          "signatures": sigs,
                                          "diary_id": diary_id
                                      })

        else:
            # Multiple URLs were opened while signatures were triggered. Send
            # a detection event with all URLs that were opened. The massurl
            # scheduler will create a new task with only these URLs.
            self.ev_client.send_event("massurldetection",
                                      body={
                                          "taskid": self.task.id,
                                          "status": "aborted",
                                          "candidates": self.curr_block.keys(),
                                          "signatures": sigs
                                      })

    def run(self):
        task_log_start(self.task.id)
        if not self.ev_client.start(maxtries=2):
            log.error(
                "Could not connect to Cuckoo event messaging client. Aborting")
            self.set_analysis_status(Analysis.FAILED)
            return

        # Tell the client to ask the event server to send all events of
        # type 'signature' and 'netflow'. These events will be sent by onemon.
        self.ev_client.subscribe(self.realtime_sig_cb, "signature")
        self.ev_client.subscribe(self.realtime_netflow_cb, "netflow")
        self.ev_client.subscribe(self.realtime_finished_cb, "finished")
        self.ev_client.subscribe(self.realtime_tlskeys_cb, "tlskeys")
        self.ev_client.subscribe(self.realtime_javascript_cb, "javascript")
        self.ev_client.subscribe(self.realtime_error_cb, "error")

        try:
            if self.start_run():
                self.set_analysis_status(Analysis.RUNNING)
                self.gm_wait_th.start()
                self.run_analysis()

        except Exception as e:
            log.error("Failure during analysis run of task #%s. %s. %s",
                      self.task.id, e, traceback.format_exc())
            try:
                self.ev_client.send_event(
                    "massurltaskfailure", {
                        "taskid": self.task.id,
                        "error": "%s" % traceback.format_exc(4),
                        "status": self.analysis.status
                    })
            except Exception as e:
                log.exception("Failed to send failure notification event")

        finally:
            try:
                self.stop_and_wait()
            except Exception as e:
                log.exception(
                    "Failure while stopping analysis run of task #%s: %s",
                    self.task.id, e)

        if self.completed or self.aborted:
            self.set_analysis_status(Analysis.STOPPED, wait=True)
        else:
            self.set_analysis_status(Analysis.FAILED, wait=True)

    def realtime_sig_cb(self, message):
        """Handle incoming signature events from the realtime processor"""
        log.debug("Signature event for task #%s", self.task.id)
        task_id = message["body"].get("taskid")
        if not task_id or task_id != self.task.id:
            return

        for k in ("description", "ioc", "signature"):
            if k not in message["body"]:
                return

        self.detection_events.append(message["body"])

    def realtime_netflow_cb(self, message):
        """Handle incoming netflow events from the realtime processor"""
        task_id = message["body"].get("taskid")
        if not task_id or task_id != self.task.id:
            return

        for k in ("srcip", "srcport", "dstip", "dstport", "pid", "ppid"):
            if k not in message["body"]:
                return

        flow = message["body"]
        self.netflow_events.append(
            ((flow.get("srcip"), flow.get("srcport"), flow.get("dstip"),
              flow.get("dstport")), flow.get("pid"), flow.get("ppid")))

    def realtime_error_cb(self, message):
        """Handle an error event from the realtime processor. These events
        can occur after sending it an event that triggers its processing
        routine"""
        task_id = message["body"].get("taskid")
        if not task_id or task_id != self.task.id:
            return

        self.realtime_error = message["body"].get("error", True)

    def realtime_tlskeys_cb(self, message):
        """Handle a tlskeys event containing tls master keys extracted by
        the realtime processor"""
        task_id = message["body"].get("taskid")
        if not task_id or task_id != self.task.id:
            return

        tlskeys = message["body"].get("tlskeys")
        if not tlskeys:
            return

        if not isinstance(tlskeys, list):
            return

        with open(cwd("tlsmaster.txt", analysis=self.task.id), "wb") as fp:
            for entry in sorted(tlskeys):
                fp.write("RSA Session-ID:%s Master-Key:%s\n" %
                         (entry.get("session_id"), entry.get("master_secret")))

    def realtime_javascript_cb(self, message):
        """Handle incoming javascript events from the realtime processor"""
        task_id = message["body"].get("taskid")
        if not task_id or task_id != self.task.id:
            return

        for k in ("pid", "ppid"):
            if k not in message["body"]:
                return

        js = message["body"]
        meta = js.get("meta") if js.get("meta") != "no context" else None
        code = js.get("code")

        if code:
            self.js_events.append((js.get("pid"), js.get("ppid"), code))

        if meta:
            self.js_events.append((js.get("pid"), js.get("ppid"), meta))

    def realtime_finished_cb(self, message):
        """Handle incoming finish events from the realtime processor"""
        task_id = message["body"].get("taskid")
        if not task_id or task_id != self.task.id:
            return

        action = message["body"].get("action")
        if not action:
            return

        self.realtime_finished.add(action)

    def on_status_failed(self, db):
        """The mass url analysis failed"""
        # What should we do it failed? How can be prevent redundant work and
        # Be sure the mass url scheduler knows this task failed?
        if self.machine.locked:
            log.debug("Releasing machine lock on %s", self.machine.label)
            self.machine = self.machinery.release(self.machine.label)

    def on_status_starting(self, db):
        """Is executed by the scheduler on analysis status starting
        Stores the chosen route in the db."""
        log.info("Using route '%s' for task #%s", self.route.route,
                 self.task.id)
        # Propagate the taken route to the database.
        db.set_route(self.task.id, self.route.route)

        # Store used machine in the task
        db.set_machine(self.task.id, self.machine.name)

    def on_status_stopurlblock(self, db):
        """When a new block of URLs has finished, update their rows
        in the database. This way we can keep track of which were and were not
        analyzed in case of an abort/crash/detection and a re-submit
         is required."""
        updated = []
        for t in self.curr_block:
            target_obj = self.curr_block[t].get("target_obj")
            target_obj["analyzed"] = True
            updated.append(target_obj.target_dict)
        db.update_targets(updated)

    def on_status_aborted(self, db):
        """This status is reached when a potentially malicious action is
        detected and the remaining URLs should be analyzed in a new task"""
        log.info("Task #%s aborted", self.task.id)
        self.aborted = True
        self.task.set_status(TASK_ABORTED)
        self.task.write_task_json()

    def on_status_stopped(self, db):
        """Executed by the scheduler when the analysis reaches the stopped
        status."""
        if self.machine.locked:
            log.debug("Releasing machine lock on %s", self.machine.label)
            self.machine = self.machinery.release(self.machine.label)

    def finalize(self, db):
        self.ev_client.stop()
        self.task.set_latest()

        if self.machine.locked:
            log.debug("Releasing machine lock on %s", self.machine.label)
            self.machine = self.machinery.release(self.machine.label)

        self.release_machine_lock()
        with open(cwd("pids_targets.json", analysis=self.task.id), "wb") as fp:
            json.dump(self.all_pids_targets, fp, indent=2)

        if self.analysis.status != Analysis.STOPPED:
            log.warning("Analysis status is '%s' after exit.",
                        self.analysis.status)
            self.task.write_task_json(status=TASK_FAILED_ANALYSIS)
            self.task.set_status(TASK_FAILED_ANALYSIS)

        if self.completed:
            log.info("Setting task #%d to reported", self.task.id)
            self.task.write_task_json(status=TASK_REPORTED)
            self.task.set_status(TASK_REPORTED)

        task_log_stop(self.task.id)