def __init__(self): """Initialize info for Request""" cfg = Config("cuckoomx") self.machines = cfg.cuckoomx.get("machines") self.api_url = cfg.cuckoo.get("api_url") self.maximum_tasks_pending = cfg.cuckoomx.get("maximum_tasks_pending") self.dbmx = DatabaseMX()
def cuckoomx_clean(): """Remove database and log of CuckooMX""" dbmx = DatabaseMX() dbmx.drop_database() # Delete log path = os.path.join(CUCKOOMX_ROOT, "log", "cuckoomx.log") try: os.unlink(path) except (IOError, OSError) as e: log.warning("Error removing file %s: %s", path, e)
def inline(): """This is an inline mode of CuckooMX In this mode, CuckooMX will capture, extract and analyze mails are transferring on traffic. Please not that with this mode, CuckooMX maybe affect Mail service, so we recommend using SPAN port. NOTE: Please note that this mode is under development """ cfg = Config("cuckoomx") enabled = cfg.inline.get("enalbed") if enabled is False: return False while True: nothing_to_check = True for root, dirnames, filenames in os.walk(store): for filename in fnmatch.filter(filenames, '*.msg'): path = os.path.join(root, filename) mail = Mail(path) mail.parse() log.debug("Parsing mail %s at %s", mail.get_msg_id(), path) if mail.is_exist() is True: continue if mail.analyze() is False: continue # Okay, add it to database dbmx = DatabaseMX() dbmx.add_mail(mail) nothing_to_check = False log.debug("Add mail %s to database", mail.get_msg_id()) if nothing_to_check: time.sleep(1)
def inline(): """This is an inline mode of CuckooMX In this mode, CuckooMX will capture, extract and analyze mails are transferring on traffic. Please not that with this mode, CuckooMX maybe affect Mail service, so we recommend using SPAN port. NOTE: Please note that this mode is under development """ cfg = Config("cuckoomx") enabled = cfg.inline.get("enalbed") if enabled is False: return False while True: nothing_to_check = True for root, dirnames, filenames in os.walk(store): for filename in fnmatch.filter(filenames, "*.msg"): path = os.path.join(root, filename) mail = Mail(path) mail.parse() log.debug("Parsing mail %s at %s", mail.get_msg_id(), path) if mail.is_exist() is True: continue if mail.analyze() is False: continue # Okay, add it to database dbmx = DatabaseMX() dbmx.add_mail(mail) nothing_to_check = False log.debug("Add mail %s to database", mail.get_msg_id()) if nothing_to_check: time.sleep(1)
def offside(): """This is an offside mode of CuckooMX In this mode, CuckooMX will find and analyze mails are stored on hard disk (ext .msg). With this mode, CuckooMX will not affect Mail service. Please note that CuckooMX need permission to access storage folder of Mail service, it don't need write permission """ cfg = Config("cuckoomx") enabled = cfg.offside.get("enalbed") store = cfg.offside.get("store") if enabled is False: return False while True: nothing_to_check = True for root, dirnames, filenames in os.walk(store): for filename in fnmatch.filter(filenames, '*.msg'): path = os.path.join(root, filename) mail = Mail(path) mail.parse() log.debug("Parsing mail %s at %s", mail.get_msg_id(), path) if mail.is_exist() is True: continue if mail.analyze() is False: continue dbmx = DatabaseMX() dbmx.add_mail(mail) nothing_to_check = False log.debug("Add mail %s to database", mail.get_msg_id()) if nothing_to_check: time.sleep(1)
def __init__(self, path): """Initialize @param path: path to a file mail.msg """ self.path = path self.msg_id = None self.msg_ori = None self.date = None self.sender = None self.sender_ip = None self.subject = None self.receiver = [] self.cc = None self.content = None self.content_length = None self.status = 0 self.urls = [] self.attachments = [] self.tasks = [] self.safebrowsing = None self.dbmx = DatabaseMX()
def index(request): db = Database() dbmx = DatabaseMX() report = dict( total_mails=dbmx.count_mails(), total_malwares=dbmx.count_malwares(), total_attachments=dbmx.count_attachments(), total_urls=dbmx.count_urls(), mails_have_malwares=dbmx.get_mails_have_malwares(), total_samples=db.count_samples(), total_tasks=db.count_tasks(), states_count={}, estimate_hour=None, estimate_day=None ) states = ( TASK_PENDING, TASK_RUNNING, TASK_COMPLETED, TASK_RECOVERED, TASK_REPORTED, TASK_FAILED_ANALYSIS, TASK_FAILED_PROCESSING, TASK_FAILED_REPORTING ) for state in states: report["states_count"][state] = db.count_tasks(state) offset = None # For the following stats we're only interested in completed tasks. tasks = db.list_tasks(offset=offset, status=TASK_COMPLETED) tasks += db.list_tasks(offset=offset, status=TASK_REPORTED) if tasks: # Get the time when the first task started. started = min(timestamp(task.started_on) for task in tasks) # Get the time when the last task completed. completed = max(timestamp(task.completed_on) for task in tasks) # Get the amount of tasks that actually completed. finished = len(tasks) hourly = 60 * 60 * finished / (completed - started) report["estimate_hour"] = int(hourly) report["estimate_day"] = int(24 * hourly) return render_to_response("cuckoomx/index.html", {"report" : report}, context_instance=RequestContext(request))
def init_database(): """Initialize Database CuckooMX""" dbmx = DatabaseMX() dbmx.create_database()
def checking(): """Thread checking() will check a result of Cuckoo""" cfg = Config("cuckoomx") critical_malscore = cfg.cuckoomx.get("critical_malscore", 6) warning_malscore = cfg.cuckoomx.get("warning_malscore", 2) dbmx = DatabaseMX() # This is not fun for me, I can't find @para malscore in API so I have # to use cuckoo database, sorry for the inconvenience dbcuckoo = None try: host = cfg.mongodb.get("host", "127.0.0.1") port = cfg.mongodb.get("port", 27017) conn = MongoClient(host, port) dbcuckoo= conn["cuckoo"] except: log.error("Cannot connect to database Mongodb") while True: for mail in dbmx.get_mails_not_done(): if mail["tasks"] is None: # This mail don't have anything to check, it is okay. # Update status = 1 and continue with a next mail dbmx.set_mail_status(mail["id"], 1) continue # Keep calm and sleep 1s, we will check mail soon :) log.debug("Checking mail %s with %s tasks", mail["id"], len(mail["tasks"])) time.sleep(1) check_all_tasks = True for task in mail["tasks"]: if task["date_checked"] is not None: continue task_id = task["task_id"] document = dbcuckoo.analysis.find_one( {"info.id": int(task_id)}) if document is None: # Ops, this task is not done yet, continue with a next task check_all_tasks = False continue malscore = document["malscore"] if malscore >= critical_malscore: dbmx.inc_mails_have_malwares() log.critical("Mail %s, task %s has malware", mail["id"], task_id) elif malscore >= warning_malscore: log.critical("Mail %s, task %s have something wrong", mail["id"], task_id) dbmx.set_task_malscore( mail["id"], task_id=task_id, malscore=malscore) if check_all_tasks: dbmx.set_mail_ended(mail["id"])
class Mail(object): """Mail""" def __init__(self, path): """Initialize @param path: path to a file mail.msg """ self.path = path self.msg_id = None self.msg_ori = None self.date = None self.sender = None self.sender_ip = None self.subject = None self.receiver = [] self.cc = None self.content = None self.content_length = None self.status = 0 self.urls = [] self.attachments = [] self.tasks = [] self.safebrowsing = None self.dbmx = DatabaseMX() def get_msg_id(self): """Get msg_id @return: self.msg_id """ return self.msg_id def get_status(self): """Get status @return: self.status """ return self.status def get_path(self): """Get path @return: self.path """ return self.path def get_safebrowsing(self): """Get safebrowsing @return: self.safebrowsing """ return self.safebrowsing def get_tasks(self): return self.tasks def count_urls(self): """Get urls @return: self.urls """ return len(self.urls) def count_attachments(self): """Get attachments @return: self.attachments """ return len(self.attachments) def get_urls(self, content): """Get URLs from content of mail @para content: get URLs from content @return: List URLs """ url_pattern = ("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|" "(?:%[0-9a-fA-F][0-9a-fA-F]))+") urls = re.findall(url_pattern, content) return urls def is_exist(self): """Check if this mail exist in database @return: True/False """ is_exist = self.dbmx.mail_exist(self.msg_id) return is_exist def process_urls(self, urls): """Process URLs @para urls: list of url need to be analyzed """ safebrowsing = SafeBrowsing() result = safebrowsing.lookup(urls) if result is not True: self.safebrowsing = result for url in urls: # Check if this url is exists in our database # Should be check it again if we already check this url 1 day ago if self.dbmx.url_exist(url): continue request = Request() task_id = request.create_url(url) if task_id is False: return False # Okay, add it to task list for _id in task_id: self.tasks.append({ "task_id": _id, "malscore": None, "url": url, "date_checked": None }) def process_attachments(self, attachments): """Process attachments @para filenames: list of filenames need to be analyzed """ for attachment in attachments: filename = attachment[0] payload = attachment[1] # Calculate a MD5 hash from payload sha256 = hashlib.sha256(payload).hexdigest() # Check if hash of file is exists in our database if self.dbmx.attachment_exist(sha256): continue request = Request() task_id = request.create_file(filename, payload) if task_id is False: return False # Okay, add it to task list for _id in task_id: self.tasks.append({ "task_id": _id, "malscore": None, "attachment": filename, "sha256": sha256, "date_checked": None }) return True def parse(self): """Parse mail Read a message file from self.path and get informations, urls and/or attachments in mail """ msg_file = open(self.path) message = email.message_from_file(msg_file) self.msg_id = message['message-id'] self.msg_ori = message.as_string() self.date = message['date'] self.sender = message['from'] self.sender_ip = message['x-originating-ip'] self.subject = message['subject'] self.receiver = message.get_all('to', []) self.cc = message.get_all('cc', []) self.content = [] self.content_length = message['content-length'] for part in message.walk(): ctype = part.get_content_maintype() if ctype == 'multipart': # Nothing to do continue if ctype == 'text': content = part.get_payload(decode=True) self.content.append(content) urls = self.get_urls(content) self.urls.extend(urls) if ctype in ['image', 'application', 'audio', 'video', 'font']: filename = part.get_filename() payload = part.get_payload(decode=True) # We can work with an array [filename, payload], so we don't # have to stored this file to hard disk self.attachments.append([filename, payload]) def analyze(self): """Analyze mail After parse mail, foreach url and attachment in it, analyze and add it to database """ # We should remove unchecking-task if process have any error if self.process_urls(self.urls) is False: log.error("%s:Skip this mail", self.msg_id) return False if self.process_attachments(self.attachments) is False: log.error("%s:Skip this mail", self.msg_id) return False return True
class Request: """Handle a way we use request """ def __init__(self): """Initialize info for Request""" cfg = Config("cuckoomx") self.machines = cfg.cuckoomx.get("machines") self.api_url = cfg.cuckoo.get("api_url") self.maximum_tasks_pending = cfg.cuckoomx.get("maximum_tasks_pending") self.dbmx = DatabaseMX() def create_url(self, url): """Create a url analysis task Adds a url to the list of pending tasks. Returns the ID of the newly created task. """ task_ids = [] rest_url = self.api_url + "/tasks/create/url" while True: if self.dbmx.count_tasks_not_done() < self.maximum_tasks_pending: break time.sleep(1) for machine in self.machines.split(","): machine = Machine().get_available_machine(machine) response = requests.post( rest_url, files={"url": ("", url)}, data={ "priority": 1, "machine": machine}) # Check for response.status_code if response.status_code != requests.codes.ok: log.warn("response for url \"%s\" return status_code = %s", url, response.status_code) continue task_id = response.json()["task_id"] if not task_id: log.warn("response for url \"%s\" return task_id = %s", url, task_id) return None task_ids.append(task_id) return task_ids def create_file(self, filename, attachment): """Create a file analysis task Adds a file to the list of pending tasks. Returns the ID of the newly created task. """ task_ids = [] rest_url = self.api_url + "/tasks/create/file" while True: if self.dbmx.count_tasks_not_done() < self.maximum_tasks_pending: break time.sleep(1) for machine in self.machines.split(","): machine = Machine().get_available_machine(machine) response = requests.post( rest_url, files={"file": (filename, attachment)}, data={ "priority": 1, "machine": machine}) # Check for response.status_code if response.status_code != requests.codes.ok: log.warn("response for file \"%s\" return status_code = %s", filename, response.status_code) continue task_id = response.json()["task_ids"] if not task_id: log.warn("response for file \"%s\" return task_id = %s", filename, task_id) return None task_ids.append(task_id[0]) return task_ids
class Request: """Handle a way we use request """ def __init__(self): """Initialize info for Request""" cfg = Config("cuckoomx") self.machines = cfg.cuckoomx.get("machines") self.api_url = cfg.cuckoo.get("api_url") self.maximum_tasks_pending = cfg.cuckoomx.get("maximum_tasks_pending") self.dbmx = DatabaseMX() def create_url(self, url): """Create a url analysis task Adds a url to the list of pending tasks. Returns the ID of the newly created task. """ task_ids = [] rest_url = self.api_url + "/tasks/create/url" while True: if self.dbmx.count_tasks_not_done() < self.maximum_tasks_pending: break time.sleep(1) for machine in self.machines.split(","): machine = Machine().get_available_machine(machine) response = requests.post(rest_url, files={"url": ("", url)}, data={ "priority": 1, "machine": machine }) # Check for response.status_code if response.status_code != requests.codes.ok: log.warn("response for url \"%s\" return status_code = %s", url, response.status_code) continue task_id = response.json()["task_id"] if not task_id: log.warn("response for url \"%s\" return task_id = %s", url, task_id) return None task_ids.append(task_id) return task_ids def create_file(self, filename, attachment): """Create a file analysis task Adds a file to the list of pending tasks. Returns the ID of the newly created task. """ task_ids = [] rest_url = self.api_url + "/tasks/create/file" while True: if self.dbmx.count_tasks_not_done() < self.maximum_tasks_pending: break time.sleep(1) for machine in self.machines.split(","): machine = Machine().get_available_machine(machine) response = requests.post(rest_url, files={"file": (filename, attachment)}, data={ "priority": 1, "machine": machine }) # Check for response.status_code if response.status_code != requests.codes.ok: log.warn("response for file \"%s\" return status_code = %s", filename, response.status_code) continue task_id = response.json()["task_ids"] if not task_id: log.warn("response for file \"%s\" return task_id = %s", filename, task_id) return None task_ids.append(task_id[0]) return task_ids
def checking(): """Thread checking() will check a result of Cuckoo""" cfg = Config("cuckoomx") critical_malscore = cfg.cuckoomx.get("critical_malscore", 6) warning_malscore = cfg.cuckoomx.get("warning_malscore", 2) dbmx = DatabaseMX() # This is not fun for me, I can't find @para malscore in API so I have # to use cuckoo database, sorry for the inconvenience dbcuckoo = None try: host = cfg.mongodb.get("host", "127.0.0.1") port = cfg.mongodb.get("port", 27017) conn = MongoClient(host, port) dbcuckoo = conn["cuckoo"] except: log.error("Cannot connect to database Mongodb") while True: for mail in dbmx.get_mails_not_done(): if mail["tasks"] is None: # This mail don't have anything to check, it is okay. # Update status = 1 and continue with a next mail dbmx.set_mail_status(mail["id"], 1) continue # Keep calm and sleep 1s, we will check mail soon :) log.debug("Checking mail %s with %s tasks", mail["id"], len(mail["tasks"])) time.sleep(1) check_all_tasks = True for task in mail["tasks"]: if task["date_checked"] is not None: continue task_id = task["task_id"] document = dbcuckoo.analysis.find_one( {"info.id": int(task_id)}) if document is None: # Ops, this task is not done yet, continue with a next task check_all_tasks = False continue malscore = document["malscore"] if malscore >= critical_malscore: dbmx.inc_mails_have_malwares() log.critical("Mail %s, task %s has malware", mail["id"], task_id) elif malscore >= warning_malscore: log.critical("Mail %s, task %s have something wrong", mail["id"], task_id) dbmx.set_task_malscore(mail["id"], task_id=task_id, malscore=malscore) if check_all_tasks: dbmx.set_mail_ended(mail["id"])