def execute(self, request): # We have the sha1 digest in the task object so there is no need to # fetch the sample for NSRL execution. cur_result = Result() try: dbresults = self.connection.query(request.sha1) except NSRLDatasource.DatabaseException: raise RecoverableError("Query failed") # If we found a result in the NSRL database, drop this task as we don't want to process it further. if dbresults: request.drop() benign = "This file was found in the NSRL database. It is not malware." res = ResultSection(title_text=benign) res.score = SCORE.NOT for dbresult in dbresults[:10]: res.add_line(dbresult[0] + " - %s (%s) - v: %s - by: %s [%s]" % (dbresult[1], dbresult[2], dbresult[3], dbresult[4], dbresult[5])) if len(dbresults) > 10: res.add_line("And %s more..." % str(len(dbresults) - 10)) cur_result.add_section(res) request.result = cur_result
def process_signatures(sigs, al_result, classification): log.debug("Processing signature results.") if len(sigs) > 0: sigs_score = 0 sigs_res = ResultSection(title_text="Signatures", classification=classification) skipped_sigs = ['dead_host', 'has_authenticode', 'network_icmp', 'network_http', 'allocates_rwx', 'has_pdb'] # Severity is 0-5ish with 0 being least severe. for sig in sigs: severity = float(sig.get('severity', 0)) actor = sig.get('actor', '') sig_classification = sig.get('classification', CLASSIFICATION.UNRESTRICTED) sig_score = int(severity * 100) sig_name = sig.get('name', 'unknown') sig_categories = sig.get('categories', []) sig_families = sig.get('families', []) # Skipped Signature Checks: if sig_name in skipped_sigs: continue sigs_score += sig_score sigs_res.add_line(sig_name + ' [' + str(sig_score) + ']') sigs_res.add_line('\tDescription: ' + sig.get('description')) if len(sig_categories) > 0: sigs_res.add_line('\tCategories: ' + ','.join([safe_str(x) for x in sig_categories])) for category in sig_categories: al_result.add_tag(tag_type=TAG_TYPE.DYNAMIC_SIGNATURE_CATEGORY, value=category, weight=TAG_WEIGHT.HIGH, classification=sig_classification) if len(sig_families) > 0: sigs_res.add_line('\tFamilies: ' + ','.join([safe_str(x) for x in sig_families])) for family in sig_families: al_result.add_tag(tag_type=TAG_TYPE.DYNAMIC_SIGNATURE_FAMILY, value=family, weight=TAG_WEIGHT.VHIGH, classification=sig_classification) if sig_name != 'unknown' and sig_name != '': al_result.add_tag(tag_type=TAG_TYPE.DYNAMIC_SIGNATURE_NAME, value=sig_name, weight=TAG_WEIGHT.VHIGH, classification=sig_classification) sigs_res.add_line('') if actor and actor != '': al_result.add_tag(tag_type=TAG_TYPE.THREAT_ACTOR, value=actor, weight=TAG_WEIGHT.VHIGH, classification=sig_classification) # We don't want to get carried away.. sigs_res.score = min(1000, sigs_score) al_result.add_section(sigs_res)
def lookup_callouts(response): results = response.get('callout', None) if not results: return None, [] tags = [] r_section = ResultSection(title_text='Sandbox Call-Outs') r_section.score = SCORE.HIGH analyser = '' r_sub_section = None for result in results[:10]: if analyser != result['analyser']: title = '%s (Analysed on %s)' % (result['analyser'], result['date']) r_sub_section = ResultSection(title_text=title, parent=r_section) analyser = result['analyser'] channel = result['request'] if channel is not None: channel = "(%s)" % channel.split('~~')[0] else: channel = "" r_sub_section.add_line("{0:s}:{1:d}{2:s}".format( result['callout'], result['port'], channel)) try: p1, p2, p3, p4 = result['callout'].split(".") if int(p1) <= 255 and int(p2) <= 255 and int( p3) <= 255 and int(p4) <= 255: tags.append( Tag(TAG_TYPE.NET_IP, result['callout'], TAG_WEIGHT.MED, context=Context.BEACONS)) except ValueError: tags.append( Tag(TAG_TYPE.NET_DOMAIN_NAME, result['callout'], TAG_WEIGHT.MED, context=Context.BEACONS)) if result['port'] != 0: tags.append( Tag(TAG_TYPE.NET_PORT, str(result['port']), TAG_WEIGHT.MED, context=Context.BEACONS)) if len(results) > 10: r_section.add_line("And %s more..." % str(len(results) - 10)) return r_section, tags
def lookup_upatre_downloader(response): result = response.get('upatre', None) if not result: return None result = result[0] r_section = ResultSection(title_text='Upatre activity') r_section.score = SCORE.VHIGH r_section.add_line('The file %s decodes to %s using XOR key %s' % (result['firstSeen'], result['decrypted_md5'], result['decryption_key'])) return r_section
def lookup_source(response): result = response.get('source', None) if not result: return None if result['count'] > 0: r_section = ResultSection(title_text='File Frequency') r_section.score = SCORE.NULL r_section.add_line('First Seen: %s' % result['first_seen']) r_section.add_line('Last Seen: %s' % result['last_seen']) r_section.add_line('Source Count: %d' % result['count']) return r_section
def lookup_spam_feed(response): result = response.get('spam_feed', None) if not result: return None result = result[0] r_section = ResultSection(title_text='SPAM feed') r_section.score = SCORE.HIGH r_section.add_line('Found %d related spam emails' % result['count']) r_section.add_line('\tFirst Seen: %s' % result['first_seen']) r_section.add_line('\tLast Seen: %s' % result['last_seen']) r_sub_section = ResultSection(title_text='Attachments', parent=r_section) r_sub_section.add_line('%s - md5: %s' % (result['filename'], result['filename_md5'])) if result['attachment']: r_sub_section.add_line( '\t%s - md5: %s' % (result['attachment'], result['attachment_md5'])) return r_section
def parse_api(data): result = Result() # Info block hash_info = data.get('hash_info') if not hash_info: return result r_info = ResultSection(title_text='File Info') r_info.score = SCORE.NULL r_info.add_line('Received Data: %s-%s-%s' % (data['received_date'][:4], data['received_date'][4:6], data['received_date'][6:])) r_info.add_line('Size: %s' % hash_info.get('filesize', "")) r_info.add_line('MD5: %s' % hash_info.get('md5', "")) r_info.add_line('SHA1: %s' % hash_info.get('sha1', "")) r_info.add_line('SHA256: %s' % hash_info.get('sha256', "")) r_info.add_line('SSDeep Blocksize: %s' % hash_info.get('ssdeep_blocksize', "")) r_info.add_line('SSDeep Hash1: %s' % hash_info.get('ssdeep_hash1', "")) r_info.add_line('SSDeep Hash2: %s' % hash_info.get('ssdeep_hash1', "")) result.add_result(r_info) callouts = data.get('callouts', []) if len(callouts) > 0: max_callouts = 10 r_callouts = ResultSection(title_text='Sandbox Call-Outs') r_callouts.score = SCORE.VHIGH analyser = '' r_call_sub_section = None reported_count = 0 for callout in callouts: reported_count += 1 if reported_count <= max_callouts: if analyser != callout['ip']: title = '%s (Analysed on %s)' % (callout['ip'], callout['addedDate']) r_call_sub_section = ResultSection(title_text=title, parent=r_callouts) analyser = callout['ip'] channel = callout['channel'] if channel is not None: channel = "(%s)" % channel.split('~~')[0] else: channel = "" r_call_sub_section.add_line("{0:s}:{1:d}{2:s}".format( callout['callout'], callout['port'], channel)) try: p1, p2, p3, p4 = callout['callout'].split(".") if int(p1) <= 255 and int(p2) <= 255 and int( p3) <= 255 and int(p4) <= 255: result.append_tag( Tag(TAG_TYPE.NET_IP, callout['callout'], TAG_WEIGHT.MED, context=Context.BEACONS)) except ValueError: result.append_tag( Tag(TAG_TYPE.NET_DOMAIN_NAME, callout['callout'], TAG_WEIGHT.MED, context=Context.BEACONS)) if callout['port'] != 0: result.append_tag( Tag(TAG_TYPE.NET_PORT, str(callout['port']), TAG_WEIGHT.MED, context=Context.BEACONS)) if len(callouts) > max_callouts: r_callouts.add_line("And %s more..." % str(len(callouts) - 10)) result.add_result(r_callouts) spamcount = data.get('spamCount', {}) if spamcount: r_spam = ResultSection(title_text='SPAM feed') r_spam.score = SCORE.VHIGH r_spam.add_line('Found %d related spam emails' % spamcount['count']) email_sample = spamcount.get("email_sample", {}) r_spam.add_line('\tFirst Seen: %s' % email_sample['firstSeen']) r_spam.add_line('\tLast Seen: %s' % email_sample['lastSeen']) r_sub_section = ResultSection(title_text='Attachments', parent=r_spam) if email_sample['filename']: r_sub_section.add_line( '%s - md5: %s' % (email_sample['filename'], email_sample['filenameMD5'])) if email_sample['attachment']: r_sub_section.add_line('%s - md5: %s' % (email_sample['attachment'], email_sample['attachmentMD5'])) result.add_result(r_spam) av_results = data.get('av_results', []) if len(av_results) > 0: r_av_sec = ResultSection(title_text='Anti-Virus Detections') r_av_sec.add_line('Found %d AV hit(s).' % len(av_results)) for av_result in av_results: r_av_sec.add_section( AvHitSection(av_result['scannerID'], av_result['name'], SCORE.SURE)) result.append_tag( VirusHitTag(av_result['name'], context="scanner:%s" % av_result['scannerID'])) result.add_result(r_av_sec) return result
def process_network(network, al_result, guest_ip, classification): global country_code_map if not country_code_map: country_code_map = forge.get_country_code_map() log.debug("Processing network results.") result_map = {} network_res = ResultSection(title_text="Network Activity", classification=classification, body_format=TEXT_FORMAT.MEMORY_DUMP) network_score = 0 # IP activity hosts = network.get("hosts", []) if len(hosts) > 0 and isinstance(hosts[0], dict): hosts = [host['ip'] for host in network.get("hosts", [])] udp = parse_protocol_data(network.get("udp", []), group_fields=['dport']) tcp = parse_protocol_data(network.get("tcp", []), group_fields=['dport']) smtp = parse_protocol_data(network.get("smtp", []), group_fields=['raw']) dns = parse_protocol_data(network.get("dns", []), group_by='request', group_fields=['type']) icmp = parse_protocol_data(network.get("icmp", []), group_fields=['type']) # Domain activity domains = parse_protocol_data(network.get("domains", []), group_by='domain') http = parse_protocol_data(network.get("http", []), group_by='host', group_fields=['port', 'uri', 'method']) http_ex = parse_protocol_data(network.get("http_ex", []), group_by='host', group_fields=['dport', 'uri', 'method']) _add_ex_data(http, http_ex, 'http', 80) https = parse_protocol_data(network.get("https", []), group_by='host', group_fields=['port', 'uri', 'method']) https_ex = parse_protocol_data(network.get("https_ex", []), group_by='host', group_fields=['dport', 'uri', 'method']) _add_ex_data(https, https_ex, 'https', 443) # Miscellaneous activity # irc = network.get("irc") # Add missing ip hosts for proto in [udp, tcp, http, https, icmp, smtp]: for hst in proto.keys(): if hst not in hosts and re.match(r"^[0-9.]+$", hst): hosts.append(hst) # network['hosts'] has all unique non-local network ips. for host in hosts: if host == guest_ip or wlist_check_ip(host): continue add_host_flows(host, 'udp', udp.get(host), result_map) add_host_flows(host, 'tcp', tcp.get(host), result_map) add_host_flows(host, 'smtp', smtp.get(host), result_map) add_host_flows(host, 'icmp', icmp.get(host), result_map) add_host_flows(host, 'http', http.get(host), result_map) add_host_flows(host, 'https', https.get(host), result_map) if hosts != [] and 'host_flows' not in result_map: # This only occurs if for some reason we don't parse corresponding flows out from the # network dump. So we'll just manually add the IPs so they're at least being reported. result_map['host_flows'] = {} for host in hosts: if host == guest_ip or wlist_check_ip(host): continue result_map['host_flows'][host] = [] for domain in domains: if wlist_check_domain(domain): continue add_domain_flows(domain, 'dns', dns.get(domain), result_map) add_domain_flows(domain, 'http', http.get(domain), result_map) add_domain_flows(domain, 'https', https.get(domain), result_map) if 'host_flows' in result_map: # hosts_res = ResultSection(title_text='IP Flows',classification=classification) # host_flows is a map of host:protocol entries # protocol is a map of protocol_name:flows # flows is a set of unique flows by the groupings above host_lines = [] for host in sorted(result_map['host_flows']): network_score += 100 protocols = result_map['host_flows'].get(host, []) host_cc = country_code_map[host] or '??' host_cc = '('+host_cc+')' al_result.add_tag(tag_type=TAG_TYPE.NET_IP, value=host, weight=TAG_WEIGHT.VHIGH, classification=classification, usage="CORRELATION", context=Context.CONNECTS_TO) for protocol in sorted(protocols): flows = protocols[protocol] if 'http' in protocol: for flow in flows: uri = flow.get('uri', None) if uri: al_result.add_tag(tag_type=TAG_TYPE.NET_FULL_URI, value=uri, weight=TAG_WEIGHT.VHIGH, classification=classification, usage="CORRELATION", context=Context.CONNECTS_TO) flow_lines = dict_list_to_fixedwidth_str_list(flows) for line in flow_lines: proto_line = "{0:<8}{1:<19}{2:<8}{3}".format(protocol, host, host_cc, line) host_lines.append(proto_line) network_res.add_lines(host_lines) if 'domain_flows' in result_map: # domains_res = ResultSection(title_text='Domain Flows',classification=classification) # host_flows is a map of host:protocol entries # protocol is a map of protocol_name:flows # flows is a set of unique flows by the groupings above # Formatting.. max_domain_len = 0 for domain in result_map['domain_flows']: max_domain_len = max(max_domain_len, len(domain)+4) proto_fmt = "{0:<8}{1:<"+str(max_domain_len)+"}{2}" domain_lines = [] network_score += 100 for domain in sorted(result_map['domain_flows']): protocols = result_map['domain_flows'][domain] al_result.add_tag(tag_type=TAG_TYPE.NET_DOMAIN_NAME, value=domain, weight=TAG_WEIGHT.VHIGH, classification=classification, context=Context.CONNECTS_TO) for protocol in sorted(protocols): flows = protocols[protocol] if 'http' in protocol: for flow in flows: uri = flow.get('uri', None) if uri: al_result.add_tag(tag_type=TAG_TYPE.NET_FULL_URI, value=uri, weight=TAG_WEIGHT.VHIGH, classification=classification, usage="CORRELATION", context=Context.CONNECTS_TO) flow_lines = dict_list_to_fixedwidth_str_list(flows) for line in flow_lines: proto_line = proto_fmt.format(protocol, domain, line) domain_lines.append(proto_line) # domain_res.add_lines(protocol_lines) # domains_res.add_section(domain_res) network_res.add_lines(domain_lines) network_score = min(500, network_score) if len(network_res.body) > 0: network_res.score = network_score al_result.add_section(network_res) log.debug("Network processing complete.")
def extract_streams(self, file_name, file_contents): oles = {} try: streams_res = ResultSection(score=SCORE.INFO, title_text="Embedded document stream(s)") is_zip = False is_ole = False # Get the OLEs if zipfile.is_zipfile(file_name): is_zip = True z = zipfile.ZipFile(file_name) for f in z.namelist(): if f in oles: continue bin_data = z.open(f).read() bin_fname = os.path.join(self.working_directory, "{}.tmp".format(hashlib.sha256(bin_data).hexdigest())) with open(bin_fname, 'w') as bin_fh: bin_fh.write(bin_data) if olefile.isOleFile(bin_fname): oles[f] = olefile.OleFileIO(bin_fname) elif olefile2.isOleFile(bin_fname): oles[f] = olefile2.OleFileIO(bin_fname) z.close() if olefile.isOleFile(file_name): is_ole = True oles[file_name] = olefile.OleFileIO(file_name) elif olefile2.isOleFile(file_name): is_ole = True oles[file_name] = olefile2.OleFileIO(file_name) if is_zip and is_ole: streams_res.report_heuristics(Oletools.AL_Oletools_002) decompressed_macros = False for ole_filename in oles.iterkeys(): try: decompressed_macros |= self.process_ole_stream(oles[ole_filename], streams_res) except Exception: continue if decompressed_macros: streams_res.score = SCORE.HIGH for _, offset, rtfobject in rtf_iter_objects(file_contents): rtfobject_name = hex(offset) + '.rtfobj' extracted_obj = os.path.join(self.working_directory, rtfobject_name) with open(extracted_obj, 'wb') as fh: fh.write(rtfobject) self.request.add_extracted(extracted_obj, 'Embedded RTF Object at offset %s' % hex(offset), rtfobject_name) if len(streams_res.body) > 0: self.ole_result.add_section(streams_res) except Exception: self.log.debug("Error extracting streams: {}".format(traceback.format_exc(limit=2))) finally: for fd in oles.itervalues(): try: fd.close() except: pass
def check_xml_strings(self, path): xml_target_res = ResultSection(score=SCORE.NULL, title_text="Attached External Template Targets in XML") xml_ioc_res = ResultSection(score=SCORE.NULL, title_text="IOCs in XML:") xml_b64_res = ResultSection(score=SCORE.NULL, title_text="Base64 in XML:") try: template_re = re.compile(r'/attachedTemplate"\s+[Tt]arget="((?!file)[^"]+)"\s+[Tt]argetMode="External"') uris = [] zip_uris = [] b64results = {} b64_extracted = set() if zipfile.is_zipfile(path): try: patterns = PatternMatch() except: patterns = None z = zipfile.ZipFile(path) for f in z.namelist(): data = z.open(f).read() if len(data) > 500000: data = data[:500000] xml_ioc_res.report_heuristics(Oletools.AL_Oletools_003) xml_ioc_res.score = min(xml_ioc_res.score, 1) zip_uris.extend(template_re.findall(data)) # Use FrankenStrings modules to find other strings of interest # Plain IOCs if patterns: pat_strs = ["http://purl.org", "schemas.microsoft.com", "schemas.openxmlformats.org", "www.w3.org"] pat_ends = ["themeManager.xml", "MSO.DLL", "stdole2.tlb", "vbaProject.bin", "VBE6.DLL", "VBE7.DLL"] pat_whitelist = ['Management', 'Manager', "microsoft.com"] st_value = patterns.ioc_match(data, bogon_ip=True) if len(st_value) > 0: for ty, val in st_value.iteritems(): if val == "": asc_asc = unicodedata.normalize('NFKC', val).encode('ascii', 'ignore') if any(x in asc_asc for x in pat_strs) \ or asc_asc.endswith(tuple(pat_ends)) \ or asc_asc in pat_whitelist: continue else: xml_ioc_res.score += 1 xml_ioc_res.add_line("Found %s string: %s in file %s}" % (TAG_TYPE[ty].replace("_", " "), asc_asc, f)) xml_ioc_res.add_tag(TAG_TYPE[ty], asc_asc, TAG_WEIGHT.LOW) else: ulis = list(set(val)) for v in ulis: if any(x in v for x in pat_strs) \ or v.endswith(tuple(pat_ends)) \ or v in pat_whitelist: continue else: xml_ioc_res.score += 1 xml_ioc_res.add_line("Found %s string: %s in file %s" % (TAG_TYPE[ty].replace("_", " "), v, f)) xml_ioc_res.add_tag(TAG_TYPE[ty], v, TAG_WEIGHT.LOW) # Base64 b64_matches = set() for b64_tuple in re.findall('(([\x20]{0,2}[A-Za-z0-9+/]{3,}={0,2}[\r]?[\n]?){6,})', data): b64 = b64_tuple[0].replace('\n', '').replace('\r', '').replace(' ', '') uniq_char = ''.join(set(b64)) if len(uniq_char) > 6: if len(b64) >= 16 and len(b64) % 4 == 0: b64_matches.add(b64) """ Using some selected code from 'base64dump.py' by Didier Stevens@https://DidierStevens.com """ for b64_string in b64_matches: try: b64_extract = False base64data = binascii.a2b_base64(b64_string) sha256hash = hashlib.sha256(base64data).hexdigest() if sha256hash in b64_extracted: continue # Search for embedded files of interest if 500 < len(base64data) < 8000000: m = magic.Magic(mime=True) ftype = m.from_buffer(base64data) if 'octet-stream' not in ftype: for ft in self.filetypes: if ft in ftype: b64_file_path = os.path.join(self.working_directory, "{}_b64_decoded" .format(sha256hash[0:10])) self.request.add_extracted(b64_file_path, "Extracted b64 file during " "OLETools analysis.") with open(b64_file_path, 'wb') as b64_file: b64_file.write(base64data) self.log.debug("Submitted dropped file for analysis: {}" .format(b64_file_path)) b64results[sha256hash] = [len(b64_string), b64_string[0:50], "[Possible base64 file contents in {}. " "See extracted files.]" .format(f), "", ""] b64_extract = True b64_extracted.add(sha256hash) break if not b64_extract and len(base64data) > 30: if all(ord(c) < 128 for c in base64data): check_utf16 = base64data.decode('utf-16').encode('ascii', 'ignore') if check_utf16 != "": asc_b64 = check_utf16 else: asc_b64 = self.ascii_dump(base64data) # If data has less then 7 uniq chars then ignore uniq_char = ''.join(set(asc_b64)) if len(uniq_char) > 6: if patterns: st_value = patterns.ioc_match(asc_b64, bogon_ip=True) if len(st_value) > 0: for ty, val in st_value.iteritems(): if val == "": asc_asc = unicodedata.normalize('NFKC', val)\ .encode('ascii', 'ignore') xml_ioc_res.add_tag(TAG_TYPE[ty], asc_asc, TAG_WEIGHT.LOW) else: ulis = list(set(val)) for v in ulis: xml_ioc_res.add_tag(TAG_TYPE[ty], v, TAG_WEIGHT.LOW) b64results[sha256hash] = [len(b64_string), b64_string[0:50], asc_b64, base64data, "{}" .format(f)] except: pass b64index = 0 for b64k, b64l in b64results.iteritems(): xml_b64_res.score = 100 b64index += 1 sub_b64_res = (ResultSection(SCORE.NULL, title_text="Result {0} in file {1}" .format(b64index, f), parent=xml_b64_res)) sub_b64_res.add_line('BASE64 TEXT SIZE: {}'.format(b64l[0])) sub_b64_res.add_line('BASE64 SAMPLE TEXT: {}[........]'.format(b64l[1])) sub_b64_res.add_line('DECODED SHA256: {}'.format(b64k)) subb_b64_res = (ResultSection(SCORE.NULL, title_text="DECODED ASCII DUMP:", body_format=TEXT_FORMAT.MEMORY_DUMP, parent=sub_b64_res)) subb_b64_res.add_line('{}'.format(b64l[2])) if b64l[3] != "": if patterns: st_value = patterns.ioc_match(b64l[3], bogon_ip=True) if len(st_value) > 0: xml_b64_res.score += 1 for ty, val in st_value.iteritems(): if val == "": asc_asc = unicodedata.normalize('NFKC', val).encode\ ('ascii', 'ignore') xml_b64_res.add_tag(TAG_TYPE[ty], asc_asc, TAG_WEIGHT.LOW) else: ulis = list(set(val)) for v in ulis: xml_b64_res.add_tag(TAG_TYPE[ty], v, TAG_WEIGHT.LOW) z.close() for uri in zip_uris: if self.parse_uri(uri): uris.append(uri) uris = list(set(uris)) # If there are domains or IPs, report them if uris: xml_target_res.score = 500 xml_target_res.add_lines(uris) xml_target_res.report_heuristics(Oletools.AL_Oletools_001) except Exception as e: self.log.debug("Failed to analyze XML: {}".format(e)) if xml_target_res.score > 0: self.ole_result.add_section(xml_target_res) if xml_ioc_res.score > 0: self.ole_result.add_section(xml_ioc_res) if xml_b64_res.score > 0: self.ole_result.add_section(xml_b64_res)