def LSB_visual(self): """Convert pixel data so that each value in a pixel is either 0 (if LSB == 0) or 255 (if LSB == 1)""" img = Image.new(self.imode, self.isize) if self.working_directory is None: self.working_directory = path.dirname(__file__) try: if self.channels_to_process == 1: img.putdata(self.iter_grayscale_pixels()) success = True else: img.putdata(self.iter_rgba_pixels()) success = True except: success = False if success: lsb_visual_path = path.join( self.working_directory, "LSB_visual_attack.{}".format(self.iformat.lower())) img.save(lsb_visual_path) # Save to AL supplementary file. Request should therefore be set and working_directory given. if self.request is not None: self.request.add_supplementary( lsb_visual_path, "LSB_visual_attack", "Pixaxe LSB visual attack image") if self.result is not None: visres = ResultSection('Visual LSB Analysis.\t') visres.add_line( 'Visual LSB analysis successful, see extracted files.') self.working_result.add_subsection(visres) else: img.show() return
def additional_parsing(self, file_path: str) -> Optional[ResultSection]: urls = set() try: with pikepdf.open(file_path) as pdf: num_pages = len(pdf.pages) for page in pdf.pages: if '/Annots' not in page: continue for annot in page['/Annots'].as_list(): if annot.get('/Subtype') == '/Link': if '/A' not in annot: continue _url = annot['/A'].get('/URI') if not hasattr(_url, '__str__'): continue url = str(_url) if re.match(FULL_URI, url): urls.add(url) if not urls: return None patterns = PatternMatch() body = '\n'.join(urls) tags: dict[str, set[bytes]] = patterns.ioc_match(body.encode()) result = ResultSection( 'URL in Annotations', heuristic=Heuristic( 27, signature='one_page' if num_pages == 1 else None), body=body) for ty, vals in tags.items(): for val in vals: result.add_tag(ty, val) return result except Exception as e: self.log.warning(f'pikepdf failed to parse sample: {e}') return None
def _get_category_section(self, category: str, tags: Iterator[AVClassTag]) -> ResultSection: """ Gets a `ResultSection` for a list of tags from a single category. Result contains table with AVclass tag information in descending order by rank. :param category: Category of tags :param tags: Tags belonging to category :return: `ResultSection` """ tags = sorted(tags, key=lambda t: t.rank, reverse=True) category_name, heur_id, tag_type = AVCLASS_CATEGORY[category] tag_table = [{ 'name': tag.name, 'category': category_name, 'path': tag.path, 'rank': tag.rank } for tag in tags] section = ResultSection( f'AVclass extracted {len(tags)} {category_name} tags', body=json.dumps(tag_table), body_format=BODY_FORMAT.TABLE, heuristic=Heuristic(heur_id) if heur_id is not None else None) if tag_type is not None: for tag in tags: section.add_tag(tag_type, tag.name) return section
def dump_invalid_properties(self, parent_res): if self.invalid_properties_count: res = ResultSection( f"We've found {self.invalid_properties_count} properties with IDs different than " f"1 (storage), 2 (stream) and 5 (root)", parent=parent_res) res.set_heuristic(50)
def parse_link(self, parent_res, path): with open(path, "rb") as fh: metadata = decode_lnk(fh.read()) if metadata is None: return False body_output = { build_key(k): v for k, v in flatten(metadata).items() if v } res = ResultSection("Metadata extracted by parse_lnk", body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(body_output), parent=parent_res) bp = metadata.get("BasePath", "").strip() rp = metadata.get("RELATIVE_PATH", "").strip() nn = metadata.get("NetName", "").strip() cla = metadata.get("COMMAND_LINE_ARGUMENTS", "").strip() s = BAD_LINK_RE.search(cla.lower()) if s: res.set_heuristic(1) res.add_tag(tag_type="file.name.extracted", value=(bp or rp or nn).rsplit("\\")[-1]) res.add_tag(tag_type="dynamic.process.command_line", value=f"{(rp or bp or nn)} {cla}".strip()) for k, v in body_output.items(): tag_type = TAG_MAP.get("LNK", {}).get(k, None) or \ TAG_MAP.get(None, {}).get(k, None) if tag_type: res.add_tag(tag_type, v) return True
def _create_random_section(self): # choose a random body format body_format = random.choice(FORMAT_LIST) # create a section with a random title section = ResultSection(get_random_phrase(3, 7), body_format=body_format) # choose random amount of lines in the body for _ in range(1, 5): # generate random line section.add_line(get_random_phrase(5, 10)) # choose random amount of tags tags = flatten(get_random_tags()) for key, val in tags.items(): for v in val: section.add_tag(key, v) # set a heuristic a third of the time if random.choice([False, False, True]): section.set_heuristic(random.randint(1, 4)) # Create random sub-sections if random.choice([False, False, True]): section.add_subsection(self._create_random_section()) return section
def execute(self, request): """Main Module. See README for details.""" result = Result() self.sha = request.sha256 local = request.file_path text_section = None kv_section = None extracted, metadata = self.dexray(request, local) num_extracted = len(request.extracted) if num_extracted != 0: text_section = ResultSection("DeXRAY found files:") for extracted in request.extracted: file_name = extracted.get('name') text_section.add_line( f"Resubmitted un-quarantined file as : {file_name}") if metadata: # Can contain live URLs to the original content source kv_section = ResultSection("DeXRAY Quarantine Metadata", body_format=BODY_FORMAT.JSON, body=json.dumps(metadata)) result.add_section(kv_section) for section in (text_section, kv_section): if section: result.add_section(section)
def test_handle_artefact(artefact, expected_result_section_title): from assemblyline_v4_service.common.dynamic_service_helper import SandboxOntology, Artefact from assemblyline_v4_service.common.result import ResultSection if artefact is None: with pytest.raises(Exception): SandboxOntology._handle_artefact(artefact, None) return expected_result_section = None if expected_result_section_title is not None: expected_result_section = ResultSection( expected_result_section_title) expected_result_section.add_tag("dynamic.process.file_name", artefact["path"]) parent_result_section = ResultSection("blah") a = Artefact(name=artefact["name"], path=artefact["path"], description=artefact["description"], to_be_extracted=artefact["to_be_extracted"]) SandboxOntology._handle_artefact(a, parent_result_section) if len(parent_result_section.subsections) > 0: actual_result_section = parent_result_section.subsections[0] else: actual_result_section = None if expected_result_section is None and actual_result_section is None: assert True else: assert check_section_equality(actual_result_section, expected_result_section)
def test_set_heuristic_by_verdict(intezer_static_class_instance): from assemblyline_v4_service.common.result import ResultSection result_section = ResultSection("blah") intezer_static_class_instance._set_heuristic_by_verdict( result_section, None) assert result_section.heuristic is None intezer_static_class_instance._set_heuristic_by_verdict( result_section, "blah") assert result_section.heuristic is None intezer_static_class_instance._set_heuristic_by_verdict( result_section, "trusted") assert result_section.heuristic is None intezer_static_class_instance._set_heuristic_by_verdict( result_section, "malicious") assert result_section.heuristic.heur_id == 1 result_section = ResultSection("blah") intezer_static_class_instance._set_heuristic_by_verdict( result_section, "known_malicious") assert result_section.heuristic.heur_id == 1 result_section = ResultSection("blah") intezer_static_class_instance._set_heuristic_by_verdict( result_section, "suspicious") assert result_section.heuristic.heur_id == 2
def stack_result(section: List[bytes]) -> Optional[ResultSection]: """ Generates a ResultSection from floss stacked strings output section """ result = ResultSection('FLARE FLOSS Sacked Strings', body_format=BODY_FORMAT.MEMORY_DUMP, heuristic=Heuristic(3)) assert result.heuristic strings = section[1:] if not strings: return None groups = group_strings(s.decode() for s in strings) for group in groups: res = ResultSection( f"Group: '{min(group, key=len)}' Strings: {len(group)}", body='\n'.join(group), body_format=BODY_FORMAT.MEMORY_DUMP) for string in group: ioc_tag(string.encode(), res, just_network=len(group) > 1000) result.add_subsection(res) if any(res.tags for res in result.subsections): result.heuristic.add_signature_id('stacked_ioc') return result
def _validate_tag( result_section: ResultSection, tag: str, value: Any, safelist: Dict[str, Dict[str, List[str]]] = None ) -> bool: """ This method validates the value relative to the tag type before adding the value as a tag to the ResultSection. :param result_section: The ResultSection that the tag will be added to :param tag: The tag type that the value will be tagged under :param value: The item that will be tagged under the tag type :param safelist: The safelist containing matches and regexs. The product of a service using self.get_api_interface().get_safelist(). :return: Tag was successfully added """ if safelist is None: safelist = {} regex = _get_regex_for_tag(tag) if regex and not match(regex, value): return False if "ip" in tag and not is_valid_ip(value): return False if "domain" in tag: if not is_valid_domain(value): return False elif value in FALSE_POSITIVE_DOMAINS_FOUND_IN_PATHS: return False elif isinstance(value, str) and value.split(".")[-1] in COMMON_FILE_EXTENSIONS: return False if is_tag_safelisted(value, [tag], safelist): return False # if "uri" is in the tag, let's try to extract its domain/ip and tag it. if "uri_path" not in tag and "uri" in tag: # First try to get the domain valid_domain = False domain = search(DOMAIN_REGEX, value) if domain: domain = domain.group() valid_domain = _validate_tag(result_section, "network.dynamic.domain", domain, safelist) # Then try to get the IP valid_ip = False ip = search(IP_REGEX, value) if ip: ip = ip.group() valid_ip = _validate_tag(result_section, "network.dynamic.ip", ip, safelist) if value not in [domain, ip] and (valid_domain or valid_ip): result_section.add_tag(tag, safe_str(value)) else: return False else: result_section.add_tag(tag, safe_str(value)) return True
def execute(self, request): result = Result() url = request.task.metadata.get('submitted_url') api_key = request.get_param("api_key") public = request.get_param("public") u = UrlScan(apikey=api_key, url=url, public=public) u.submit() # We need to wait for the API to process our request response = self.wait_processing(u) # We get the response parts that we want and merge them all together report = { **response.json()["verdicts"]["overall"], **response.json()["lists"], **response.json()["page"] } # We convert the "certicates" section from a list of dictionnaries to a dictionnary of lists certificates = report.pop("certificates") certificates = { k: [dic[k] for dic in certificates] for k in certificates[0] } # We add the converted section to the report report = {**report, **certificates} # We create the KEY_VALUE section to add the report to the result page kv_section = ResultSection("Urlscan.io report", body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(report)) for domain in report["domains"]: kv_section.add_tag("network.static.domain", domain.strip()) result.add_section(kv_section) # We get the preview of the website screenshot = u.getScreenshot() with open(self.working_directory + "/preview.png", "wb") as ofile: ofile.write(screenshot) # Adding the preview on the result page url_section = ResultSection( 'Urlscan.io website screenshot', body_format=BODY_FORMAT.URL, body=json.dumps({ "name": "The preview is also available here !", "url": response.json()["task"]["screenshotURL"] })) result.add_section(url_section) request.add_extracted(self.working_directory + "/preview.png", "preview.png", "Here\'s the preview of the site") request.result = result
def parse_plist(self, pdict): """Attempts to extract and identify all known and unknown keys of a plist file. Args: pdict: Plist dictionary item. Returns: A list of known keys and a list of unknown keys. """ idenkey_sec = ResultSection("Identified Keys") unkkey_sec = ResultSection("Unidentified Keys:") # Sometimes plist is a list of dictionaries, or it is just a list. Will merge dict /convert to dict for now if isinstance(pdict, list): pdict = self.transform_dicts(pdict) for k, i in list(pdict.items()): # Prepare Keys k = str(safe_str(k)) k_noipad = k.replace("~ipad", "") # Prepare values if i is None: i = [""] elif not isinstance(i, list): i = [i] # Many plist files are duplicates of info.plist, do not report on keys already identified if k_noipad in self.reported_keys: if i in self.reported_keys[k_noipad]: continue self.reported_keys[k_noipad].append(i) else: self.reported_keys[k_noipad] = [i] # Process known keys if k_noipad in self.known_keys: desc, create_tag = self.known_keys[k_noipad] idenkey_sec.add_line(f"{k} ({desc}): {', '.join([safe_str(x, force_str=True) for x in i])}") if create_tag: for val in i: idenkey_sec.add_tag(TAG_MAP[k_noipad.upper()], safe_str(val, force_str=True)) else: unkkey_sec.add_line(f"{k}: {', '.join([safe_str(x, force_str=True) for x in i])}") if idenkey_sec.body is None: idenkey_sec = None if unkkey_sec.body is None: unkkey_sec = None return idenkey_sec, unkkey_sec
def get_result_subsection(result, title, heuristic): result_subsection = None # Set appropriate result subsection if it already exists for subsection in result.subsections: if subsection.title_text == title: result_subsection = subsection # Create appropriate result subsection if it doesn't already exist if not result_subsection: result_subsection = ResultSection(title) result.add_subsection(result_subsection) result_subsection.set_heuristic(heuristic) return result_subsection
def resubmit_dex2jar_output(self, apk_file: str, target: str, result: Result, request): dex = os.path.join(self.working_directory, "classes.dex") self.get_dex(apk_file, dex) if os.path.exists(dex): d2j = Popen([self.dex2jar, "--output", target, dex], stdout=PIPE, stderr=PIPE) d2j.communicate() if os.path.exists(target): res_sec = ResultSection("Classes.dex file was recompiled as a JAR and re-submitted for analysis") res_sec.add_line(f"JAR file resubmitted as: {os.path.basename(target)}") request.add_extracted(target, os.path.basename(target), "Dex2Jar output JAR file") result.add_section(res_sec)
def _section_traverser(section: ResultSection = None) -> ResultSection: """ This function goes through each section and sends the tags to a function that will reduce specific tags :param section: An Assemblyline result section :return: Reduced Assemblyline result section """ for subsection in section.subsections: _section_traverser(subsection) if section.tags: section.set_tags(_reduce_specific_tags(section.tags)) return section
def add_results(result, data, data_deobfuscated): result_ioc = ResultSection('Found the following IoCs') result_formula = ResultSection('Suspicious formulas found in document') # Tag IoCs/formulas and generate result subsections tag_data(data, data_deobfuscated, result_ioc, result_formula) # Add 'IoCs' result section to results if IoCs were found if result_ioc.subsections: result.add_section(result_ioc) # Add 'Suspicious Formulas' result section to results if suspicious formulas were found if result_formula.subsections: result.add_section(result_formula)
def manage_threat_level(self, data, result): if data['threat_level'] == 'Low Risk': threat_section = ResultSection("threat level : {0}".format( data['threat_level']), heuristic=Heuristic(1)) if data['threat_level'] == 'Moderate Risk': threat_section = ResultSection("threat level : {0}".format( data['threat_level']), heuristic=Heuristic(2)) if data['threat_level'] == 'High Risk': threat_section = ResultSection("threat level : {0}".format( data['threat_level']), heuristic=Heuristic(3)) result.add_section(threat_section)
def recurse_add_res(self, file_res, res_list, new_files, parent=None): for res_dic in res_list: # Check if condition is OK if self.pass_condition(res_dic.get("condition", None)): res = ResultSection(res_dic['title_text'], classification=res_dic.get('classification', Classification.UNRESTRICTED), parent=parent, body_format=res_dic.get('body_format', BODY_FORMAT.TEXT)) heur_id = self.heuristic_alteration(res_dic.get('score_condition', None), res_dic['heur_id']) res.set_heuristic(heur_id) # Add Tags tags = res_dic.get('tags', []) for res_tag in tags: res.add_tag(res_tag[0], res_tag[1]) # Add body body = res_dic.get('body', None) if body: res.set_body(body) # File for resubmit files = res_dic.get('files', []) for res_file in files: if isinstance(res_file, tuple): res_file = res_file[1] new_files.append(res_file) # Add to file res if root result if parent is None: file_res.add_section(res)
def static_result(section: List[bytes], max_length: int, st_max_size: int) -> Optional[ResultSection]: """ Generates a ResultSection from floss static strings output section """ header = section[0] lines = section[1:] result = ResultSection(header.decode(errors='ignore'), body_format=BODY_FORMAT.MEMORY_DUMP) for line in lines: if len(line) > max_length: continue if ioc_tag(line, result, just_network=len(lines) > st_max_size): result.add_line(line.decode(errors='ignore')) return result if result.body else None
def check_for_b64(self, data, section): """Search and decode base64 strings in sample data. Args: data: Data to be parsed section: Sub-section to be modified if base64 found Returns: decoded: Boolean which is true if base64 found """ b64_matches = [] # b64_matches_raw will be used for replacing in case b64_matches are modified b64_matches_raw = [] decoded_param = data decoded = False for b64_match in re.findall( '([\x20]{0,2}(?:[A-Za-z0-9+/]{10,}={0,2}[\r]?[\n]?){2,})', re.sub('\x3C\x00\x20{2}\x00', '', data)): b64 = b64_match.replace('\n', '').replace('\r', '').replace( ' ', '').replace('<', '') uniq_char = ''.join(set(b64)) if len(uniq_char) > 6: if len(b64) >= 16 and len(b64) % 4 == 0: b64_matches.append(b64) b64_matches_raw.append(b64_match) for b64_string, b64_string_raw in zip(b64_matches, b64_matches_raw): try: base64data = binascii.a2b_base64(b64_string) # Decode base64 bytes, add a space to beginning as it may be stripped off while using regex base64data_decoded = ' ' + base64data.decode('utf-16').encode( 'ascii', 'ignore') # Replace base64 from param with decoded string decoded_param = re.sub(b64_string_raw, base64data_decoded, decoded_param) decoded = True except Exception: pass if decoded: decoded_section = ResultSection('Possible Base64 found', parent=section, heuristic=Heuristic(5)) decoded_section.add_line( f'Possible Base64 Decoded Parameters: {decoded_param}') self.find_ip(decoded_param) return decoded
def extract_powershell(self, parameter, section): """Searches parameter for PowerShell, adds as extracted if found Args: parameter: String to be searched section: Section to be modified if PowerShell found """ if re.findall(r'(?:powershell)|(?:pwsh)', parameter, re.IGNORECASE): self.found_powershell = True if type(parameter) == str: # Unicode-objects must be encoded before hashing sha256hash = hashlib.sha256(parameter.encode()).hexdigest() else: sha256hash = hashlib.sha256(parameter).hexdigest() ResultSection('Discovered PowerShell code in parameter.', parent=section) # Add PowerShell code as extracted, account for duplicates if sha256hash not in self.file_hashes: self.file_hashes.append(sha256hash) powershell_filename = f'{sha256hash[0:25]}_extracted_powershell' powershell_file_path = os.path.join(self.working_directory, powershell_filename) with open(powershell_file_path, 'w') as f: f.write(parameter) self.request.add_extracted( powershell_file_path, powershell_filename, 'Discovered PowerShell code in parameter')
def handle_artefacts(artefact_list: list, request: ServiceRequest) -> ResultSection: """ Goes through each artefact in artefact_list, uploading them and adding result sections accordingly Positional arguments: artefact_list -- list of dictionaries that each represent an artefact """ validated_artefacts = SandboxOntology._validate_artefacts( artefact_list) artefacts_result_section = ResultSection("Sandbox Artefacts") for artefact in validated_artefacts: SandboxOntology._handle_artefact(artefact, artefacts_result_section) if artefact.to_be_extracted: try: request.add_extracted(artefact.path, artefact.name, artefact.description) except MaxExtractedExceeded: # To avoid errors from being raised when too many files have been extracted pass else: request.add_supplementary(artefact.path, artefact.name, artefact.description) return artefacts_result_section if artefacts_result_section.subsections else None
def execute(self, request): temp_filename = request.file_path # Filter out large documents if os.path.getsize(temp_filename) > self.max_pdf_size: file_res = Result() res = (ResultSection( f"PDF Analysis of the file was skipped because the " f"file is too big (limit is {(self.max_pdf_size / 1000 / 1000)} MB)." )) file_res.add_section(res) request.result = file_res return filename = os.path.basename(temp_filename) # noinspection PyUnusedLocal file_content = '' with open(temp_filename, 'rb') as f: file_content = f.read() if '<xdp:xdp'.encode(encoding='UTF-8') in file_content: self.find_xdp_embedded(filename, file_content, request) self.peepdf_analysis(temp_filename, file_content, request)
def execute(self, request): result = Result() file_path = request.file_path file_type = request.file_type shutil.copyfile(file_path, self.working_directory + "/analyzed") p1 = subprocess.Popen( "java -jar /var/lib/assemblyline/StegExpose/StegExpose.jar " + self.working_directory + " standard default " + self.working_directory + "/report.csv", shell=True) p1.wait() lsb_steg_results = self.read_csv(self.working_directory + "/report.csv") lsb_steg_results = self.beautify_dict(lsb_steg_results) kv_section = ResultSection("Result of the LSB steganalysis", body_format=BODY_FORMAT.KEY_VALUE, body=json.dumps(lsb_steg_results)) result.add_section(kv_section) request.result = result
def test_validate_tag(tag, value, expected_tags, added_tag): from assemblyline_v4_service.common.result import ResultSection from assemblyline_v4_service.common.tag_helper import add_tag res_sec = ResultSection("blah") safelist = {"match": {"domain": ["blah.ca"]}} assert add_tag(res_sec, tag, value, safelist) == added_tag assert res_sec.tags == expected_tags
def dump_dir(self, dir_index, path, parent_res, is_orphan): # 1. make sure the directory wasn't dumped already if dir_index in self.property_dict and self.property_dict[dir_index][ 1] is False: self.property_dict[dir_index][1] = True field = self.property_dict[dir_index][0] field_name = field['name'].display[1:-1] field_full_name = path + field_name # 2. create a res with it's name res = ResultSection(f"OLE2 STORAGE: {field_full_name}") # 3. Dump the dir property self.dump_property(self.property_dict[dir_index][0], path, dir_index, res, parent_res, is_orphan) # 3. navigate the red-black tree self.dump_siblings(field['child'].display, field_full_name, res, parent_res, is_orphan) if len(res.subsections) > 0: parent_res.add_subsection(res) # call recursively our children when there is a children if dir_index in self.children: for sub_dir in self.children[dir_index][1]: self.dump_dir(sub_dir, field_full_name + '\\', parent_res, is_orphan)
def run(self): hachoir_config.quiet = True self.additional_parsing_fields = {} self.ole2parser = None self.office_root_entry_parser = None self.children = {} self.parent = {} self.property_dict = {} self.invalid_streams = [] self.invalid_properties_count = 0 parser = createParser(self.file_path) if parser is not None: with parser: tags = parser.getParserTags() parser_id = tags.get('id', 'unknown') # Do OLE2 deep analysis if requested if parser_id == 'ole2': ole2_res = ResultSection(f"Hachoir OLE2 Deep Analysis", parent=self.parent_res) # this is just so that we don't bail on the NULL property type and we keep on going. for (key, value) in PropertyContent.TYPE_INFO.items(): if value[1] is None: PropertyContent.TYPE_INFO[key] = (value[0], DummyObject) self.parse_ole2(parser, ole2_res)
def test_add_tag(value, expected_tags, tags_were_added): from assemblyline_v4_service.common.result import ResultSection from assemblyline_v4_service.common.tag_helper import add_tag res_sec = ResultSection("blah") tag = "blah" safelist = {"match": {"domain": ["blah.ca"]}} assert add_tag(res_sec, tag, value, safelist) == tags_were_added assert res_sec.tags == expected_tags
def test_section_traverser(tags, correct_tags): from assemblyline_v4_service.common.section_reducer import _section_traverser from assemblyline_v4_service.common.result import ResultSection section = ResultSection("blah") subsection = ResultSection("subblah") for t_type, t_values in tags.items(): for t_value in t_values: subsection.add_tag(t_type, t_value) section.add_subsection(subsection) assert _section_traverser(section).subsections[0].tags == correct_tags