def _scan(self, context): oparser = OfficeParser(context.data) oparser.parse_office_doc() if not oparser.office_header.get('maj_ver'): self._error("Could not parse file as an office document") return self._add_result('office_header', '%d.%d' % (oparser.office_header.get('maj_ver'), oparser.office_header.get('min_ver'))) for curr_dir in oparser.directory: result = { 'md5': curr_dir.get('md5', ''), 'size': curr_dir.get('stream_size', 0), 'mod_time': oparser.timestamp_string(curr_dir['modify_time'])[1], 'create_time': oparser.timestamp_string(curr_dir['create_time'])[1], } self._add_result('directory', curr_dir['norm_name'], result) if self.config.get('save_streams', 0) == 1 and 'data' in curr_dir: self._add_file(curr_dir['data'], curr_dir['norm_name'], relationship="Extracted_From") for prop_list in oparser.properties: for prop in prop_list['property_list']: prop_summary = oparser.summary_mapping.get(binascii.unhexlify(prop['clsid']), None) prop_name = prop_summary.get('name', 'Unknown') for item in prop['properties']['properties']: result = { 'name': item.get('name', 'Unknown'), 'value': item.get('date', item['value']), 'result': item.get('result', ''), } self._add_result('doc_meta', prop_name, result)
def run(self, obj, config): oparser = OfficeParser(obj.filedata.read()) oparser.parse_office_doc() added_files = [] user = self.current_task.user if not oparser.office_header.get('maj_ver'): self._error("Could not parse file as an office document") return self._add_result('office_header', '%d.%d' % (oparser.office_header.get('maj_ver'), oparser.office_header.get('min_ver'))) for curr_dir in oparser.directory: result = { 'md5': curr_dir.get('md5', ''), 'size': curr_dir.get('stream_size', 0), 'mod_time': oparser.timestamp_string(curr_dir['modify_time'])[1], 'create_time': oparser.timestamp_string(curr_dir['create_time'])[1], } name = curr_dir['norm_name'].decode('ascii', errors='ignore') self._add_result('directory', name, result) if user.has_access_to(SampleACL.WRITE) and config.get('save_streams', 0) == 1 and 'data' in curr_dir: handle_file(name, curr_dir['data'], obj.source, related_id=str(obj.id), related_type=str(obj._meta['crits_type']), campaign=obj.campaign, source_method=self.name, relationship=RelationshipTypes.CONTAINED_WITHIN, user=self.current_task.user) stream_md5 = hashlib.md5(curr_dir['data']).hexdigest() added_files.append((name, stream_md5)) for prop_list in oparser.properties: for prop in prop_list['property_list']: prop_summary = oparser.summary_mapping.get(binascii.unhexlify(prop['clsid']), None) prop_name = prop_summary.get('name', 'Unknown') for item in prop['properties']['properties']: result = { 'name': item.get('name', 'Unknown'), 'value': item.get('date', item['value']), 'result': item.get('result', ''), } self._add_result('doc_meta', prop_name, result) for f in added_files: self._add_result("file_added", f[0], {'md5': f[1]})
def run(data): ret = {} ret['directory'] = {} # ret['doc_meta'] = [] ret['doc_meta'] = {} oparser = OfficeParser(data) oparser.parse_office_doc() if not oparser.office_header.get('maj_ver'): print('officemeta', 'Could not parse file as an office document') return ret['office_header'] = '%d.%d' % (oparser.office_header.get('maj_ver'), oparser.office_header.get('min_ver')) for curr_dir in oparser.directory: result = { 'md5': curr_dir.get('md5', ''), 'size': curr_dir.get('stream_size', 0), 'mod_time': oparser.timestamp_string(curr_dir['modify_time'])[1], 'create_time': oparser.timestamp_string(curr_dir['create_time'])[1], } name = curr_dir['norm_name'].decode('ascii', errors='ignore') # TODO: why is this '' sometimes? if name: ret['directory'][name] = result # stream_md5 = hashlib.md5(curr_dir.get('data', b'')).hexdigest() # ret['added_files'].append((name, stream_md5)) for prop_list in oparser.properties: for prop in prop_list['property_list']: prop_summary = oparser.summary_mapping.get(binascii.unhexlify(prop['clsid']), None) prop_name = prop_summary.get('name', 'Unknown') if not prop_name in ret['doc_meta']: ret['doc_meta'][prop_name] = {} for item in prop['properties']['properties']: result = { 'name': item.get('name', 'Unknown'), 'value': item.get('date', item['value']), } ret['doc_meta'][prop_name][result.get('name')] = result.get('value') return ret
def run(self, obj, config): oparser = OfficeParser(obj.filedata.read()) oparser.parse_office_doc() added_files = [] user = self.current_task.user if not oparser.office_header.get('maj_ver'): self._error("Could not parse file as an office document") return self._add_result( 'office_header', '%d.%d' % (oparser.office_header.get('maj_ver'), oparser.office_header.get('min_ver'))) for curr_dir in oparser.directory: result = { 'md5': curr_dir.get('md5', ''), 'size': curr_dir.get('stream_size', 0), 'mod_time': oparser.timestamp_string(curr_dir['modify_time'])[1], 'create_time': oparser.timestamp_string(curr_dir['create_time'])[1], } name = curr_dir['norm_name'].decode('ascii', errors='ignore') self._add_result('directory', name, result) if user.has_access_to(SampleACL.WRITE) and config.get( 'save_streams', 0) == 1 and 'data' in curr_dir: handle_file(name, curr_dir['data'], obj.source, related_id=str(obj.id), related_type=str(obj._meta['crits_type']), campaign=obj.campaign, source_method=self.name, relationship=RelationshipTypes.CONTAINED_WITHIN, user=self.current_task.user) stream_md5 = hashlib.md5(curr_dir['data']).hexdigest() added_files.append((name, stream_md5)) for prop_list in oparser.properties: for prop in prop_list['property_list']: prop_summary = oparser.summary_mapping.get( binascii.unhexlify(prop['clsid']), None) prop_name = prop_summary.get('name', 'Unknown') for item in prop['properties']['properties']: result = { 'name': item.get('name', 'Unknown'), 'value': item.get('date', item['value']), 'result': item.get('result', ''), } self._add_result('doc_meta', prop_name, result) for f in added_files: self._add_result("file_added", f[0], {'md5': f[1]})
def _scan(self, context): oparser = OfficeParser(context.data) oparser.parse_office_doc() if not oparser.office_header.get('maj_ver'): self._error("Could not parse file as an office document") return self._add_result( 'office_header', '%d.%d' % (oparser.office_header.get('maj_ver'), oparser.office_header.get('min_ver'))) for curr_dir in oparser.directory: result = { 'md5': curr_dir.get('md5', ''), 'size': curr_dir.get('stream_size', 0), 'mod_time': oparser.timestamp_string(curr_dir['modify_time'])[1], 'create_time': oparser.timestamp_string(curr_dir['create_time'])[1], } self._add_result('directory', curr_dir['norm_name'], result) if self.config.get('save_streams', 0) == 1 and 'data' in curr_dir: self._add_file(curr_dir['data'], curr_dir['norm_name'], relationship="Extracted_From") for prop_list in oparser.properties: for prop in prop_list['property_list']: prop_summary = oparser.summary_mapping.get( binascii.unhexlify(prop['clsid']), None) prop_name = prop_summary.get('name', 'Unknown') for item in prop['properties']['properties']: result = { 'name': item.get('name', 'Unknown'), 'value': item.get('date', item['value']), 'result': item.get('result', ''), } self._add_result('doc_meta', prop_name, result)