def main(): try: ole = olefile.OleFileIO(sys.argv[1]) except IndexError: sys.exit(__doc__) # parse and display metadata: meta = ole.get_metadata() # console output with UTF8 encoding: # It looks like we do not need the UTF8 codec anymore, both for Python 2 and 3 console_utf8 = sys.stdout #codecs.getwriter('utf8')(sys.stdout) # TODO: move similar code to a function print('Properties from the SummaryInformation stream:') t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'], outfile=console_utf8) for prop in meta.SUMMARY_ATTRIBS: value = getattr(meta, prop) if value is not None: # TODO: pretty printing for strings, dates, numbers # TODO: better unicode handling # print('- %s: %s' % (prop, value)) # if isinstance(value, unicode): # # encode to UTF8, avoiding errors # value = value.encode('utf-8', errors='replace') # else: # value = str(value) t.write_row([prop, value], colors=[None, 'yellow']) t.close() print('') print('Properties from the DocumentSummaryInformation stream:') t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'], outfile=console_utf8) for prop in meta.DOCSUM_ATTRIBS: value = getattr(meta, prop) if value is not None: # TODO: pretty printing for strings, dates, numbers # TODO: better unicode handling # print('- %s: %s' % (prop, value)) # if isinstance(value, unicode): # # encode to UTF8, avoiding errors # value = value.encode('utf-8', errors='replace') # else: # value = str(value) t.write_row([prop, value], colors=[None, 'yellow']) t.close() ole.close()
def main(): # print banner with version print('olemap %s - http://decalage.info/python/oletools' % __version__) fname = sys.argv[1] ole = olefile.OleFileIO(fname) print('FAT:') t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #']) for i in range(ole.nb_sect): fat_value = ole.fat[i] fat_type = FAT_TYPES.get(fat_value, '<Data>') color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default']) # compute offset based on sector size: offset = ole.sectorsize * (i+1) # print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value) t.write_row(['%8X' % i, fat_type, '%08X' % offset, '%8X' % fat_value], colors=[None, color_type, None, None]) print('') print('MiniFAT:') # load MiniFAT if it wasn't already done: ole.loadminifat() for i in range(len(ole.minifat)): fat_value = ole.minifat[i] fat_type = FAT_TYPES.get(fat_value, 'Data') print('%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value)) ole.close()
def main(): """ Main function, called when olevba is run from the command line """ global log DEFAULT_LOG_LEVEL = "warning" # Default log level LOG_LEVELS = { 'debug': logging.DEBUG, 'info': logging.INFO, 'warning': logging.WARNING, 'error': logging.ERROR, 'critical': logging.CRITICAL } usage = 'usage: %prog [options] <filename> [filename2 ...]' parser = optparse.OptionParser(usage=usage) parser.add_option("-r", action="store_true", dest="recursive", help='find files recursively in subdirectories.') parser.add_option( "-z", "--zip", dest='zip_password', type='str', default=None, help= 'if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)' ) parser.add_option( "-f", "--zipfname", dest='zip_fname', type='str', default='*', help= 'if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)' ) parser.add_option( '-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, help= "logging level debug/info/warning/error/critical (default=%default)") parser.add_option("-m", '--matches', action="store_true", dest="show_matches", help='Show matched strings.') # TODO: add logfile option (options, args) = parser.parse_args() # Print help if no arguments are passed if len(args) == 0: print __doc__ parser.print_help() print '\nAn exit code is returned based on the analysis result:' for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK, Result_Error, Result_Suspicious): print ' - %d: %s' % (result.exit_code, result.name) sys.exit() # print banner with version print 'MacroRaptor %s - http://decalage.info/python/oletools' % __version__ print 'This is work in progress, please report issues at %s' % URL_ISSUES logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') # enable logging in the modules: log.setLevel(logging.NOTSET) t = tablestream.TableStream(style=tablestream.TableStyleSlim, header_row=['Result', 'Flags', 'Type', 'File'], column_width=[10, 5, 4, 56]) exitcode = -1 global_result = None # TODO: handle errors in xglob, to continue processing the next files for container, filename, data in xglob.iter_files( args, recursive=options.recursive, zip_password=options.zip_password, zip_fname=options.zip_fname): # ignore directory names stored in zip files: if container and filename.endswith('/'): continue full_name = '%s in %s' % (filename, container) if container else filename # try: # # Open the file # if data is None: # data = open(filename, 'rb').read() # except: # log.exception('Error when opening file %r' % full_name) # continue if isinstance(data, Exception): result = Result_Error t.write_row([result.name, '', '', full_name], colors=[result.color, None, None, None]) t.write_row(['', '', '', str(data)], colors=[None, None, None, result.color]) else: filetype = '???' try: vba_parser = olevba.VBA_Parser(filename=filename, data=data, container=container) filetype = TYPE2TAG[vba_parser.type] except Exception as e: # log.error('Error when parsing VBA macros from file %r' % full_name) # TODO: distinguish actual errors from non-MSOffice files result = Result_Error t.write_row([result.name, '', filetype, full_name], colors=[result.color, None, None, None]) t.write_row(['', '', '', str(e)], colors=[None, None, None, result.color]) continue if vba_parser.detect_vba_macros(): vba_code_all_modules = '' try: for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros(): vba_code_all_modules += vba_code + '\n' except Exception as e: # log.error('Error when parsing VBA macros from file %r' % full_name) result = Result_Error t.write_row([ result.name, '', TYPE2TAG[vba_parser.type], full_name ], colors=[result.color, None, None, None]) t.write_row(['', '', '', str(e)], colors=[None, None, None, result.color]) continue mraptor = MacroRaptor(vba_code_all_modules) mraptor.scan() if mraptor.suspicious: result = Result_Suspicious else: result = Result_MacroOK t.write_row( [result.name, mraptor.get_flags(), filetype, full_name], colors=[result.color, None, None, None]) if mraptor.matches and options.show_matches: t.write_row(['', '', '', 'Matches: %r' % mraptor.matches]) else: result = Result_NoMacro t.write_row([result.name, '', filetype, full_name], colors=[result.color, None, None, None]) if result.exit_code > exitcode: global_result = result exitcode = result.exit_code print '' print 'Flags: A=AutoExec, W=Write, X=Execute' print 'Exit code: %d - %s' % (exitcode, global_result.name) sys.exit(exitcode)
def process_file(container, filename, data, output_dir=None, save_object=False): if output_dir: if not os.path.isdir(output_dir): log.info('creating output directory %s' % output_dir) os.mkdir(output_dir) fname_prefix = os.path.join(output_dir, sanitize_filename(filename)) else: base_dir = os.path.dirname(filename) sane_fname = sanitize_filename(filename) fname_prefix = os.path.join(base_dir, sane_fname) # TODO: option to extract objects to files (false by default) if data is None: data = open(filename, 'rb').read() print('=' * 79) print('File: %r - size: %d bytes' % (filename, len(data))) tstream = tablestream.TableStream(column_width=(3, 10, 31, 31), header_row=('id', 'index', 'OLE Object', 'OLE Package'), style=tablestream.TableStyleSlim) rtfp = RtfObjParser(data) rtfp.parse() for rtfobj in rtfp.objects: pkg_color = None if rtfobj.is_ole: ole_column = 'format_id: %d\n' % rtfobj.format_id ole_column += 'class name: %r\n' % rtfobj.class_name ole_column += 'data size: %d' % rtfobj.oledata_size if rtfobj.is_package: pkg_column = 'Filename: %r\n' % rtfobj.filename pkg_column += 'Source path: %r\n' % rtfobj.src_path pkg_column += 'Temp path = %r' % rtfobj.temp_path pkg_color = 'yellow' # check if the file extension is executable: _, ext = os.path.splitext(rtfobj.filename) log.debug('File extension: %r' % ext) if re_executable_extensions.match(ext): pkg_color = 'red' pkg_column += '\nEXECUTABLE FILE' else: pkg_column = 'Not an OLE Package' else: pkg_column = '' ole_column = 'Not a well-formed OLE object' tstream.write_row( ( rtfp.objects.index(rtfobj), # filename, '%08Xh' % rtfobj.start, ole_column, pkg_column), colors=(None, None, None, pkg_color)) tstream.write_sep() if save_object: if save_object == 'all': objects = rtfp.objects else: try: i = int(save_object) objects = [rtfp.objects[i]] except: log.error( 'The -s option must be followed by an object index or all, such as "-s 2" or "-s all"' ) return for rtfobj in objects: i = objects.index(rtfobj) if rtfobj.is_package: print('Saving file from OLE Package in object #%d:' % i) print(' Filename = %r' % rtfobj.filename) print(' Source path = %r' % rtfobj.src_path) print(' Temp path = %r' % rtfobj.temp_path) if rtfobj.filename: fname = '%s_%s' % (fname_prefix, sanitize_filename(rtfobj.filename)) else: fname = '%s_object_%08X.noname' % (fname_prefix, rtfobj.start) print(' saving to file %s' % fname) open(fname, 'wb').write(rtfobj.olepkgdata) elif rtfobj.is_ole: print('Saving file embedded in OLE object #%d:' % i) print(' format_id = %d' % rtfobj.format_id) print(' class name = %r' % rtfobj.class_name) print(' data size = %d' % rtfobj.oledata_size) # set a file extension according to the class name: class_name = rtfobj.class_name.lower() if class_name.startswith(b'word'): ext = 'doc' elif class_name.startswith(b'package'): ext = 'package' else: ext = 'bin' fname = '%s_object_%08X.%s' % (fname_prefix, rtfobj.start, ext) print(' saving to file %s' % fname) open(fname, 'wb').write(rtfobj.oledata) else: print('Saving raw data in object #%d:' % i) fname = '%s_object_%08X.raw' % (fname_prefix, rtfobj.start) print(' saving object to file %s' % fname) open(fname, 'wb').write(rtfobj.rawdata)
olefile.FATSECT: "cyan", olefile.DIFSECT: "blue", 'default': None, } # === MAIN =================================================================== if __name__ == '__main__': # print banner with version print 'olemap %s - http://decalage.info/python/oletools' % __version__ fname = sys.argv[1] ole = olefile.OleFileIO(fname) print 'FAT:' t = tablestream.TableStream( [8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #']) for i in xrange(ole.nb_sect): fat_value = ole.fat[i] fat_type = FAT_TYPES.get(fat_value, '<Data>') color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default']) # compute offset based on sector size: offset = ole.sectorsize * (i + 1) # print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value) t.write_row(['%8X' % i, fat_type, '%08X' % offset, '%8X' % fat_value], colors=[None, color_type, None, None]) print '' print 'MiniFAT:' # load MiniFAT if it wasn't already done:
try: ole = olefile.OleFileIO(sys.argv[1]) except IndexError: sys.exit(__doc__) # parse and display metadata: meta = ole.get_metadata() # console output with UTF8 encoding: console_utf8 = codecs.getwriter('utf8')(sys.stdout) # TODO: move similar code to a function print('Properties from the SummaryInformation stream:') t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'], outfile=console_utf8) for prop in meta.SUMMARY_ATTRIBS: value = getattr(meta, prop) if value is not None: # TODO: pretty printing for strings, dates, numbers # TODO: better unicode handling # print('- %s: %s' % (prop, value)) if isinstance(value, unicode): # encode to UTF8, avoiding errors value = value.encode('utf-8', errors='replace') else: value = str(value) t.write_row([prop, value], colors=[None, 'yellow']) t.close() print ''
def extract_hunt_info(filter_str, limit, print_only=False, date_from=None): logging.debug("Filter String: {} - Limit: {}".format(filter_str, limit)) hunts = vt3.get_hunting_notification_files(filter=filter_str, limit=limit) hunt_infos = {"count": len(hunts), "info": []} color_scheme = None if print_only: tstream = tablestream.TableStream(column_width=(33, 4, 20, 100, 10), header_row=('md5', 'hits', 'rule', 'Details', 'first_seen'), style=tablestream.TableStyleSlim) for hunt in hunts: hunt_attributes = hunt.get('attributes', {}) sha256 = hunt.get('attributes', {}).get('sha256') md5 = hunt.get('attributes', {}).get('md5') first_seen_ts = hunt.get('attributes', {}).get('first_submission_date') first_seen = datetime.datetime.fromtimestamp(first_seen_ts) if date_from: fs_date_from = datetime.datetime.strptime(date_from, '%Y-%m-%d') if first_seen < fs_date_from: logging.debug("Skipping this sha256: {} first_seen: {}".format( sha256, first_seen)) continue rule_name = hunt.get('context_attributes', {}).get('rule_name') ruleset_name = hunt.get('context_attributes', {}).get('ruleset_name') positives = hunt.get('attributes', {}).get('last_analysis_stats', {}).get('malicious') meaningful_name = hunt.get('attributes', {}).get('meaningful_name') names = hunt.get('attributes', {}).get('names') tags = hunt.get('attributes', {}).get('tags') file_type = hunt.get('attributes', {}).get('type_description') match_in_subfile = hunt.get('context_attributes', {}).get('match_in_subfile') times_submitted = hunt.get('attributes', {}).get('times_submitted') unique_sources = hunt.get('attributes', {}).get('unique_sources') malware_name_info = vt3.getClassification( {"data": { "attributes": hunt_attributes }}) malware_name = "{}.{}".format(malware_name_info.get('category'), malware_name_info.get('family')) info = { "sha256": sha256, "md5": md5, "rule_name": rule_name, "ruleset_name": ruleset_name, "first_seen": str(first_seen), "positives": positives, "file_type": file_type, "meaningful_name": meaningful_name, "names": names, "tags": tags, "times_submitted": times_submitted, "unique_sources": unique_sources, "match_in_subfile": match_in_subfile, "malware_name": malware_name } if print_only: color_scheme = None if positives < 10: color_scheme = 'yellow' if positives < 5: color_scheme = 'red' details = "MALWARE_NAME: {}\nNAME: {}\nFILE_TYPE: {}\nTAGS: {}\nNAMES: {}".format( malware_name, meaningful_name, file_type, ','.join(tags), ','.join(names)) tstream.write_row( (md5, positives, rule_name, details, first_seen), colors=(color_scheme, color_scheme, None, color_scheme, None)) tstream.write_sep() else: hunt_infos["info"].append(info) if print_only: print("Count of files: {}".format(len(hunts))) return else: return hunt_infos
# t = prettytable.PrettyTable(('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size')) # t.align = 'l' # t.max_width['id'] = 4 # t.max_width['Status'] = 6 # t.max_width['Type'] = 10 # t.max_width['Name'] = 10 # t.max_width['Left'] = 5 # t.max_width['Right'] = 5 # t.max_width['Child'] = 5 # t.max_width['1st Sect'] = 8 # t.max_width['Size'] = 6 table = tablestream.TableStream(column_width=[4, 6, 7, 22, 5, 5, 5, 8, 6], header_row=('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'), style=tablestream.TableStyleSlim) # TODO: read ALL the actual directory entries from the directory stream, because olefile does not! # TODO: OR fix olefile! # TODO: olefile should store or give access to the raw direntry data on demand # TODO: oledir option to hexdump the raw direntries # TODO: olefile should be less picky about incorrect directory structures for id in xrange(len(ole.direntries)): d = ole.direntries[id] if d is None: # this direntry is not part of the tree: either unused or an orphan d = ole._load_direntry(id) #ole.direntries[id] # print('%03d: %s *** ORPHAN ***' % (id, d.name))