def main(): try: ole = olefile.OleFileIO(sys.argv[1]) except IndexError: sys.exit(__doc__) # parse and display metadata: meta = ole.get_metadata() # console output with UTF8 encoding: # It looks like we do not need the UTF8 codec anymore, both for Python 2 and 3 console_utf8 = sys.stdout #codecs.getwriter('utf8')(sys.stdout) # TODO: move similar code to a function print('Properties from the SummaryInformation stream:') t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'], outfile=console_utf8) for prop in meta.SUMMARY_ATTRIBS: value = getattr(meta, prop) if value is not None: # TODO: pretty printing for strings, dates, numbers # TODO: better unicode handling # print('- %s: %s' % (prop, value)) # if isinstance(value, unicode): # # encode to UTF8, avoiding errors # value = value.encode('utf-8', errors='replace') # else: # value = str(value) t.write_row([prop, value], colors=[None, 'yellow']) t.close() print('') print('Properties from the DocumentSummaryInformation stream:') t = tablestream.TableStream([21, 30], header_row=['Property', 'Value'], outfile=console_utf8) for prop in meta.DOCSUM_ATTRIBS: value = getattr(meta, prop) if value is not None: # TODO: pretty printing for strings, dates, numbers # TODO: better unicode handling # print('- %s: %s' % (prop, value)) # if isinstance(value, unicode): # # encode to UTF8, avoiding errors # value = value.encode('utf-8', errors='replace') # else: # value = str(value) t.write_row([prop, value], colors=[None, 'yellow']) t.close() ole.close()
def process_ole(ole): # parse and display metadata: meta = ole.get_metadata() # console output with UTF8 encoding: ensure_stdout_handles_unicode() # TODO: move similar code to a function print('Properties from the SummaryInformation stream:') t = tablestream.TableStream([21, 30], header_row=['Property', 'Value']) for prop in meta.SUMMARY_ATTRIBS: value = getattr(meta, prop) if value is not None: # TODO: pretty printing for strings, dates, numbers # TODO: better unicode handling # print('- %s: %s' % (prop, value)) # if isinstance(value, unicode): # # encode to UTF8, avoiding errors # value = value.encode('utf-8', errors='replace') # else: # value = str(value) t.write_row([prop, value], colors=[None, 'yellow']) t.close() print('') print('Properties from the DocumentSummaryInformation stream:') t = tablestream.TableStream([21, 30], header_row=['Property', 'Value']) for prop in meta.DOCSUM_ATTRIBS: value = getattr(meta, prop) if value is not None: # TODO: pretty printing for strings, dates, numbers # TODO: better unicode handling # print('- %s: %s' % (prop, value)) # if isinstance(value, unicode): # # encode to UTF8, avoiding errors # value = value.encode('utf-8', errors='replace') # else: # value = str(value) t.write_row([prop, value], colors=[None, 'yellow']) t.close()
def show_fat(ole): print('FAT:') t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #']) for i in range(len(ole.fat)): fat_value = ole.fat[i] fat_type = FAT_TYPES.get(fat_value, '<Data>') color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default']) # compute offset based on sector size: offset = ole.sectorsize * (i + 1) # print '%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value) t.write_row(['%8X' % i, fat_type, '%08X' % offset, '%8X' % fat_value], colors=[None, color_type, None, None]) t.close() print('')
def show_minifat(ole): print('MiniFAT:') # load MiniFAT if it wasn't already done: ole.loadminifat() t = tablestream.TableStream([8, 12, 8, 8], header_row=['Sector #', 'Type', 'Offset', 'Next #']) for i in range(len(ole.minifat)): fat_value = ole.minifat[i] fat_type = FAT_TYPES.get(fat_value, 'Data') color_type = FAT_COLORS.get(fat_value, FAT_COLORS['default']) # TODO: compute offset # print('%8X: %-12s offset=%08X next=%8X' % (i, fat_type, 0, fat_value)) t.write_row(['%8X' % i, fat_type, 'N/A', '%8X' % fat_value], colors=[None, color_type, None, None]) t.close() print('')
def main(): """Called when running this file as script. Shows all info on input file.""" # print banner with version print('oleid %s - http://decalage.info/oletools' % __version__) print('THIS IS WORK IN PROGRESS - Check updates regularly!') print('Please report any issue at ' 'https://github.com/decalage2/oletools/issues') print('') parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('input', type=str, nargs='*', metavar='FILE', help='Name of files to process') # parser.add_argument('-o', '--ole', action='store_true', dest='ole', # help='Parse an OLE file (e.g. Word, Excel) to look for ' # 'SWF in each stream') args = parser.parse_args() # Print help if no argurments are passed if len(args.input) == 0: parser.print_help() return log_helper.enable_logging() for filename in args.input: print('Filename:', filename) oleid = OleID(filename) indicators = oleid.check() #TODO: add description #TODO: highlight suspicious indicators table = tablestream.TableStream( [20, 20, 10, 26], header_row=['Indicator', 'Value', 'Risk', 'Description'], style=tablestream.TableStyleSlimSep) for indicator in indicators: if not (indicator.hide_if_false and not indicator.value): #print '%s: %s' % (indicator.name, indicator.value) color = risk_color.get(indicator.risk, None) table.write_row((indicator.name, indicator.value, indicator.risk, indicator.description), colors=(color, color, color, None)) table.close()
def main(): """ Main function, called when olevba is run from the command line """ global log DEFAULT_LOG_LEVEL = "warning" # Default log level LOG_LEVELS = { 'debug': logging.DEBUG, 'info': logging.INFO, 'warning': logging.WARNING, 'error': logging.ERROR, 'critical': logging.CRITICAL } usage = 'usage: %prog [options] <filename> [filename2 ...]' parser = optparse.OptionParser(usage=usage) parser.add_option("-r", action="store_true", dest="recursive", help='find files recursively in subdirectories.') parser.add_option( "-z", "--zip", dest='zip_password', type='str', default=None, help= 'if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)' ) parser.add_option( "-f", "--zipfname", dest='zip_fname', type='str', default='*', help= 'if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)' ) parser.add_option( '-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, help= "logging level debug/info/warning/error/critical (default=%default)") parser.add_option("-m", '--matches', action="store_true", dest="show_matches", help='Show matched strings.') # TODO: add logfile option (options, args) = parser.parse_args() # Print help if no arguments are passed if len(args) == 0: print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__) print('This is work in progress, please report issues at %s' % URL_ISSUES) print(__doc__) parser.print_help() print('\nAn exit code is returned based on the analysis result:') for result in (Result_NoMacro, Result_NotMSOffice, Result_MacroOK, Result_Error, Result_Suspicious): print(' - %d: %s' % (result.exit_code, result.name)) sys.exit() # print banner with version print('MacroRaptor %s - http://decalage.info/python/oletools' % __version__) print('This is work in progress, please report issues at %s' % URL_ISSUES) logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') # enable logging in the modules: log.setLevel(logging.NOTSET) t = tablestream.TableStream(style=tablestream.TableStyleSlim, header_row=['Result', 'Flags', 'Type', 'File'], column_width=[10, 5, 4, 56]) exitcode = -1 global_result = None # TODO: handle errors in xglob, to continue processing the next files for container, filename, data in xglob.iter_files( args, recursive=options.recursive, zip_password=options.zip_password, zip_fname=options.zip_fname): # ignore directory names stored in zip files: if container and filename.endswith('/'): continue full_name = '%s in %s' % (filename, container) if container else filename # try: # # Open the file # if data is None: # data = open(filename, 'rb').read() # except: # log.exception('Error when opening file %r' % full_name) # continue if isinstance(data, Exception): result = Result_Error t.write_row([result.name, '', '', full_name], colors=[result.color, None, None, None]) t.write_row(['', '', '', str(data)], colors=[None, None, None, result.color]) else: filetype = '???' try: vba_parser = olevba.VBA_Parser(filename=filename, data=data, container=container) filetype = TYPE2TAG[vba_parser.type] except Exception as e: # log.error('Error when parsing VBA macros from file %r' % full_name) # TODO: distinguish actual errors from non-MSOffice files result = Result_Error t.write_row([result.name, '', filetype, full_name], colors=[result.color, None, None, None]) t.write_row(['', '', '', str(e)], colors=[None, None, None, result.color]) continue if vba_parser.detect_vba_macros(): vba_code_all_modules = '' try: for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros(): vba_code_all_modules += vba_code.decode( 'utf-8', 'replace') + '\n' except Exception as e: # log.error('Error when parsing VBA macros from file %r' % full_name) result = Result_Error t.write_row([ result.name, '', TYPE2TAG[vba_parser.type], full_name ], colors=[result.color, None, None, None]) t.write_row(['', '', '', str(e)], colors=[None, None, None, result.color]) continue mraptor = MacroRaptor(vba_code_all_modules) mraptor.scan() if mraptor.suspicious: result = Result_Suspicious else: result = Result_MacroOK t.write_row( [result.name, mraptor.get_flags(), filetype, full_name], colors=[result.color, None, None, None]) if mraptor.matches and options.show_matches: t.write_row(['', '', '', 'Matches: %r' % mraptor.matches]) else: result = Result_NoMacro t.write_row([result.name, '', filetype, full_name], colors=[result.color, None, None, None]) if result.exit_code > exitcode: global_result = result exitcode = result.exit_code print('') print('Flags: A=AutoExec, W=Write, X=Execute') print('Exit code: %d - %s' % (exitcode, global_result.name)) sys.exit(exitcode)
def show_header(ole, extra_data=False): print("OLE HEADER:") t = tablestream.TableStream( [24, 16, 79 - (4 + 24 + 16)], header_row=['Attribute', 'Value', 'Description']) t.write_row([ 'OLE Signature (hex)', binascii.b2a_hex(ole.header_signature).upper(), 'Should be D0CF11E0A1B11AE1' ]) t.write_row([ 'Header CLSID (hex)', binascii.b2a_hex(ole.header_clsid).upper(), 'Should be 0' ]) t.write_row( ['Minor Version', '%04X' % ole.minor_version, 'Should be 003E']) t.write_row( ['Major Version', '%04X' % ole.dll_version, 'Should be 3 or 4']) t.write_row([ 'Byte Order', '%04X' % ole.byte_order, 'Should be FFFE (little endian)' ]) t.write_row( ['Sector Shift', '%04X' % ole.sector_shift, 'Should be 0009 or 000C']) t.write_row([ '# of Dir Sectors', ole.num_dir_sectors, 'Should be 0 if major version is 3' ]) t.write_row(['# of FAT Sectors', ole.num_fat_sectors, '']) t.write_row(['First Dir Sector', '%08X' % ole.first_dir_sector, '(hex)']) t.write_row([ 'Transaction Sig Number', ole.transaction_signature_number, 'Should be 0' ]) t.write_row([ 'MiniStream cutoff', ole.mini_stream_cutoff_size, 'Should be 4096 bytes' ]) t.write_row( ['First MiniFAT Sector', '%08X' % ole.first_mini_fat_sector, '(hex)']) t.write_row(['# of MiniFAT Sectors', ole.num_mini_fat_sectors, '']) t.write_row( ['First DIFAT Sector', '%08X' % ole.first_difat_sector, '(hex)']) t.write_row(['# of DIFAT Sectors', ole.num_difat_sectors, '']) t.close() print('') print("CALCULATED ATTRIBUTES:") t = tablestream.TableStream( [24, 16, 79 - (4 + 24 + 16)], header_row=['Attribute', 'Value', 'Description']) t.write_row([ 'Sector Size (bytes)', ole.sector_size, 'Should be 512 or 4096 bytes' ]) t.write_row( ['Actual File Size (bytes)', ole._filesize, 'Real file size on disk']) num_sectors_per_fat_sector = ole.sector_size / 4 num_sectors_in_fat = num_sectors_per_fat_sector * ole.num_fat_sectors # Need to add one sector for the header: max_filesize_fat = (num_sectors_in_fat + 1) * ole.sector_size t.write_row([ 'Max File Size in FAT', max_filesize_fat, 'Max file size covered by FAT' ]) if ole._filesize > max_filesize_fat: extra_size_beyond_fat = ole._filesize - max_filesize_fat color = 'red' else: extra_size_beyond_fat = 0 color = None t.write_row([ 'Extra data beyond FAT', extra_size_beyond_fat, 'Only if file is larger than FAT coverage' ], colors=[color, color, color]) # Find the last used sector: # By default, it's the last sector in the FAT last_used_sector = len(ole.fat) - 1 for i in range(len(ole.fat) - 1, 0, -1): last_used_sector = i if ole.fat[i] != olefile.FREESECT: break # Extra data would start at the next sector offset_extra_data = ole.sectorsize * (last_used_sector + 2) t.write_row([ 'Extra data offset in FAT', '%08X' % offset_extra_data, 'Offset of the 1st free sector at end of FAT' ]) extra_data_size = ole._filesize - offset_extra_data color = 'red' if extra_data_size > 0 else None t.write_row([ 'Extra data size', extra_data_size, 'Size of data starting at the 1st free sector at end of FAT' ], colors=[color, color, color]) t.close() print('') if extra_data: # hex dump of extra data print('HEX DUMP OF EXTRA DATA:\n') if extra_data_size <= 0: print('No extra data found at end of file.') else: ole.fp.seek(offset_extra_data) # read until end of file: exdata = ole.fp.read() assert len(exdata) == extra_data_size print(hexdump3(exdata, length=16, startindex=offset_extra_data)) print('')
def process_file(container, filename, data, output_dir=None, save_object=False): if output_dir: if not os.path.isdir(output_dir): log.info('creating output directory %s' % output_dir) os.mkdir(output_dir) fname_prefix = os.path.join(output_dir, sanitize_filename(filename)) else: base_dir = os.path.dirname(filename) sane_fname = sanitize_filename(filename) fname_prefix = os.path.join(base_dir, sane_fname) # TODO: option to extract objects to files (false by default) if data is None: data = open(filename, 'rb').read() print('=' * 79) print('File: %r - size: %d bytes' % (filename, len(data))) tstream = tablestream.TableStream(column_width=(3, 10, 31, 31), header_row=('id', 'index', 'OLE Object', 'OLE Package'), style=tablestream.TableStyleSlim) rtfp = RtfObjParser(data) rtfp.parse() for rtfobj in rtfp.objects: ole_color = None pkg_color = None if rtfobj.is_ole: ole_column = 'format_id: %d ' % rtfobj.format_id if rtfobj.format_id == oleobj.OleObject.TYPE_EMBEDDED: ole_column += '(Embedded)\n' elif rtfobj.format_id == oleobj.OleObject.TYPE_LINKED: ole_column += '(Linked)\n' else: ole_column += '(Unknown)\n' ole_column += 'class name: %r\n' % rtfobj.class_name # if the object is linked and not embedded, data_size=None: if rtfobj.oledata_size is None: ole_column += 'data size: N/A' else: ole_column += 'data size: %d' % rtfobj.oledata_size if rtfobj.is_package: pkg_column = 'Filename: %r\n' % rtfobj.filename pkg_column += 'Source path: %r\n' % rtfobj.src_path pkg_column += 'Temp path = %r' % rtfobj.temp_path pkg_color = 'yellow' # check if the file extension is executable: _, ext = os.path.splitext(rtfobj.filename) log.debug('File extension: %r' % ext) if re_executable_extensions.match(ext): pkg_color = 'red' pkg_column += '\nEXECUTABLE FILE' else: pkg_column = 'Not an OLE Package' # Detect OLE2Link exploit # http://www.kb.cert.org/vuls/id/921560 if rtfobj.class_name == 'OLE2Link': ole_color = 'red' ole_column += '\nPossibly an exploit for the OLE2Link vulnerability (VU#921560, CVE-2017-0199)' else: pkg_column = '' ole_column = 'Not a well-formed OLE object' tstream.write_row( ( rtfp.objects.index(rtfobj), # filename, '%08Xh' % rtfobj.start, ole_column, pkg_column), colors=(None, None, ole_color, pkg_color)) tstream.write_sep() if save_object: if save_object == 'all': objects = rtfp.objects else: try: i = int(save_object) objects = [rtfp.objects[i]] except: log.error( 'The -s option must be followed by an object index or all, such as "-s 2" or "-s all"' ) return for rtfobj in objects: i = objects.index(rtfobj) if rtfobj.is_package: print('Saving file from OLE Package in object #%d:' % i) print(' Filename = %r' % rtfobj.filename) print(' Source path = %r' % rtfobj.src_path) print(' Temp path = %r' % rtfobj.temp_path) if rtfobj.filename: fname = '%s_%s' % (fname_prefix, sanitize_filename(rtfobj.filename)) else: fname = '%s_object_%08X.noname' % (fname_prefix, rtfobj.start) print(' saving to file %s' % fname) open(fname, 'wb').write(rtfobj.olepkgdata) # When format_id=TYPE_LINKED, oledata_size=None elif rtfobj.is_ole and rtfobj.oledata_size is not None: print('Saving file embedded in OLE object #%d:' % i) print(' format_id = %d' % rtfobj.format_id) print(' class name = %r' % rtfobj.class_name) print(' data size = %d' % rtfobj.oledata_size) # set a file extension according to the class name: class_name = rtfobj.class_name.lower() if class_name.startswith(b'word'): ext = 'doc' elif class_name.startswith(b'package'): ext = 'package' else: ext = 'bin' fname = '%s_object_%08X.%s' % (fname_prefix, rtfobj.start, ext) print(' saving to file %s' % fname) open(fname, 'wb').write(rtfobj.oledata) else: print('Saving raw data in object #%d:' % i) fname = '%s_object_%08X.raw' % (fname_prefix, rtfobj.start) print(' saving object to file %s' % fname) open(fname, 'wb').write(rtfobj.rawdata)
def main(): usage = 'usage: oledir [options] <filename> [filename2 ...]' parser = optparse.OptionParser(usage=usage) parser.add_option("-r", action="store_true", dest="recursive", help='find files recursively in subdirectories.') parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, help='if the file is a zip archive, open all files from it, using the provided password (requires Python 2.6+)') parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') # parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, # help="logging level debug/info/warning/error/critical (default=%default)") # TODO: add logfile option (options, args) = parser.parse_args() # Print help if no arguments are passed if len(args) == 0: print(BANNER) print(__doc__) parser.print_help() sys.exit() # print banner with version print(BANNER) if os.name == 'nt': colorclass.Windows.enable(auto_colors=True, reset_atexit=True) for container, filename, data in xglob.iter_files(args, recursive=options.recursive, zip_password=options.zip_password, zip_fname=options.zip_fname): # ignore directory names stored in zip files: if container and filename.endswith('/'): continue full_name = '%s in %s' % (filename, container) if container else filename print('OLE directory entries in file %s:' % full_name) if data is not None: # data extracted from zip file ole = olefile.OleFileIO(data) else: # normal filename ole = olefile.OleFileIO(filename) # ole.dumpdirectory() # t = prettytable.PrettyTable(('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size')) # t.align = 'l' # t.max_width['id'] = 4 # t.max_width['Status'] = 6 # t.max_width['Type'] = 10 # t.max_width['Name'] = 10 # t.max_width['Left'] = 5 # t.max_width['Right'] = 5 # t.max_width['Child'] = 5 # t.max_width['1st Sect'] = 8 # t.max_width['Size'] = 6 table = tablestream.TableStream(column_width=[4, 6, 7, 22, 5, 5, 5, 8, 6], header_row=('id', 'Status', 'Type', 'Name', 'Left', 'Right', 'Child', '1st Sect', 'Size'), style=tablestream.TableStyleSlim) # TODO: read ALL the actual directory entries from the directory stream, because olefile does not! # TODO: OR fix olefile! # TODO: olefile should store or give access to the raw direntry data on demand # TODO: oledir option to hexdump the raw direntries # TODO: olefile should be less picky about incorrect directory structures for id in range(len(ole.direntries)): d = ole.direntries[id] if d is None: # this direntry is not part of the tree: either unused or an orphan d = ole._load_direntry(id) #ole.direntries[id] # print('%03d: %s *** ORPHAN ***' % (id, d.name)) if d.entry_type == olefile.STGTY_EMPTY: status = 'unused' else: status = 'ORPHAN' else: # print('%03d: %s' % (id, d.name)) status = '<Used>' if d.name.startswith('\x00'): # this may happen with unused entries, the name may be filled with zeroes name = '' else: # handle non-printable chars using repr(), remove quotes: name = repr(d.name)[1:-1] left = sid_display(d.sid_left) right = sid_display(d.sid_right) child = sid_display(d.sid_child) entry_type = STORAGE_NAMES.get(d.entry_type, 'Unknown') etype_color = STORAGE_COLORS.get(d.entry_type, 'red') status_color = STATUS_COLORS.get(status, 'red') # print(' type=%7s sid_left=%s sid_right=%s sid_child=%s' # %(entry_type, left, right, child)) # t.add_row((id, status, entry_type, name, left, right, child, hex(d.isectStart), d.size)) table.write_row((id, status, entry_type, name, left, right, child, '%X' % d.isectStart, d.size), colors=(None, status_color, etype_color, None, None, None, None, None, None)) ole.close()
def process_output(meta, output): # console output with UTF8 encoding: ensure_stdout_handles_unicode() # TODO: move similar code to a function if output == 'table': print('Properties from the SummaryInformation stream:') t = tablestream.TableStream([21, 30], header_row=['Property', 'Value']) for prop in meta.SUMMARY_ATTRIBS: value = getattr(meta, prop) if value is not None: # TODO: pretty printing for strings, dates, numbers # TODO: better unicode handling # print('- %s: %s' % (prop, value)) # if isinstance(value, unicode): # # encode to UTF8, avoiding errors # value = value.encode('utf-8', errors='replace') # else: # value = str(value) t.write_row([prop, value], colors=[None, 'yellow']) t.close() print('') print('Properties from the DocumentSummaryInformation stream:') t = tablestream.TableStream([21, 30], header_row=['Property', 'Value']) for prop in meta.DOCSUM_ATTRIBS: value = getattr(meta, prop) if value is not None: # TODO: pretty printing for strings, dates, numbers # TODO: better unicode handling # print('- %s: %s' % (prop, value)) # if isinstance(value, unicode): # # encode to UTF8, avoiding errors # value = value.encode('utf-8', errors='replace') # else: # value = str(value) t.write_row([prop, value], colors=[None, 'yellow']) t.close() else: # initalize a dictionary with keys for each type of attribute # update props/values like the table would output_dict = {"SUMMARY_ATTRIBS": {}, "DOCSUM_ATTRIBS": {}} for prop in meta.SUMMARY_ATTRIBS: value = getattr(meta, prop) if value: value = clean_output(value) output_dict['SUMMARY_ATTRIBS'][prop] = value else: # pass for now, when logging is enabled log as warning # logger.warning("Unable to log {}: {}".format(prop, value)) pass for prop in meta.DOCSUM_ATTRIBS: value = getattr(meta, prop) if value: value = clean_output(value) output_dict['DOCSUM_ATTRIBS'][prop] = value else: # pass for now, when logging is enabled log as warning # logger.warning("Unable to log {}: {}".format(prop, value)) pass return output_dict
def process_file(container, filename, data, output_dir=None, save_object=False): if output_dir: if not os.path.isdir(output_dir): log.info('creating output directory %s' % output_dir) os.mkdir(output_dir) fname_prefix = os.path.join(output_dir, sanitize_filename(filename)) else: base_dir = os.path.dirname(filename) sane_fname = sanitize_filename(filename) fname_prefix = os.path.join(base_dir, sane_fname) # TODO: option to extract objects to files (false by default) if data is None: data = open(filename, 'rb').read() print('='*79) print('File: %r - size: %d bytes' % (filename, len(data))) tstream = tablestream.TableStream( column_width=(3, 10, 63), header_row=('id', 'index', 'OLE Object'), style=tablestream.TableStyleSlim ) rtfp = RtfObjParser(data) rtfp.parse() for rtfobj in rtfp.objects: ole_color = None if rtfobj.is_ole: ole_column = 'format_id: %d ' % rtfobj.format_id if rtfobj.format_id == oleobj.OleObject.TYPE_EMBEDDED: ole_column += '(Embedded)\n' elif rtfobj.format_id == oleobj.OleObject.TYPE_LINKED: ole_column += '(Linked)\n' else: ole_column += '(Unknown)\n' ole_column += 'class name: %r\n' % rtfobj.class_name # if the object is linked and not embedded, data_size=None: if rtfobj.oledata_size is None: ole_column += 'data size: N/A' else: ole_column += 'data size: %d' % rtfobj.oledata_size if rtfobj.is_package: ole_column += '\nOLE Package object:' ole_column += '\nFilename: %r' % rtfobj.filename ole_column += '\nSource path: %r' % rtfobj.src_path ole_column += '\nTemp path = %r' % rtfobj.temp_path ole_column += '\nMD5 = %r' % rtfobj.olepkgdata_md5 ole_color = 'yellow' # check if the file extension is executable: _, temp_ext = os.path.splitext(rtfobj.temp_path) log.debug('Temp path extension: %r' % temp_ext) _, file_ext = os.path.splitext(rtfobj.filename) log.debug('File extension: %r' % file_ext) if temp_ext != file_ext: ole_column += "\nMODIFIED FILE EXTENSION" if re_executable_extensions.match(temp_ext) or re_executable_extensions.match(file_ext): ole_color = 'red' ole_column += '\nEXECUTABLE FILE' else: ole_column += '\nMD5 = %r' % rtfobj.oledata_md5 if rtfobj.clsid is not None: ole_column += '\nCLSID: %s' % rtfobj.clsid ole_column += '\n%s' % rtfobj.clsid_desc if 'CVE' in rtfobj.clsid_desc: ole_color = 'red' # Detect OLE2Link exploit # http://www.kb.cert.org/vuls/id/921560 if rtfobj.class_name == b'OLE2Link': ole_color = 'red' ole_column += '\nPossibly an exploit for the OLE2Link vulnerability (VU#921560, CVE-2017-0199)\n' # https://bitbucket.org/snippets/Alexander_Hanel/7Adpp found_list = re.findall(r'[a-fA-F0-9\x0D\x0A]{128,}',data) urls = [] for item in found_list: try: temp = item.replace("\x0D\x0A","").decode("hex") except: continue pat = re.compile(r'(?:[\x20-\x7E][\x00]){3,}') words = [w.decode('utf-16le') for w in pat.findall(temp)] for w in words: if "http" in w: urls.append(w) urls = sorted(set(urls)) if urls: ole_column += 'URL extracted: ' + ', '.join(urls) # Detect Equation Editor exploit # https://www.kb.cert.org/vuls/id/421280/ elif rtfobj.class_name.lower() == b'equation.3': ole_color = 'red' ole_column += '\nPossibly an exploit for the Equation Editor vulnerability (VU#421280, CVE-2017-11882)' else: ole_column = 'Not a well-formed OLE object' tstream.write_row(( rtfp.objects.index(rtfobj), # filename, '%08Xh' % rtfobj.start, ole_column ), colors=(None, None, ole_color) ) tstream.write_sep() if save_object: if save_object == 'all': objects = rtfp.objects else: try: i = int(save_object) objects = [ rtfp.objects[i] ] except: log.error('The -s option must be followed by an object index or all, such as "-s 2" or "-s all"') return for rtfobj in objects: i = objects.index(rtfobj) if rtfobj.is_package: print('Saving file from OLE Package in object #%d:' % i) print(' Filename = %r' % rtfobj.filename) print(' Source path = %r' % rtfobj.src_path) print(' Temp path = %r' % rtfobj.temp_path) if rtfobj.filename: fname = '%s_%s' % (fname_prefix, sanitize_filename(rtfobj.filename)) else: fname = '%s_object_%08X.noname' % (fname_prefix, rtfobj.start) print(' saving to file %s' % fname) print(' md5 %s' % rtfobj.olepkgdata_md5) open(fname, 'wb').write(rtfobj.olepkgdata) # When format_id=TYPE_LINKED, oledata_size=None elif rtfobj.is_ole and rtfobj.oledata_size is not None: print('Saving file embedded in OLE object #%d:' % i) print(' format_id = %d' % rtfobj.format_id) print(' class name = %r' % rtfobj.class_name) print(' data size = %d' % rtfobj.oledata_size) # set a file extension according to the class name: class_name = rtfobj.class_name.lower() if class_name.startswith(b'word'): ext = 'doc' elif class_name.startswith(b'package'): ext = 'package' else: ext = 'bin' fname = '%s_object_%08X.%s' % (fname_prefix, rtfobj.start, ext) print(' saving to file %s' % fname) print(' md5 %s' % rtfobj.oledata_md5) open(fname, 'wb').write(rtfobj.oledata) else: print('Saving raw data in object #%d:' % i) fname = '%s_object_%08X.raw' % (fname_prefix, rtfobj.start) print(' saving object to file %s' % fname) print(' md5 %s' % rtfobj.rawdata_md5) open(fname, 'wb').write(rtfobj.rawdata)