def testOutput(self): with TempDirectory() as dirname: dump_file = os.path.join(dirname, 'plaso.db') # Copy events to pstorage dump. with storage.StorageFile(self.test_filename, read_only=True) as store: formatter_cls = output.GetOutputFormatter('Pstorage') formatter = formatter_cls(store, dump_file) with output.EventBuffer(formatter, check_dedups=False) as output_buffer: event_object = formatter.FetchEntry() while event_object: output_buffer.Append(event_object) event_object = formatter.FetchEntry() # Make sure original and dump have the same events. original = storage.StorageFile(self.test_filename, read_only=True) dump = storage.StorageFile(dump_file, read_only=True) event_object_original = original.GetSortedEntry() event_object_dump = dump.GetSortedEntry() original_list = [] dump_list = [] while event_object_original: original_list.append(event_object_original.EqualityString()) dump_list.append(event_object_dump.EqualityString()) event_object_original = original.GetSortedEntry() event_object_dump = dump.GetSortedEntry() self.assertFalse(event_object_dump) for original_str, dump_str in zip(sorted(original_list), sorted(dump_list)): self.assertEqual(original_str, dump_str)
def testFlush(self): """Test to ensure we empty our buffers and sends to output properly.""" with tempfile.NamedTemporaryFile() as fh: def CheckBufferLength(event_buffer, expected): if not event_buffer.check_dedups: expected = 0 # pylint: disable=protected-access self.assertEquals(len(event_buffer._buffer_dict), expected) formatter = TestOutput(fh) event_buffer = output.EventBuffer(formatter, False) event_buffer.Append(DummyEvent(123456, u'Now is now')) CheckBufferLength(event_buffer, 1) # Add three events. event_buffer.Append(DummyEvent(123456, u'OMG I AM DIFFERENT')) event_buffer.Append(DummyEvent(123456, u'Now is now')) event_buffer.Append(DummyEvent(123456, u'Now is now')) CheckBufferLength(event_buffer, 2) event_buffer.Flush() CheckBufferLength(event_buffer, 0) event_buffer.Append(DummyEvent(123456, u'Now is now')) event_buffer.Append(DummyEvent(123456, u'Now is now')) event_buffer.Append(DummyEvent(123456, u'Different again :)')) CheckBufferLength(event_buffer, 2) event_buffer.Append(DummyEvent(123457, u'Now is different')) CheckBufferLength(event_buffer, 1)
def NoDuplicates(self, dump_filename): """Saves a de-duped Plaso Storage. This goes through the Plaso storage file, and saves a new dump with duplicates removed. The filename is '.[dump_hash]_dedup', and is returned at the end of the function. Note that if this function is interrupted, incomplete results are recorded and this file must be deleted or subsequent runs will use this incomplete data. Args: dump_filename: the filename of the Plaso Storage to be deduped. """ sys.stdout.write(u'Removing duplicates...\n') sys.stdout.flush() # Whether these incremental files should remain a feature or not is still # being decided. They're just here for now to make development faster. nodup_filename = '.{}_dedup'.format(self.plaso_hash) if os.path.isfile(nodup_filename): sys.stdout.write(u'Using previously calculated results.\n') else: with SetupStorage(dump_filename) as store: total_events = store.GetNumberOfEvents() events_per_dot = operator.floordiv(total_events, 80) formatter_cls = output_lib.GetOutputFormatter('Pstorage') store_dedup = open(nodup_filename, 'wb') formatter = formatter_cls(store, store_dedup) with output_lib.EventBuffer( formatter, check_dedups=True) as output_buffer: event_object = formatter.FetchEntry() counter = 0 while event_object: output_buffer.Append(event_object) counter += 1 if counter % events_per_dot == 0: sys.stdout.write(u'.') sys.stdout.flush() event_object = formatter.FetchEntry() sys.stdout.write(u'\n') return nodup_filename
def ParseStorage(self, options): """Open a storage file and parse through it. Args: options: the command line arguments (instance of argparse.Namespace). Returns: A counter. Raises: RuntimeError: if a non-recoverable situation is encountered. """ counter = None if options.slice: if options.timezone == 'UTC': zone = pytz.utc else: zone = pytz.timezone(options.timezone) timestamp = timelib.Timestamp.FromTimeString(options.slice, timezone=zone) # Convert number of minutes to microseconds. range_operator = self._slice_size * 60 * 1000000 # Set the time range. pfilter.TimeRangeCache.SetLowerTimestamp(timestamp - range_operator) pfilter.TimeRangeCache.SetUpperTimestamp(timestamp + range_operator) if options.analysis_plugins: read_only = False else: read_only = True try: storage_file = self.OpenStorageFile(read_only=read_only) except IOError as exception: raise RuntimeError( u'Unable to open storage file: {0:s} with error: {1:s}.'. format(self._storage_file_path, exception)) with storage_file: storage_file.SetStoreLimit(self._filter_object) try: output_module = self._output_module_class( storage_file, self._output_stream, options, self._filter_object) except IOError as exception: raise RuntimeError( u'Unable to create output module with error: {0:s}'.format( exception)) if not output_module: raise RuntimeError(u'Missing output module.') if options.analysis_plugins: logging.info(u'Starting analysis plugins.') # Within all preprocessing objects, try to get the last one that has # time zone information stored in it, the highest chance of it # containing the information we are seeking (defaulting to the last # one). pre_objs = storage_file.GetStorageInformation() pre_obj = pre_objs[-1] for obj in pre_objs: if getattr(obj, 'time_zone_str', ''): pre_obj = obj # Fill in the collection information. pre_obj.collection_information = {} encoding = getattr(pre_obj, 'preferred_encoding', None) if encoding: cmd_line = ' '.join(sys.argv) try: pre_obj.collection_information[ 'cmd_line'] = cmd_line.decode(encoding) except UnicodeDecodeError: pass pre_obj.collection_information['file_processed'] = ( self._storage_file_path) pre_obj.collection_information[ 'method'] = 'Running Analysis Plugins' pre_obj.collection_information[ 'plugins'] = options.analysis_plugins time_of_run = timelib.Timestamp.GetNow() pre_obj.collection_information['time_of_run'] = time_of_run pre_obj.counter = collections.Counter() # Assign the preprocessing object to the storage. # This is normally done in the construction of the storage object, # however we cannot do that here since the preprocessing object is # stored inside the storage file, so we need to open it first to # be able to read it in, before we make changes to it. Thus we need # to access this protected member of the class. # pylint: disable=protected-access storage_file._pre_obj = pre_obj # Start queues and load up plugins. analysis_output_queue = queue.MultiThreadedQueue() analysis_producers = [] analysis_queues = [] analysis_plugins_list = [ x.strip() for x in options.analysis_plugins.split(',') ] for _ in xrange(0, len(analysis_plugins_list)): analysis_queues.append(queue.MultiThreadedQueue()) analysis_producers.append( queue.AnalysisPluginProducer(analysis_queues[-1])) analysis_plugins = analysis.LoadPlugins( analysis_plugins_list, pre_obj, analysis_queues, analysis_output_queue) # Now we need to start all the plugins. for analysis_plugin in analysis_plugins: self._analysis_processes.append( multiprocessing.Process( name='Analysis {0:s}'.format( analysis_plugin.plugin_name), target=analysis_plugin.RunPlugin)) self._analysis_processes[-1].start() logging.info(u'Plugin: [{0:s}] started.'.format( analysis_plugin.plugin_name)) else: analysis_producers = [] output_buffer = output_lib.EventBuffer(output_module, options.dedup) with output_buffer: counter = ProcessOutput(output_buffer, output_module, self._filter_object, self._filter_buffer, analysis_producers) for information in storage_file.GetStorageInformation(): if hasattr(information, 'counter'): counter['Stored Events'] += information.counter['total'] if not options.quiet: logging.info(u'Output processing is done.') # Get all reports and tags from analysis plugins. if options.analysis_plugins: logging.info(u'Processing data from analysis plugins.') for analysis_producer in analysis_producers: analysis_producer.SignalEndOfInput() # Wait for all analysis plugins to complete. for number, analysis_process in enumerate( self._analysis_processes): logging.debug( u'Waiting for analysis plugin: {0:d} to complete.'. format(number)) if analysis_process.is_alive(): analysis_process.join(10) else: logging.warning( u'Plugin {0:d} already stopped.'.format(number)) analysis_process.terminate() logging.debug(u'All analysis plugins are now stopped.') # Close the output queue. analysis_output_queue.SignalEndOfInput() # Go over each output. analysis_queue_consumer = PsortAnalysisReportQueueConsumer( analysis_output_queue, storage_file, self._filter_expression, self.preferred_encoding) analysis_queue_consumer.ConsumeAnalysisReports() if analysis_queue_consumer.tags: storage_file.StoreTagging(analysis_queue_consumer.tags) # TODO: analysis_queue_consumer.anomalies: for item, value in analysis_queue_consumer.counter.iteritems(): counter[item] = value if self._filter_object and not counter['Limited By']: counter['Filter By Date'] = (counter['Stored Events'] - counter['Events Included'] - counter['Events Filtered Out']) return counter