def ParseOptions(cls, options, configuration_object): """Parses and validates options. Args: options (argparse.Namespace): parser options. configuration_object (CLITool): object to be configured by the argument helper. Raises: BadConfigObject: when the configuration object is of the wrong type. BadConfigOption: when the location of the data files cannot be determined. """ if not isinstance(configuration_object, tools.CLITool): raise errors.BadConfigObject( 'Configuration object is not an instance of CLITool') data_location = cls._ParseStringOption(options, 'data_location') if not data_location: # Determine the source root path, which is 3 directories up from # the location of the script. data_location = os.path.dirname(cls._PATH) data_location = os.path.dirname(data_location) data_location = os.path.dirname(data_location) data_location = os.path.dirname(data_location) # There are multiple options to run a tool e.g. running from source or # from an egg file. data_location_egg = os.path.join(data_location, 'share', 'plaso') data_location_source = os.path.join(data_location, 'data') data_location = None if os.path.exists(data_location_egg) and os.path.isfile( os.path.join(data_location_egg, 'plaso-data.README')): data_location = data_location_egg elif os.path.exists(data_location_source) and os.path.isfile( os.path.join(data_location_source, 'plaso-data.README')): data_location = data_location_source if not data_location or not os.path.exists(data_location): data_location = os.path.join(sys.prefix, 'share', 'plaso') if not os.path.exists(data_location): data_location = os.path.join(sys.prefix, 'local', 'share', 'plaso') if sys.prefix != '/usr': if not os.path.exists(data_location): data_location = os.path.join('/usr', 'share', 'plaso') if not os.path.exists(data_location): data_location = os.path.join('/usr', 'local', 'share', 'plaso') if not os.path.exists(data_location) or not os.path.isfile( os.path.join(data_location, 'plaso-data.README')): data_location = None if not data_location: raise errors.BadConfigOption( 'Unable to determine location of data files.') logger.info('Determined data location: {0:s}'.format(data_location)) setattr(configuration_object, '_data_location', data_location)
def ParseOptions(self, options): """Parses the options and initializes the front-end. Args: options: the command line arguments (instance of argparse.Namespace). source_option: optional name of the source option. The default is source. Raises: BadConfigOption: if the options are invalid. """ # The data location is required to list signatures. self._ParseDataLocationOption(options) # Check the list options first otherwise required options will raise. signature_identifiers = getattr(options, u'signature_identifiers', None) if signature_identifiers == u'list': self.list_signature_identifiers = True if self.list_signature_identifiers: return super(ImageExportTool, self).ParseOptions(options) format_string = u'%(asctime)s [%(levelname)s] %(message)s' if self._debug_mode: log_level = logging.DEBUG else: log_level = logging.INFO log_file = getattr(options, u'log_file', None) self._ConfigureLogging(filename=log_file, format_string=format_string, log_level=log_level) self._destination_path = getattr(options, u'path', u'export') self._ParseFilterOptions(options) if (getattr(options, u'no_vss', False) or getattr(options, u'include_duplicates', False)): self._remove_duplicates = False date_filters = getattr(options, u'date_filters', None) try: self._front_end.ParseDateFilters(date_filters) except ValueError as exception: raise errors.BadConfigOption(exception) extensions_string = getattr(options, u'extensions_string', None) self._front_end.ParseExtensionsString(extensions_string) names_string = getattr(options, u'names_string', None) self._front_end.ParseNamesString(names_string) if not self._data_location: logging.warning( u'Unable to automatically determine data location.') signature_identifiers = getattr(options, u'signature_identifiers', None) try: self._front_end.ParseSignatureIdentifiers(self._data_location, signature_identifiers) except (IOError, ValueError) as exception: raise errors.BadConfigOption(exception) if self._filter_file: self.has_filters = True else: self.has_filters = self._front_end.HasFilters()
def ParseOptions(self, options): """Parses the options. Args: options: the command line arguments (instance of argparse.Namespace). Raises: BadConfigOption: if the options are invalid. """ # Check the list options first otherwise required options will raise. self._ParseExtractionOptions(options) self._front_end.SetUseOldPreprocess(self._old_preprocess) self._ParseTimezoneOption(options) self.show_info = getattr(options, u'show_info', False) if getattr(options, u'use_markdown', False): self._views_format_type = cli_views.ViewsFactory.FORMAT_TYPE_MARKDOWN if (self.list_hashers or self.list_parsers_and_plugins or self.list_timezones or self.show_info): return super(Log2TimelineTool, self).ParseOptions(options) self._ParseOutputOptions(options) self._ParseProcessingOptions(options) format_string = ( u'%(asctime)s [%(levelname)s] (%(processName)-10s) PID:%(process)d ' u'<%(module)s> %(message)s') if self._debug_mode: logging_level = logging.DEBUG elif self._quiet_mode: logging_level = logging.WARNING else: logging_level = logging.INFO self.ParseLogFileOptions(options) self._ConfigureLogging(filename=self._log_file, format_string=format_string, log_level=logging_level) if self._debug_mode: logging_filter = log2timeline.LoggingFilter() root_logger = logging.getLogger() root_logger.addFilter(logging_filter) self._output = self.ParseStringOption(options, u'output') if not self._output: raise errors.BadConfigOption(u'No output defined.') # TODO: where is this defined? self._operating_system = getattr(options, u'os', None) if self._operating_system: self._mount_path = getattr(options, u'filename', None) self._filter_expression = self.ParseStringOption(options, u'filter') if self._filter_expression: # TODO: refactor self._filter_object out the tool into the frontend. self._filter_object = self._GetMatcher(self._filter_expression) if not self._filter_object: raise errors.BadConfigOption( u'Invalid filter expression: {0:s}'.format( self._filter_expression)) self._status_view_mode = getattr(options, u'status_view_mode', u'linear') self._enable_sigsegv_handler = getattr(options, u'sigsegv_handler', False)
def ParseOptions(self, options): """Parses tool specific options. Args: options (argparse.Namespace): command line arguments. Raises: BadConfigOption: if the options are invalid. """ # The extraction options are dependent on the data location. helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['data_location']) self._ReadParserPresetsFromFile() self._ReadEventFormatters() # The output modules options are dependent on the preferred_language # and output_time_zone options. self._ParseOutputTimeZoneOption(options) argument_helper_names = [ 'artifact_definitions', 'hashers', 'language', 'parsers' ] helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=argument_helper_names) self._ParseTimeZoneOption(options) self.list_hashers = self._hasher_names_string == 'list' self.list_language_identifiers = self._preferred_language == 'list' self.list_parsers_and_plugins = self._parser_filter_expression == 'list' self.show_troubleshooting = getattr(options, 'show_troubleshooting', False) self.dependencies_check = getattr(options, 'dependencies_check', True) # Check the list options first otherwise required options will raise. if (self.list_hashers or self.list_language_identifiers or self.list_parsers_and_plugins or self.list_time_zones or self.show_troubleshooting): return # Check output modules after the other listable options, otherwise # it could raise with "requires an output file". helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['output_modules']) self.list_output_modules = self._output_format == 'list' if self.list_output_modules: return self._ParseInformationalOptions(options) argument_helper_names = ['extraction', 'status_view'] helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=argument_helper_names) self._ParseLogFileOptions(options) self._ParseStorageMediaOptions(options) self._ParsePerformanceOptions(options) self._ParseProcessingOptions(options) self._storage_file_path = getattr(options, 'storage_file', None) if not self._storage_file_path: self._storage_file_path = self._GenerateStorageFileName() self._output_filename = getattr(options, 'write', None) if not self._output_filename: raise errors.BadConfigOption( ('Output format: {0:s} requires an output file ' '(-w OUTPUT_FILE)').format(self._output_format)) if os.path.exists(self._output_filename): raise errors.BadConfigOption( 'Output file already exists: {0:s}.'.format( self._output_filename)) self._EnforceProcessMemoryLimit(self._process_memory_limit) self._output_module = self._CreateOutputModule(options)
def ParseOptions(cls, options, output_module): """Parses and validates options. Args: options (argparse.Namespace): parser options. output_module (OutputModule): output module to configure. Raises: BadConfigObject: when the output module object is of the wrong type. BadConfigOption: when a configuration parameter fails validation. """ if not isinstance(output_module, shared_elastic.SharedElasticsearchOutputModule): raise errors.BadConfigObject( 'Output module is not an instance of ElasticsearchOutputModule' ) index_name = cls._ParseStringOption( options, 'index_name', default_value=cls._DEFAULT_INDEX_NAME) flush_interval = cls._ParseNumericOption( options, 'flush_interval', default_value=cls._DEFAULT_FLUSH_INTERVAL) fields = ','.join(cls._DEFAULT_FIELDS) additional_fields = cls._ParseStringOption(options, 'additional_fields') if additional_fields: fields = ','.join([fields, additional_fields]) mappings_file_path = cls._ParseStringOption(options, 'elastic_mappings') elastic_user = cls._ParseStringOption(options, 'elastic_user') elastic_password = cls._ParseStringOption(options, 'elastic_password') use_ssl = getattr(options, 'use_ssl', False) ca_certificates_path = cls._ParseStringOption( options, 'ca_certificates_file_path') elastic_url_prefix = cls._ParseStringOption(options, 'elastic_url_prefix') if elastic_password is None: elastic_password = os.getenv('PLASO_ELASTIC_PASSWORD', None) if elastic_password is not None: logger.warning( 'Note that specifying your Elasticsearch password via ' '--elastic_password or the environment PLASO_ELASTIC_PASSWORD can ' 'expose the password to other users on the system.') if elastic_user is not None and elastic_password is None: elastic_password = getpass.getpass( 'Enter your Elasticsearch password: '******',')]) output_module.SetUsername(elastic_user) output_module.SetPassword(elastic_password) output_module.SetUseSSL(use_ssl) output_module.SetCACertificatesPath(ca_certificates_path) output_module.SetURLPrefix(elastic_url_prefix) if not mappings_file_path or not os.path.isfile(mappings_file_path): mappings_filename = output_module.MAPPINGS_FILENAME mappings_path = getattr(output_module, 'MAPPINGS_PATH', None) if mappings_path: mappings_file_path = os.path.join(mappings_path, mappings_filename) else: data_location = getattr(options, '_data_location', None) or 'data' mappings_file_path = os.path.join(data_location, mappings_filename) if not mappings_file_path or not os.path.isfile(mappings_file_path): raise errors.BadConfigOption( 'No such Elasticsearch mappings file: {0!s}.'.format( mappings_file_path)) with open(mappings_file_path, 'r') as file_object: mappings_json = json.load(file_object) output_module.SetMappings(mappings_json)
def ParseOptions(self, options): """Parses the options. Args: options (argparse.Namespace): command line arguments. Raises: BadConfigOption: if the options are invalid. """ # The extraction options are dependent on the data location. helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['data_location']) self._ReadParserPresetsFromFile() # Check the list options first otherwise required options will raise. argument_helper_names = ['hashers', 'parsers', 'profiling'] helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=argument_helper_names) self._ParseExtractionOptions(options) self.list_hashers = self._hasher_names_string == 'list' self.list_parsers_and_plugins = self._parser_filter_expression == 'list' self.list_profilers = self._profilers == 'list' self.show_info = getattr(options, 'show_info', False) self.show_troubleshooting = getattr(options, 'show_troubleshooting', False) if getattr(options, 'use_markdown', False): self._views_format_type = views.ViewsFactory.FORMAT_TYPE_MARKDOWN self.dependencies_check = getattr(options, 'dependencies_check', True) if (self.list_hashers or self.list_language_tags or self.list_parsers_and_plugins or self.list_profilers or self.list_time_zones or self.show_info or self.show_troubleshooting): return self._ParseInformationalOptions(options) argument_helper_names = [ 'artifact_definitions', 'artifact_filters', 'extraction', 'filter_file', 'status_view', 'storage_format', 'text_prepend', 'yara_rules'] helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=argument_helper_names) self._ParseLogFileOptions(options) self._ParseStorageMediaOptions(options) self._ParsePerformanceOptions(options) self._ParseProcessingOptions(options) self._storage_file_path = self.ParseStringOption(options, 'storage_file') if not self._storage_file_path: self._storage_file_path = self._GenerateStorageFileName() if not self._storage_file_path: raise errors.BadConfigOption('Missing storage file option.') serializer_format = getattr( options, 'serializer_format', definitions.SERIALIZER_FORMAT_JSON) if serializer_format not in definitions.SERIALIZER_FORMATS: raise errors.BadConfigOption( 'Unsupported storage serializer format: {0:s}.'.format( serializer_format)) self._storage_serializer_format = serializer_format helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['status_view']) self._enable_sigsegv_handler = getattr(options, 'sigsegv_handler', False) self._EnforceProcessMemoryLimit(self._process_memory_limit)
def AnalyzeEvents(self): """Analyzes events from a plaso storage file and generate a report. Raises: BadConfigOption: when a configuration parameter fails validation or the storage file cannot be opened with read access. RuntimeError: if a non-recoverable situation is encountered. """ session = engine.BaseEngine.CreateSession( command_line_arguments=self._command_line_arguments, preferred_encoding=self.preferred_encoding) storage_reader = storage_factory.StorageFactory.CreateStorageReaderForFile( self._storage_file_path) if not storage_reader: raise errors.BadConfigOption( 'Format of storage file: {0:s} not supported'.format( self._storage_file_path)) self._number_of_analysis_reports = ( storage_reader.GetNumberOfAnalysisReports()) storage_reader.Close() configuration = self._CreateProcessingConfiguration( self._knowledge_base) counter = collections.Counter() if self._output_format != 'null': self._status_view.SetMode(self._status_view_mode) self._status_view.SetStorageFileInformation( self._storage_file_path) status_update_callback = ( self._status_view.GetAnalysisStatusUpdateCallback()) storage_reader = ( storage_factory.StorageFactory.CreateStorageReaderForFile( self._storage_file_path)) # TODO: add single processing support. analysis_engine = psort.PsortMultiProcessEngine( worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) analysis_engine.ExportEvents( self._knowledge_base, storage_reader, self._output_module, configuration, deduplicate_events=self._deduplicate_events, status_update_callback=status_update_callback, time_slice=self._time_slice, use_time_slicer=self._use_time_slicer) for item, value in session.analysis_reports_counter.items(): counter[item] = value if self._quiet_mode: return self._output_writer.Write('Processing completed.\n') table_view = views.ViewsFactory.GetTableView(self._views_format_type, title='Counter') for element, count in counter.most_common(): if not element: element = 'N/A' table_view.AddRow([element, count]) table_view.Write(self._output_writer) storage_reader = storage_factory.StorageFactory.CreateStorageReaderForFile( self._storage_file_path) self._PrintAnalysisReportsDetails(storage_reader, self._number_of_analysis_reports) self._output_writer.Write('Storage file is {0:s}\n'.format( self._storage_file_path))
def ExtractEventsFromSources(self): """Processes the sources and extracts events. Raises: BadConfigOption: if the storage file path is invalid, or the storage format not supported, or there was a failure to writing to the storage. IOError: if the extraction engine could not write to the storage. OSError: if the extraction engine could not write to the storage. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) self.ScanSource(self._source_path) self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, self._source_type, artifact_filters=self._artifact_filters, filter_file=self._filter_file) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( artifact_filter_names=self._artifact_filters, command_line_arguments=self._command_line_arguments, debug_mode=self._debug_mode, filter_file_path=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year, text_prepend=self._text_prepend) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format( self._storage_format)) try: storage_writer.Open(path=self._storage_file_path) except IOError as exception: raise IOError( 'Unable to open storage with error: {0!s}'.format(exception)) processing_status = None try: storage_writer.WriteSessionStart(session) try: processing_status = self._ProcessSources( session, storage_writer) finally: session.aborted = getattr(processing_status, 'aborted', True) storage_writer.WriteSessionCompletion(session) except IOError as exception: raise IOError( 'Unable to write to storage with error: {0!s}'.format( exception)) finally: storage_writer.Close() self._status_view.PrintExtractionSummary(processing_status)
def ParseOptions(cls, options, configuration_object): """Parses and validates options. Args: options (argparse.Namespace): parser options. configuration_object (CLITool): object to be configured by the argument helper. Raises: BadConfigObject: when the configuration object is of the wrong type. BadConfigOption: if the required artifact definitions are not defined. """ if not isinstance(configuration_object, tools.CLITool): raise errors.BadConfigObject( 'Configuration object is not an instance of CLITool') artifacts_path = getattr(options, 'artifact_definitions_path', None) if ((not artifacts_path or not os.path.exists(artifacts_path)) and configuration_object.data_location): artifacts_path = os.path.dirname(configuration_object.data_location) artifacts_path = os.path.join(artifacts_path, 'artifacts') if not os.path.exists(artifacts_path) and 'VIRTUAL_ENV' in os.environ: artifacts_path = os.path.join( os.environ['VIRTUAL_ENV'], 'share', 'artifacts') if not os.path.exists(artifacts_path): artifacts_path = os.path.join(sys.prefix, 'share', 'artifacts') if not os.path.exists(artifacts_path): artifacts_path = os.path.join(sys.prefix, 'local', 'share', 'artifacts') if sys.prefix != '/usr': if not os.path.exists(artifacts_path): artifacts_path = os.path.join('/usr', 'share', 'artifacts') if not os.path.exists(artifacts_path): artifacts_path = os.path.join('/usr', 'local', 'share', 'artifacts') if not os.path.exists(artifacts_path): artifacts_path = None if not artifacts_path or not os.path.exists(artifacts_path): raise errors.BadConfigOption( 'Unable to determine path to artifact definitions.') custom_artifacts_path = getattr( options, 'custom_artifact_definitions_path', None) if custom_artifacts_path and not os.path.isfile(custom_artifacts_path): raise errors.BadConfigOption( 'No such artifacts filter file: {0:s}.'.format(custom_artifacts_path)) if custom_artifacts_path: logger.info( 'Custom artifact filter file: {0:s}'.format(custom_artifacts_path)) registry = artifacts_registry.ArtifactDefinitionsRegistry() reader = artifacts_reader.YamlArtifactsReader() logger.info( 'Determined artifact definitions path: {0:s}'.format(artifacts_path)) try: registry.ReadFromDirectory(reader, artifacts_path) except (KeyError, artifacts_errors.FormatError) as exception: raise errors.BadConfigOption(( 'Unable to read artifact definitions from: {0:s} with error: ' '{1!s}').format(artifacts_path, exception)) for name in preprocessors_manager.PreprocessPluginsManager.GetNames(): if not registry.GetDefinitionByName(name): raise errors.BadConfigOption( 'Missing required artifact definition: {0:s}'.format(name)) if custom_artifacts_path: try: registry.ReadFromFile(reader, custom_artifacts_path) except (KeyError, artifacts_errors.FormatError) as exception: raise errors.BadConfigOption(( 'Unable to read artifact definitions from: {0:s} with error: ' '{1!s}').format(custom_artifacts_path, exception)) setattr(configuration_object, '_artifact_definitions_path', artifacts_path) setattr( configuration_object, '_custom_artifacts_path', custom_artifacts_path)
def ExtractEventsFromSources(self): """Processes the sources and extracts events. Raises: BadConfigOption: if the storage file path is invalid or the storage format not supported or an invalid collection filter was specified. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource_CARPE(self._source_path, self.par_name) self._source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, self._source_type, artifact_filters=self._artifact_filters, filter_file=self._filter_file) session = engine.BaseEngine.CreateSession( artifact_filter_names=self._artifact_filters, command_line_arguments=self._command_line_arguments, debug_mode=self._debug_mode, filter_file_path=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format, session, self._storage_file_path) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format( self._storage_format)) single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine() # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) self._SetExtractionParsersAndPlugins(configuration, session) self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) processing_status = None if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration) else: logger.debug('Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, number_of_worker_processes=self._number_of_extraction_workers, worker_memory_limit=self._worker_memory_limit)
def _ProcessSources(self, session, storage_writer): """Processes the sources and extract events. Args: session (Session): session in which the sources are processed. storage_writer (StorageWriter): storage writer for a session storage. Returns: ProcessingStatus: processing status. Raises: BadConfigOption: if an invalid collection filter was specified. """ is_archive = False if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: is_archive = self._IsArchiveFile(self._source_path_specs[0]) if is_archive: self._source_type = definitions.SOURCE_TYPE_ARCHIVE single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: if not self._process_archives or not is_archive: single_process_mode = True if single_process_mode: extraction_engine = single_extraction_engine.SingleProcessEngine() else: extraction_engine = multi_extraction_engine.ExtractionMultiProcessEngine( number_of_worker_processes=self._number_of_extraction_workers, worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine, session, storage_writer) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) session.enabled_parser_names = ( configuration.parser_filter_expression.split(',')) session.parser_filter_expression = self._parser_filter_expression self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) # TODO: set mount path in knowledge base with # extraction_engine.knowledge_base.SetMountPath() extraction_engine.knowledge_base.SetTextPrepend(self._text_prepend) try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) # TODO: decouple session and storage writer? session.source_configurations = ( extraction_engine.knowledge_base.GetSourceConfigurationArtifacts()) storage_writer.WriteSessionConfiguration(session) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) if single_process_mode: force_parser = False number_of_parsers = len( configuration.parser_filter_expression.split(',')) if (self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE and not is_archive and number_of_parsers == 1): force_parser = True logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( session, self._source_path_specs, storage_writer, self._resolver_context, configuration, force_parser=force_parser, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') # The following overrides are needed because pylint 2.6.0 gets confused # about which ProcessSources to check against. # pylint: disable=no-value-for-parameter,unexpected-keyword-arg processing_status = extraction_engine.ProcessSources( session, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, status_update_callback=status_update_callback, storage_file_path=self._storage_file_path) return processing_status
def ExtractEventsFromSources(self): """Processes the sources and extracts events. Raises: BadConfigOption: if the storage file path is invalid, or the storage format not supported, or there was a failure to writing to the storage. IOError: if the extraction engine could not write to the storage. OSError: if the extraction engine could not write to the storage. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) try: self.ScanSource(self._source_path) except dfvfs_errors.UserAbort as exception: raise errors.UserAbort(exception) self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, self._source_type, artifact_filters=self._artifact_filters, filter_file=self._filter_file) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') # TODO: attach processing configuration to session? session = engine.BaseEngine.CreateSession() storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format( self._storage_format)) try: storage_writer.Open(path=self._storage_file_path) except IOError as exception: raise IOError( 'Unable to open storage with error: {0!s}'.format(exception)) processing_status = None try: session_start = session.CreateSessionStart() storage_writer.AddAttributeContainer(session_start) try: processing_status = self._ProcessSources( session, storage_writer) finally: session.aborted = getattr(processing_status, 'aborted', True) session_completion = session.CreateSessionCompletion() storage_writer.AddAttributeContainer(session_completion) except IOError as exception: raise IOError( 'Unable to write to storage with error: {0!s}'.format( exception)) finally: storage_writer.Close() self._status_view.PrintExtractionSummary(processing_status)
def _ProcessSources(self, session, storage_writer): """Processes the sources and extract events. Args: session (Session): session in which the sources are processed. storage_writer (StorageWriter): storage writer for a session storage. Returns: ProcessingStatus: processing status. Raises: BadConfigOption: if an invalid collection filter was specified. """ is_archive = False if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: is_archive = self._IsArchiveFile(self._source_path_specs[0]) if is_archive: self._source_type = definitions.SOURCE_TYPE_ARCHIVE single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: if not self._process_archives or not is_archive: single_process_mode = True if single_process_mode: extraction_engine = single_extraction_engine.SingleProcessEngine() else: extraction_engine = multi_extraction_engine.ExtractionMultiProcessEngine( number_of_worker_processes=self._number_of_extraction_workers, worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine, session, storage_writer) self._expanded_parser_filter_expression = ( self._GetExpandedParserFilterExpression( extraction_engine.knowledge_base)) enabled_parser_names = self._expanded_parser_filter_expression.split( ',') number_of_enabled_parsers = len(enabled_parser_names) force_parser = False if (self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE and not is_archive and number_of_enabled_parsers == 1): force_parser = True self._extract_winevt_resources = False elif ('winevt' not in enabled_parser_names and 'winevtx' not in enabled_parser_names): self._extract_winevt_resources = False elif (self._extract_winevt_resources and 'pe' not in enabled_parser_names): logger.warning( 'A Windows EventLog parser is enabled in combination with ' 'extraction of Windows EventLog resources, but the Portable ' 'Executable (PE) parser is disabled. Therefore Windows EventLog ' 'resources cannot be extracted.') self._extract_winevt_resources = False configuration = self._CreateExtractionProcessingConfiguration() try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) session_configuration = self._CreateExtractionSessionConfiguration( session, enabled_parser_names) storage_writer.AddAttributeContainer(session_configuration) source_configurations = [] for path_spec in self._source_path_specs: source_configuration = artifacts.SourceConfigurationArtifact( path_spec=path_spec) source_configurations.append(source_configuration) # TODO: improve to detect more than 1 system configurations. # TODO: improve to add volumes to system configuration. system_configuration = ( extraction_engine.knowledge_base.GetSystemConfigurationArtifact()) storage_writer.AddAttributeContainer(system_configuration) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( source_configurations, storage_writer, self._resolver_context, configuration, force_parser=force_parser, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') # The following overrides are needed because pylint 2.6.0 gets confused # about which ProcessSources to check against. # pylint: disable=no-value-for-parameter,unexpected-keyword-arg processing_status = extraction_engine.ProcessSources( source_configurations, storage_writer, session.identifier, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, status_update_callback=status_update_callback, storage_file_path=self._storage_file_path) return processing_status
def _GetExpandedParserFilterExpression(self, knowledge_base): """Determines the expanded parser filter expression. Args: knowledge_base (KnowledgeBase): contains information from the source data needed for parsing. Returns: str: expanded parser filter expression. Raises: BadConfigOption: if presets in the parser filter expression could not be expanded or if an invalid parser or plugin name is specified. """ parser_filter_expression = self._parser_filter_expression if not parser_filter_expression and not self._single_process_mode: operating_system_family = knowledge_base.GetValue( 'operating_system') operating_system_product = knowledge_base.GetValue( 'operating_system_product') operating_system_version = knowledge_base.GetValue( 'operating_system_version') operating_system_artifact = artifacts.OperatingSystemArtifact( family=operating_system_family, product=operating_system_product, version=operating_system_version) preset_definitions = self._presets_manager.GetPresetsByOperatingSystem( operating_system_artifact) if preset_definitions: self._parser_filter_expression = ','.join([ preset_definition.name for preset_definition in preset_definitions ]) logger.debug( 'Parser filter expression set to preset: {0:s}'.format( self._parser_filter_expression)) parser_filter_helper = parser_filter.ParserFilterExpressionHelper() try: parser_filter_expression = parser_filter_helper.ExpandPresets( self._presets_manager, self._parser_filter_expression) logger.debug('Parser filter expression set to: {0:s}'.format( parser_filter_expression or 'N/A')) except RuntimeError as exception: raise errors.BadConfigOption( ('Unable to expand presets in parser filter expression with ' 'error: {0!s}').format(exception)) parser_elements, invalid_parser_elements = ( parsers_manager.ParsersManager.CheckFilterExpression( parser_filter_expression)) if invalid_parser_elements: invalid_parser_names_string = ','.join(invalid_parser_elements) raise errors.BadConfigOption( 'Unknown parser or plugin names in element(s): "{0:s}" of ' 'parser filter expression: {1:s}'.format( invalid_parser_names_string, parser_filter_expression)) return ','.join(sorted(parser_elements))
def ParseOptions(cls, options, unused_config_object): """Parse and validate the configuration options.""" if not getattr(options, 'dynamic', u''): raise errors.BadConfigOption(u'Always set this.')
def ParseOptions(cls, options, configuration_object): """Parses and validates options. Args: options (argparse.Namespace): parser options. configuration_object (CLITool): object to be configured by the argument helper. Raises: BadConfigObject: when the configuration object is of the wrong type. BadConfigOption: when a configuration parameter fails validation. """ if not isinstance(configuration_object, tools.CLITool): raise errors.BadConfigObject( 'Configuration object is not an instance of CLITool') filter_expression = cls._ParseStringOption(options, 'filter') event_filter = None if filter_expression: event_filter = filters_manager.FiltersManager.GetFilterObject( filter_expression) if not event_filter: raise errors.BadConfigOption('Invalid filter expression: {0:s}'.format( filter_expression)) time_slice_event_time_string = getattr(options, 'slice', None) time_slice_duration = getattr(options, 'slice_size', 5) use_time_slicer = getattr(options, 'slicer', False) # The slice and slicer cannot be set at the same time. if time_slice_event_time_string and use_time_slicer: raise errors.BadConfigOption( 'Time slice and slicer cannot be used at the same time.') time_slice_event_timestamp = None if time_slice_event_time_string: # Note self._preferred_time_zone is None when not set but represents UTC. preferred_time_zone = getattr( configuration_object, '_preferred_time_zone', None) or 'UTC' timezone = pytz.timezone(preferred_time_zone) time_slice_event_timestamp = timelib.Timestamp.FromTimeString( time_slice_event_time_string, timezone=timezone) if time_slice_event_timestamp is None: raise errors.BadConfigOption( 'Unsupported time slice event date and time: {0:s}'.format( time_slice_event_time_string)) setattr(configuration_object, '_event_filter_expression', filter_expression) if event_filter: setattr(configuration_object, '_event_filter', event_filter) setattr(configuration_object, '_use_time_slicer', use_time_slicer) if time_slice_event_timestamp is not None or use_time_slicer: # Note that time slicer uses the time slice to determine the duration. # TODO: refactor TimeSlice to filters. time_slice = time_slices.TimeSlice( time_slice_event_timestamp, duration=time_slice_duration) setattr(configuration_object, '_time_slice', time_slice)
def _CreateProcessingConfiguration(self, knowledge_base): """Creates a processing configuration. Args: knowledge_base (KnowledgeBase): contains information from the source data needed for parsing. Returns: ProcessingConfiguration: processing configuration. Raises: BadConfigOption: if presets in the parser filter expression could not be expanded or if an invalid parser or plugin name is specified. """ parser_filter_expression = self._parser_filter_expression if not parser_filter_expression: operating_system_family = knowledge_base.GetValue('operating_system') operating_system_product = knowledge_base.GetValue( 'operating_system_product') operating_system_version = knowledge_base.GetValue( 'operating_system_version') operating_system_artifact = artifacts.OperatingSystemArtifact( family=operating_system_family, product=operating_system_product, version=operating_system_version) preset_definitions = self._presets_manager.GetPresetsByOperatingSystem( operating_system_artifact) if preset_definitions: preset_names = [ preset_definition.name for preset_definition in preset_definitions] filter_expression = ','.join(preset_names) logger.info('Parser filter expression set to: {0:s}'.format( filter_expression)) parser_filter_expression = filter_expression parser_filter_helper = parser_filter.ParserFilterExpressionHelper() try: parser_filter_expression = parser_filter_helper.ExpandPresets( self._presets_manager, parser_filter_expression) except RuntimeError as exception: raise errors.BadConfigOption(( 'Unable to expand presets in parser filter expression with ' 'error: {0!s}').format(exception)) _, invalid_parser_elements = ( parsers_manager.ParsersManager.CheckFilterExpression( parser_filter_expression)) if invalid_parser_elements: invalid_parser_names_string = ','.join(invalid_parser_elements) raise errors.BadConfigOption( 'Unknown parser or plugin names in element(s): "{0:s}" of ' 'parser filter expression: {1:s}'.format( invalid_parser_names_string, parser_filter_expression)) # TODO: pass preferred_encoding. configuration = configurations.ProcessingConfiguration() configuration.artifact_filters = self._artifact_filters configuration.credentials = self._credential_configurations configuration.debug_output = self._debug_mode configuration.extraction.hasher_file_size_limit = ( self._hasher_file_size_limit) configuration.extraction.hasher_names_string = self._hasher_names_string configuration.extraction.process_archives = self._process_archives configuration.extraction.process_compressed_streams = ( self._process_compressed_streams) configuration.extraction.yara_rules_string = self._yara_rules_string configuration.filter_file = self._filter_file configuration.log_filename = self._log_file configuration.parser_filter_expression = parser_filter_expression configuration.preferred_year = self._preferred_year configuration.profiling.directory = self._profiling_directory configuration.profiling.sample_rate = self._profiling_sample_rate configuration.profiling.profilers = self._profilers configuration.task_storage_format = self._task_storage_format configuration.temporary_directory = self._temporary_directory return configuration
def ParseOptions(self, options): """Parses the options. Args: options (argparse.Namespace): command line arguments. Raises: BadConfigOption: if the options are invalid. """ self._ParseInformationalOptions(options) self._verbose = getattr(options, 'verbose', False) self._sections = getattr(options, 'sections', '') self.list_sections = self._sections == 'list' self.show_troubleshooting = getattr(options, 'show_troubleshooting', False) if self.list_sections or self.show_troubleshooting: return if self._sections != 'all': self._sections = self._sections.split(',') self._output_filename = getattr(options, 'write', None) argument_helper_names = ['process_resources', 'storage_file'] helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=argument_helper_names) # TODO: move check into _CheckStorageFile. if not self._storage_file_path: raise errors.BadConfigOption('Missing storage file option.') if not os.path.isfile(self._storage_file_path): raise errors.BadConfigOption('No such storage file: {0:s}.'.format( self._storage_file_path)) compare_storage_file_path = self.ParseStringOption( options, 'compare_storage_file') if compare_storage_file_path: if not os.path.isfile(compare_storage_file_path): raise errors.BadConfigOption( 'No such storage file: {0:s}.'.format( compare_storage_file_path)) self._compare_storage_file_path = compare_storage_file_path self.compare_storage_information = True self._output_format = self.ParseStringOption(options, 'output_format') if self._output_filename: if os.path.exists(self._output_filename): raise errors.BadConfigOption( 'Output file already exists: {0:s}.'.format( self._output_filename)) output_file_object = open(self._output_filename, 'wb') self._output_writer = tools.FileObjectOutputWriter( output_file_object) self._EnforceProcessMemoryLimit(self._process_memory_limit)
def _ExtractWithFilter(self, source_path_specs, destination_path, output_writer, artifact_filters, filter_file, artifact_definitions_path, custom_artifacts_path, skip_duplicates=True): """Extracts files using a filter expression. This method runs the file extraction process on the image and potentially on every VSS if that is wanted. Args: source_path_specs (list[dfvfs.PathSpec]): path specifications to extract. destination_path (str): path where the extracted files should be stored. output_writer (CLIOutputWriter): output writer. artifact_definitions_path (str): path to artifact definitions file. custom_artifacts_path (str): path to custom artifact definitions file. artifact_filters (list[str]): names of artifact definitions that are used for filtering file system and Windows Registry key paths. filter_file (str): path of the file that contains the filter file path filters. skip_duplicates (Optional[bool]): True if files with duplicate content should be skipped. Raises: BadConfigOption: if an invalid collection filter was specified. """ extraction_engine = engine.BaseEngine() # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) for source_path_spec in source_path_specs: file_system, mount_point = self._GetSourceFileSystem( source_path_spec, resolver_context=self._resolver_context) display_name = path_helper.PathHelper.GetDisplayNameForPathSpec( source_path_spec) output_writer.Write( 'Extracting file entries from: {0:s}\n'.format(display_name)) try: extraction_engine.BuildCollectionFilters( artifact_definitions_path, custom_artifacts_path, extraction_engine.knowledge_base, artifact_filters, filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'. format(exception)) searcher = file_system_searcher.FileSystemSearcher( file_system, mount_point) filters_helper = extraction_engine.collection_filters_helper for path_spec in searcher.Find(find_specs=( filters_helper.included_file_system_find_specs)): self._ExtractFileEntry(path_spec, destination_path, output_writer, skip_duplicates=skip_duplicates) file_system.Close()
def ProcessStorage(self): """Processes a plaso storage file. Raises: BadConfigOption: when a configuration parameter fails validation or the storage file cannot be opened with read access. RuntimeError: if a non-recoverable situation is encountered. """ self._CheckStorageFile(self._storage_file_path) self._status_view.SetMode(self._status_view_mode) self._status_view.SetStorageFileInformation(self._storage_file_path) status_update_callback = ( self._status_view.GetAnalysisStatusUpdateCallback()) session = engine.BaseEngine.CreateSession( command_line_arguments=self._command_line_arguments, preferred_encoding=self.preferred_encoding) storage_reader = storage_factory.StorageFactory.CreateStorageReaderForFile( self._storage_file_path) if not storage_reader: raise errors.BadConfigOption( 'Format of storage file: {0:s} not supported'.format( self._storage_file_path)) for session in storage_reader.GetSessions(): if not session.source_configurations: storage_reader.ReadSystemConfiguration(self._knowledge_base) else: for source_configuration in session.source_configurations: self._knowledge_base.ReadSystemConfigurationArtifact( source_configuration.system_configuration, session_identifier=session.identifier) self._knowledge_base.SetTextPrepend(session.text_prepend) self._number_of_analysis_reports = ( storage_reader.GetNumberOfAnalysisReports()) storage_reader.Close() configuration = configurations.ProcessingConfiguration() configuration.data_location = self._data_location configuration.debug_output = self._debug_mode configuration.log_filename = self._log_file configuration.profiling.directory = self._profiling_directory configuration.profiling.sample_rate = self._profiling_sample_rate configuration.profiling.profilers = self._profilers analysis_counter = None if self._analysis_plugins: storage_writer = ( storage_factory.StorageFactory.CreateStorageWriterForFile( session, self._storage_file_path)) if not storage_writer: raise errors.BadConfigOption( 'Format of storage file: {0:s} not supported for writing'. format(self._storage_file_path)) # TODO: add single processing support. analysis_engine = psort.PsortMultiProcessEngine( worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) analysis_engine.AnalyzeEvents( self._knowledge_base, storage_writer, self._data_location, self._analysis_plugins, configuration, event_filter=self._event_filter, event_filter_expression=self._event_filter_expression, status_update_callback=status_update_callback) analysis_counter = collections.Counter() for item, value in session.analysis_reports_counter.items(): analysis_counter[item] = value if self._output_format != 'null': storage_reader = ( storage_factory.StorageFactory.CreateStorageReaderForFile( self._storage_file_path)) # TODO: add single processing support. analysis_engine = psort.PsortMultiProcessEngine( worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) analysis_engine.ExportEvents( self._knowledge_base, storage_reader, self._output_module, configuration, deduplicate_events=self._deduplicate_events, event_filter=self._event_filter, status_update_callback=status_update_callback, time_slice=self._time_slice, use_time_slicer=self._use_time_slicer) self._output_module.Close() self._output_module = None if self._quiet_mode: return self._output_writer.Write('Processing completed.\n') if analysis_counter: table_view = views.ViewsFactory.GetTableView( self._views_format_type, title='Analysis reports generated') for element, count in analysis_counter.most_common(): if element != 'total': table_view.AddRow([element, count]) table_view.AddRow(['Total', analysis_counter['total']]) table_view.Write(self._output_writer) storage_reader = storage_factory.StorageFactory.CreateStorageReaderForFile( self._storage_file_path) self._PrintAnalysisReportsDetails(storage_reader)
def ExtractEventsFromSources(self): """Processes the sources and extract events. This is a stripped down copy of tools/log2timeline.py that doesn't support the full set of flags. The defaults for these are hard coded in the constructor of this class. Raises: BadConfigOption: if the storage file path is invalid or the storage format not supported or an invalid collection filter was specified. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation( self._source_path, source_type, artifact_filters=self._artifact_filters, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( artifact_filter_names=self._artifact_filters, command_line_arguments=self._command_line_arguments, filter_file_path=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format, session, self._storage_file_path) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format( self._storage_format)) single_process_mode = self._single_process_mode if source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( number_of_worker_processes=self._number_of_extraction_workers, worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) # If the source is a directory or a storage media image # run pre-processing. if source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) self._SetExtractionParsersAndPlugins(configuration, session) self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) # TODO: set mount path in knowledge base with # extraction_engine.knowledge_base.SetMountPath() extraction_engine.knowledge_base.SetTextPrepend(self._text_prepend) try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) processing_status = None if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( session, self._source_path_specs, storage_writer, self._resolver_context, configuration, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, status_update_callback=status_update_callback) self._status_view.PrintExtractionSummary(processing_status)
def ParseOptions(cls, options, configuration_object): """Parses and validates options. Args: options (argparse.Namespace): parser options. configuration_object (CLITool): object to be configured by the argument helper. Raises: BadConfigObject: when the configuration object is of the wrong type. BadConfigOption: when a configuration parameter fails validation. """ if not isinstance(configuration_object, tools.CLITool): raise errors.BadConfigObject( 'Configuration object is not an instance of CLITool') filter_expression = cls._ParseStringOption(options, 'filter') filter_object = None if filter_expression: filter_object = event_filter.EventObjectFilter() try: filter_object.CompileFilter(filter_expression) except errors.ParseError as exception: raise errors.BadConfigOption( ('Unable to compile filter expression with error: ' '{0!s}').format(exception)) time_slice_event_time_string = getattr(options, 'slice', None) time_slice_duration = getattr(options, 'slice_size', 5) use_time_slicer = getattr(options, 'slicer', False) # The slice and slicer cannot be set at the same time. if time_slice_event_time_string and use_time_slicer: raise errors.BadConfigOption( 'Time slice and slicer cannot be used at the same time.') time_slice_event_timestamp = None if time_slice_event_time_string: if ' ' in time_slice_event_time_string: raise errors.BadConfigOption( 'Time slice date and time must be defined in ISO 8601 format, ' 'for example: 20200619T20:09:23+02:00.') date_time = dfdatetime_time_elements.TimeElements() try: date_time.CopyFromStringISO8601(time_slice_event_time_string) except ValueError: raise errors.BadConfigOption(( 'Unsupported time slice date and time: {0:s}. The date and time ' 'must be defined in ISO 8601 format, for example: ' '20200619T20:09:23+02:00' ).format(time_slice_event_time_string)) # TODO: directly use dfDateTime objects in time slice. time_slice_event_timestamp = date_time.GetPlasoTimestamp() setattr(configuration_object, '_event_filter_expression', filter_expression) if filter_object: setattr(configuration_object, '_event_filter', filter_object) setattr(configuration_object, '_use_time_slicer', use_time_slicer) if time_slice_event_timestamp is not None or use_time_slicer: # Note that time slicer uses the time slice to determine the duration. # TODO: refactor TimeSlice to filters. time_slice = time_slices.TimeSlice(time_slice_event_timestamp, duration=time_slice_duration) setattr(configuration_object, '_time_slice', time_slice)
def _ParseStorageMediaImageOptions(self, options): """Parses the storage media image options. Args: options (argparse.Namespace): command line arguments. Raises: BadConfigOption: if the options are invalid. """ partitions = getattr(options, 'partitions', None) self._partitions = self._ParsePartitionsString(partitions) partition = getattr(options, 'partition', None) if self._partitions and partition is not None: raise errors.BadConfigOption( ('Option "--partition" can not be used in combination ' 'with "--partitions".')) if not self._partitions and partition is not None: self._partitions = self._ParsePartitionsString(partition) image_offset_bytes = getattr(options, 'image_offset_bytes', None) if self._partitions and image_offset_bytes is not None: raise errors.BadConfigOption(( 'Option "--image_offset_bytes" can not be used in combination ' 'with "--partitions" or "--partition".')) image_offset = getattr(options, 'image_offset', None) if self._partitions and image_offset is not None: raise errors.BadConfigOption( ('Option "--image_offset" can not be used in combination with ' '"--partitions" or "--partition".')) if (image_offset_bytes is not None and isinstance(image_offset_bytes, py2to3.STRING_TYPES)): try: image_offset_bytes = int(image_offset_bytes, 10) except ValueError: raise errors.BadConfigOption( 'Invalid image offset bytes: {0:s}.'.format( image_offset_bytes)) if image_offset_bytes is None and image_offset is not None: bytes_per_sector = getattr(options, 'bytes_per_sector', self._DEFAULT_BYTES_PER_SECTOR) if isinstance(image_offset, py2to3.STRING_TYPES): try: image_offset = int(image_offset, 10) except ValueError: raise errors.BadConfigOption( 'Invalid image offset: {0:s}.'.format(image_offset)) if isinstance(bytes_per_sector, py2to3.STRING_TYPES): try: bytes_per_sector = int(bytes_per_sector, 10) except ValueError: raise errors.BadConfigOption( 'Invalid bytes per sector: {0:s}.'.format( bytes_per_sector)) if image_offset_bytes: self._partition_offset = image_offset_bytes elif image_offset: self._partition_offset = image_offset * bytes_per_sector
def ParseOptions(self, options): """Parses tool specific options. Args: options (argparse.Namespace): command line arguments. Raises: BadConfigOption: if the options are invalid. """ # The extraction options are dependent on the data location. helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['data_location']) # The output modules options are dependent on the preferred language # and preferred time zone options. self._ParseTimezoneOption(options) argument_helper_names = [ 'artifact_definitions', 'hashers', 'language', 'parsers' ] helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=argument_helper_names) if self._preferred_language == 'list': self.list_language_identifiers = True if self._hasher_names_string == 'list': self.list_hashers = True if self._parser_filter_expression == 'list': self.list_parsers_and_plugins = True if (self.list_language_identifiers or self.list_timezones or self.list_hashers or self.list_parsers_and_plugins or self.list_hashers): return # Check the list options first otherwise required options will raise. if self.list_timezones or self.list_output_modules: return # Check output modules after the other listable options, as otherwise # a required argument will raise. helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['output_modules']) if self._output_format == 'list': self.list_output_modules = True return self._ParseInformationalOptions(options) argument_helper_names = ['extraction', 'status_view'] helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=argument_helper_names) self._ParseLogFileOptions(options) self._ParseStorageMediaOptions(options) # These arguments are parsed from argparse.Namespace, so we can make # tests consistents with the log2timeline/psort ones. self._single_process_mode = getattr(options, 'single_process', False) self._storage_file_path = getattr(options, u'storage_file', None) if not self._storage_file_path: self._storage_file_path = self._GenerateStorageFileName() self._output_filename = getattr(options, u'write', None) if not self._output_filename: raise errors.BadConfigOption(( u'Output format: {0:s} requires an output file (-w OUTPUT_FILE)' ).format(self._output_format)) if os.path.exists(self._output_filename): raise errors.BadConfigOption( u'Output file already exists: {0:s}.'.format( self._output_filename)) self._output_module = self._CreateOutputModule(options)
def ParseOptions(self, options): """Parses the options. Args: options (argparse.Namespace): command line arguments. Raises: BadConfigOption: if the options are invalid. """ # The output modules options are dependent on the preferred language # and preferred time zone options. self._ParseTimezoneOption(options) names = ['analysis_plugins', 'language', 'profiling'] helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=names) self.list_analysis_plugins = self._analysis_plugins == 'list' self.list_language_identifiers = self._preferred_language == 'list' self.list_profilers = self._profilers == 'list' if (self.list_analysis_plugins or self.list_language_identifiers or self.list_profilers or self.list_timezones): return # Check output modules after the other listable options, otherwise # it could raise with "requires an output file". helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['output_modules']) self.list_output_modules = self._output_format == 'list' if self.list_output_modules: return self._ParseInformationalOptions(options) helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['data_location']) self._ParseLogFileOptions(options) self._ParseProcessingOptions(options) helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['event_filters']) self._deduplicate_events = getattr(options, 'dedup', True) if self._data_location: # Update the data location with the calculated value. options.data_location = self._data_location else: logging.warning('Unable to automatically determine data location.') self._command_line_arguments = self.GetCommandLineArguments() helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['storage_file']) # TODO: move check into _CheckStorageFile. if not self._storage_file_path: raise errors.BadConfigOption('Missing storage file option.') if not os.path.isfile(self._storage_file_path): raise errors.BadConfigOption( 'No such storage file: {0:s}.'.format(self._storage_file_path)) self._analysis_plugins = self._CreateAnalysisPlugins(options) self._output_module = self._CreateOutputModule(options)
def ExtractEventsFromSources(self): """Processes the sources and extracts events. Raises: BadConfigOption: if the storage file path is invalid or the storage format not supported. SourceScannerError: if the source scanner could not find a supported file system. UserAbort: if the user initiated an abort. """ self._CheckStorageFile(self._storage_file_path, warn_about_existing=True) scan_context = self.ScanSource(self._source_path) self._source_type = scan_context.source_type self._status_view.SetMode(self._status_view_mode) self._status_view.SetSourceInformation(self._source_path, self._source_type, filter_file=self._filter_file) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) self._output_writer.Write('\n') self._status_view.PrintExtractionStatusHeader(None) self._output_writer.Write('Processing started.\n') session = engine.BaseEngine.CreateSession( command_line_arguments=self._command_line_arguments, debug_mode=self._debug_mode, filter_file=self._filter_file, preferred_encoding=self.preferred_encoding, preferred_time_zone=self._preferred_time_zone, preferred_year=self._preferred_year) storage_writer = storage_factory.StorageFactory.CreateStorageWriter( self._storage_format, session, self._storage_file_path) if not storage_writer: raise errors.BadConfigOption( 'Unsupported storage format: {0:s}'.format( self._storage_format)) single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: # No need to multi process a single file source. single_process_mode = True if single_process_mode: extraction_engine = single_process_engine.SingleProcessEngine() else: extraction_engine = multi_process_engine.TaskMultiProcessEngine( use_zeromq=self._use_zeromq) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) configuration = self._CreateProcessingConfiguration( extraction_engine.knowledge_base) self._SetExtractionParsersAndPlugins(configuration, session) self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base) filter_find_specs = None if configuration.filter_file: environment_variables = ( extraction_engine.knowledge_base.GetEnvironmentVariables()) filter_file_object = filter_file.FilterFile( configuration.filter_file) filter_find_specs = filter_file_object.BuildFindSpecs( environment_variables=environment_variables) processing_status = None if single_process_mode: logging.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( self._source_path_specs, storage_writer, self._resolver_context, configuration, filter_find_specs=filter_find_specs, status_update_callback=status_update_callback) else: logging.debug('Starting extraction in multi process mode.') processing_status = extraction_engine.ProcessSources( session.identifier, self._source_path_specs, storage_writer, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, filter_find_specs=filter_find_specs, number_of_worker_processes=self._number_of_extraction_workers, status_update_callback=status_update_callback, worker_memory_limit=self._worker_memory_limit) self._status_view.PrintExtractionSummary(processing_status)
def ParseOptions(self, options): """Parses the options. Args: options (argparse.Namespace): command line arguments. Raises: BadConfigOption: if the options are invalid. """ # The extraction options are dependent on the data location. helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['data_location']) # Check the list options first otherwise required options will raise. argument_helper_names = ['hashers', 'parsers', 'profiling'] helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=argument_helper_names) self._ParseTimezoneOption(options) self.list_hashers = self._hasher_names_string == 'list' self.list_parsers_and_plugins = self._parser_filter_expression == 'list' self.list_profilers = self._profilers == 'list' self.show_info = getattr(options, 'show_info', False) if getattr(options, 'use_markdown', False): self._views_format_type = views.ViewsFactory.FORMAT_TYPE_MARKDOWN self.dependencies_check = getattr(options, 'dependencies_check', True) if (self.list_hashers or self.list_parsers_and_plugins or self.list_profilers or self.list_timezones or self.show_info): return self._ParseInformationalOptions(options) argument_helper_names = [ 'artifact_definitions', 'extraction', 'filter_file', 'status_view', 'storage_file', 'storage_format', 'text_prepend'] helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=argument_helper_names) self._ParseLogFileOptions(options) self._ParseStorageMediaOptions(options) self._ParsePerformanceOptions(options) self._ParseProcessingOptions(options) format_string = ( '%(asctime)s [%(levelname)s] (%(processName)-10s) PID:%(process)d ' '<%(module)s> %(message)s') if self._debug_mode: logging_level = logging.DEBUG elif self._quiet_mode: logging_level = logging.WARNING else: logging_level = logging.INFO self._ConfigureLogging( filename=self._log_file, format_string=format_string, log_level=logging_level) if self._debug_mode: log_filter = logging_filter.LoggingFilter() root_logger = logging.getLogger() root_logger.addFilter(log_filter) if not self._storage_file_path: raise errors.BadConfigOption('Missing storage file option.') serializer_format = getattr( options, 'serializer_format', definitions.SERIALIZER_FORMAT_JSON) if serializer_format not in definitions.SERIALIZER_FORMATS: raise errors.BadConfigOption( 'Unsupported storage serializer format: {0:s}.'.format( serializer_format)) self._storage_serializer_format = serializer_format # TODO: where is this defined? self._operating_system = getattr(options, 'os', None) if self._operating_system: self._mount_path = getattr(options, 'filename', None) helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['status_view']) self._enable_sigsegv_handler = getattr(options, 'sigsegv_handler', False)
def ParseOptions(self, options): """Parses the options and initializes the front-end. Args: options: the command line arguments (instance of argparse.Namespace). source_option: optional name of the source option. The default is source. Raises: BadConfigOption: if the options are invalid. """ super(ImageExportTool, self).ParseOptions(options) format_str = u'%(asctime)s [%(levelname)s] %(message)s' debug = getattr(options, u'debug', False) if debug: logging.basicConfig(level=logging.DEBUG, format=format_str) else: logging.basicConfig(level=logging.INFO, format=format_str) self._destination_path = getattr(options, u'path', u'export') filter_file = getattr(options, u'filter', None) if filter_file and not os.path.isfile(filter_file): raise errors.BadConfigOption( u'Unable to proceed, filter file: {0:s} does not exist.'. format(filter_file)) self._filter_file = filter_file if (getattr(options, u'no_vss', False) or getattr(options, u'include_duplicates', False)): self._remove_duplicates = False # TODO: move data location code to a location shared with psort. data_location = getattr(options, u'data_location', None) if not data_location: # Determine if we are running from the source directory. data_location = os.path.dirname(__file__) data_location = os.path.dirname(data_location) data_location = os.path.join(data_location, u'data') if not os.path.exists(data_location): # Otherwise determine if there is shared plaso data location. data_location = os.path.join(sys.prefix, u'share', u'plaso') if not os.path.exists(data_location): logging.warning( u'Unable to automatically determine data location.') data_location = None self._data_location = data_location date_filters = getattr(options, u'date_filters', None) try: self._front_end.ParseDateFilters(date_filters) except ValueError as exception: raise errors.BadConfigOption(exception) extensions_string = getattr(options, u'extensions_string', None) self._front_end.ParseExtensionsString(extensions_string) names_string = getattr(options, u'names_string', None) self._front_end.ParseNamesString(names_string) signature_identifiers = getattr(options, u'signature_identifiers', None) if signature_identifiers == u'list': self.list_signature_identifiers = True else: try: self._frontend.ParseSignatureIdentifiers( self._data_location, signature_identifiers) except (IOError, ValueError) as exception: raise errors.BadConfigOption(exception) self.has_filters = self._frontend.HasFilters()
def _Extract(self, source_path_specs, destination_path, output_writer, artifact_filters, filter_file, artifact_definitions_path, custom_artifacts_path, skip_duplicates=True): """Extracts files. This method runs the file extraction process on the image and potentially on every VSS if that is wanted. Args: source_path_specs (list[dfvfs.PathSpec]): path specifications to extract. destination_path (str): path where the extracted files should be stored. output_writer (CLIOutputWriter): output writer. artifact_definitions_path (str): path to artifact definitions file. custom_artifacts_path (str): path to custom artifact definitions file. artifact_filters (list[str]): names of artifact definitions that are used for filtering file system and Windows Registry key paths. filter_file (str): path of the file that contains the filter file path filters. skip_duplicates (Optional[bool]): True if files with duplicate content should be skipped. Raises: BadConfigOption: if an invalid collection filter was specified. """ extraction_engine = engine.BaseEngine() # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) try: extraction_engine.BuildCollectionFilters( artifact_definitions_path, custom_artifacts_path, extraction_engine.knowledge_base, artifact_filters, filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) filters_helper = extraction_engine.collection_filters_helper excluded_find_specs = None included_find_specs = None if filters_helper: excluded_find_specs = filters_helper.excluded_file_system_find_specs included_find_specs = filters_helper.included_file_system_find_specs output_writer.Write('Extracting file entries.\n') path_spec_generator = self._path_spec_extractor.ExtractPathSpecs( source_path_specs, find_specs=included_find_specs, resolver_context=self._resolver_context) for path_spec in path_spec_generator: file_entry = path_spec_resolver.Resolver.OpenFileEntry( path_spec, resolver_context=self._resolver_context) if not file_entry: logger.warning( 'Unable to open file entry for path spec: {0:s}'.format( path_spec.comparable)) continue skip_file_entry = False for find_spec in excluded_find_specs or []: skip_file_entry = find_spec.CompareLocation(file_entry) if skip_file_entry: break if skip_file_entry: logger.info( 'Skipped: {0:s} because of exclusion filter.'.format( file_entry.path_spec.location)) continue self._ExtractFileEntry(file_entry, destination_path, skip_duplicates=skip_duplicates)
def ProcessStorage(self): """Processes a plaso storage file. Raises: BadConfigOption: when a configuration parameter fails validation. RuntimeError: if a non-recoverable situation is encountered. """ output_module = self._front_end.CreateOutputModule( self._output_format, preferred_encoding=self.preferred_encoding, timezone=self._timezone) if isinstance(output_module, output_interface.LinearOutputModule): if not self._output_filename: # TODO: Remove "no longer supported" after 1.5 release. raise errors.BadConfigOption(( u'Output format: {0:s} requires an output file, output to stdout ' u'is no longer supported.').format(self._output_format)) if self._output_filename and os.path.exists(self._output_filename): raise errors.BadConfigOption( (u'Output file already exists: {0:s}. Aborting.').format( self._output_filename)) output_file_object = open(self._output_filename, u'wb') output_writer = cli_tools.FileObjectOutputWriter( output_file_object) output_module.SetOutputWriter(output_writer) helpers_manager.ArgumentHelperManager.ParseOptions( self._options, output_module) # Check if there are parameters that have not been defined and need to # in order for the output module to continue. Prompt user to supply # those that may be missing. missing_parameters = output_module.GetMissingArguments() while missing_parameters: # TODO: refactor this. configuration_object = PsortOptions() setattr(configuration_object, u'output_format', output_module.NAME) for parameter in missing_parameters: value = self._PromptUserForInput( u'Missing parameter {0:s} for output module'.format( parameter)) if value is None: logging.warning( u'Unable to set the missing parameter for: {0:s}'. format(parameter)) continue setattr(configuration_object, parameter, value) helpers_manager.ArgumentHelperManager.ParseOptions( configuration_object, output_module) missing_parameters = output_module.GetMissingArguments() analysis_plugins = self._front_end.GetAnalysisPlugins( self._analysis_plugins) for analysis_plugin in analysis_plugins: helpers_manager.ArgumentHelperManager.ParseOptions( self._options, analysis_plugin) if self._status_view_mode == u'linear': status_update_callback = self._PrintStatusUpdateStream elif self._status_view_mode == u'window': status_update_callback = self._PrintStatusUpdate else: status_update_callback = None session = self._front_end.CreateSession( command_line_arguments=self._command_line_arguments, preferred_encoding=self.preferred_encoding) storage_reader = self._front_end.CreateStorageReader( self._storage_file_path) self._number_of_analysis_reports = ( storage_reader.GetNumberOfAnalysisReports()) storage_reader.Close() if analysis_plugins: storage_writer = self._front_end.CreateStorageWriter( session, self._storage_file_path) # TODO: handle errors.BadConfigOption self._front_end.AnalyzeEvents( storage_writer, analysis_plugins, status_update_callback=status_update_callback) counter = collections.Counter() if self._output_format != u'null': storage_reader = self._front_end.CreateStorageReader( self._storage_file_path) events_counter = self._front_end.ExportEvents( storage_reader, output_module, deduplicate_events=self._deduplicate_events, status_update_callback=status_update_callback, time_slice=self._time_slice, use_time_slicer=self._use_time_slicer) counter += events_counter for item, value in iter(session.analysis_reports_counter.items()): counter[item] = value if self._quiet_mode: return self._output_writer.Write(u'Processing completed.\n') table_view = cli_views.ViewsFactory.GetTableView( self._views_format_type, title=u'Counter') for element, count in counter.most_common(): if not element: element = u'N/A' table_view.AddRow([element, count]) table_view.Write(self._output_writer) storage_reader = self._front_end.CreateStorageReader( self._storage_file_path) self._PrintAnalysisReportsDetails(storage_reader)