def _BuildFindSpecsFromRegistrySourceKey(self, key_path): """Build find specifications from a Windows Registry source type. Args: key_path (str): Windows Registry key path defined by the source. Returns: list[dfwinreg.FindSpec]: find specifications for the Windows Registry source type. """ find_specs = [] for key_path_glob in path_helper.PathHelper.ExpandGlobStars( key_path, '\\'): logger.debug('building find spec from key path glob: {0:s}'.format( key_path_glob)) key_path_glob_upper = key_path_glob.upper() if key_path_glob_upper.startswith( 'HKEY_LOCAL_MACHINE\\SYSTEM\\CURRENTCONTROLSET'): # Rewrite CurrentControlSet to ControlSet* for Windows NT. key_path_glob = 'HKEY_LOCAL_MACHINE\\System\\ControlSet*{0:s}'.format( key_path_glob[43:]) elif key_path_glob_upper.startswith('HKEY_USERS\\%%USERS.SID%%'): key_path_glob = 'HKEY_CURRENT_USER{0:s}'.format( key_path_glob[26:]) find_spec = registry_searcher.FindSpec(key_path_glob=key_path_glob) find_specs.append(find_spec) return find_specs
def BuildFindSpecs(self, artifact_filter_names, environment_variables=None): """Builds find specifications from artifact definitions. Args: artifact_filter_names (list[str]): names of artifact definitions that are used for filtering file system and Windows Registry key paths. environment_variables (Optional[list[EnvironmentVariableArtifact]]): environment variables. """ find_specs = [] for name in artifact_filter_names: definition = self._artifacts_registry.GetDefinitionByName(name) if not definition: logger.debug('undefined artifact definition: {0:s}'.format(name)) continue logger.debug('building find spec from artifact definition: {0:s}'.format( name)) artifact_find_specs = self._BuildFindSpecsFromArtifact( definition, environment_variables) find_specs.extend(artifact_find_specs) for find_spec in find_specs: if isinstance(find_spec, file_system_searcher.FindSpec): self.included_file_system_find_specs.append(find_spec) elif isinstance(find_spec, registry_searcher.FindSpec): self.registry_find_specs.append(find_spec) else: logger.warning('Unsupported find specification type: {0!s}'.format( type(find_spec)))
def _BuildFindSpecsFromRegistrySourceKey(self, key_path): """Build find specifications from a Windows Registry source type. Args: key_path (str): Windows Registry key path defined by the source. Returns: list[dfwinreg.FindSpec]: find specifications for the Windows Registry source type. """ find_specs = [] for key_path_glob in path_helper.PathHelper.ExpandRecursiveGlobs( key_path, '\\'): logger.debug('building find spec from key path glob: {0:s}'.format( key_path_glob)) key_path_glob_upper = key_path_glob.upper() if key_path_glob_upper.startswith('HKEY_USERS\\%%USERS.SID%%'): key_path_glob = 'HKEY_CURRENT_USER{0:s}'.format( key_path_glob[26:]) find_spec = registry_searcher.FindSpec(key_path_glob=key_path_glob) find_specs.append(find_spec) return find_specs
def BuildFindSpecs(self, artifact_filter_names, environment_variables=None): """Builds find specifications from artifact definitions. Args: artifact_filter_names (list[str]): names of artifact definitions that are used for filtering file system and Windows Registry key paths. environment_variables (Optional[list[EnvironmentVariableArtifact]]): environment variables. """ find_specs = [] for name in artifact_filter_names: definition = self._artifacts_registry.GetDefinitionByName(name) if not definition: logger.debug('undefined artifact definition: {0:s}'.format(name)) continue logger.debug('building find spec from artifact definition: {0:s}'.format( name)) artifact_find_specs = self._BuildFindSpecsFromArtifact( definition, environment_variables) find_specs.extend(artifact_find_specs) for find_spec in find_specs: if isinstance(find_spec, file_system_searcher.FindSpec): self.included_file_system_find_specs.append(find_spec) elif isinstance(find_spec, registry_searcher.FindSpec): self.registry_find_specs.append(find_spec) else: logger.warning('Unsupported find specification type: {0:s}'.format( type(find_spec)))
def _ParseFileEntryWithParser(self, parser_mediator, parser, file_entry, file_object=None): """Parses a file entry with a specific parser. Args: parser_mediator (ParserMediator): parser mediator. parser (BaseParser): parser. file_entry (dfvfs.FileEntry): file entry. file_object (Optional[file]): file-like object to parse. If not set the parser will use the parser mediator to open the file entry's default data stream as a file-like object. Returns: int: parse result which is _PARSE_RESULT_FAILURE if the file entry could not be parsed, _PARSE_RESULT_SUCCESS if the file entry successfully was parsed or _PARSE_RESULT_UNSUPPORTED when UnableToParseFile was raised. Raises: TypeError: if parser object is not a supported parser type. """ if not isinstance(parser, (parsers_interface.FileEntryParser, parsers_interface.FileObjectParser)): raise TypeError('Unsupported parser object type.') parser_mediator.ClearParserChain() parser_mediator.SampleStartTiming(parser.NAME) try: if isinstance(parser, parsers_interface.FileEntryParser): parser.Parse(parser_mediator) elif isinstance(parser, parsers_interface.FileObjectParser): parser.Parse(parser_mediator, file_object) result = self._PARSE_RESULT_SUCCESS # We catch IOError so we can determine the parser that generated the error. except (IOError, dfvfs_errors.BackEndError) as exception: display_name = parser_mediator.GetDisplayName(file_entry) logger.warning( '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( parser.NAME, display_name, exception)) result = self._PARSE_RESULT_FAILURE except errors.UnableToParseFile as exception: display_name = parser_mediator.GetDisplayName(file_entry) logger.debug( '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( parser.NAME, display_name, exception)) result = self._PARSE_RESULT_UNSUPPORTED finally: parser_mediator.SampleStopTiming(parser.NAME) parser_mediator.SampleMemoryUsage(parser.NAME) return result
def _ProcessCompressedStreamTypes(self, mediator, path_spec, type_indicators): """Processes a data stream containing compressed stream types such as: bz2. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. path_spec (dfvfs.PathSpec): path specification. type_indicators(list[str]): dfVFS archive type indicators found in the data stream. """ number_of_type_indicators = len(type_indicators) if number_of_type_indicators == 0: return self.processing_status = definitions.STATUS_INDICATOR_COLLECTING if number_of_type_indicators > 1: display_name = mediator.GetDisplayName() logger.debug(('Found multiple format type indicators: {0:s} for ' 'compressed stream file: {1:s}').format( type_indicators, display_name)) for type_indicator in type_indicators: if type_indicator == dfvfs_definitions.TYPE_INDICATOR_BZIP2: compressed_stream_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_COMPRESSED_STREAM, compression_method=dfvfs_definitions. COMPRESSION_METHOD_BZIP2, parent=path_spec) elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_GZIP: compressed_stream_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_GZIP, parent=path_spec) elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_XZ: compressed_stream_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_COMPRESSED_STREAM, compression_method=dfvfs_definitions.COMPRESSION_METHOD_XZ, parent=path_spec) else: compressed_stream_path_spec = None warning_message = ( 'unsupported compressed stream format type indicators: ' '{0:s}').format(type_indicator) mediator.ProduceExtractionWarning(warning_message, path_spec=path_spec) if compressed_stream_path_spec: event_source = event_sources.FileEntryEventSource( path_spec=compressed_stream_path_spec) event_source.file_entry_type = dfvfs_definitions.FILE_ENTRY_TYPE_FILE mediator.ProduceEventSource(event_source) self.last_activity_timestamp = time.time()
def BuildFilterFindSpecs(cls, artifact_definitions_path, custom_artifacts_path, knowledge_base_object, artifact_filter_names=None, filter_file_path=None): """Builds find specifications from artifacts or filter file if available. Args: artifact_definitions_path (str): path to artifact definitions file. custom_artifacts_path (str): path to custom artifact definitions file. knowledge_base_object (KnowledgeBase): knowledge base. artifact_filter_names (Optional[list[str]]): names of artifact definitions that are used for filtering file system and Windows Registry key paths. filter_file_path (Optional[str]): path of filter file. Returns: list[dfvfs.FindSpec]: find specifications for the file source type. Raises: InvalidFilter: if no valid FindSpecs are built. """ environment_variables = knowledge_base_object.GetEnvironmentVariables() find_specs = None if artifact_filter_names: logger.debug( 'building find specification based on artifacts: {0:s}'.format( ', '.join(artifact_filter_names))) artifacts_registry_object = cls.BuildArtifactsRegistry( artifact_definitions_path, custom_artifacts_path) artifact_filters_object = ( artifact_filters.ArtifactDefinitionsFilterHelper( artifacts_registry_object, artifact_filter_names, knowledge_base_object)) artifact_filters_object.BuildFindSpecs( environment_variables=environment_variables) find_specs = knowledge_base_object.GetValue( artifact_filters_object.KNOWLEDGE_BASE_VALUE)[ artifact_types.TYPE_INDICATOR_FILE] elif filter_file_path: logger.debug( 'building find specification based on filter file: {0:s}'. format(filter_file_path)) filter_file_object = filter_file.FilterFile(filter_file_path) find_specs = filter_file_object.BuildFindSpecs( environment_variables=environment_variables) if (artifact_filter_names or filter_file_path) and not find_specs: raise errors.InvalidFilter( 'Error processing filters, no valid specifications built.') return find_specs
def _ParseFileEntryWithParsers(self, parser_mediator, parser_names, file_entry, file_object=None): """Parses a file entry with a specific parsers. Args: parser_mediator (ParserMediator): parser mediator. parser_names (list[str]): names of parsers. file_entry (dfvfs.FileEntry): file entry. file_object (Optional[file]): file-like object to parse. If not set the parser will use the parser mediator to open the file entry's default data stream as a file-like object. Returns: int: parse result which is _PARSE_RESULT_FAILURE if the file entry could not be parsed, _PARSE_RESULT_SUCCESS if the file entry successfully was parsed or _PARSE_RESULT_UNSUPPORTED when UnableToParseFile was raised or no names of parser were provided. Raises: RuntimeError: if the parser object is missing. """ parse_results = self._PARSE_RESULT_UNSUPPORTED for parser_name in parser_names: parser = self._parsers.get(parser_name, None) if not parser: raise RuntimeError( 'Parser object missing for parser: {0:s}'.format( parser_name)) if parser.FILTERS: if not self._CheckParserCanProcessFileEntry( parser, file_entry): parse_results = self._PARSE_RESULT_SUCCESS continue display_name = parser_mediator.GetDisplayName(file_entry) logger.debug(( '[ParseFileEntryWithParsers] parsing file: {0:s} with parser: ' '{1:s}').format(display_name, parser_name)) parse_result = self._ParseFileEntryWithParser( parser_mediator, parser, file_entry, file_object=file_object) if parse_result == self._PARSE_RESULT_FAILURE: return self._PARSE_RESULT_FAILURE if parse_result == self._PARSE_RESULT_SUCCESS: parse_results = self._PARSE_RESULT_SUCCESS return parse_results
def PopItem(self): """Pops an item off the queue. If no ZeroMQ socket has been created, one will be created the first time this method is called. Returns: object: item from the queue. Raises: KeyboardInterrupt: if the process is sent a KeyboardInterrupt while popping an item. QueueEmpty: if the queue is empty, and no item could be popped within the queue timeout. RuntimeError: if terminate event is missing. zmq.error.ZMQError: if an error occurs in ZeroMQ. """ if not self._zmq_socket: self._CreateZMQSocket() if not self._terminate_event: raise RuntimeError('Missing terminate event.') logger.debug('Pop on {0:s} queue, port {1:d}'.format( self.name, self.port)) last_retry_time = time.time() + self.timeout_seconds while not self._terminate_event.is_set(): try: self._zmq_socket.send_pyobj(None) break except zmq.error.Again: # The existing socket is now out of sync, so we need to open a new one. self._CreateZMQSocket() if time.time() > last_retry_time: logger.warning('{0:s} timeout requesting item'.format( self.name)) raise errors.QueueEmpty continue while not self._terminate_event.is_set(): try: return self._ReceiveItemOnActivity(self._zmq_socket) except errors.QueueEmpty: continue except KeyboardInterrupt: self.Close(abort=True) raise
def _ProcessCompressedStreamTypes(self, mediator, path_spec, type_indicators): """Processes a data stream containing compressed stream types such as: bz2. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. path_spec (dfvfs.PathSpec): path specification. type_indicators(list[str]): dfVFS archive type indicators found in the data stream. """ number_of_type_indicators = len(type_indicators) if number_of_type_indicators == 0: return self.processing_status = definitions.PROCESSING_STATUS_COLLECTING if number_of_type_indicators > 1: display_name = mediator.GetDisplayName() logger.debug(( 'Found multiple format type indicators: {0:s} for ' 'compressed stream file: {1:s}').format( type_indicators, display_name)) for type_indicator in type_indicators: if type_indicator == dfvfs_definitions.TYPE_INDICATOR_BZIP2: compressed_stream_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_COMPRESSED_STREAM, compression_method=dfvfs_definitions.COMPRESSION_METHOD_BZIP2, parent=path_spec) elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_GZIP: compressed_stream_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_GZIP, parent=path_spec) else: compressed_stream_path_spec = None error_message = ( 'unsupported compressed stream format type indicators: ' '{0:s}').format(type_indicator) mediator.ProduceExtractionError( error_message, path_spec=path_spec) if compressed_stream_path_spec: event_source = event_sources.FileEntryEventSource( path_spec=compressed_stream_path_spec) event_source.file_entry_type = dfvfs_definitions.FILE_ENTRY_TYPE_FILE mediator.ProduceEventSource(event_source) self.last_activity_timestamp = time.time()
def _ZeroMQResponder(self, source_queue): """Listens for requests and replies to clients. Args: source_queue (Queue.queue): queue to use to pull items from. Raises: RuntimeError: if closed or terminate event is missing. """ if not self._closed_event or not self._terminate_event: raise RuntimeError('Missing closed or terminate event.') logger.debug('{0:s} responder thread started'.format(self.name)) item = None while not self._terminate_event.is_set(): if not item: try: if self._closed_event.is_set(): item = source_queue.get_nowait() else: item = source_queue.get(True, self._buffer_timeout_seconds) except Queue.Empty: if self._closed_event.is_set(): break continue try: # We need to receive a request before we can reply with the item. self._ReceiveItemOnActivity(self._zmq_socket) except errors.QueueEmpty: if self._closed_event.is_set() and self._queue.empty(): break continue sent_successfully = self._SendItem(self._zmq_socket, item) item = None if not sent_successfully: logger.error('Queue {0:s} unable to send item.'.format( self.name)) break logger.info('Queue {0:s} responder exiting.'.format(self.name)) self._zmq_socket.close(self._linger_seconds)
def _AnalyzeDataStream(self, file_entry, data_stream_name, display_name, event_data_stream): """Analyzes the contents of a specific data stream of a file entry. The results of the analyzers are set in the event data stream as attributes that are added to produced event objects. Note that some file systems allow directories to have data streams, such as NTFS. Args: file_entry (dfvfs.FileEntry): file entry whose data stream is to be analyzed. data_stream_name (str): name of the data stream. display_name (str): human readable representation of the file entry currently being analyzed. event_data_stream (EventDataStream): event data stream attribute container. Raises: RuntimeError: if the file-like object cannot be retrieved from the file entry. """ logger.debug( '[AnalyzeDataStream] analyzing file: {0:s}'.format(display_name)) if self._processing_profiler: self._processing_profiler.StartTiming('analyzing') try: file_object = file_entry.GetFileObject( data_stream_name=data_stream_name) if not file_object: raise RuntimeError( ('Unable to retrieve file-like object for file entry: ' '{0:s}.').format(display_name)) try: self._AnalyzeFileObject(file_object, display_name, event_data_stream) finally: file_object.close() finally: if self._processing_profiler: self._processing_profiler.StopTiming('analyzing') logger.debug( '[AnalyzeDataStream] completed analyzing file: {0:s}'.format( display_name))
def _ParseFileEntryWithParsers( self, parser_mediator, parser_names, file_entry, file_object=None): """Parses a file entry with a specific parsers. Args: parser_mediator (ParserMediator): parser mediator. parser_names (list[str]): names of parsers. file_entry (dfvfs.FileEntry): file entry. file_object (Optional[file]): file-like object to parse. If not set the parser will use the parser mediator to open the file entry's default data stream as a file-like object. Returns: int: parse result which is _PARSE_RESULT_FAILURE if the file entry could not be parsed, _PARSE_RESULT_SUCCESS if the file entry successfully was parsed or _PARSE_RESULT_UNSUPPORTED when UnableToParseFile was raised or no names of parser were provided. Raises: RuntimeError: if the parser object is missing. """ parse_results = self._PARSE_RESULT_UNSUPPORTED for parser_name in parser_names: parser = self._parsers.get(parser_name, None) if not parser: raise RuntimeError( 'Parser object missing for parser: {0:s}'.format(parser_name)) if parser.FILTERS: if not self._CheckParserCanProcessFileEntry(parser, file_entry): parse_results = self._PARSE_RESULT_SUCCESS continue display_name = parser_mediator.GetDisplayName(file_entry) logger.debug(( '[ParseFileEntryWithParsers] parsing file: {0:s} with parser: ' '{1:s}').format(display_name, parser_name)) parse_result = self._ParseFileEntryWithParser( parser_mediator, parser, file_entry, file_object=file_object) if parse_result == self._PARSE_RESULT_FAILURE: return self._PARSE_RESULT_FAILURE elif parse_result == self._PARSE_RESULT_SUCCESS: parse_results = self._PARSE_RESULT_SUCCESS return parse_results
def _AnalyzeDataStream(self, mediator, file_entry, data_stream_name): """Analyzes the contents of a specific data stream of a file entry. The results of the analyzers are set in the parser mediator as attributes that are added to produced event objects. Note that some file systems allow directories to have data streams, e.g. NTFS. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. file_entry (dfvfs.FileEntry): file entry whose data stream is to be analyzed. data_stream_name (str): name of the data stream. Raises: RuntimeError: if the file-like object cannot be retrieved from the file entry. """ display_name = mediator.GetDisplayName() logger.debug( '[AnalyzeDataStream] analyzing file: {0:s}'.format(display_name)) if self._processing_profiler: self._processing_profiler.StartTiming('analyzing') try: file_object = file_entry.GetFileObject( data_stream_name=data_stream_name) if not file_object: raise RuntimeError( ('Unable to retrieve file-like object for file entry: ' '{0:s}.').format(display_name)) try: self._AnalyzeFileObject(mediator, file_object) finally: file_object.close() finally: if self._processing_profiler: self._processing_profiler.StopTiming('analyzing') logger.debug( '[AnalyzeDataStream] completed analyzing file: {0:s}'.format( display_name))
def Close(self, abort=False): """Closes the queue. Args: abort (Optional[bool]): whether the Close is the result of an abort condition. If True, queue contents may be lost. Raises: QueueAlreadyClosed: If the queue is not started, or has already been closed. RuntimeError: if closed or terminate event is missing. """ if not self._closed_event or not self._terminate_event: raise RuntimeError('Missing closed or terminate event.') if not abort and self._closed_event.is_set(): raise errors.QueueAlreadyClosed() self._closed_event.set() if abort: if not self._closed_event.is_set(): logger.warning( '{0:s} queue aborting. Contents may be lost.'.format( self.name)) # We can't determine whether a there might be an operation being performed # on the socket in a separate method or thread, so we'll signal that any # such operation should cease. self._terminate_event.set() self._linger_seconds = 0 if self._zmq_thread: logger.debug('[{0:s}] Waiting for thread to exit.'.format( self.name)) self._zmq_thread.join(timeout=self.timeout_seconds) if self._zmq_thread.isAlive(): logger.error(( '{0:s} ZMQ responder thread did not exit within timeout' ).format(self.name)) else: logger.debug( '{0:s} queue closing, will linger for up to {1:d} seconds'. format(self.name, self._linger_seconds))
def _AnalyzeDataStream(self, mediator, file_entry, data_stream_name): """Analyzes the contents of a specific data stream of a file entry. The results of the analyzers are set in the parser mediator as attributes that are added to produced event objects. Note that some file systems allow directories to have data streams, e.g. NTFS. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. file_entry (dfvfs.FileEntry): file entry whose data stream is to be analyzed. data_stream_name (str): name of the data stream. Raises: RuntimeError: if the file-like object cannot be retrieved from the file entry. """ display_name = mediator.GetDisplayName() logger.debug('[AnalyzeDataStream] analyzing file: {0:s}'.format( display_name)) if self._processing_profiler: self._processing_profiler.StartTiming('analyzing') try: file_object = file_entry.GetFileObject(data_stream_name=data_stream_name) if not file_object: raise RuntimeError(( 'Unable to retrieve file-like object for file entry: ' '{0:s}.').format(display_name)) try: self._AnalyzeFileObject(mediator, file_object) finally: file_object.close() finally: if self._processing_profiler: self._processing_profiler.StopTiming('analyzing') logger.debug( '[AnalyzeDataStream] completed analyzing file: {0:s}'.format( display_name))
def _InitializeParserObjects(self, parser_filter_expression=None): """Initializes the parser objects. Args: parser_filter_expression (Optional[str]): the parser filter expression, None represents all parsers and plugins. The parser filter expression is a comma separated value string that denotes a list of parser names to include and/or exclude. Each entry can have the value of: * An exact match of a list of parsers, or a preset (see plaso/parsers/presets.py for a full list of available presets). * A name of a single parser (case insensitive), e.g. msiecf. * A glob name for a single parser, e.g. '*msie*' (case insensitive). """ self._specification_store, non_sigscan_parser_names = ( parsers_manager.ParsersManager.GetSpecificationStore( parser_filter_expression=parser_filter_expression)) self._non_sigscan_parser_names = [] for parser_name in non_sigscan_parser_names: if parser_name in ('filestat', 'usnjrnl'): continue self._non_sigscan_parser_names.append(parser_name) self._file_scanner = parsers_manager.ParsersManager.GetScanner( self._specification_store) self._parsers = parsers_manager.ParsersManager.GetParserObjects( parser_filter_expression=parser_filter_expression) active_parser_names = ', '.join(sorted(self._parsers.keys())) logger.debug('Active parsers: {0:s}'.format(active_parser_names)) self._filestat_parser = self._parsers.get('filestat', None) if 'filestat' in self._parsers: del self._parsers['filestat'] self._mft_parser = self._parsers.get('mft', None) self._usnjrnl_parser = self._parsers.get('usnjrnl', None) if 'usnjrnl' in self._parsers: del self._parsers['usnjrnl']
def PushItem(self, item, block=True): """Push an item on to the queue. If no ZeroMQ socket has been created, one will be created the first time this method is called. Args: item (object): item to push on the queue. block (Optional[bool]): whether the push should be performed in blocking or non-block mode. Raises: KeyboardInterrupt: if the process is sent a KeyboardInterrupt while pushing an item. QueueFull: if it was not possible to push the item to the queue within the timeout. RuntimeError: if terminate event is missing. zmq.error.ZMQError: if a ZeroMQ specific error occurs. """ if not self._zmq_socket: self._CreateZMQSocket() if not self._terminate_event: raise RuntimeError('Missing terminate event.') logger.debug('Push on {0:s} queue, port {1:d}'.format( self.name, self.port)) last_retry_timestamp = time.time() + self.timeout_seconds while not self._terminate_event.is_set(): try: send_successful = self._SendItem(self._zmq_socket, item, block) if send_successful: break if time.time() > last_retry_timestamp: logger.error('{0:s} unable to push item, raising.'.format( self.name)) raise errors.QueueFull except KeyboardInterrupt: self.Close(abort=True) raise
def BuildFindSpecs(self, environment_variables=None): """Builds find specifications from artifact definitions. The resulting find specifications are set in the knowledge base. Args: environment_variables (Optional[list[EnvironmentVariableArtifact]]): environment variables. """ find_specs = [] for name in self._artifacts: definition = self._artifacts_registry.GetDefinitionByName(name) if not definition: logger.debug( 'undefined artifact definition: {0:s}'.format(name)) continue logger.debug( 'building find spec from artifact definition: {0:s}'.format( name)) artifact_find_specs = self._BuildFindSpecsFromArtifact( definition, environment_variables) find_specs.extend(artifact_find_specs) find_specs_per_source_type = defaultdict(list) for find_spec in find_specs: if isinstance(find_spec, registry_searcher.FindSpec): artifact_list = find_specs_per_source_type[ artifact_types.TYPE_INDICATOR_WINDOWS_REGISTRY_KEY] artifact_list.append(find_spec) continue if isinstance(find_spec, file_system_searcher.FindSpec): artifact_list = find_specs_per_source_type[ artifact_types.TYPE_INDICATOR_FILE] artifact_list.append(find_spec) continue logger.warning('Unknown find specification type: {0:s}'.format( type(find_spec))) self._knowledge_base.SetValue(self.KNOWLEDGE_BASE_VALUE, find_specs_per_source_type)
def _InitializeParserObjects(self, parser_filter_expression=None): """Initializes the parser objects. Args: parser_filter_expression (Optional[str]): the parser filter expression, None represents all parsers and plugins. The parser filter expression is a comma separated value string that denotes a list of parser names to include and/or exclude. Each entry can have the value of: * An exact match of a list of parsers, or a preset (see plaso/parsers/presets.py for a full list of available presets). * A name of a single parser (case insensitive), e.g. msiecf. * A glob name for a single parser, e.g. '*msie*' (case insensitive). """ self._formats_with_signatures, non_sigscan_parser_names = ( parsers_manager.ParsersManager.GetFormatsWithSignatures( parser_filter_expression=parser_filter_expression)) self._non_sigscan_parser_names = [] for parser_name in non_sigscan_parser_names: if parser_name not in ('filestat', 'usnjrnl'): self._non_sigscan_parser_names.append(parser_name) self._file_scanner = parsers_manager.ParsersManager.CreateSignatureScanner( self._formats_with_signatures) self._parsers = parsers_manager.ParsersManager.GetParserObjects( parser_filter_expression=parser_filter_expression) active_parser_names = ', '.join(sorted(self._parsers.keys())) logger.debug('Active parsers: {0:s}'.format(active_parser_names)) self._filestat_parser = self._parsers.get('filestat', None) if 'filestat' in self._parsers: del self._parsers['filestat'] self._mft_parser = self._parsers.get('mft', None) self._usnjrnl_parser = self._parsers.get('usnjrnl', None) if 'usnjrnl' in self._parsers: del self._parsers['usnjrnl']
def _InitializeParserObjects(self, parser_filter_expression=None): """Initializes the parser objects. Args: parser_filter_expression (Optional[str]): parser filter expression, where None represents all parsers and plugins. The parser filter expression is a comma separated value string that denotes a list of parser names to include and/or exclude. Each entry can have the value of: * A name of a single parser (case insensitive), such as msiecf. * A glob name for a single parser, such as '*msie*' (case insensitive). """ self._formats_with_signatures, non_sigscan_parser_names = ( parsers_manager.ParsersManager.GetFormatsWithSignatures( parser_filter_expression=parser_filter_expression)) self._non_sigscan_parser_names = [] for parser_name in non_sigscan_parser_names: if parser_name not in ('filestat', 'usnjrnl'): self._non_sigscan_parser_names.append(parser_name) self._file_scanner = parsers_manager.ParsersManager.CreateSignatureScanner( self._formats_with_signatures) self._parsers = parsers_manager.ParsersManager.GetParserObjects( parser_filter_expression=parser_filter_expression) active_parser_names = ', '.join(sorted(self._parsers.keys())) logger.debug('Active parsers: {0:s}'.format(active_parser_names)) self._filestat_parser = self._parsers.get('filestat', None) if 'filestat' in self._parsers: del self._parsers['filestat'] self._mft_parser = self._parsers.get('mft', None) self._usnjrnl_parser = self._parsers.get('usnjrnl', None) if 'usnjrnl' in self._parsers: del self._parsers['usnjrnl']
def _ParserFileEntryWithParsers(self, parser_mediator, parser_names, file_entry, file_object=None): """Parses a file entry with a specific parsers. Args: parser_mediator (ParserMediator): parser mediator. parser_names (list[str]): names of parsers. file_entry (dfvfs.FileEntry): file entry. file_object (Optional[file]): file-like object to parse. If not set the parser will use the parser mediator to open the file entry's default data stream as a file-like object. Returns: bool: False if the file could not be parsed and UnableToParseFile was raised. Raises: RuntimeError: if the parser object is missing. """ for parser_name in parser_names: parser = self._parsers.get(parser_name, None) if not parser: raise RuntimeError( 'Parser object missing for parser: {0:s}'.format( parser_name)) if parser.FILTERS: if not self._CheckParserCanProcessFileEntry( parser, file_entry): continue display_name = parser_mediator.GetDisplayName(file_entry) logger.debug(('[ParseDataStream] parsing file: {0:s} with parser: ' '{1:s}').format(display_name, parser_name)) self._ParseFileEntryWithParser(parser_mediator, parser, file_entry, file_object=file_object)
def _CreateZMQSocket(self): """Creates a ZeroMQ socket.""" logger.debug('Creating socket for {0:s}'.format(self.name)) if not self._zmq_context: self._zmq_context = zmq.Context() # The terminate and close threading events need to be created when the # socket is opened. Threading events are unpickleable objects and cannot # passed in multiprocessing on Windows. if not self._terminate_event: self._terminate_event = threading.Event() if not self._closed_event: self._closed_event = threading.Event() if self._zmq_socket: logger.debug('Closing old socket for {0:s}'.format(self.name)) self._zmq_socket.close() self._zmq_socket = None self._zmq_socket = self._zmq_context.socket(self._SOCKET_TYPE) self._SetSocketTimeouts() self._SetSocketHighWaterMark() if self.port: address = '{0:s}:{1:d}'.format(self._SOCKET_ADDRESS, self.port) if self.SOCKET_CONNECTION_TYPE == self.SOCKET_CONNECTION_CONNECT: self._zmq_socket.connect(address) logger.debug('{0:s} connected to {1:s}'.format( self.name, address)) else: self._zmq_socket.bind(address) logger.debug('{0:s} bound to specified port {1:s}'.format( self.name, address)) else: self.port = self._zmq_socket.bind_to_random_port( self._SOCKET_ADDRESS) logger.debug('{0:s} bound to random port {1:d}'.format( self.name, self.port))
def _SendItem(self, zmq_socket, item, block=True): """Attempts to send an item to a ZeroMQ socket. Args: zmq_socket (zmq.Socket): used to the send the item. item (object): sent on the queue. Will be pickled prior to sending. Returns: bool: whether the item was sent successfully. """ try: logger.debug('{0:s} sending item'.format(self.name)) if block: zmq_socket.send_pyobj(item) else: zmq_socket.send_pyobj(item, zmq.DONTWAIT) logger.debug('{0:s} sent item'.format(self.name)) return True except zmq.error.Again: logger.debug('{0:s} could not send an item'.format(self.name)) except zmq.error.ZMQError as exception: if exception.errno == errno.EINTR: logger.error('ZMQ syscall interrupted in {0:s}.'.format( self.name)) return False
def PopItem(self): """Pops an item off the queue. If no ZeroMQ socket has been created, one will be created the first time this method is called. Returns: object: item from the queue. Raises: KeyboardInterrupt: if the process is sent a KeyboardInterrupt while popping an item. QueueEmpty: if the queue is empty, and no item could be popped within the queue timeout. RuntimeError: if closed or terminate event is missing. zmq.error.ZMQError: if a ZeroMQ error occurs. """ if not self._zmq_socket: self._CreateZMQSocket() if not self._closed_event or not self._terminate_event: raise RuntimeError('Missing closed or terminate event.') logger.debug('Pop on {0:s} queue, port {1:d}'.format( self.name, self.port)) last_retry_timestamp = time.time() + self.timeout_seconds while not self._closed_event.is_set( ) or not self._terminate_event.is_set(): try: return self._ReceiveItemOnActivity(self._zmq_socket) except errors.QueueEmpty: if time.time() > last_retry_timestamp: raise except KeyboardInterrupt: self.Close(abort=True) raise
def _ProcessFileEntry(self, mediator, file_entry): """Processes a file entry. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. file_entry (dfvfs.FileEntry): file entry. """ display_name = mediator.GetDisplayName() logger.debug('[ProcessFileEntry] processing file entry: {0:s}'.format( display_name)) reference_count = mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec) try: if self._IsMetadataFile(file_entry): self._ProcessMetadataFile(mediator, file_entry) else: file_entry_processed = False for data_stream in file_entry.data_streams: if self._abort: break if self._CanSkipDataStream(file_entry, data_stream): logger.debug( ('[ProcessFileEntry] Skipping datastream {0:s} ' 'for {1:s}: {2:s}').format( data_stream.name, file_entry.type, display_name)) continue self._ProcessFileEntryDataStream(mediator, file_entry, data_stream) file_entry_processed = True if not file_entry_processed: # For when the file entry does not contain a data stream. self._ProcessFileEntryDataStream(mediator, file_entry, None) finally: new_reference_count = ( mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec)) if reference_count != new_reference_count: # Clean up after parsers that do not call close explicitly. if mediator.resolver_context.ForceRemoveFileObject( file_entry.path_spec): logger.warning( 'File-object not explicitly closed for file: {0:s}'. format(display_name)) logger.debug( '[ProcessFileEntry] done processing file entry: {0:s}'.format( display_name))
def _InitializeParserObjects(self, parser_filter_expression=None): """Initializes the parser objects. Args: parser_filter_expression (Optional[str]): parser filter expression, where None represents all parsers and plugins. A parser filter expression is a comma separated value string that denotes which parsers and plugins should be used. See filters/parser_filter.py for details of the expression syntax. """ self._formats_with_signatures, non_sigscan_parser_names = ( parsers_manager.ParsersManager.GetFormatsWithSignatures( parser_filter_expression=parser_filter_expression)) self._non_sigscan_parser_names = [] for parser_name in non_sigscan_parser_names: if parser_name not in ('filestat', 'usnjrnl'): self._non_sigscan_parser_names.append(parser_name) self._file_scanner = parsers_manager.ParsersManager.CreateSignatureScanner( self._formats_with_signatures) self._parsers = parsers_manager.ParsersManager.GetParserObjects( parser_filter_expression=parser_filter_expression) active_parser_names = ', '.join(sorted(self._parsers.keys())) logger.debug('Active parsers: {0:s}'.format(active_parser_names)) self._filestat_parser = self._parsers.get('filestat', None) if 'filestat' in self._parsers: del self._parsers['filestat'] self._mft_parser = self._parsers.get('mft', None) self._usnjrnl_parser = self._parsers.get('usnjrnl', None) if 'usnjrnl' in self._parsers: del self._parsers['usnjrnl']
def _ExtractMetadataFromFileEntry(self, mediator, file_entry, data_stream): """Extracts metadata from a file entry. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. file_entry (dfvfs.FileEntry): file entry to extract metadata from. data_stream (dfvfs.DataStream): data stream or None if the file entry has no data stream. """ # Do not extract metadata from the root file entry when it is virtual. if file_entry.IsRoot() and file_entry.type_indicator not in ( self._TYPES_WITH_ROOT_METADATA): return # We always want to extract the file entry metadata but we only want # to parse it once per file entry, so we only use it if we are # processing the default data stream of regular files. if data_stream and not data_stream.IsDefault(): return display_name = mediator.GetDisplayName() logger.debug( '[ExtractMetadataFromFileEntry] processing file entry: {0:s}'.format( display_name)) self.processing_status = definitions.PROCESSING_STATUS_EXTRACTING if self._processing_profiler: self._processing_profiler.StartTiming('extracting') self._event_extractor.ParseFileEntryMetadata(mediator, file_entry) if self._processing_profiler: self._processing_profiler.StopTiming('extracting') self.processing_status = definitions.PROCESSING_STATUS_RUNNING
def _ExtractMetadataFromFileEntry(self, mediator, file_entry, data_stream): """Extracts metadata from a file entry. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. file_entry (dfvfs.FileEntry): file entry to extract metadata from. data_stream (dfvfs.DataStream): data stream or None if the file entry has no data stream. """ # Do not extract metadata from the root file entry when it is virtual. if file_entry.IsRoot() and file_entry.type_indicator not in ( self._TYPES_WITH_ROOT_METADATA): return # We always want to extract the file entry metadata but we only want # to parse it once per file entry, so we only use it if we are # processing the default data stream of regular files. if data_stream and not data_stream.IsDefault(): return display_name = mediator.GetDisplayName() logger.debug( '[ExtractMetadataFromFileEntry] processing file entry: {0:s}'. format(display_name)) self.processing_status = definitions.PROCESSING_STATUS_EXTRACTING if self._processing_profiler: self._processing_profiler.StartTiming('extracting') self._event_extractor.ParseFileEntryMetadata(mediator, file_entry) if self._processing_profiler: self._processing_profiler.StopTiming('extracting') self.processing_status = definitions.PROCESSING_STATUS_RUNNING
def _BuildFindSpecsFromRegistrySourceKey(self, key_path): """Build find specifications from a Windows Registry source type. Args: key_path (str): Windows Registry key path defined by the source. Returns: list[dfwinreg.FindSpec]: find specifications for the Windows Registry source type. """ find_specs = [] for key_path_glob in path_helper.PathHelper.ExpandRecursiveGlobs( key_path, '\\'): logger.debug('building find spec from key path glob: {0:s}'.format( key_path_glob)) key_path_glob_upper = key_path_glob.upper() if key_path_glob_upper.startswith('HKEY_USERS\\%%USERS.SID%%'): key_path_glob = 'HKEY_CURRENT_USER{0:s}'.format(key_path_glob[26:]) find_spec = registry_searcher.FindSpec(key_path_glob=key_path_glob) find_specs.append(find_spec) return find_specs
def _ProcessFileEntry(self, mediator, file_entry): """Processes a file entry. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. file_entry (dfvfs.FileEntry): file entry. """ display_name = mediator.GetDisplayName() logger.debug( '[ProcessFileEntry] processing file entry: {0:s}'.format(display_name)) reference_count = mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec) try: if self._IsMetadataFile(file_entry): self._ProcessMetadataFile(mediator, file_entry) else: file_entry_processed = False for data_stream in file_entry.data_streams: if self._abort: break if self._CanSkipDataStream(file_entry, data_stream): logger.debug(( '[ProcessFileEntry] Skipping datastream {0:s} for {1:s}: ' '{2:s}').format( data_stream.name, file_entry.type_indicator, display_name)) continue self._ProcessFileEntryDataStream(mediator, file_entry, data_stream) file_entry_processed = True if not file_entry_processed: # For when the file entry does not contain a data stream. self._ProcessFileEntryDataStream(mediator, file_entry, None) finally: new_reference_count = ( mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec)) if reference_count != new_reference_count: # Clean up after parsers that do not call close explicitly. if mediator.resolver_context.ForceRemoveFileObject( file_entry.path_spec): logger.warning( 'File-object not explicitly closed for file: {0:s}'.format( display_name)) logger.debug( '[ProcessFileEntry] done processing file entry: {0:s}'.format( display_name))
def _BuildFindSpecsFromFileSourcePath(self, source_path, path_separator, environment_variables, user_accounts): """Builds find specifications from a file source type. Args: source_path (str): file system path defined by the source. path_separator (str): file system path segment separator. environment_variables (list[str]): environment variable attributes used to dynamically populate environment variables in key. user_accounts (list[str]): identified user accounts stored in the knowledge base. Returns: list[dfvfs.FindSpec]: find specifications for the file source type. """ find_specs = [] for path_glob in path_helper.PathHelper.ExpandGlobStars( source_path, path_separator): logger.debug( 'building find spec from path glob: {0:s}'.format(path_glob)) for path in path_helper.PathHelper.ExpandUsersVariablePath( path_glob, path_separator, user_accounts): logger.debug( 'building find spec from path: {0:s}'.format(path)) if '%' in path: path = path_helper.PathHelper.ExpandWindowsPath( path, environment_variables) logger.debug( 'building find spec from expanded path: {0:s}'.format( path)) if not path.startswith(path_separator): logger.warning(( 'The path filter must be defined as an absolute path: ' '"{0:s}"').format(path)) continue try: find_spec = file_system_searcher.FindSpec( case_sensitive=False, location_glob=path, location_separator=path_separator) except ValueError as exception: logger.error(( 'Unable to build find specification for path: "{0:s}" with ' 'error: {1!s}').format(path, exception)) continue find_specs.append(find_spec) return find_specs
def _ProcessFileEntry(self, mediator, file_entry): """Processes a file entry. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. file_entry (dfvfs.FileEntry): file entry. """ display_name = mediator.GetDisplayName() logger.debug('[ProcessFileEntry] processing file entry: {0:s}'.format( display_name)) if self._IsMetadataFile(file_entry): self._ProcessMetadataFile(mediator, file_entry) else: file_entry_processed = False for data_stream in file_entry.data_streams: if self._abort: break if self._CanSkipDataStream(file_entry, data_stream): logger.debug(( '[ProcessFileEntry] Skipping datastream {0:s} for {1:s}: ' '{2:s}').format(data_stream.name, file_entry.type_indicator, display_name)) continue self._ProcessFileEntryDataStream(mediator, file_entry, data_stream) file_entry_processed = True if not file_entry_processed: # For when the file entry does not contain a data stream. self._ProcessFileEntryDataStream(mediator, file_entry, None) logger.debug( '[ProcessFileEntry] done processing file entry: {0:s}'.format( display_name))
def _BuildFindSpecsFromFileSourcePath( self, source_path, path_separator, environment_variables, user_accounts): """Builds find specifications from a file source type. Args: source_path (str): file system path defined by the source. path_separator (str): file system path segment separator. environment_variables (list[str]): environment variable attributes used to dynamically populate environment variables in key. user_accounts (list[str]): identified user accounts stored in the knowledge base. Returns: list[dfvfs.FindSpec]: find specifications for the file source type. """ find_specs = [] for path_glob in path_helper.PathHelper.ExpandRecursiveGlobs( source_path, path_separator): logger.debug('building find spec from path glob: {0:s}'.format( path_glob)) for path in path_helper.PathHelper.ExpandUsersVariablePath( path_glob, path_separator, user_accounts): logger.debug('building find spec from path: {0:s}'.format(path)) if '%' in path: path = path_helper.PathHelper.ExpandWindowsPath( path, environment_variables) logger.debug('building find spec from expanded path: {0:s}'.format( path)) if not path.startswith(path_separator): logger.warning(( 'The path filter must be defined as an absolute path: ' '"{0:s}"').format(path)) continue # Convert the path filters into a list of path segments and # strip the root path segment. path_segments = path.split(path_separator) # Remove initial root entry path_segments.pop(0) if not path_segments[-1]: logger.warning( 'Empty last path segment in path filter: "{0:s}"'.format(path)) path_segments.pop(-1) try: find_spec = file_system_searcher.FindSpec( location_glob=path_segments, case_sensitive=False) except ValueError as exception: logger.error(( 'Unable to build find specification for path: "{0:s}" with ' 'error: {1!s}').format(path, exception)) continue find_specs.append(find_spec) return find_specs
def _ProcessArchiveTypes(self, mediator, path_spec, type_indicators): """Processes a data stream containing archive types such as: TAR or ZIP. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. path_spec (dfvfs.PathSpec): path specification. type_indicators(list[str]): dfVFS archive type indicators found in the data stream. """ number_of_type_indicators = len(type_indicators) if number_of_type_indicators == 0: return self.processing_status = definitions.PROCESSING_STATUS_COLLECTING if number_of_type_indicators > 1: display_name = mediator.GetDisplayName() logger.debug(( 'Found multiple format type indicators: {0:s} for ' 'archive file: {1:s}').format(type_indicators, display_name)) for type_indicator in type_indicators: if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TAR: archive_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TAR, location='/', parent=path_spec) elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_ZIP: archive_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_ZIP, location='/', parent=path_spec) else: archive_path_spec = None error_message = ( 'unsupported archive format type indicator: {0:s}').format( type_indicator) mediator.ProduceExtractionError( error_message, path_spec=path_spec) if archive_path_spec: try: path_spec_generator = self._path_spec_extractor.ExtractPathSpecs( [archive_path_spec], resolver_context=mediator.resolver_context) for generated_path_spec in path_spec_generator: if self._abort: break event_source = event_sources.FileEntryEventSource( path_spec=generated_path_spec) event_source.file_entry_type = ( dfvfs_definitions.FILE_ENTRY_TYPE_FILE) mediator.ProduceEventSource(event_source) self.last_activity_timestamp = time.time() except (IOError, errors.MaximumRecursionDepth) as exception: error_message = ( 'unable to process archive file with error: {0!s}').format( exception) mediator.ProduceExtractionError( error_message, path_spec=generated_path_spec)
def _ProcessFileEntryDataStream(self, mediator, file_entry, data_stream): """Processes a specific data stream of a file entry. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. file_entry (dfvfs.FileEntry): file entry containing the data stream. data_stream (dfvfs.DataStream): data stream or None if the file entry has no data stream. """ display_name = mediator.GetDisplayName() data_stream_name = getattr(data_stream, 'name', '') or '' logger.debug( ('[ProcessFileEntryDataStream] processing data stream: "{0:s}" of ' 'file entry: {1:s}').format(data_stream_name, display_name)) mediator.ClearEventAttributes() if data_stream and self._analyzers: # Since AnalyzeDataStream generates event attributes it needs to be # called before producing events. self._AnalyzeDataStream(mediator, file_entry, data_stream.name) self._ExtractMetadataFromFileEntry(mediator, file_entry, data_stream) # Not every file entry has a data stream. In such cases we want to # extract the metadata only. if not data_stream: return # Determine if the content of the file entry should not be extracted. skip_content_extraction = self._CanSkipContentExtraction(file_entry) if skip_content_extraction: display_name = mediator.GetDisplayName() logger.debug( 'Skipping content extraction of: {0:s}'.format(display_name)) self.processing_status = definitions.PROCESSING_STATUS_IDLE return path_spec = copy.deepcopy(file_entry.path_spec) if data_stream and not data_stream.IsDefault(): path_spec.data_stream = data_stream.name archive_types = [] compressed_stream_types = [] if self._process_compressed_streams: compressed_stream_types = self._GetCompressedStreamTypes( mediator, path_spec) if not compressed_stream_types: archive_types = self._GetArchiveTypes(mediator, path_spec) if archive_types: if self._process_archives: self._ProcessArchiveTypes(mediator, path_spec, archive_types) if dfvfs_definitions.TYPE_INDICATOR_ZIP in archive_types: # ZIP files are the base of certain file formats like docx. self._ExtractContentFromDataStream(mediator, file_entry, data_stream.name) elif compressed_stream_types: self._ProcessCompressedStreamTypes(mediator, path_spec, compressed_stream_types) else: self._ExtractContentFromDataStream(mediator, file_entry, data_stream.name)
def _AnalyzeFileObject(self, mediator, file_object): """Processes a file-like object with analyzers. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. file_object (dfvfs.FileIO): file-like object to process. """ maximum_read_size = max([ analyzer_object.SIZE_LIMIT for analyzer_object in self._analyzers]) hashers_only = True for analyzer_object in self._analyzers: if not isinstance(analyzer_object, hashing_analyzer.HashingAnalyzer): hashers_only = False break file_size = file_object.get_size() if (hashers_only and self._hasher_file_size_limit and file_size > self._hasher_file_size_limit): return file_object.seek(0, os.SEEK_SET) data = file_object.read(maximum_read_size) while data: if self._abort: break for analyzer_object in self._analyzers: if self._abort: break if (not analyzer_object.INCREMENTAL_ANALYZER and file_size > analyzer_object.SIZE_LIMIT): continue if (isinstance(analyzer_object, hashing_analyzer.HashingAnalyzer) and self._hasher_file_size_limit and file_size > self._hasher_file_size_limit): continue self.processing_status = analyzer_object.PROCESSING_STATUS_HINT analyzer_object.Analyze(data) self.last_activity_timestamp = time.time() data = file_object.read(maximum_read_size) display_name = mediator.GetDisplayName() for analyzer_object in self._analyzers: if self._abort: break for result in analyzer_object.GetResults(): logger.debug(( '[AnalyzeFileObject] attribute {0:s}:{1:s} calculated for ' 'file: {2:s}.').format( result.attribute_name, result.attribute_value, display_name)) mediator.AddEventAttribute( result.attribute_name, result.attribute_value) analyzer_object.Reset() self.processing_status = definitions.PROCESSING_STATUS_RUNNING
def ProcessSources( self, source_path_specs, storage_writer, resolver_context, processing_configuration, filter_find_specs=None, status_update_callback=None): """Processes the sources. Args: source_path_specs (list[dfvfs.PathSpec]): path specifications of the sources to process. storage_writer (StorageWriter): storage writer for a session storage. resolver_context (dfvfs.Context): resolver context. processing_configuration (ProcessingConfiguration): processing configuration. filter_find_specs (Optional[list[dfvfs.FindSpec]]): find specifications used in path specification extraction. status_update_callback (Optional[function]): callback function for status updates. Returns: ProcessingStatus: processing status. """ parser_mediator = parsers_mediator.ParserMediator( storage_writer, self.knowledge_base, preferred_year=processing_configuration.preferred_year, resolver_context=resolver_context, temporary_directory=processing_configuration.temporary_directory) parser_mediator.SetEventExtractionConfiguration( processing_configuration.event_extraction) parser_mediator.SetInputSourceConfiguration( processing_configuration.input_source) extraction_worker = worker.EventExtractionWorker( parser_filter_expression=( processing_configuration.parser_filter_expression)) extraction_worker.SetExtractionConfiguration( processing_configuration.extraction) self._processing_configuration = processing_configuration self._status_update_callback = status_update_callback logger.debug('Processing started.') parser_mediator.StartProfiling( self._processing_configuration.profiling, self._name, self._process_information) self._StartProfiling(self._processing_configuration.profiling) if self._processing_profiler: extraction_worker.SetProcessingProfiler(self._processing_profiler) if self._serializers_profiler: storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: storage_writer.SetStorageProfiler(self._storage_profiler) storage_writer.Open() storage_writer.WriteSessionStart() try: storage_writer.WritePreprocessingInformation(self.knowledge_base) self._ProcessSources( source_path_specs, extraction_worker, parser_mediator, storage_writer, filter_find_specs=filter_find_specs) finally: storage_writer.WriteSessionCompletion(aborted=self._abort) storage_writer.Close() if self._processing_profiler: extraction_worker.SetProcessingProfiler(None) if self._serializers_profiler: storage_writer.SetSerializersProfiler(None) if self._storage_profiler: storage_writer.SetStorageProfiler(None) self._StopProfiling() parser_mediator.StopProfiling() if self._abort: logger.debug('Processing aborted.') self._processing_status.aborted = True else: logger.debug('Processing completed.') self._processing_configuration = None self._status_update_callback = None return self._processing_status
def _ParseFileEntryWithParser( self, parser_mediator, parser, file_entry, file_object=None): """Parses a file entry with a specific parser. Args: parser_mediator (ParserMediator): parser mediator. parser (BaseParser): parser. file_entry (dfvfs.FileEntry): file entry. file_object (Optional[file]): file-like object to parse. If not set the parser will use the parser mediator to open the file entry's default data stream as a file-like object. Returns: int: parse result which is _PARSE_RESULT_FAILURE if the file entry could not be parsed, _PARSE_RESULT_SUCCESS if the file entry successfully was parsed or _PARSE_RESULT_UNSUPPORTED when UnableToParseFile was raised. Raises: TypeError: if parser object is not a supported parser type. """ if not isinstance(parser, ( parsers_interface.FileEntryParser, parsers_interface.FileObjectParser)): raise TypeError('Unsupported parser object type.') parser_mediator.ClearParserChain() reference_count = ( parser_mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec)) parser_mediator.SampleStartTiming(parser.NAME) try: if isinstance(parser, parsers_interface.FileEntryParser): parser.Parse(parser_mediator) elif isinstance(parser, parsers_interface.FileObjectParser): parser.Parse(parser_mediator, file_object) result = self._PARSE_RESULT_SUCCESS # We catch IOError so we can determine the parser that generated the error. except (IOError, dfvfs_errors.BackEndError) as exception: display_name = parser_mediator.GetDisplayName(file_entry) logger.warning( '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( parser.NAME, display_name, exception)) result = self._PARSE_RESULT_FAILURE except errors.UnableToParseFile as exception: display_name = parser_mediator.GetDisplayName(file_entry) logger.debug( '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( parser.NAME, display_name, exception)) result = self._PARSE_RESULT_UNSUPPORTED finally: parser_mediator.SampleStopTiming(parser.NAME) parser_mediator.SampleMemoryUsage(parser.NAME) new_reference_count = ( parser_mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec)) if reference_count != new_reference_count: display_name = parser_mediator.GetDisplayName(file_entry) logger.warning(( '[{0:s}] did not explicitly close file-object for file: ' '{1:s}.').format(parser.NAME, display_name)) return result
def _ParseFileEntryWithParser(self, parser_mediator, parser, file_entry, file_object=None): """Parses a file entry with a specific parser. Args: parser_mediator (ParserMediator): parser mediator. parser (BaseParser): parser. file_entry (dfvfs.FileEntry): file entry. file_object (Optional[file]): file-like object to parse. If not set the parser will use the parser mediator to open the file entry's default data stream as a file-like object. Returns: bool: False if the file could not be parsed and UnableToParseFile was raised. Raises: TypeError: if parser object is not a supported parser type. """ if not isinstance(parser, (parsers_interface.FileEntryParser, parsers_interface.FileObjectParser)): raise TypeError('Unsupported parser object type.') parser_mediator.ClearParserChain() reference_count = ( parser_mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec)) if self._parsers_profiler: self._parsers_profiler.StartTiming(parser.NAME) result = True try: if isinstance(parser, parsers_interface.FileEntryParser): parser.Parse(parser_mediator) elif isinstance(parser, parsers_interface.FileObjectParser): parser.Parse(parser_mediator, file_object) # We catch IOError so we can determine the parser that generated the error. except (IOError, dfvfs_errors.BackEndError) as exception: display_name = parser_mediator.GetDisplayName(file_entry) logger.warning( '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( parser.NAME, display_name, exception)) except errors.UnableToParseFile as exception: display_name = parser_mediator.GetDisplayName(file_entry) logger.debug( '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( parser.NAME, display_name, exception)) result = False finally: if self._parsers_profiler: self._parsers_profiler.StopTiming(parser.NAME) new_reference_count = ( parser_mediator.resolver_context.GetFileObjectReferenceCount( file_entry.path_spec)) if reference_count != new_reference_count: display_name = parser_mediator.GetDisplayName(file_entry) logger.warning( ('[{0:s}] did not explicitly close file-object for file: ' '{1:s}.').format(parser.NAME, display_name)) return result
def BuildCollectionFilters( self, artifact_definitions_path, custom_artifacts_path, knowledge_base_object, artifact_filter_names=None, filter_file_path=None): """Builds collection filters from artifacts or filter file if available. Args: artifact_definitions_path (str): path to artifact definitions file. custom_artifacts_path (str): path to custom artifact definitions file. knowledge_base_object (KnowledgeBase): knowledge base. artifact_filter_names (Optional[list[str]]): names of artifact definitions that are used for filtering file system and Windows Registry key paths. filter_file_path (Optional[str]): path of filter file. Raises: InvalidFilter: if no valid file system find specifications are built. """ environment_variables = knowledge_base_object.GetEnvironmentVariables() if artifact_filter_names: logger.debug( 'building find specification based on artifacts: {0:s}'.format( ', '.join(artifact_filter_names))) artifacts_registry_object = BaseEngine.BuildArtifactsRegistry( artifact_definitions_path, custom_artifacts_path) self.collection_filters_helper = ( artifact_filters.ArtifactDefinitionsFiltersHelper( artifacts_registry_object, knowledge_base_object)) self.collection_filters_helper.BuildFindSpecs( artifact_filter_names, environment_variables=environment_variables) # If the user selected Windows Registry artifacts we have to ensure # the Windows Registry files are parsed. if self.collection_filters_helper.registry_find_specs: self.collection_filters_helper.BuildFindSpecs( self._WINDOWS_REGISTRY_FILES_ARTIFACT_NAMES, environment_variables=environment_variables) if not self.collection_filters_helper.included_file_system_find_specs: raise errors.InvalidFilter( 'No valid file system find specifications were built from ' 'artifacts.') elif filter_file_path: logger.debug( 'building find specification based on filter file: {0:s}'.format( filter_file_path)) filter_file_path_lower = filter_file_path.lower() if (filter_file_path_lower.endswith('.yaml') or filter_file_path_lower.endswith('.yml')): filter_file_object = yaml_filter_file.YAMLFilterFile() else: filter_file_object = filter_file.FilterFile() filter_file_path_filters = filter_file_object.ReadFromFile( filter_file_path) self.collection_filters_helper = ( path_filters.PathCollectionFiltersHelper()) self.collection_filters_helper.BuildFindSpecs( filter_file_path_filters, environment_variables=environment_variables) if (not self.collection_filters_helper.excluded_file_system_find_specs and not self.collection_filters_helper.included_file_system_find_specs): raise errors.InvalidFilter(( 'No valid file system find specifications were built from filter ' 'file: {0:s}.').format(filter_file_path))
def _AnalyzeFileObject(self, mediator, file_object): """Processes a file-like object with analyzers. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. file_object (dfvfs.FileIO): file-like object to process. """ maximum_read_size = max([ analyzer_object.SIZE_LIMIT for analyzer_object in self._analyzers ]) hashers_only = True for analyzer_object in self._analyzers: if not isinstance(analyzer_object, hashing_analyzer.HashingAnalyzer): hashers_only = False break file_size = file_object.get_size() if (hashers_only and self._hasher_file_size_limit and file_size > self._hasher_file_size_limit): return file_object.seek(0, os.SEEK_SET) data = file_object.read(maximum_read_size) while data: if self._abort: break for analyzer_object in self._analyzers: if self._abort: break if (not analyzer_object.INCREMENTAL_ANALYZER and file_size > analyzer_object.SIZE_LIMIT): continue if (isinstance(analyzer_object, hashing_analyzer.HashingAnalyzer) and self._hasher_file_size_limit and file_size > self._hasher_file_size_limit): continue self.processing_status = analyzer_object.PROCESSING_STATUS_HINT analyzer_object.Analyze(data) self.last_activity_timestamp = time.time() data = file_object.read(maximum_read_size) display_name = mediator.GetDisplayName() for analyzer_object in self._analyzers: if self._abort: break for result in analyzer_object.GetResults(): logger.debug(( '[AnalyzeFileObject] attribute {0:s}:{1:s} calculated for ' 'file: {2:s}.').format(result.attribute_name, result.attribute_value, display_name)) mediator.AddEventAttribute(result.attribute_name, result.attribute_value) analyzer_object.Reset() self.processing_status = definitions.PROCESSING_STATUS_RUNNING
def ProcessSources( self, source_configurations, storage_writer, resolver_context, processing_configuration, force_parser=False, status_update_callback=None): """Processes the sources. Args: source_configurations (list[SourceConfigurationArtifact]): configurations of the sources to process. storage_writer (StorageWriter): storage writer for a session storage. resolver_context (dfvfs.Context): resolver context. processing_configuration (ProcessingConfiguration): processing configuration. force_parser (Optional[bool]): True if a specified parser should be forced to be used to extract events. status_update_callback (Optional[function]): callback function for status updates. Returns: ProcessingStatus: processing status. """ parser_mediator = self._CreateParserMediator( self.knowledge_base, resolver_context, processing_configuration) parser_mediator.SetStorageWriter(storage_writer) self._extraction_worker = worker.EventExtractionWorker( force_parser=force_parser, parser_filter_expression=( processing_configuration.parser_filter_expression)) self._extraction_worker.SetExtractionConfiguration( processing_configuration.extraction) self._parser_mediator = parser_mediator self._processing_configuration = processing_configuration self._resolver_context = resolver_context self._status_update_callback = status_update_callback self._storage_writer = storage_writer logger.debug('Processing started.') parser_mediator.StartProfiling( self._processing_configuration.profiling, self._name, self._process_information) self._StartProfiling(self._processing_configuration.profiling) if self._analyzers_profiler: self._extraction_worker.SetAnalyzersProfiler(self._analyzers_profiler) if self._processing_profiler: self._extraction_worker.SetProcessingProfiler(self._processing_profiler) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(self._serializers_profiler) if self._storage_profiler: self._storage_writer.SetStorageProfiler(self._storage_profiler) self._StartStatusUpdateThread() self._parsers_counter = collections.Counter({ parser_count.name: parser_count for parser_count in self._storage_writer.GetAttributeContainers( 'parser_count')}) try: self._ProcessSources(source_configurations, parser_mediator) finally: # Stop the status update thread after close of the storage writer # so we include the storage sync to disk in the status updates. self._StopStatusUpdateThread() if self._analyzers_profiler: self._extraction_worker.SetAnalyzersProfiler(None) if self._processing_profiler: self._extraction_worker.SetProcessingProfiler(None) if self._serializers_profiler: self._storage_writer.SetSerializersProfiler(None) if self._storage_profiler: self._storage_writer.SetStorageProfiler(None) self._StopProfiling() parser_mediator.StopProfiling() for key, value in parser_mediator.parsers_counter.items(): parser_count = self._parsers_counter.get(key, None) if parser_count: parser_count.number_of_events += value self._storage_writer.UpdateAttributeContainer(parser_count) else: parser_count = counts.ParserCount(name=key, number_of_events=value) self._parsers_counter[key] = parser_count self._storage_writer.AddAttributeContainer(parser_count) if self._abort: logger.debug('Processing aborted.') self._processing_status.aborted = True else: logger.debug('Processing completed.') # Update the status view one last time. self._UpdateStatus() self._extraction_worker = None self._file_system_cache = [] self._parser_mediator = None self._processing_configuration = None self._resolver_context = None self._status_update_callback = None self._storage_writer = None return self._processing_status
def _ProcessFileEntryDataStream(self, mediator, file_entry, data_stream): """Processes a specific data stream of a file entry. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. file_entry (dfvfs.FileEntry): file entry containing the data stream. data_stream (dfvfs.DataStream): data stream or None if the file entry has no data stream. """ display_name = mediator.GetDisplayName() data_stream_name = getattr(data_stream, 'name', '') or '' logger.debug(( '[ProcessFileEntryDataStream] processing data stream: "{0:s}" of ' 'file entry: {1:s}').format(data_stream_name, display_name)) mediator.ClearEventAttributes() if data_stream and self._analyzers: # Since AnalyzeDataStream generates event attributes it needs to be # called before producing events. self._AnalyzeDataStream(mediator, file_entry, data_stream.name) self._ExtractMetadataFromFileEntry(mediator, file_entry, data_stream) # Not every file entry has a data stream. In such cases we want to # extract the metadata only. if not data_stream: return # Determine if the content of the file entry should not be extracted. skip_content_extraction = self._CanSkipContentExtraction(file_entry) if skip_content_extraction: display_name = mediator.GetDisplayName() logger.debug( 'Skipping content extraction of: {0:s}'.format(display_name)) self.processing_status = definitions.PROCESSING_STATUS_IDLE return path_spec = copy.deepcopy(file_entry.path_spec) if data_stream and not data_stream.IsDefault(): path_spec.data_stream = data_stream.name archive_types = [] compressed_stream_types = [] if self._process_compressed_streams: compressed_stream_types = self._GetCompressedStreamTypes( mediator, path_spec) if not compressed_stream_types: archive_types = self._GetArchiveTypes(mediator, path_spec) if archive_types: if self._process_archives: self._ProcessArchiveTypes(mediator, path_spec, archive_types) if dfvfs_definitions.TYPE_INDICATOR_ZIP in archive_types: # ZIP files are the base of certain file formats like docx. self._ExtractContentFromDataStream( mediator, file_entry, data_stream.name) elif compressed_stream_types: self._ProcessCompressedStreamTypes( mediator, path_spec, compressed_stream_types) else: self._ExtractContentFromDataStream( mediator, file_entry, data_stream.name)
def _ProcessArchiveTypes(self, mediator, path_spec, type_indicators): """Processes a data stream containing archive types such as: TAR or ZIP. Args: mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. path_spec (dfvfs.PathSpec): path specification. type_indicators(list[str]): dfVFS archive type indicators found in the data stream. """ number_of_type_indicators = len(type_indicators) if number_of_type_indicators == 0: return self.processing_status = definitions.PROCESSING_STATUS_COLLECTING if number_of_type_indicators > 1: display_name = mediator.GetDisplayName() logger.debug( ('Found multiple format type indicators: {0:s} for ' 'archive file: {1:s}').format(type_indicators, display_name)) for type_indicator in type_indicators: if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TAR: archive_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_TAR, location='/', parent=path_spec) elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_ZIP: archive_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_ZIP, location='/', parent=path_spec) else: archive_path_spec = None error_message = ( 'unsupported archive format type indicator: {0:s}' ).format(type_indicator) mediator.ProduceExtractionError(error_message, path_spec=path_spec) if archive_path_spec: try: path_spec_generator = self._path_spec_extractor.ExtractPathSpecs( [archive_path_spec], resolver_context=mediator.resolver_context) for generated_path_spec in path_spec_generator: if self._abort: break event_source = event_sources.FileEntryEventSource( path_spec=generated_path_spec) event_source.file_entry_type = ( dfvfs_definitions.FILE_ENTRY_TYPE_FILE) mediator.ProduceEventSource(event_source) self.last_activity_timestamp = time.time() except (IOError, errors.MaximumRecursionDepth) as exception: error_message = ( 'unable to process archive file with error: {0!s}' ).format(exception) mediator.ProduceExtractionError( error_message, path_spec=generated_path_spec)