def _AddAttributeContainer(self, container_type, attribute_container): """Adds an attribute container. Args: container_type (str): attribute container type. attribute_container (AttributeContainer): attribute container. Raises: IOError: if the attribute container cannot be serialized. """ container_list = self._GetSerializedAttributeContainerList( container_type) identifier = identifiers.SQLTableIdentifier( container_type, container_list.next_sequence_number + 1) attribute_container.SetIdentifier(identifier) serialized_data = self._SerializeAttributeContainer( attribute_container) container_list.PushAttributeContainer(serialized_data) if container_list.data_size > self._maximum_buffer_size: self._WriteSerializedAttributeContainerList(container_type)
def _WriteNewAttributeContainer(self, container): """Writes a new attribute container to the store. The table for the container type must exist. Args: container (AttributeContainer): attribute container. Raises: IOError: when there is an error querying the storage file. OSError: when there is an error querying the storage file. """ next_sequence_number = self._GetAttributeContainerNextSequenceNumber( container.CONTAINER_TYPE) identifier = identifiers.SQLTableIdentifier( container.CONTAINER_TYPE, next_sequence_number) container.SetIdentifier(identifier) schema = self._CONTAINER_SCHEMAS.get(container.CONTAINER_TYPE, {}) self._UpdateAttributeContainerBeforeSerialize(container) if self._use_schema and schema: column_names = [] values = [] for name, data_type in sorted(schema.items()): attribute_value = getattr(container, name, None) if attribute_value is not None: if data_type == 'bool': attribute_value = int(attribute_value) elif data_type not in self._CONTAINER_SCHEMA_TO_SQLITE_TYPE_MAPPINGS: # TODO: add compression support attribute_value = self._serializer.WriteSerialized(attribute_value) column_names.append(name) values.append(attribute_value) else: serialized_data = self._SerializeAttributeContainer(container) if self.compression_format == definitions.COMPRESSION_FORMAT_ZLIB: compressed_data = zlib.compress(serialized_data) serialized_data = sqlite3.Binary(compressed_data) else: compressed_data = '' if self._storage_profiler: self._storage_profiler.Sample( 'write_new', 'write', container.CONTAINER_TYPE, len(serialized_data), len(compressed_data)) if container.CONTAINER_TYPE == self._CONTAINER_TYPE_EVENT: column_names = ['_timestamp', '_data'] values = [container.timestamp, serialized_data] else: column_names = ['_data'] values = [serialized_data] query = 'INSERT INTO {0:s} ({1:s}) VALUES ({2:s})'.format( container.CONTAINER_TYPE, ', '.join(column_names), ','.join(['?'] * len(column_names))) if self._storage_profiler: self._storage_profiler.StartTiming('write_new') try: self._cursor.execute(query, values) except sqlite3.OperationalError as exception: raise IOError('Unable to query storage file with error: {0!s}'.format( exception)) finally: if self._storage_profiler: self._storage_profiler.StopTiming('write_new') if (self.storage_type == definitions.STORAGE_TYPE_SESSION and container.CONTAINER_TYPE == self._CONTAINER_TYPE_EVENT_SOURCE): # Cache the event source for a session store since it will be accessed # after write. self._CacheAttributeContainerByIndex(container, next_sequence_number - 1)
def _GetAttributeContainersWithFilter( self, container_type, column_names=None, filter_expression=None, order_by=None): """Retrieves a specific type of stored attribute containers. Args: container_type (str): attribute container type. column_names (Optional[list[str]]): names of the columns to retrieve. filter_expression (Optional[str]): expression to filter results by. order_by (Optional[str]): name of a column to order the results by. Yields: AttributeContainer: attribute container. Raises: IOError: when there is an error querying the storage file. OSError: when there is an error querying the storage file. """ query = 'SELECT _identifier, {0:s} FROM {1:s}'.format( ', '.join(column_names), container_type) if filter_expression: query = ' WHERE '.join([query, filter_expression]) if order_by: query = ' ORDER BY '.join([query, order_by]) # Use a local cursor to prevent another query interrupting the generator. cursor = self._connection.cursor() try: cursor.execute(query) except sqlite3.OperationalError as exception: raise IOError(( 'Unable to query storage file for attribute container: {0:s} with ' 'error: {1!s}').format(container_type, exception)) if self._storage_profiler: self._storage_profiler.StartTiming('get_containers') try: row = cursor.fetchone() finally: if self._storage_profiler: self._storage_profiler.StopTiming('get_containers') while row: container = self._CreatetAttributeContainerFromRow( container_type, column_names, row, 1) identifier = identifiers.SQLTableIdentifier(container_type, row[0]) container.SetIdentifier(identifier) self._UpdateAttributeContainerAfterDeserialize(container) yield container if self._storage_profiler: self._storage_profiler.StartTiming('get_containers') try: row = cursor.fetchone() finally: if self._storage_profiler: self._storage_profiler.StopTiming('get_containers')
def MergeAttributeContainers(self, callback=None, maximum_number_of_containers=0): """Reads attribute containers from a task storage file into the writer. Args: callback (function[StorageWriter, AttributeContainer]): function to call after each attribute container is deserialized. maximum_number_of_containers (Optional[int]): maximum number of containers to merge, where 0 represent no limit. Returns: bool: True if the entire task storage file has been merged. Raises: RuntimeError: if the add method for the active attribute container type is missing. OSError: if the task storage file cannot be deleted. ValueError: if the maximum number of containers is a negative value. """ if maximum_number_of_containers < 0: raise ValueError('Invalid maximum number of containers') if not self._cursor: self._Open() self._ReadStorageMetadata() self._container_types = self._GetContainerTypes() number_of_containers = 0 while self._active_cursor or self._container_types: if not self._active_cursor: self._PrepareForNextContainerType() if maximum_number_of_containers == 0: rows = self._active_cursor.fetchall() else: number_of_rows = maximum_number_of_containers - number_of_containers rows = self._active_cursor.fetchmany(size=number_of_rows) if not rows: self._active_cursor = None continue for row in rows: identifier = identifiers.SQLTableIdentifier( self._active_container_type, row[0]) if self._compression_format == definitions.COMPRESSION_FORMAT_ZLIB: serialized_data = zlib.decompress(row[1]) else: serialized_data = row[1] attribute_container = self._DeserializeAttributeContainer( self._active_container_type, serialized_data) attribute_container.SetIdentifier(identifier) if self._active_container_type == self._CONTAINER_TYPE_EVENT_TAG: event_identifier = identifiers.SQLTableIdentifier( self._CONTAINER_TYPE_EVENT, attribute_container.event_row_identifier) attribute_container.SetEventIdentifier(event_identifier) del attribute_container.event_row_identifier if callback: callback(self._storage_writer, attribute_container) self._add_active_container_method(attribute_container) number_of_containers += 1 if (maximum_number_of_containers != 0 and number_of_containers >= maximum_number_of_containers): return False self._Close() os.remove(self._path) return True
def _GetAttributeContainers(self, container_type, filter_expression=None, order_by=None): """Retrieves a specific type of stored attribute containers. Args: container_type (str): attribute container type. filter_expression (Optional[str]): expression to filter results by. order_by (Optional[str]): name of a column to order the results by. Yields: AttributeContainer: attribute container. Raises: IOError: when there is an error querying the storage file. OSError: when there is an error querying the storage file. """ query = 'SELECT _identifier, _data FROM {0:s}'.format(container_type) if filter_expression: query = '{0:s} WHERE {1:s}'.format(query, filter_expression) if order_by: query = '{0:s} ORDER BY {1:s}'.format(query, order_by) # Use a local cursor to prevent another query interrupting the generator. cursor = self._connection.cursor() try: cursor.execute(query) except sqlite3.OperationalError as exception: raise IOError( 'Unable to query storage file with error: {0!s}'.format( exception)) if self._storage_profiler: self._storage_profiler.StartTiming('get_containers') try: row = cursor.fetchone() finally: if self._storage_profiler: self._storage_profiler.StopTiming('get_containers') while row: identifier = identifiers.SQLTableIdentifier(container_type, row[0]) if self.compression_format == definitions.COMPRESSION_FORMAT_ZLIB: serialized_data = zlib.decompress(row[1]) else: serialized_data = row[1] if self._storage_profiler: self._storage_profiler.Sample('get_containers', 'read', container_type, len(serialized_data), len(row[1])) attribute_container = self._DeserializeAttributeContainer( container_type, serialized_data) attribute_container.SetIdentifier(identifier) yield attribute_container row = cursor.fetchone()
def _GetAttributeContainerByIndex(self, container_type, index): """Retrieves a specific attribute container. Args: container_type (str): attribute container type. index (int): attribute container index. Returns: AttributeContainer: attribute container or None if not available. Raises: IOError: when there is an error querying the storage file. OSError: when there is an error querying the storage file. """ sequence_number = index + 1 query = 'SELECT _data FROM {0:s} WHERE rowid = {1:d}'.format( container_type, sequence_number) try: self._cursor.execute(query) except sqlite3.OperationalError as exception: raise IOError( 'Unable to query storage file with error: {0!s}'.format( exception)) if self._storage_profiler: self._storage_profiler.StartTiming('get_container_by_index') try: row = self._cursor.fetchone() finally: if self._storage_profiler: self._storage_profiler.StopTiming('get_container_by_index') if row: identifier = identifiers.SQLTableIdentifier( container_type, sequence_number) if self.compression_format == definitions.COMPRESSION_FORMAT_ZLIB: serialized_data = zlib.decompress(row[0]) else: serialized_data = row[0] if self._storage_profiler: self._storage_profiler.Sample('get_container_by_index', 'read', container_type, len(serialized_data), len(row[0])) attribute_container = self._DeserializeAttributeContainer( container_type, serialized_data) attribute_container.SetIdentifier(identifier) return attribute_container count = self._GetNumberOfAttributeContainers(container_type) index -= count serialized_data = self._GetSerializedAttributeContainerByIndex( container_type, index) attribute_container = self._DeserializeAttributeContainer( container_type, serialized_data) if attribute_container: identifier = identifiers.SQLTableIdentifier( container_type, sequence_number) attribute_container.SetIdentifier(identifier) return attribute_container
def MergeAttributeContainers( self, callback=None, maximum_number_of_containers=0): """Reads attribute containers from a task storage file into the writer. Args: callback (function[StorageWriter, AttributeContainer]): function to call after each attribute container is deserialized. maximum_number_of_containers (Optional[int]): maximum number of containers to merge, where 0 represent no limit. Returns: bool: True if the entire task storage file has been merged. Raises: OSError: if the task storage file cannot be deleted. """ if not self._cursor: self._connection = sqlite3.connect( self._path, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES) self._cursor = self._connection.cursor() self._cursor.execute(self._TABLE_NAMES_QUERY) table_names = [row[0] for row in self._cursor.fetchall()] # Remove container types not stored in the storage file but keep # the container types list in order. self._container_types = list(self._CONTAINER_TYPES) for name in set(self._CONTAINER_TYPES).difference(table_names): self._container_types.remove(name) number_of_containers = 0 while self._active_cursor or self._container_types: if not self._active_cursor: self._active_container_type = self._container_types.pop(0) query = 'SELECT _identifier, _data FROM {0:s}'.format( self._active_container_type) self._cursor.execute(query) self._active_cursor = self._cursor if maximum_number_of_containers > 0: number_of_rows = maximum_number_of_containers - number_of_containers rows = self._active_cursor.fetchmany(size=number_of_rows) else: rows = self._active_cursor.fetchall() if not rows: self._active_cursor = None continue for row in rows: identifier = identifiers.SQLTableIdentifier( self._active_container_type, row[0]) serialized_data = row[1] attribute_container = self._DeserializeAttributeContainer( self._active_container_type, serialized_data) attribute_container.SetIdentifier(identifier) if self._active_container_type == self._CONTAINER_TYPE_EVENT_TAG: event_identifier = identifiers.SQLTableIdentifier( self._CONTAINER_TYPE_EVENT, attribute_container.event_row_identifier) attribute_container.SetEventIdentifier(event_identifier) del attribute_container.event_row_identifier if callback: callback(self._storage_writer, attribute_container) self._AddAttributeContainer(attribute_container) number_of_containers += 1 if (maximum_number_of_containers > 0 and number_of_containers >= maximum_number_of_containers): return False self._connection.close() self._connection = None self._cursor = None os.remove(self._path) return True
def MergeAttributeContainers(self, callback=None, maximum_number_of_containers=0): """Reads attribute containers from a task storage file into the writer. Args: callback (function[StorageWriter, AttributeContainer]): function to call after each attribute container is deserialized. maximum_number_of_containers (Optional[int]): maximum number of containers to merge, where 0 represent no limit. Returns: bool: True if the entire task storage file has been merged. Raises: RuntimeError: if the add method for the active attribute container type is missing. OSError: if the task storage file cannot be deleted. ValueError: if the maximum number of containers is a negative value. """ if maximum_number_of_containers < 0: raise ValueError('Invalid maximum number of containers') if not self._cursor: self._Open() self._ReadStorageMetadata() self._container_types = self._GetContainerTypes() self._deserialization_errors = [] total_compressed_data_size = 0 total_serialized_data_size = 0 number_of_containers = 0 while self._active_cursor or self._container_types: if not self._active_cursor: self._PrepareForNextContainerType() if self._storage_profiler: self._storage_profiler.StartTiming('merge_read') try: if maximum_number_of_containers == 0: rows = self._active_cursor.fetchall() else: number_of_rows = maximum_number_of_containers - number_of_containers rows = self._active_cursor.fetchmany(size=number_of_rows) finally: if self._storage_profiler: self._storage_profiler.StopTiming('merge_read') if not rows: self._active_cursor = None continue for row in rows: identifier = identifiers.SQLTableIdentifier( self._active_container_type, row[0]) if self._compression_format == definitions.COMPRESSION_FORMAT_ZLIB: compressed_data = row[1] serialized_data = zlib.decompress(compressed_data) else: compressed_data = '' serialized_data = row[1] if self._storage_profiler: total_compressed_data_size += len(compressed_data) total_serialized_data_size += len(serialized_data) try: attribute_container = self._DeserializeAttributeContainer( self._active_container_type, serialized_data) except IOError as exception: # TODO: store this as an extraction warning so this is preserved # in the storage file. logger.error(( 'Unable to deserialize attribute container with error: ' '{0!s}').format(exception)) identifier = identifier.CopyToString() self._deserialization_errors.append(identifier) continue attribute_container.SetIdentifier(identifier) if self._active_container_type == self._CONTAINER_TYPE_EVENT_TAG: row_identifier = getattr(attribute_container, '_event_row_identifier', None) # TODO: error if row_identifier is None event_identifier = identifiers.SQLTableIdentifier( self._CONTAINER_TYPE_EVENT, row_identifier) attribute_container.SetEventIdentifier(event_identifier) delattr(attribute_container, '_event_row_identifier') if callback: callback(self._storage_writer, attribute_container) self._add_active_container_method( attribute_container, serialized_data=serialized_data) number_of_containers += 1 if (maximum_number_of_containers != 0 and number_of_containers >= maximum_number_of_containers): if self._storage_profiler: self._storage_profiler.Sample('merge_read', 'read', self._active_container_type, total_serialized_data_size, total_compressed_data_size) return False if self._storage_profiler: self._storage_profiler.Sample('merge_read', 'read', self._active_container_type, total_serialized_data_size, total_compressed_data_size) self._Close() os.remove(self._path) return True
def GetAttributeContainerByIndex(self, container_type, index): """Retrieves a specific attribute container. Args: container_type (str): attribute container type. index (int): attribute container index. Returns: AttributeContainer: attribute container or None if not available. Raises: IOError: when the store is closed or when there is an error querying the storage file. OSError: when the store is closed or when there is an error querying the storage file. """ container = self._GetCachedAttributeContainer(container_type, index) if container: return container schema = self._GetAttributeContainerSchema(container_type) if self._use_schema and schema: column_names = sorted(schema.keys()) else: column_names = ['_data'] row_number = index + 1 query = 'SELECT {0:s} FROM {1:s} WHERE rowid = {2:d}'.format( ', '.join(column_names), container_type, row_number) try: self._cursor.execute(query) except sqlite3.OperationalError as exception: raise IOError( 'Unable to query storage file with error: {0!s}'.format( exception)) if self._storage_profiler: self._storage_profiler.StartTiming('get_container_by_index') try: row = self._cursor.fetchone() finally: if self._storage_profiler: self._storage_profiler.StopTiming('get_container_by_index') if not row: return None container = self._CreatetAttributeContainerFromRow( container_type, column_names, row, 0) identifier = identifiers.SQLTableIdentifier(container_type, row_number) container.SetIdentifier(identifier) self._UpdateAttributeContainerAfterDeserialize(container) self._CacheAttributeContainerByIndex(container, index) return container