class StrikeConfiguration(object): """Represents the configuration for a running Strike instance. The configuration includes details about mounting the transfer NFS directory, the suffix for identifying files still being transferred, and regular expressions to identify files to ingest and how to store them. """ def __init__(self, configuration): """Creates a Strike configuration object from the given dictionary. The general format is checked for correctness, but the specified workspace(s) are not checked. :param configuration: The Strike configuration :type configuration: dict :raises InvalidStrikeConfiguration: If the given configuration is invalid """ self._configuration = configuration # Convert old versions if 'version' in self._configuration and self._configuration[ 'version'] != CURRENT_VERSION: self._configuration = self._convert_schema(configuration) try: validate(configuration, STRIKE_CONFIGURATION_SCHEMA) except ValidationError as ex: raise InvalidStrikeConfiguration( 'Invalid Strike configuration: %s' % unicode(ex)) self._populate_default_values() if self._configuration['version'] != CURRENT_VERSION: msg = 'Invalid Strike configuration: %s is an unsupported version number' raise InvalidStrikeConfiguration(msg % self._configuration['version']) self._file_handler = FileHandler() for file_dict in self._configuration['files_to_ingest']: try: regex_pattern = re.compile(file_dict['filename_regex']) except re.error: raise InvalidStrikeConfiguration( 'Invalid file name regex: %s' % file_dict['filename_regex']) new_workspace = None if 'new_workspace' in file_dict: new_workspace = file_dict['new_workspace'] new_file_path = None if 'new_file_path' in file_dict: if os.path.isabs(file_dict['new_file_path']): msg = 'Invalid Strike configuration: new_file_path may not be an absolute path' raise InvalidStrikeConfiguration(msg) file_dict['new_file_path'] = os.path.normpath( file_dict['new_file_path']) new_file_path = file_dict['new_file_path'] rule = FileRule(regex_pattern, file_dict['data_types'], new_workspace, new_file_path) self._file_handler.add_rule(rule) def get_dict(self): """Returns the internal dictionary that represents this Strike process configuration. :returns: The internal dictionary :rtype: dict """ return self._configuration def get_monitor(self): """Returns the configured monitor for this Strike configuration :returns: The configured monitor :rtype: :class:`ingest.strike.monitors.monitor.Monitor` """ monitor_type = self._configuration['monitor']['type'] monitor = factory.get_monitor(monitor_type) self.load_monitor_configuration(monitor) return monitor def get_workspace(self): """Returns the monitored workspace name for this Strike configuration :returns: The monitored workspace name :rtype: string """ return self._configuration['workspace'] def load_monitor_configuration(self, monitor): """Loads the configuration into the given monitor :param monitor: The configuration as a dictionary :type monitor: :class:`ingest.strike.monitors.monitor.Monitor` """ monitor_dict = self._configuration['monitor'] monitor_type = monitor_dict['type'] workspace = self._configuration['workspace'] # Only load configuration if monitor type is unchanged if monitor_type == monitor.monitor_type: monitor.setup_workspaces(workspace, self._file_handler) monitor.load_configuration(monitor_dict) else: msg = 'Strike monitor type has been changed from %s to %s. Cannot reload configuration.' logger.warning(msg, monitor.monitor_type, monitor_type) def validate(self): """Validates the Strike configuration :returns: A list of warnings discovered during validation :rtype: list[:class:`ingest.strike.configuration.strike_configuration.ValidationWarning`] :raises :class:`ingest.strike.configuration.exceptions.InvalidStrikeConfiguration`: If the configuration is invalid. """ warnings = [] monitor_type = self._configuration['monitor']['type'] if monitor_type not in factory.get_monitor_types(): raise InvalidStrikeConfiguration( '\'%s\' is an invalid monitor type' % monitor_type) monitored_workspace_name = self._configuration['workspace'] workspace_names = {monitored_workspace_name} for rule in self._file_handler.rules: if rule.new_workspace: workspace_names.add(rule.new_workspace) for workspace in Workspace.objects.filter(name__in=workspace_names): if workspace.name == monitored_workspace_name: broker_type = workspace.get_broker().broker_type monitor = factory.get_monitor(monitor_type) if broker_type not in monitor.supported_broker_types: msg = 'Monitor type %s does not support broker type %s' raise InvalidStrikeConfiguration( msg % (monitor_type, broker_type)) if not workspace.is_active: raise InvalidStrikeConfiguration( 'Workspace is not active: %s' % workspace.name) workspace_names.remove(workspace.name) if workspace_names: raise InvalidStrikeConfiguration('Unknown workspace name: %s' % workspace_names.pop()) return warnings def _convert_schema(self, configuration): """Tries to validate the configuration as version 1.0 and convert it to version 2.0 :param configuration: The Strike configuration :type configuration: dict :returns: The converted configuration :rtype: dict """ # Try converting from 1.0 converted_configuration = StrikeConfiguration_1_0( configuration).get_dict() converted_configuration['version'] = CURRENT_VERSION mount = converted_configuration['mount'] mount_path = mount.split(':')[1] transfer_suffix = converted_configuration['transfer_suffix'] del converted_configuration['mount'] del converted_configuration['transfer_suffix'] auto_workspace_name = 'auto_wksp_for_%s' % mount.replace( ':', '_').replace('/', '_') auto_workspace_name = auto_workspace_name[: 50] # Truncate to max name length of 50 chars title = 'Auto Workspace for %s' % mount title = title[:50] # Truncate to max title length of 50 chars try: Workspace.objects.get(name=auto_workspace_name) except Workspace.DoesNotExist: workspace = Workspace() workspace.name = auto_workspace_name workspace.title = title desc = 'This workspace was automatically created for mount %s to support converting Strike from 1.0 to 2.0' workspace.description = desc % mount workspace.json_config = { 'version': '1.0', 'broker': { 'type': 'host', 'host_path': mount_path } } workspace.save() converted_configuration['workspace'] = auto_workspace_name converted_configuration['monitor'] = { 'type': 'dir-watcher', 'transfer_suffix': transfer_suffix } for file_dict in converted_configuration['files_to_ingest']: file_dict['new_workspace'] = file_dict['workspace_name'] file_dict['new_file_path'] = file_dict['workspace_path'] del file_dict['workspace_name'] del file_dict['workspace_path'] return converted_configuration def _populate_default_values(self): """Goes through the configuration and populates any missing values with defaults.""" if 'version' not in self._configuration: self._configuration['version'] = CURRENT_VERSION for file_dict in self._configuration['files_to_ingest']: if 'data_types' not in file_dict: file_dict['data_types'] = []
class ScanConfigurationV6(object): """Represents the configuration for a running Scan instance. The configuration includes details about mounting the transfer directory, the suffix for identifying files still being transferred, and regular expressions to identify files to ingest and how to store them. """ def __init__(self, configuration, do_validate=False): """Creates a Scan configuration object from the given dictionary. The general format is checked for correctness, but the specified workspace(s) are not checked. :param configuration: The Scan configuration :type configuration: dict :raises InvalidScanConfiguration: If the given configuration is invalid """ self._configuration = configuration # Convert old versions if 'version' in self._configuration and self._configuration[ 'version'] == '1.0': self._configuration['version'] = SCHEMA_VERSION if 'version' not in self._configuration: self._configuration['version'] = SCHEMA_VERSION try: if do_validate: validate(self._configuration, SCAN_CONFIGURATION_SCHEMA) except ValidationError as ex: raise InvalidScanConfiguration('Invalid Scan configuration: %s' % unicode(ex)) self._populate_default_values() if self._configuration['version'] not in SCHEMA_VERSIONS: msg = 'Invalid Scan configuration: %s is an unsupported version number' raise InvalidScanConfiguration(msg % self._configuration['version']) self._file_handler = FileHandler() for file_dict in self._configuration['files_to_ingest']: try: regex_pattern = re.compile(file_dict['filename_regex']) except re.error: raise InvalidScanConfiguration('Invalid file name regex: %s' % file_dict['filename_regex']) new_workspace = None if 'new_workspace' in file_dict: new_workspace = file_dict['new_workspace'] new_file_path = None if 'new_file_path' in file_dict: if os.path.isabs(file_dict['new_file_path']): msg = 'Invalid Scan configuration: new_file_path may not be an absolute path' raise InvalidScanConfiguration(msg) file_dict['new_file_path'] = os.path.normpath( file_dict['new_file_path']) new_file_path = file_dict['new_file_path'] rule = FileRule(regex_pattern, file_dict['data_types'], new_workspace, new_file_path) self._file_handler.add_rule(rule) def get_configuration(self): """Returns the scan configuration represented by this JSON :returns: The scan configuration :rtype: :class:`ingest.scan.configuration.scan_configuration.ScanConfiguration`: """ config = ScanConfiguration() config.scanner_type = self._configuration['scanner']['type'] config.scanner_config = self._configuration['scanner'] config.recursive = self._configuration['recursive'] config.file_handler = self._file_handler config.workspace = self._configuration['workspace'] config.config_dict = self._configuration return config def get_dict(self): """Returns the internal dictionary that represents this Strike process configuration. :returns: The internal dictionary :rtype: dict """ return self._configuration def _convert_schema(self, configuration): """Upgrade schema from a previous version :param configuration: The Scan configuration :type configuration: dict :returns: The converted configuration :rtype: dict """ config = configuration if 'version' in config and config['version'] == '1.0': config['version'] = SCHEMA_VERSION return config def _populate_default_values(self): """Goes through the configuration and populates any missing values with defaults.""" if 'version' not in self._configuration: self._configuration['version'] = SCHEMA_VERSION if 'recursive' not in self._configuration: self._configuration['recursive'] = True for file_dict in self._configuration['files_to_ingest']: if 'data_types' not in file_dict: file_dict['data_types'] = []
class StrikeConfigurationV6(object): """Represents the configuration for a running Strike instance. The configuration includes details about mounting the transfer NFS directory, the suffix for identifying files still being transferred, and regular expressions to identify files to ingest and how to store them. """ def __init__(self, configuration, do_validate=False): """Creates a Strike configuration object from the given dictionary. The general format is checked for correctness, but the specified workspace(s) are not checked. :param configuration: The Strike configuration :type configuration: dict :raises InvalidStrikeConfiguration: If the given configuration is invalid """ self._configuration = configuration # Convert old versions if 'version' in self._configuration and self._configuration[ 'version'] == '1.0': raise InvalidStrikeConfiguration( 'Invalid Strike configuration. Strike configuration version 1.0 is no longer supported' ) if 'version' in self._configuration and self._configuration[ 'version'] == '2.0': self._configuration['version'] = SCHEMA_VERSION try: if do_validate: validate(configuration, STRIKE_CONFIGURATION_SCHEMA) except ValidationError as ex: raise InvalidStrikeConfiguration( 'Invalid Strike configuration: %s' % unicode(ex)) self._populate_default_values() if self._configuration['version'] not in SCHEMA_VERSIONS: msg = 'Invalid Strike configuration: %s is an unsupported version number' raise InvalidStrikeConfiguration(msg % self._configuration['version']) self._file_handler = FileHandler() for file_dict in self._configuration['files_to_ingest']: try: regex_pattern = re.compile(file_dict['filename_regex']) except re.error: raise InvalidStrikeConfiguration( 'Invalid file name regex: %s' % file_dict['filename_regex']) new_workspace = None if 'new_workspace' in file_dict: new_workspace = file_dict['new_workspace'] new_file_path = None if 'new_file_path' in file_dict: if os.path.isabs(file_dict['new_file_path']): msg = 'Invalid Strike configuration: new_file_path may not be an absolute path' raise InvalidStrikeConfiguration(msg) file_dict['new_file_path'] = os.path.normpath( file_dict['new_file_path']) new_file_path = file_dict['new_file_path'] rule = FileRule(regex_pattern, file_dict['data_types'], new_workspace, new_file_path) self._file_handler.add_rule(rule) def get_dict(self): """Returns the internal dictionary that represents this Strike process configuration. :returns: The internal dictionary :rtype: dict """ return self._configuration def get_configuration(self): """Returns the strike configuration represented by this JSON :returns: The strike configuration :rtype: :class:`ingest.strike.configuration.strike_configuration.StrikeConfiguration`: """ config = StrikeConfiguration() config.configuration = self._configuration config.file_handler = self._file_handler return config def sanitize_credentials(self): """Sanitizes the aws credentials in the config if they exist """ if 'monitor' in self._configuration and 'credentials' in self._configuration[ 'monitor']: self._configuration['monitor']['credentials'][ 'access_key_id'] = '************' self._configuration['monitor']['credentials'][ 'secret_access_key'] = '************' def _populate_default_values(self): """Goes through the configuration and populates any missing values with defaults.""" if 'version' not in self._configuration: self._configuration['version'] = SCHEMA_VERSION for file_dict in self._configuration['files_to_ingest']: if 'data_types' not in file_dict: file_dict['data_types'] = []
class ScanConfiguration(object): """Represents the configuration for a running Scan instance. The configuration includes details about mounting the transfer directory, the suffix for identifying files still being transferred, and regular expressions to identify files to ingest and how to store them. """ def __init__(self, configuration): """Creates a Scan configuration object from the given dictionary. The general format is checked for correctness, but the specified workspace(s) are not checked. :param configuration: The Scan configuration :type configuration: dict :raises InvalidScanConfiguration: If the given configuration is invalid """ self._configuration = configuration try: validate(configuration, SCAN_CONFIGURATION_SCHEMA) except ValidationError as ex: raise InvalidScanConfiguration('Invalid Scan configuration: %s' % unicode(ex)) self._populate_default_values() if self._configuration['version'] != CURRENT_VERSION: msg = 'Invalid Scan configuration: %s is an unsupported version number' raise InvalidScanConfiguration(msg % self._configuration['version']) self._file_handler = FileHandler() for file_dict in self._configuration['files_to_ingest']: try: regex_pattern = re.compile(file_dict['filename_regex']) except re.error: raise InvalidScanConfiguration('Invalid file name regex: %s' % file_dict['filename_regex']) new_workspace = None if 'new_workspace' in file_dict: new_workspace = file_dict['new_workspace'] new_file_path = None if 'new_file_path' in file_dict: if os.path.isabs(file_dict['new_file_path']): msg = 'Invalid Scan configuration: new_file_path may not be an absolute path' raise InvalidScanConfiguration(msg) file_dict['new_file_path'] = os.path.normpath( file_dict['new_file_path']) new_file_path = file_dict['new_file_path'] rule = FileRule(regex_pattern, file_dict['data_types'], new_workspace, new_file_path) self._file_handler.add_rule(rule) def get_dict(self): """Returns the internal dictionary that represents this Scan process configuration. :returns: The internal dictionary :rtype: dict """ return self._configuration def get_scanner(self): """Returns the configured scanner for this Scan configuration :returns: The configured scanner :rtype: :class:`ingest.scan.scanners.scanner.Scanner` """ scanner_type = self._configuration['scanner']['type'] scanner = factory.get_scanner(scanner_type) self.load_scanner_configuration(scanner) return scanner def get_workspace(self): """Returns the workspace name to be scanned for this Scan configuration :returns: The workspace name :rtype: string """ return self._configuration['workspace'] def load_scanner_configuration(self, scanner): """Loads the configuration into the given scanner :param scanner: The configuration as a dictionary :type scanner: :class:`ingest.scan.scanners.scanner.Scanner` """ scanner_dict = self._configuration['scanner'] scanner_type = scanner_dict['type'] workspace = self._configuration['workspace'] # Only load configuration if scanner type is unchanged if scanner_type == scanner.scanner_type: scanner.setup_workspaces(workspace, self._file_handler) scanner.load_configuration(scanner_dict) scanner.set_recursive(self._configuration['recursive']) else: msg = 'Scan scanner type has been changed from %s to %s. Cannot reload configuration.' logger.warning(msg, scanner.scanner_type, scanner_type) def validate(self): """Validates the Scan configuration :returns: A list of warnings discovered during validation :rtype: list[:class:`ingest.scan.configuration.scan_configuration.ValidationWarning`] :raises :class:`ingest.scan.configuration.exceptions.InvalidScanConfiguration`: If the configuration is invalid. """ warnings = [] scanner_type = self._configuration['scanner']['type'] if scanner_type not in factory.get_scanner_types(): raise InvalidScanConfiguration('\'%s\' is an invalid scanner' % scanner_type) scanned_workspace_name = self._configuration['workspace'] workspace_names = {scanned_workspace_name} for rule in self._file_handler.rules: if rule.new_workspace: workspace_names.add(rule.new_workspace) for workspace in Workspace.objects.filter(name__in=workspace_names): if workspace.name == scanned_workspace_name: broker_type = workspace.get_broker().broker_type scanner = factory.get_scanner(scanner_type) if broker_type not in scanner.supported_broker_types: msg = 'Scanner type %s does not support broker type %s' raise InvalidScanConfiguration(msg % (scanner_type, broker_type)) if not workspace.is_active: raise InvalidScanConfiguration('Workspace is not active: %s' % workspace.name) workspace_names.remove(workspace.name) if workspace_names: raise InvalidScanConfiguration('Unknown workspace name: %s' % workspace_names.pop()) return warnings def _convert_schema(self, configuration): """Upgrade schema from a previous version :param configuration: The Scan configuration :type configuration: dict :returns: The converted configuration :rtype: dict """ raise NotImplementedError def _populate_default_values(self): """Goes through the configuration and populates any missing values with defaults.""" if 'version' not in self._configuration: self._configuration['version'] = CURRENT_VERSION if 'recursive' not in self._configuration: self._configuration['recursive'] = True for file_dict in self._configuration['files_to_ingest']: if 'data_types' not in file_dict: file_dict['data_types'] = []
class StrikeConfigurationV2(object): """Represents the configuration for a running Strike instance. The configuration includes details about mounting the transfer NFS directory, the suffix for identifying files still being transferred, and regular expressions to identify files to ingest and how to store them. """ def __init__(self, configuration, do_validate=False): """Creates a Strike configuration object from the given dictionary. The general format is checked for correctness, but the specified workspace(s) are not checked. :param configuration: The Strike configuration :type configuration: dict :raises InvalidStrikeConfiguration: If the given configuration is invalid """ self._configuration = configuration # Convert old versions if 'version' in self._configuration and self._configuration['version'] != CURRENT_VERSION: self._configuration = self._convert_schema(configuration) try: if do_validate: validate(configuration, STRIKE_CONFIGURATION_SCHEMA) except ValidationError as ex: raise InvalidStrikeConfiguration('Invalid Strike configuration: %s' % unicode(ex)) self._populate_default_values() if self._configuration['version'] != CURRENT_VERSION: msg = 'Invalid Strike configuration: %s is an unsupported version number' raise InvalidStrikeConfiguration(msg % self._configuration['version']) self._file_handler = FileHandler() for file_dict in self._configuration['files_to_ingest']: try: regex_pattern = re.compile(file_dict['filename_regex']) except re.error: raise InvalidStrikeConfiguration('Invalid file name regex: %s' % file_dict['filename_regex']) new_workspace = None if 'new_workspace' in file_dict: new_workspace = file_dict['new_workspace'] new_file_path = None if 'new_file_path' in file_dict: if os.path.isabs(file_dict['new_file_path']): msg = 'Invalid Strike configuration: new_file_path may not be an absolute path' raise InvalidStrikeConfiguration(msg) file_dict['new_file_path'] = os.path.normpath(file_dict['new_file_path']) new_file_path = file_dict['new_file_path'] rule = FileRule(regex_pattern, file_dict['data_types'], new_workspace, new_file_path) self._file_handler.add_rule(rule) def get_dict(self): """Returns the internal dictionary that represents this Strike process configuration. :returns: The internal dictionary :rtype: dict """ return self._configuration def get_configuration(self): """Returns the strike configuration represented by this JSON :returns: The strike configuration :rtype: :class:`ingest.strike.configuration.strike_configuration.StrikeConfiguration`: """ config = StrikeConfiguration() config.configuration = self._configuration config.file_handler = self._file_handler return config def _convert_schema(self, configuration): """Tries to validate the configuration as version 1.0 and convert it to version 2.0 :param configuration: The Strike configuration :type configuration: dict :returns: The converted configuration :rtype: dict """ # Try converting from 1.0 converted_configuration = StrikeConfigurationV1(configuration).get_dict() converted_configuration['version'] = CURRENT_VERSION mount = converted_configuration['mount'] mount_path = mount.split(':')[1] transfer_suffix = converted_configuration['transfer_suffix'] del converted_configuration['mount'] del converted_configuration['transfer_suffix'] auto_workspace_name = 'auto_wksp_for_%s' % mount.replace(':', '_').replace('/', '_') auto_workspace_name = auto_workspace_name[:50] # Truncate to max name length of 50 chars title = 'Auto Workspace for %s' % mount title = title[:50] # Truncate to max title length of 50 chars try: Workspace.objects.get(name=auto_workspace_name) except Workspace.DoesNotExist: workspace = Workspace() workspace.name = auto_workspace_name workspace.title = title desc = 'This workspace was automatically created for mount %s to support converting Strike from 1.0 to 2.0' workspace.description = desc % mount workspace.json_config = {'version': '1.0', 'broker': {'type': 'host', 'host_path': mount_path}} workspace.save() converted_configuration['workspace'] = auto_workspace_name converted_configuration['monitor'] = {'type': 'dir-watcher', 'transfer_suffix': transfer_suffix} for file_dict in converted_configuration['files_to_ingest']: file_dict['new_workspace'] = file_dict['workspace_name'] file_dict['new_file_path'] = file_dict['workspace_path'] del file_dict['workspace_name'] del file_dict['workspace_path'] return converted_configuration def _populate_default_values(self): """Goes through the configuration and populates any missing values with defaults.""" if 'version' not in self._configuration: self._configuration['version'] = CURRENT_VERSION for file_dict in self._configuration['files_to_ingest']: if 'data_types' not in file_dict: file_dict['data_types'] = []