示例#1
0
class StrikeConfiguration(object):
    """Represents the configuration for a running Strike instance. The configuration includes details about mounting the
    transfer NFS directory, the suffix for identifying files still being transferred, and regular expressions to
    identify files to ingest and how to store them.
    """
    def __init__(self, configuration):
        """Creates a Strike configuration object from the given dictionary. The general format is checked for
        correctness, but the specified workspace(s) are not checked.

        :param configuration: The Strike configuration
        :type configuration: dict
        :raises InvalidStrikeConfiguration: If the given configuration is invalid
        """

        self._configuration = configuration

        # Convert old versions
        if 'version' in self._configuration and self._configuration[
                'version'] != CURRENT_VERSION:
            self._configuration = self._convert_schema(configuration)

        try:
            validate(configuration, STRIKE_CONFIGURATION_SCHEMA)
        except ValidationError as ex:
            raise InvalidStrikeConfiguration(
                'Invalid Strike configuration: %s' % unicode(ex))

        self._populate_default_values()
        if self._configuration['version'] != CURRENT_VERSION:
            msg = 'Invalid Strike configuration: %s is an unsupported version number'
            raise InvalidStrikeConfiguration(msg %
                                             self._configuration['version'])

        self._file_handler = FileHandler()
        for file_dict in self._configuration['files_to_ingest']:
            try:
                regex_pattern = re.compile(file_dict['filename_regex'])
            except re.error:
                raise InvalidStrikeConfiguration(
                    'Invalid file name regex: %s' %
                    file_dict['filename_regex'])
            new_workspace = None
            if 'new_workspace' in file_dict:
                new_workspace = file_dict['new_workspace']
            new_file_path = None
            if 'new_file_path' in file_dict:
                if os.path.isabs(file_dict['new_file_path']):
                    msg = 'Invalid Strike configuration: new_file_path may not be an absolute path'
                    raise InvalidStrikeConfiguration(msg)
                file_dict['new_file_path'] = os.path.normpath(
                    file_dict['new_file_path'])
                new_file_path = file_dict['new_file_path']
            rule = FileRule(regex_pattern, file_dict['data_types'],
                            new_workspace, new_file_path)
            self._file_handler.add_rule(rule)

    def get_dict(self):
        """Returns the internal dictionary that represents this Strike process configuration.

        :returns: The internal dictionary
        :rtype: dict
        """

        return self._configuration

    def get_monitor(self):
        """Returns the configured monitor for this Strike configuration

        :returns: The configured monitor
        :rtype: :class:`ingest.strike.monitors.monitor.Monitor`
        """

        monitor_type = self._configuration['monitor']['type']
        monitor = factory.get_monitor(monitor_type)
        self.load_monitor_configuration(monitor)
        return monitor

    def get_workspace(self):
        """Returns the monitored workspace name for this Strike configuration

        :returns: The monitored workspace name
        :rtype: string
        """

        return self._configuration['workspace']

    def load_monitor_configuration(self, monitor):
        """Loads the configuration into the given monitor

        :param monitor: The configuration as a dictionary
        :type monitor: :class:`ingest.strike.monitors.monitor.Monitor`
        """

        monitor_dict = self._configuration['monitor']
        monitor_type = monitor_dict['type']
        workspace = self._configuration['workspace']

        # Only load configuration if monitor type is unchanged
        if monitor_type == monitor.monitor_type:
            monitor.setup_workspaces(workspace, self._file_handler)
            monitor.load_configuration(monitor_dict)
        else:
            msg = 'Strike monitor type has been changed from %s to %s. Cannot reload configuration.'
            logger.warning(msg, monitor.monitor_type, monitor_type)

    def validate(self):
        """Validates the Strike configuration

        :returns: A list of warnings discovered during validation
        :rtype: list[:class:`ingest.strike.configuration.strike_configuration.ValidationWarning`]

        :raises :class:`ingest.strike.configuration.exceptions.InvalidStrikeConfiguration`: If the configuration is
            invalid.
        """

        warnings = []

        monitor_type = self._configuration['monitor']['type']
        if monitor_type not in factory.get_monitor_types():
            raise InvalidStrikeConfiguration(
                '\'%s\' is an invalid monitor type' % monitor_type)

        monitored_workspace_name = self._configuration['workspace']
        workspace_names = {monitored_workspace_name}
        for rule in self._file_handler.rules:
            if rule.new_workspace:
                workspace_names.add(rule.new_workspace)

        for workspace in Workspace.objects.filter(name__in=workspace_names):
            if workspace.name == monitored_workspace_name:
                broker_type = workspace.get_broker().broker_type
                monitor = factory.get_monitor(monitor_type)
                if broker_type not in monitor.supported_broker_types:
                    msg = 'Monitor type %s does not support broker type %s'
                    raise InvalidStrikeConfiguration(
                        msg % (monitor_type, broker_type))
            if not workspace.is_active:
                raise InvalidStrikeConfiguration(
                    'Workspace is not active: %s' % workspace.name)
            workspace_names.remove(workspace.name)

        if workspace_names:
            raise InvalidStrikeConfiguration('Unknown workspace name: %s' %
                                             workspace_names.pop())

        return warnings

    def _convert_schema(self, configuration):
        """Tries to validate the configuration as version 1.0 and convert it to version 2.0

        :param configuration: The Strike configuration
        :type configuration: dict
        :returns: The converted configuration
        :rtype: dict
        """

        # Try converting from 1.0
        converted_configuration = StrikeConfiguration_1_0(
            configuration).get_dict()
        converted_configuration['version'] = CURRENT_VERSION

        mount = converted_configuration['mount']
        mount_path = mount.split(':')[1]
        transfer_suffix = converted_configuration['transfer_suffix']
        del converted_configuration['mount']
        del converted_configuration['transfer_suffix']
        auto_workspace_name = 'auto_wksp_for_%s' % mount.replace(
            ':', '_').replace('/', '_')
        auto_workspace_name = auto_workspace_name[:
                                                  50]  # Truncate to max name length of 50 chars
        title = 'Auto Workspace for %s' % mount
        title = title[:50]  # Truncate to max title length of 50 chars
        try:
            Workspace.objects.get(name=auto_workspace_name)
        except Workspace.DoesNotExist:
            workspace = Workspace()
            workspace.name = auto_workspace_name
            workspace.title = title
            desc = 'This workspace was automatically created for mount %s to support converting Strike from 1.0 to 2.0'
            workspace.description = desc % mount
            workspace.json_config = {
                'version': '1.0',
                'broker': {
                    'type': 'host',
                    'host_path': mount_path
                }
            }
            workspace.save()

        converted_configuration['workspace'] = auto_workspace_name
        converted_configuration['monitor'] = {
            'type': 'dir-watcher',
            'transfer_suffix': transfer_suffix
        }
        for file_dict in converted_configuration['files_to_ingest']:
            file_dict['new_workspace'] = file_dict['workspace_name']
            file_dict['new_file_path'] = file_dict['workspace_path']
            del file_dict['workspace_name']
            del file_dict['workspace_path']

        return converted_configuration

    def _populate_default_values(self):
        """Goes through the configuration and populates any missing values with defaults."""

        if 'version' not in self._configuration:
            self._configuration['version'] = CURRENT_VERSION

        for file_dict in self._configuration['files_to_ingest']:
            if 'data_types' not in file_dict:
                file_dict['data_types'] = []
示例#2
0
class ScanConfigurationV6(object):
    """Represents the configuration for a running Scan instance. The configuration includes details about mounting the
    transfer directory, the suffix for identifying files still being transferred, and regular expressions to
    identify files to ingest and how to store them.
    """
    def __init__(self, configuration, do_validate=False):
        """Creates a Scan configuration object from the given dictionary. The general format is checked for
        correctness, but the specified workspace(s) are not checked.

        :param configuration: The Scan configuration
        :type configuration: dict
        :raises InvalidScanConfiguration: If the given configuration is invalid
        """

        self._configuration = configuration

        # Convert old versions
        if 'version' in self._configuration and self._configuration[
                'version'] == '1.0':
            self._configuration['version'] = SCHEMA_VERSION
        if 'version' not in self._configuration:
            self._configuration['version'] = SCHEMA_VERSION

        try:
            if do_validate:
                validate(self._configuration, SCAN_CONFIGURATION_SCHEMA)
        except ValidationError as ex:
            raise InvalidScanConfiguration('Invalid Scan configuration: %s' %
                                           unicode(ex))

        self._populate_default_values()
        if self._configuration['version'] not in SCHEMA_VERSIONS:
            msg = 'Invalid Scan configuration: %s is an unsupported version number'
            raise InvalidScanConfiguration(msg %
                                           self._configuration['version'])

        self._file_handler = FileHandler()
        for file_dict in self._configuration['files_to_ingest']:
            try:
                regex_pattern = re.compile(file_dict['filename_regex'])
            except re.error:
                raise InvalidScanConfiguration('Invalid file name regex: %s' %
                                               file_dict['filename_regex'])
            new_workspace = None
            if 'new_workspace' in file_dict:
                new_workspace = file_dict['new_workspace']
            new_file_path = None
            if 'new_file_path' in file_dict:
                if os.path.isabs(file_dict['new_file_path']):
                    msg = 'Invalid Scan configuration: new_file_path may not be an absolute path'
                    raise InvalidScanConfiguration(msg)
                file_dict['new_file_path'] = os.path.normpath(
                    file_dict['new_file_path'])
                new_file_path = file_dict['new_file_path']
            rule = FileRule(regex_pattern, file_dict['data_types'],
                            new_workspace, new_file_path)
            self._file_handler.add_rule(rule)

    def get_configuration(self):
        """Returns the scan configuration represented by this JSON

        :returns: The scan configuration
        :rtype: :class:`ingest.scan.configuration.scan_configuration.ScanConfiguration`:
        """

        config = ScanConfiguration()

        config.scanner_type = self._configuration['scanner']['type']
        config.scanner_config = self._configuration['scanner']
        config.recursive = self._configuration['recursive']
        config.file_handler = self._file_handler
        config.workspace = self._configuration['workspace']
        config.config_dict = self._configuration

        return config

    def get_dict(self):
        """Returns the internal dictionary that represents this Strike process configuration.

        :returns: The internal dictionary
        :rtype: dict
        """

        return self._configuration

    def _convert_schema(self, configuration):
        """Upgrade schema from a previous version

        :param configuration: The Scan configuration
        :type configuration: dict
        :returns: The converted configuration
        :rtype: dict
        """

        config = configuration
        if 'version' in config and config['version'] == '1.0':
            config['version'] = SCHEMA_VERSION
        return config

    def _populate_default_values(self):
        """Goes through the configuration and populates any missing values with defaults."""

        if 'version' not in self._configuration:
            self._configuration['version'] = SCHEMA_VERSION

        if 'recursive' not in self._configuration:
            self._configuration['recursive'] = True

        for file_dict in self._configuration['files_to_ingest']:
            if 'data_types' not in file_dict:
                file_dict['data_types'] = []
示例#3
0
class StrikeConfigurationV6(object):
    """Represents the configuration for a running Strike instance. The configuration includes details about mounting the
    transfer NFS directory, the suffix for identifying files still being transferred, and regular expressions to
    identify files to ingest and how to store them.
    """
    def __init__(self, configuration, do_validate=False):
        """Creates a Strike configuration object from the given dictionary. The general format is checked for
        correctness, but the specified workspace(s) are not checked.

        :param configuration: The Strike configuration
        :type configuration: dict
        :raises InvalidStrikeConfiguration: If the given configuration is invalid
        """

        self._configuration = configuration

        # Convert old versions
        if 'version' in self._configuration and self._configuration[
                'version'] == '1.0':
            raise InvalidStrikeConfiguration(
                'Invalid Strike configuration. Strike configuration version 1.0 is no longer supported'
            )

        if 'version' in self._configuration and self._configuration[
                'version'] == '2.0':
            self._configuration['version'] = SCHEMA_VERSION

        try:
            if do_validate:
                validate(configuration, STRIKE_CONFIGURATION_SCHEMA)
        except ValidationError as ex:
            raise InvalidStrikeConfiguration(
                'Invalid Strike configuration: %s' % unicode(ex))

        self._populate_default_values()
        if self._configuration['version'] not in SCHEMA_VERSIONS:
            msg = 'Invalid Strike configuration: %s is an unsupported version number'
            raise InvalidStrikeConfiguration(msg %
                                             self._configuration['version'])

        self._file_handler = FileHandler()
        for file_dict in self._configuration['files_to_ingest']:
            try:
                regex_pattern = re.compile(file_dict['filename_regex'])
            except re.error:
                raise InvalidStrikeConfiguration(
                    'Invalid file name regex: %s' %
                    file_dict['filename_regex'])
            new_workspace = None
            if 'new_workspace' in file_dict:
                new_workspace = file_dict['new_workspace']
            new_file_path = None
            if 'new_file_path' in file_dict:
                if os.path.isabs(file_dict['new_file_path']):
                    msg = 'Invalid Strike configuration: new_file_path may not be an absolute path'
                    raise InvalidStrikeConfiguration(msg)
                file_dict['new_file_path'] = os.path.normpath(
                    file_dict['new_file_path'])
                new_file_path = file_dict['new_file_path']
            rule = FileRule(regex_pattern, file_dict['data_types'],
                            new_workspace, new_file_path)
            self._file_handler.add_rule(rule)

    def get_dict(self):
        """Returns the internal dictionary that represents this Strike process configuration.

        :returns: The internal dictionary
        :rtype: dict
        """

        return self._configuration

    def get_configuration(self):
        """Returns the strike configuration represented by this JSON

        :returns: The strike configuration
        :rtype: :class:`ingest.strike.configuration.strike_configuration.StrikeConfiguration`:
        """

        config = StrikeConfiguration()

        config.configuration = self._configuration
        config.file_handler = self._file_handler

        return config

    def sanitize_credentials(self):
        """Sanitizes the aws credentials in the config if they exist
        """

        if 'monitor' in self._configuration and 'credentials' in self._configuration[
                'monitor']:
            self._configuration['monitor']['credentials'][
                'access_key_id'] = '************'
            self._configuration['monitor']['credentials'][
                'secret_access_key'] = '************'

    def _populate_default_values(self):
        """Goes through the configuration and populates any missing values with defaults."""

        if 'version' not in self._configuration:
            self._configuration['version'] = SCHEMA_VERSION

        for file_dict in self._configuration['files_to_ingest']:
            if 'data_types' not in file_dict:
                file_dict['data_types'] = []
示例#4
0
class ScanConfiguration(object):
    """Represents the configuration for a running Scan instance. The configuration includes details about mounting the
    transfer directory, the suffix for identifying files still being transferred, and regular expressions to
    identify files to ingest and how to store them.
    """
    def __init__(self, configuration):
        """Creates a Scan configuration object from the given dictionary. The general format is checked for
        correctness, but the specified workspace(s) are not checked.

        :param configuration: The Scan configuration
        :type configuration: dict
        :raises InvalidScanConfiguration: If the given configuration is invalid
        """

        self._configuration = configuration

        try:
            validate(configuration, SCAN_CONFIGURATION_SCHEMA)
        except ValidationError as ex:
            raise InvalidScanConfiguration('Invalid Scan configuration: %s' %
                                           unicode(ex))

        self._populate_default_values()
        if self._configuration['version'] != CURRENT_VERSION:
            msg = 'Invalid Scan configuration: %s is an unsupported version number'
            raise InvalidScanConfiguration(msg %
                                           self._configuration['version'])

        self._file_handler = FileHandler()
        for file_dict in self._configuration['files_to_ingest']:
            try:
                regex_pattern = re.compile(file_dict['filename_regex'])
            except re.error:
                raise InvalidScanConfiguration('Invalid file name regex: %s' %
                                               file_dict['filename_regex'])
            new_workspace = None
            if 'new_workspace' in file_dict:
                new_workspace = file_dict['new_workspace']
            new_file_path = None
            if 'new_file_path' in file_dict:
                if os.path.isabs(file_dict['new_file_path']):
                    msg = 'Invalid Scan configuration: new_file_path may not be an absolute path'
                    raise InvalidScanConfiguration(msg)
                file_dict['new_file_path'] = os.path.normpath(
                    file_dict['new_file_path'])
                new_file_path = file_dict['new_file_path']
            rule = FileRule(regex_pattern, file_dict['data_types'],
                            new_workspace, new_file_path)
            self._file_handler.add_rule(rule)

    def get_dict(self):
        """Returns the internal dictionary that represents this Scan process configuration.

        :returns: The internal dictionary
        :rtype: dict
        """

        return self._configuration

    def get_scanner(self):
        """Returns the configured scanner for this Scan configuration

        :returns: The configured scanner
        :rtype: :class:`ingest.scan.scanners.scanner.Scanner`
        """

        scanner_type = self._configuration['scanner']['type']
        scanner = factory.get_scanner(scanner_type)
        self.load_scanner_configuration(scanner)
        return scanner

    def get_workspace(self):
        """Returns the workspace name to be scanned for this Scan configuration

        :returns: The workspace name
        :rtype: string
        """

        return self._configuration['workspace']

    def load_scanner_configuration(self, scanner):
        """Loads the configuration into the given scanner

        :param scanner: The configuration as a dictionary
        :type scanner: :class:`ingest.scan.scanners.scanner.Scanner`
        """

        scanner_dict = self._configuration['scanner']
        scanner_type = scanner_dict['type']
        workspace = self._configuration['workspace']

        # Only load configuration if scanner type is unchanged
        if scanner_type == scanner.scanner_type:
            scanner.setup_workspaces(workspace, self._file_handler)
            scanner.load_configuration(scanner_dict)
            scanner.set_recursive(self._configuration['recursive'])
        else:
            msg = 'Scan scanner type has been changed from %s to %s. Cannot reload configuration.'
            logger.warning(msg, scanner.scanner_type, scanner_type)

    def validate(self):
        """Validates the Scan configuration

        :returns: A list of warnings discovered during validation
        :rtype: list[:class:`ingest.scan.configuration.scan_configuration.ValidationWarning`]

        :raises :class:`ingest.scan.configuration.exceptions.InvalidScanConfiguration`: If the configuration is
            invalid.
        """

        warnings = []

        scanner_type = self._configuration['scanner']['type']
        if scanner_type not in factory.get_scanner_types():
            raise InvalidScanConfiguration('\'%s\' is an invalid scanner' %
                                           scanner_type)

        scanned_workspace_name = self._configuration['workspace']
        workspace_names = {scanned_workspace_name}
        for rule in self._file_handler.rules:
            if rule.new_workspace:
                workspace_names.add(rule.new_workspace)

        for workspace in Workspace.objects.filter(name__in=workspace_names):
            if workspace.name == scanned_workspace_name:
                broker_type = workspace.get_broker().broker_type
                scanner = factory.get_scanner(scanner_type)
                if broker_type not in scanner.supported_broker_types:
                    msg = 'Scanner type %s does not support broker type %s'
                    raise InvalidScanConfiguration(msg %
                                                   (scanner_type, broker_type))
            if not workspace.is_active:
                raise InvalidScanConfiguration('Workspace is not active: %s' %
                                               workspace.name)
            workspace_names.remove(workspace.name)

        if workspace_names:
            raise InvalidScanConfiguration('Unknown workspace name: %s' %
                                           workspace_names.pop())

        return warnings

    def _convert_schema(self, configuration):
        """Upgrade schema from a previous version

        :param configuration: The Scan configuration
        :type configuration: dict
        :returns: The converted configuration
        :rtype: dict
        """

        raise NotImplementedError

    def _populate_default_values(self):
        """Goes through the configuration and populates any missing values with defaults."""

        if 'version' not in self._configuration:
            self._configuration['version'] = CURRENT_VERSION

        if 'recursive' not in self._configuration:
            self._configuration['recursive'] = True

        for file_dict in self._configuration['files_to_ingest']:
            if 'data_types' not in file_dict:
                file_dict['data_types'] = []
示例#5
0
class StrikeConfigurationV2(object):
    """Represents the configuration for a running Strike instance. The configuration includes details about mounting the
    transfer NFS directory, the suffix for identifying files still being transferred, and regular expressions to
    identify files to ingest and how to store them.
    """

    def __init__(self, configuration, do_validate=False):
        """Creates a Strike configuration object from the given dictionary. The general format is checked for
        correctness, but the specified workspace(s) are not checked.

        :param configuration: The Strike configuration
        :type configuration: dict
        :raises InvalidStrikeConfiguration: If the given configuration is invalid
        """

        self._configuration = configuration

        # Convert old versions
        if 'version' in self._configuration and self._configuration['version'] != CURRENT_VERSION:
            self._configuration = self._convert_schema(configuration)

        try:
            if do_validate:
                validate(configuration, STRIKE_CONFIGURATION_SCHEMA)
        except ValidationError as ex:
            raise InvalidStrikeConfiguration('Invalid Strike configuration: %s' % unicode(ex))

        self._populate_default_values()
        if self._configuration['version'] != CURRENT_VERSION:
            msg = 'Invalid Strike configuration: %s is an unsupported version number'
            raise InvalidStrikeConfiguration(msg % self._configuration['version'])

        self._file_handler = FileHandler()
        for file_dict in self._configuration['files_to_ingest']:
            try:
                regex_pattern = re.compile(file_dict['filename_regex'])
            except re.error:
                raise InvalidStrikeConfiguration('Invalid file name regex: %s' % file_dict['filename_regex'])
            new_workspace = None
            if 'new_workspace' in file_dict:
                new_workspace = file_dict['new_workspace']
            new_file_path = None
            if 'new_file_path' in file_dict:
                if os.path.isabs(file_dict['new_file_path']):
                    msg = 'Invalid Strike configuration: new_file_path may not be an absolute path'
                    raise InvalidStrikeConfiguration(msg)
                file_dict['new_file_path'] = os.path.normpath(file_dict['new_file_path'])
                new_file_path = file_dict['new_file_path']
            rule = FileRule(regex_pattern, file_dict['data_types'], new_workspace, new_file_path)
            self._file_handler.add_rule(rule)

    def get_dict(self):
        """Returns the internal dictionary that represents this Strike process configuration.

        :returns: The internal dictionary
        :rtype: dict
        """

        return self._configuration
        
    def get_configuration(self):
        """Returns the strike configuration represented by this JSON

        :returns: The strike configuration
        :rtype: :class:`ingest.strike.configuration.strike_configuration.StrikeConfiguration`:
        """

        config = StrikeConfiguration()
        
        config.configuration    = self._configuration
        config.file_handler     = self._file_handler

        return config

    def _convert_schema(self, configuration):
        """Tries to validate the configuration as version 1.0 and convert it to version 2.0

        :param configuration: The Strike configuration
        :type configuration: dict
        :returns: The converted configuration
        :rtype: dict
        """

        # Try converting from 1.0
        converted_configuration = StrikeConfigurationV1(configuration).get_dict()
        converted_configuration['version'] = CURRENT_VERSION

        mount = converted_configuration['mount']
        mount_path = mount.split(':')[1]
        transfer_suffix = converted_configuration['transfer_suffix']
        del converted_configuration['mount']
        del converted_configuration['transfer_suffix']
        auto_workspace_name = 'auto_wksp_for_%s' % mount.replace(':', '_').replace('/', '_')
        auto_workspace_name = auto_workspace_name[:50]  # Truncate to max name length of 50 chars
        title = 'Auto Workspace for %s' % mount
        title = title[:50]  # Truncate to max title length of 50 chars
        try:
            Workspace.objects.get(name=auto_workspace_name)
        except Workspace.DoesNotExist:
            workspace = Workspace()
            workspace.name = auto_workspace_name
            workspace.title = title
            desc = 'This workspace was automatically created for mount %s to support converting Strike from 1.0 to 2.0'
            workspace.description = desc % mount
            workspace.json_config = {'version': '1.0', 'broker': {'type': 'host', 'host_path': mount_path}}
            workspace.save()

        converted_configuration['workspace'] = auto_workspace_name
        converted_configuration['monitor'] = {'type': 'dir-watcher', 'transfer_suffix': transfer_suffix}
        for file_dict in converted_configuration['files_to_ingest']:
            file_dict['new_workspace'] = file_dict['workspace_name']
            file_dict['new_file_path'] = file_dict['workspace_path']
            del file_dict['workspace_name']
            del file_dict['workspace_path']

        return converted_configuration

    def _populate_default_values(self):
        """Goes through the configuration and populates any missing values with defaults."""

        if 'version' not in self._configuration:
            self._configuration['version'] = CURRENT_VERSION

        for file_dict in self._configuration['files_to_ingest']:
            if 'data_types' not in file_dict:
                file_dict['data_types'] = []