def _add_fields(self, missing_fields: Dict):
        no_fix = []
        properties = {}
        for name, field in missing_fields.items():
            # Figure out the path of the field in the document, if the name is set in the field, it
            # is going to be duplicated in the path from missing_fields, so drop it
            prefix = name.split('.')
            if field.name:
                prefix = prefix[:-1]

            # Build the fields and templates for this new mapping
            sub_properties, sub_templates = build_mapping([field], prefix=prefix, allow_refuse_implicit=False)
            properties.update(sub_properties)
            if sub_templates:
                no_fix.append(name)

        # If we have collected any fields that we can't just blindly add, as they might conflict
        # with existing things, (we might have the refuse_all_implicit_mappings rule in place)
        # simply raise an exception
        if no_fix:
            raise ValueError(f"Can't update database mapping for {self.name}, "
                             f"couldn't safely amend mapping for {no_fix}")

        # If we got this far, the missing fields have been described in properties, upload them to the
        # server, and we should be able to move on.
        mappings = {"properties": properties}
        for index in self.index_list_full:
            self.with_retries(self.datastore.client.indices.put_mapping, index=index, body=mappings)

        if self.with_retries(self.datastore.client.indices.exists_template, self.name):
            current_template = self.with_retries(self.datastore.client.indices.get_template, self.name)[self.name]
            recursive_update(current_template, {'mappings': mappings})
            self.with_retries(self.datastore.client.indices.put_template, self.name, body=current_template)
def perform_alert_update(datastore, logger, alert):
    alert_id = alert.get('alert_id')

    with Lock(f"alert-update-{alert_id}", 5):
        old_alert = datastore.alert.get(alert_id, as_obj=False)
        if old_alert is None:
            raise KeyError(f"{alert_id} is missing from the alert collection.")

        # Merge fields...
        merged = {
            x: list(
                set(old_alert.get('al', {}).get(x, [])).union(
                    set(alert['al'].get(x, []))))
            for x in AL_FIELDS
        }

        # Sanity check.
        if not all([
                old_alert.get(x, None) == alert.get(x, None)
                for x in config.core.alerter.constant_alert_fields
        ]):
            raise ValueError("Constant alert field changed. (%s, %s)" %
                             (str(old_alert), str(alert)))

        old_alert = recursive_update(old_alert, alert)
        old_alert['al'] = recursive_update(old_alert['al'], merged)

        datastore.alert.save(alert_id, old_alert)

    logger.info(f"Alert {alert_id} has been updated.")
示例#3
0
def _get_config(static=False, yml_config=None):
    from assemblyline.odm.models.config import Config

    if yml_config is None:
        yml_config = "/etc/assemblyline/config.yml"

    # Initialize a default config
    config = Config().as_primitives()

    # Load modifiers from the yaml config
    if os.path.exists(yml_config):
        with open(yml_config) as yml_fh:
            yml_data = yaml.safe_load(_env_substitute(yml_fh.read()))
            if yml_data:
                config = recursive_update(config, yml_data)

    if not static:
        # TODO: Load a datastore object and load the config changes from the datastore
        # config.update(datastore_changes)
        pass

    if 'AL_LOG_LEVEL' in os.environ:
        config['logging']['log_level'] = os.environ['AL_LOG_LEVEL']

    return Config(config)
示例#4
0
def get_classification(yml_config=None):
    from assemblyline.common.classification import Classification, InvalidDefinition

    if yml_config is None:
        yml_config = "/etc/assemblyline/classification.yml"

    classification_definition = {}
    default_file = os.path.join(os.path.dirname(__file__), "classification.yml")
    if os.path.exists(default_file):
        with open(default_file) as default_fh:
            default_yml_data = yaml.safe_load(default_fh.read())
            if default_yml_data:
                classification_definition.update(default_yml_data)

    # Load modifiers from the yaml config
    if os.path.exists(yml_config):
        with open(yml_config) as yml_fh:
            yml_data = yaml.safe_load(yml_fh.read())
            if yml_data:
                classification_definition = recursive_update(classification_definition, yml_data)

    if not classification_definition:
        raise InvalidDefinition('Could not find any classification definition to load.')

    return Classification(classification_definition)
示例#5
0
def get_tag_whitelister(log=None, yml_config=None):
    from assemblyline.common.tagging import TagWhitelister, InvalidWhitelist

    if yml_config is None:
        yml_config = "/etc/assemblyline/tag_whitelist.yml"

    tag_whitelist_data = {}
    default_file = os.path.join(os.path.dirname(__file__), "tag_whitelist.yml")
    if os.path.exists(default_file):
        with open(default_file) as default_fh:
            default_yml_data = yaml.safe_load(default_fh.read())
            if default_yml_data:
                tag_whitelist_data.update(default_yml_data)

    # Load modifiers from the yaml config
    if os.path.exists(yml_config):
        with open(yml_config) as yml_fh:
            yml_data = yaml.safe_load(yml_fh.read())
            if yml_data:
                tag_whitelist_data = recursive_update(tag_whitelist_data, yml_data)

    if not tag_whitelist_data:
        raise InvalidWhitelist('Could not find any tag_whitelist file to load.')

    return TagWhitelister(tag_whitelist_data, log=log)
示例#6
0
def test_dict_recursive():
    src = {
        "a": {
            "b": {
                "c": 1
            }
        },
        "b": {
            "d": 2
        }
    }
    add = {
        "a": {
            "d": 3,
            "b": {
                "c": 4
            }
        }
    }
    dest = recursive_update(deepcopy(src), add)
    assert dest["a"]["b"]["c"] == 4
    assert dest["a"]["d"] == 3
    assert dest["b"]["d"] == 2

    delta = get_recursive_delta(src, dest)
    assert add == delta
示例#7
0
def perform_alert_update(datastore, logger, alert):
    alert_id = alert.get('alert_id', None)
    if not alert_id:
        raise ValueError(
            f"We could not find the alert ID in the alert: {str(alert)}")

    while True:
        old_alert, version = datastore.alert.get_if_exists(
            alert_id,
            as_obj=False,
            archive_access=config.datastore.ilm.update_archive,
            version=True)
        if old_alert is None:
            raise AlertMissingError(
                f"{alert_id} is missing from the alert collection.")

        # Ensure alert keeps original timestamp
        alert['ts'] = old_alert['ts']

        # Merge fields...
        merged = {
            x: list(
                set(old_alert.get('al', {}).get(x, [])).union(
                    set(alert['al'].get(x, []))))
            for x in AL_FIELDS
        }

        # Sanity check.
        if not all([
                old_alert.get(x, None) == alert.get(x, None)
                for x in config.core.alerter.constant_alert_fields
        ]):
            raise ValueError(
                f"Constant alert field changed. ({str(old_alert)}, {str(alert)})"
            )

        old_alert = recursive_update(old_alert, alert)
        old_alert['al'] = recursive_update(old_alert['al'], merged)
        old_alert['workflows_completed'] = False

        try:
            datastore.alert.save(alert_id, old_alert, version=version)
            logger.info(f"Alert {alert_id} has been updated.")
            return
        except VersionConflictException as vce:
            logger.info(
                f"Retrying update alert due to version conflict: {str(vce)}")
示例#8
0
    def list_all_services(self,
                          as_obj=True,
                          full=False) -> Union[List[dict], List[Service]]:
        """
        :param as_obj: Return ODM objects rather than dicts
        :param full: If true retrieve all the fields of the service object, otherwise only
                     fields returned by search are given.
        """
        items = list(self.ds.service_delta.stream_search("id:*", as_obj=False))

        if full:
            service_data = self.ds.service.multiget(
                [f"{item['id']}_{item['version']}" for item in items],
                as_dictionary=False)
            service_delta = self.ds.service_delta.multiget(
                [item['id'] for item in items], as_dictionary=False)
            services = [
                recursive_update(data.as_primitives(strip_null=True),
                                 delta.as_primitives(strip_null=True))
                for data, delta in zip(service_data, service_delta)
            ]

        else:
            services_versions = {
                item['id']: item
                for item in self.ds.service.stream_search("id:*", as_obj=False)
            }
            services = [
                recursive_update(
                    services_versions[f"{item['id']}_{item['version']}"], item)
                for item in items
                if f"{item['id']}_{item['version']}" in services_versions
            ]

        if as_obj:
            mask = None
            if not full and services:
                mask = services[0].keys()
            return [Service(s, mask=mask) for s in services]
        else:
            return services
示例#9
0
    def get_pdfid(path, additional_keywords, plugins, deep):
        """Run PDFId code on sample.

        Args:
            path: Original PDF sample path.
            additional_keywords: List of additional keywords to be searched (provided in service configuration).
            plugins: List of PDFId module plugins (provided in service configuration)..
            deep: Boolean value of AL submission deep scan value.

        Returns:
            PDFId result and error list.
        """
        options = {
            'verbose': False,
            'plugins': ','.join(plugins),
            'scan': False,
            'csv': False,
            'all': True,
            "extra": False,
            "force": False,
            "disarm": False,
            "minimumscore": 0.0,
            "select": '',
            "nozero": False,
            "output": '',
            "pluginoptions": '',
        }
        try:
            pdfid_result, errors = pdfid.PDFiDMain(path, options,
                                                   additional_keywords, deep)
        except Exception as e:
            raise Exception(f"PDFID failed to run on sample. Error: {e}")

        # Process pdfid_results for service results
        pdfid_result_dict = {}
        for line in pdfid_result:
            if line:
                parts = line.split(',')
                value = parts[len(parts) - 1]
                for index in reversed(range(len(parts) - 1)):
                    value = {parts[index]: value}
                if isinstance(value, dict):
                    try:
                        pdfid_result_dict = recursive_update(
                            pdfid_result_dict, value)
                    except Exception:
                        pass

        return pdfid_result_dict, errors
示例#10
0
    def __init__(self, component_name):
        super().__init__(component_name)

        # Load updated values
        if os.path.exists(CONFIG_PATH):
            with open(CONFIG_PATH) as yml_fh:
                self.replay_config = ReplayConfig(
                    recursive_update(
                        ReplayConfig().as_primitives(),
                        yaml.safe_load(env_substitute(yml_fh.read()))))
        else:
            self.replay_config = ReplayConfig()

        # Thread events related to exiting
        self.main_loop_exit = threading.Event()
示例#11
0
def get_classification() -> Classification:
    classification_yml = '/etc/assemblyline/classification.yml'

    classification_definition = {}

    # TODO: Why is this not using forge?

    if os.path.exists(classification_yml):
        with open(classification_yml) as yml_fh:
            yml_data = yaml.safe_load(yml_fh.read())
            if yml_data:
                classification_definition = recursive_update(classification_definition, yml_data)

    if not classification_definition:
        raise InvalidDefinition("Could not find any classification definition to load.")

    return Classification(classification_definition)
示例#12
0
    def get_service_with_delta(self, service_name, version=None, as_obj=True):
        svc = self.ds.service_delta.get(service_name)
        if svc is None:
            return svc

        if version is not None:
            svc.version = version

        svc_version_data = self.ds.service.get(f"{service_name}_{svc.version}")
        if svc_version_data is None:
            return svc_version_data

        svc_version_data = recursive_update(
            svc_version_data.as_primitives(strip_null=True),
            svc.as_primitives(strip_null=True))
        if as_obj:
            return Service(svc_version_data)
        else:
            return svc_version_data
示例#13
0
def _get_config(yml_config=None):
    from assemblyline.odm.models.config import Config

    if yml_config is None:
        yml_config = "/etc/assemblyline/config.yml"

    # Initialize a default config
    config = Config().as_primitives()

    # Load modifiers from the yaml config
    if os.path.exists(yml_config):
        with open(yml_config) as yml_fh:
            yml_data = yaml.safe_load(env_substitute(yml_fh.read()))
            if yml_data:
                config = recursive_update(config, yml_data)

    if 'AL_LOG_LEVEL' in os.environ:
        config['logging']['log_level'] = os.environ['AL_LOG_LEVEL']

    return Config(config)
示例#14
0
def construct_safe(mod, data) -> typing.Tuple[typing.Any, typing.Dict]:
    if not isinstance(data, dict):
        return None, data
    fields = mod.fields()
    clean = {}
    dropped = {}
    for key, value in data.items():
        if key not in fields:
            dropped[key] = value
            continue

        _c, _d = _construct_field(fields[key], value)

        if _c is not None:
            clean[key] = _c
        if _d is not None:
            dropped[key] = _d

    try:
        return mod(clean), dropped
    except ValueError as _:
        return None, recursive_update(dropped, clean)
示例#15
0
def get_tag_safelist_data(yml_config=None):

    if yml_config is None:
        yml_config = "/etc/assemblyline/tag_safelist.yml"

    tag_safelist_data = {}
    default_file = os.path.join(os.path.dirname(__file__), "tag_safelist.yml")
    if os.path.exists(default_file):
        with open(default_file) as default_fh:
            default_yml_data = yaml.safe_load(default_fh.read())
            if default_yml_data:
                tag_safelist_data.update(default_yml_data)

    # Load modifiers from the yaml config
    if os.path.exists(yml_config):
        with open(yml_config) as yml_fh:
            yml_data = yaml.safe_load(yml_fh.read())
            if yml_data:
                tag_safelist_data = recursive_update(tag_safelist_data,
                                                     yml_data)

    return tag_safelist_data
def process_alert_message(counter, datastore, logger, alert_data):
    """
    This is the default process_alert_message function. If the generic alerts are not sufficient
    in your deployment, you can create another method like this one that would follow
    the same structure but with added parts where the comment blocks are located.
    """

    ###############################
    # Additional init goes here
    ###############################

    user = datastore.user.get(alert_data.get('submission', {}).get(
        'params', {}).get('submitter', None),
                              as_obj=False)
    if user:
        user_classification = user['classification']
    else:
        user_classification = Classification.UNRESTRICTED
    a_type = alert_data.get('submission', {}).get('metadata',
                                                  {}).pop('type', None)
    a_ts = alert_data.get('submission', {}).get('metadata', {}).pop('ts', None)

    alert = {
        'al': {
            'score': alert_data['score']
        },
        'alert_id':
        generate_alert_id(alert_data),
        'archive_ts':
        now_as_iso(config.datastore.ilm.days_until_archive * 24 * 60 * 60),
        'metadata': {
            safe_str(key): value
            for key, value in alert_data['submission']['metadata'].items()
        },
        'sid':
        alert_data['submission']['sid'],
        'ts':
        a_ts or alert_data['submission']['time'],
        'type':
        a_type or alert_data['submission']['params']['type']
    }

    if config.core.alerter.alert_ttl:
        alert['expiry_ts'] = now_as_iso(config.core.alerter.alert_ttl * 24 *
                                        60 * 60)

    ###############################
    # Additional alert_data parsing
    # and alert updating goes here
    ###############################

    # Get update parts
    alert_update_p1, alert_update_p2 = get_alert_update_parts(
        counter, datastore, alert_data, logger, user_classification)

    # Update alert with default values
    alert = recursive_update(alert, alert_update_p1)

    # Update alert with computed values
    alert = recursive_update(alert, alert_update_p2)

    return save_alert(datastore, counter, logger, alert,
                      alert_data['submission']['params']['psid'])