def git_clone_repo(source: Dict[str, Any], previous_update=None) -> List:
    name = source['name']
    url = source['uri']
    pattern = source.get('pattern', None)
    key = source.get('private_key', None)
    ssl_ignore = source.get("ssl_ignore_errors", False)
    ca_cert = source.get("ca_cert")

    git_env = {}
    if ssl_ignore:
        git_env['GIT_SSL_NO_VERIFY'] = 1

    if ca_cert:
        LOGGER.info(f"A CA certificate has been provided with this source.")
        add_cacert(ca_cert)
        git_env['GIT_SSL_CAINFO'] = certifi.where()

    clone_dir = os.path.join(UPDATE_DIR, name)
    if os.path.exists(clone_dir):
        shutil.rmtree(clone_dir)

    if key:
        LOGGER.info(f"key found for {url}")
        # Save the key to a file
        git_ssh_identity_file = os.path.join(tempfile.gettempdir(), 'id_rsa')
        with open(git_ssh_identity_file, 'w') as key_fh:
            key_fh.write(key)
        os.chmod(git_ssh_identity_file, 0o0400)

        git_ssh_cmd = f"ssh -oStrictHostKeyChecking=no -i {git_ssh_identity_file}"
        git_env['GIT_SSH_COMMAND'] = git_ssh_cmd

    repo = Repo.clone_from(url, clone_dir, env=git_env)

    # Check repo last commit
    if previous_update:
        if isinstance(previous_update, str):
            previous_update = iso_to_epoch(previous_update)
        for c in repo.iter_commits():
            if c.committed_date < previous_update:
                return []
            break

    sigma_files = []
    for path_in_dir, _, files in os.walk(clone_dir):
        for filename in files:
            filepath = os.path.join(clone_dir, path_in_dir, filename)
            if pattern:
                if re.match(pattern, filepath):
                    sigma_files.append(
                        (filepath, get_sha256_for_file(filepath)))
            else:
                if re.match(R'.*\.yml', filepath):
                    sigma_files.append(
                        (filepath, get_sha256_for_file(filepath)))

    return sigma_files
Exemplo n.º 2
0
    def _add_file(
        self,
        path: str,
        name: str,
        description: str,
        classification: Optional[Classification] = None
    ) -> Optional[Dict[str, str]]:
        # Reject empty files
        if os.path.getsize(path) == 0:
            self.log.warning(
                f"Adding empty extracted or supplementary files is not allowed. "
                f"Empty file ({name}) was ignored.")
            return

        # If file classification not provided, then use the default result classification
        if not classification:
            classification = self.service_default_result_classification

        file = dict(
            name=name,
            sha256=get_sha256_for_file(path),
            description=description,
            classification=self._classification.max_classification(
                self.min_classification, classification),
            path=path,
        )

        return file
Exemplo n.º 3
0
    def _get_rules_hash(self):
        if not os.path.exists(FILE_UPDATE_DIRECTORY):
            self.log.warning(f"{self.name} rules directory not found")
            return None

        try:
            rules_directory = max([
                os.path.join(FILE_UPDATE_DIRECTORY, d)
                for d in os.listdir(FILE_UPDATE_DIRECTORY)
                if os.path.isdir(os.path.join(FILE_UPDATE_DIRECTORY, d))
                and not d.startswith('.tmp')
            ],
                                  key=os.path.getctime)
        except ValueError:
            self.log.warning(f"No valid {self.name} rules directory found")
            return None

        self.rules_list = [
            str(f) for f in Path(rules_directory).rglob("*")
            if os.path.isfile(str(f))
        ]
        all_sha256s = [get_sha256_for_file(f) for f in self.rules_list]

        self.log.info(
            f"{self.name} will load the following rule files: {self.rules_list}"
        )

        if len(all_sha256s) == 1:
            return all_sha256s[0][:7]

        return hashlib.sha256(' '.join(
            sorted(all_sha256s)).encode('utf-8')).hexdigest()[:7]
Exemplo n.º 4
0
    def download_file(self) -> str:
        file_path = os.path.join(tempfile.gettempdir(), self.sha256)
        if not os.path.exists(file_path):
            raise Exception("File download failed. File not found on local filesystem.")

        received_sha256 = get_sha256_for_file(file_path)
        if received_sha256 != self.sha256:
            raise Exception(f"SHA256 mismatch between requested and "
                            f"downloaded file. {self.sha256} != {received_sha256}")

        return file_path
Exemplo n.º 5
0
    def _gen_rules_hash(self) -> str:
        self.rules_list = [
            str(f) for f in Path(self.rules_directory).rglob("*")
            if os.path.isfile(str(f))
        ]
        all_sha256s = [get_sha256_for_file(f) for f in self.rules_list]

        if len(all_sha256s) == 1:
            return all_sha256s[0][:7]

        return hashlib.sha256(' '.join(
            sorted(all_sha256s)).encode('utf-8')).hexdigest()[:7]
Exemplo n.º 6
0
def filter_downloads(update_directory,
                     pattern,
                     default_pattern=".*") -> List[Tuple[str, str]]:
    f_files = []
    if not pattern:
        # Regex will either match on the filename, directory, or filepath, either with default or given pattern for source
        pattern = default_pattern
    for path_in_dir, subdirs, files in os.walk(update_directory):
        for filename in files:
            filepath = os.path.join(update_directory, path_in_dir, filename)
            if re.match(pattern, filepath) or re.match(pattern, filename):
                f_files.append((filepath, get_sha256_for_file(filepath)))
        for subdir in subdirs:
            dirpath = f'{os.path.join(update_directory, path_in_dir, subdir)}/'
            if re.match(pattern, dirpath):
                f_files.append((dirpath,
                                get_sha256_for_file(
                                    make_archive(subdir,
                                                 'tar',
                                                 root_dir=dirpath))))

    return f_files
Exemplo n.º 7
0
def put_identify_custom_yara_file(**_):
    """
    Save a new version of identify's custom Yara file

    Variables:
    None

    Arguments:
    None

    Data Block:
    <current custom.yara file>

    Result example:
    {"success": True}
    """
    data = request.json.encode('utf-8')

    yara_file = None
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp:
            yara_file = tmp.name
            tmp.write(data)

        try:
            yara_default_externals = {'mime': '', 'magic': '', 'type': ''}
            yara.compile(filepaths={"default": yara_file},
                         externals=yara_default_externals)
        except Exception as e:
            message = str(e).replace(yara_file, "custom.yara line ")
            return make_api_response(
                {'success': False},
                f"The Yara file you have submitted is invalid: {message}", 400)
    finally:
        if yara_file and os.path.exists(yara_file):
            os.unlink(yara_file)

    with forge.get_cachestore('system', config=config,
                              datastore=STORAGE) as cache:
        if hashlib.sha256(data).hexdigest() == get_sha256_for_file(
                constants.YARA_RULE_PATH):
            cache.delete('custom_yara')
        else:
            cache.save('custom_yara', data, ttl=ADMIN_FILE_TTL, force=True)

    # Notify components watching to reload yara file
    event_sender.send('identify', 'yara')

    return make_api_response({'success': True})
Exemplo n.º 8
0
def put_identify_custom_magic_file(**_):
    """
    Save a new version of identify's custom LibMagic file

    Variables:
    None

    Arguments:
    None

    Data Block:
    <current custom.magic file>

    Result example:
    {"success": True}
    """
    data = request.json.encode('utf-8')

    magic_file = None
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp:
            magic_file = tmp.name
            tmp.write(data)

        try:
            test = magic.magic_open(magic.MAGIC_CONTINUE + magic.MAGIC_RAW)
            magic.magic_load(test, magic_file)
        except magic.MagicException:
            return make_api_response(
                {'success': False},
                "The magic file you have submitted is invalid.", 400)
    finally:
        if magic_file and os.path.exists(magic_file):
            os.unlink(magic_file)

    with forge.get_cachestore('system', config=config,
                              datastore=STORAGE) as cache:
        if hashlib.sha256(data).hexdigest() == get_sha256_for_file(
                constants.MAGIC_RULE_PATH):
            cache.delete('custom_magic')
        else:
            cache.save('custom_magic', data, ttl=ADMIN_FILE_TTL, force=True)

    # Notify components watching to reload magic file
    event_sender.send('identify', 'magic')

    return make_api_response({'success': True})
def url_download(source: Dict[str, Any], previous_update=None) -> List:
    """
    :param source:
    :param previous_update:
    :return:
    """
    name = source['name']
    uri = source['uri']
    pattern = source.get('pattern', None)
    username = source.get('username', None)
    password = source.get('password', None)
    ca_cert = source.get('ca_cert', None)
    ignore_ssl_errors = source.get('ssl_ignore_errors', False)
    auth = (username, password) if username and password else None

    headers = source.get('headers', None)

    LOGGER.info(
        f"{name} source is configured to {'ignore SSL errors' if ignore_ssl_errors else 'verify SSL'}."
    )
    if ca_cert:
        LOGGER.info(f"A CA certificate has been provided with this source.")
        add_cacert(ca_cert)

    # Create a requests session
    session = requests.Session()
    session.verify = not ignore_ssl_errors

    try:
        if isinstance(previous_update, str):
            previous_update = iso_to_epoch(previous_update)

        # Check the response header for the last modified date
        response = session.head(uri, auth=auth, headers=headers)
        last_modified = response.headers.get('Last-Modified', None)
        if last_modified:
            # Convert the last modified time to epoch
            last_modified = time.mktime(
                time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z"))

            # Compare the last modified time with the last updated time
            if previous_update and last_modified <= previous_update:
                # File has not been modified since last update, do nothing
                return []

        if previous_update:
            previous_update = time.strftime("%a, %d %b %Y %H:%M:%S %Z",
                                            time.gmtime(previous_update))
            if headers:
                headers['If-Modified-Since'] = previous_update
            else:
                headers = {'If-Modified-Since': previous_update}

        response = session.get(uri, auth=auth, headers=headers)

        # Check the response code
        if response.status_code == requests.codes['not_modified']:
            # File has not been modified since last update, do nothing
            return []
        elif response.ok:
            if not os.path.exists(UPDATE_DIR):
                os.makedirs(UPDATE_DIR)

            file_name = os.path.basename(urlparse(uri).path)
            file_path = os.path.join(UPDATE_DIR, file_name)
            with open(file_path, 'wb') as f:
                f.write(response.content)

            rules_files = None
            if file_name.endswith('tar.gz'):
                extract_dir = os.path.join(UPDATE_DIR, name)
                shutil.unpack_archive(file_path, extract_dir=extract_dir)

                rules_files = set()
                for path_in_dir, _, files in os.walk(extract_dir):
                    for filename in files:
                        filepath = os.path.join(extract_dir, path_in_dir,
                                                filename)
                        if pattern:
                            if re.match(pattern, filepath):
                                rules_files.add(filepath)
                        else:
                            rules_files.add(filepath)

            return [(f, get_sha256_for_file(f))
                    for f in rules_files or [file_path]]

    except requests.Timeout:
        # TODO: should we retry?
        pass
    except Exception as e:
        # Catch all other types of exceptions such as ConnectionError, ProxyError, etc.
        LOGGER.info(str(e))
        exit()
        # TODO: Should we exit even if one file fails to download? Or should we continue downloading other files?
    finally:
        # Close the requests session
        session.close()
Exemplo n.º 10
0
def url_download(source: Dict[str, Any],
                 previous_update: int = None,
                 logger=None,
                 output_dir: str = None) -> List[Tuple[str, str]]:
    """

    :param source:
    :param previous_update:
    :return:
    """
    name = source['name']
    uri = source['uri']
    pattern = source.get('pattern', None)
    username = source.get('username', None)
    password = source.get('password', None)
    ca_cert = source.get('ca_cert', None)
    ignore_ssl_errors = source.get('ssl_ignore_errors', False)
    auth = (username, password) if username and password else None

    proxy = source.get('proxy', None)
    headers_list = source.get('headers', [])
    headers = {}
    [
        headers.update({header['name']: header['value']})
        for header in headers_list
    ]

    logger.info(
        f"{name} source is configured to {'ignore SSL errors' if ignore_ssl_errors else 'verify SSL'}."
    )
    if ca_cert:
        logger.info("A CA certificate has been provided with this source.")
        add_cacert(ca_cert)

    # Create a requests session
    session = requests.Session()
    session.verify = not ignore_ssl_errors

    # Let https requests go through proxy
    if proxy:
        os.environ['https_proxy'] = proxy

    try:
        if isinstance(previous_update, str):
            previous_update = iso_to_epoch(previous_update)

        # Check the response header for the last modified date
        response = session.head(uri, auth=auth, headers=headers)
        last_modified = response.headers.get('Last-Modified', None)
        if last_modified:
            # Convert the last modified time to epoch
            last_modified = time.mktime(
                time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z"))

            # Compare the last modified time with the last updated time
            if previous_update and last_modified <= previous_update and not FORCE_UPDATE:
                # File has not been modified since last update, do nothing
                raise SkipSource()

        if previous_update:
            previous_update = time.strftime("%a, %d %b %Y %H:%M:%S %Z",
                                            time.gmtime(previous_update))
            if headers:
                headers['If-Modified-Since'] = previous_update
            else:
                headers = {'If-Modified-Since': previous_update}

        response = session.get(uri, auth=auth, headers=headers)

        # Check the response code
        if response.status_code == requests.codes[
                'not_modified'] and not FORCE_UPDATE:
            # File has not been modified since last update, do nothing
            raise SkipSource()
        elif response.ok:
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)

            file_name = os.path.basename(urlparse(uri).path)
            file_path = os.path.join(output_dir, file_name)
            with open(file_path, 'wb') as f:
                for content in response.iter_content(BLOCK_SIZE):
                    f.write(content)

            # Clear proxy setting
            if proxy:
                del os.environ['https_proxy']

            if file_name.endswith('tar.gz') or file_name.endswith('zip'):
                extract_dir = os.path.join(output_dir, name)
                shutil.unpack_archive(file_path, extract_dir=extract_dir)

                return filter_downloads(extract_dir, pattern)
            else:
                return [(file_path, get_sha256_for_file(file_path))]
        else:
            logger.warning(f"Download not successful: {response.content}")
            return []

    except SkipSource:
        # Raise to calling function for handling
        raise
    except Exception as e:
        # Catch all other types of exceptions such as ConnectionError, ProxyError, etc.
        logger.warning(str(e))
        exit()
    finally:
        # Close the requests session
        session.close()
Exemplo n.º 11
0
def url_update(test_func=test_file) -> None:
    """
    Using an update configuration file as an input, which contains a list of sources, download all the file(s) which
    have been modified since the last update.
    """
    update_config = {}
    # Load configuration
    if UPDATE_CONFIGURATION_PATH and os.path.exists(UPDATE_CONFIGURATION_PATH):
        with open(UPDATE_CONFIGURATION_PATH, 'r') as yml_fh:
            update_config = yaml.safe_load(yml_fh)
    else:
        LOGGER.warning("Could not find update configuration file.")
        exit(1)

    # Cleanup output path
    if os.path.exists(UPDATE_OUTPUT_PATH):
        if os.path.isdir(UPDATE_OUTPUT_PATH):
            shutil.rmtree(UPDATE_OUTPUT_PATH)
        else:
            os.unlink(UPDATE_OUTPUT_PATH)
    os.makedirs(UPDATE_OUTPUT_PATH)

    # Get sources
    sources = update_config.get('sources', None)
    # Exit if no update sources given
    if not sources:
        exit()

    # Parse updater configuration
    previous_update = update_config.get('previous_update', None)
    previous_hash = update_config.get('previous_hash', None) or {}
    if previous_hash:
        previous_hash = json.loads(previous_hash)
    if isinstance(previous_update, str):
        previous_update = iso_to_epoch(previous_update)

    # Create a requests session
    session = requests.Session()

    files_sha256 = {}

    # Go through each source and download file
    for source in sources:
        uri = source['uri']
        name = source['name']

        if not uri or not name:
            LOGGER.warning(f"Invalid source: {source}")
            continue

        LOGGER.info(f"Downloading file '{name}' from uri '{uri}' ...")

        username = source.get('username', None)
        password = source.get('password', None)
        auth = (username, password) if username and password else None
        ca_cert = source.get('ca_cert', None)
        ignore_ssl_errors = source.get('ssl_ignore_errors', False)

        headers = source.get('headers', None)

        if ca_cert:
            # Add certificate to requests
            cafile = certifi.where()
            with open(cafile, 'a') as ca_editor:
                ca_editor.write(f"\n{ca_cert}")

        session.verify = not ignore_ssl_errors

        try:
            # Check the response header for the last modified date
            response = session.head(uri, auth=auth, headers=headers)
            last_modified = response.headers.get('Last-Modified', None)
            if last_modified:
                # Convert the last modified time to epoch
                last_modified = time.mktime(
                    time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z"))

                # Compare the last modified time with the last updated time
                if update_config.get(
                        'previous_update',
                        None) and last_modified <= previous_update:
                    # File has not been modified since last update, do nothing
                    LOGGER.info(
                        "File has not changed since last time, Skipping...")
                    continue

            if update_config.get('previous_update', None):
                previous_update = time.strftime("%a, %d %b %Y %H:%M:%S %Z",
                                                time.gmtime(previous_update))
                if headers:
                    headers['If-Modified-Since'] = previous_update
                else:
                    headers = {
                        'If-Modified-Since': previous_update,
                    }

            response = session.get(uri, auth=auth, headers=headers)

            # Check the response code
            if response.status_code == requests.codes['not_modified']:
                # File has not been modified since last update, do nothing
                LOGGER.info(
                    "File has not changed since last time, Skipping...")
                continue
            elif response.ok:
                file_path = os.path.join(UPDATE_OUTPUT_PATH, name)
                with open(file_path, 'wb') as f:
                    f.write(response.content)

                if not test_func(file_path):
                    os.unlink(file_path)
                    LOGGER.warning(
                        f"The downloaded file was invalid. It will not be part of this update..."
                    )
                    continue

                # Append the SHA256 of the file to a list of downloaded files
                sha256 = get_sha256_for_file(file_path)
                if previous_hash.get(name, None) != sha256:
                    files_sha256[name] = sha256
                else:
                    LOGGER.info(
                        "File as the same hash as last time. Skipping...")

                LOGGER.info("File successfully downloaded!")
        except requests.Timeout:
            LOGGER.warning(
                f"Cannot find the file for source {name} with url {uri} - (Timeout)"
            )
            continue
        except Exception as e:
            # Catch all other types of exceptions such as ConnectionError, ProxyError, etc.
            LOGGER.warning(f"Source {name} failed with error: {str(e)}")

    if files_sha256:
        new_hash = deepcopy(previous_hash)
        new_hash.update(files_sha256)

        # Check if the new update hash matches the previous update hash
        if new_hash == previous_hash:
            # Update file(s) not changed, delete the downloaded files and exit
            shutil.rmtree(UPDATE_OUTPUT_PATH, ignore_errors=True)
            exit()

        # Create the response yaml
        with open(os.path.join(UPDATE_OUTPUT_PATH, 'response.yaml'),
                  'w') as yml_fh:
            yaml.safe_dump(dict(hash=json.dumps(new_hash), ), yml_fh)

        LOGGER.info("Service update file(s) successfully downloaded")

    # Close the requests session
    session.close()
def yara_update(updater_type, update_config_path, update_output_path,
                download_directory, externals, cur_logger) -> None:
    """
    Using an update configuration file as an input, which contains a list of sources, download all the file(s).
    """
    # noinspection PyBroadException
    try:
        # Load updater configuration
        update_config = {}
        if update_config_path and os.path.exists(update_config_path):
            with open(update_config_path, 'r') as yml_fh:
                update_config = yaml.safe_load(yml_fh)
        else:
            cur_logger.error(f"Update configuration file doesn't exist: {update_config_path}")
            exit()

        # Exit if no update sources given
        if 'sources' not in update_config.keys() or not update_config['sources']:
            cur_logger.error(f"Update configuration does not contain any source to update from")
            exit()

        # Initialise al_client
        server = update_config['ui_server']
        user = update_config['api_user']
        api_key = update_config['api_key']
        cur_logger.info(f"Connecting to Assemblyline API: {server}...")
        al_client = get_client(server, apikey=(user, api_key), verify=False)
        cur_logger.info(f"Connected!")

        # Parse updater configuration
        previous_update = update_config.get('previous_update', None)
        previous_hash = json.loads(update_config.get('previous_hash', None) or "{}")
        sources = {source['name']: source for source in update_config['sources']}
        files_sha256 = {}
        files_default_classification = {}

        # Create working directory
        updater_working_dir = os.path.join(tempfile.gettempdir(), 'updater_working_dir')
        if os.path.exists(updater_working_dir):
            shutil.rmtree(updater_working_dir)
        os.makedirs(updater_working_dir)

        # Go through each source and download file
        for source_name, source in sources.items():
            os.makedirs(os.path.join(updater_working_dir, source_name))
            # 1. Download signatures
            cur_logger.info(f"Downloading files from: {source['uri']}")
            uri: str = source['uri']

            if uri.endswith('.git'):
                files = git_clone_repo(download_directory, source, cur_logger, previous_update=previous_update)
            else:
                files = [url_download(download_directory, source, cur_logger, previous_update=previous_update)]

            processed_files = set()

            # 2. Aggregate files
            file_name = os.path.join(updater_working_dir, f"{source_name}.yar")
            mode = "w"
            for file in files:
                # File has already been processed before, skip it to avoid duplication of rules
                if file in processed_files:
                    continue

                cur_logger.info(f"Processing file: {file}")

                file_dirname = os.path.dirname(file)
                processed_files.add(os.path.normpath(file))
                with open(file, 'r') as f:
                    f_lines = f.readlines()

                temp_lines = []
                for i, f_line in enumerate(f_lines):
                    if f_line.startswith("include"):
                        lines, processed_files = replace_include(f_line, file_dirname, processed_files, cur_logger)
                        temp_lines.extend(lines)
                    else:
                        temp_lines.append(f_line)

                # guess the type of files that we have in the current file
                guessed_category = guess_category(file)
                parser = Plyara()
                signatures = parser.parse_string("\n".join(temp_lines))

                # Ignore "cuckoo" rules
                if "cuckoo" in parser.imports:
                    parser.imports.remove("cuckoo")

                # Guess category
                if guessed_category:
                    for s in signatures:
                        if 'metadata' not in s:
                            s['metadata'] = []

                        # Do not override category with guessed category if it already exists
                        for meta in s['metadata']:
                            if 'category' in meta:
                                continue

                        s['metadata'].append({'category': guessed_category})
                        s['metadata'].append({guessed_category: s.get('rule_name')})

                # Save all rules from source into single file
                with open(file_name, mode) as f:
                    for s in signatures:
                        # Fix imports and remove cuckoo
                        s['imports'] = utils.detect_imports(s)
                        if "cuckoo" not in s['imports']:
                            f.write(utils.rebuild_yara_rule(s))

                if mode == "w":
                    mode = "a"

            # Check if the file is the same as the last run
            if os.path.exists(file_name):
                cache_name = os.path.basename(file_name)
                sha256 = get_sha256_for_file(file_name)
                if sha256 != previous_hash.get(cache_name, None):
                    files_sha256[cache_name] = sha256
                    files_default_classification[cache_name] = source.get('default_classification',
                                                                          classification.UNRESTRICTED)
                else:
                    cur_logger.info(f'File {cache_name} has not changed since last run. Skipping it...')

        if files_sha256:
            cur_logger.info(f"Found new {updater_type.upper()} rules files to process!")

            yara_importer = YaraImporter(updater_type, al_client, logger=cur_logger)

            # Validating and importing the different signatures
            for base_file in files_sha256:
                cur_logger.info(f"Validating output file: {base_file}")
                cur_file = os.path.join(updater_working_dir, base_file)
                source_name = os.path.splitext(os.path.basename(cur_file))[0]
                default_classification = files_default_classification.get(base_file, classification.UNRESTRICTED)

                try:
                    _compile_rules(cur_file, externals, cur_logger)
                    yara_importer.import_file(cur_file, source_name, default_classification=default_classification)
                except Exception as e:
                    raise e
        else:
            cur_logger.info(f'No new {updater_type.upper()} rules files to process...')

        # Check if new signatures have been added
        if al_client.signature.update_available(since=previous_update or '', sig_type=updater_type)['update_available']:
            cur_logger.info("An update is available for download from the datastore")

            if not os.path.exists(update_output_path):
                os.makedirs(update_output_path)

            temp_zip_file = os.path.join(update_output_path, 'temp.zip')
            al_client.signature.download(output=temp_zip_file,
                                         query=f"type:{updater_type} AND (status:NOISY OR status:DEPLOYED)")

            if os.path.exists(temp_zip_file):
                with ZipFile(temp_zip_file, 'r') as zip_f:
                    zip_f.extractall(update_output_path)

                os.remove(temp_zip_file)

            # Create the response yaml
            with open(os.path.join(update_output_path, 'response.yaml'), 'w') as yml_fh:
                yaml.safe_dump(dict(hash=json.dumps(files_sha256)), yml_fh)

            cur_logger.info(f"New ruleset successfully downloaded and ready to use")

        cur_logger.info(f"{updater_type.upper()} updater completed successfully")
    except Exception:
        cur_logger.exception("Updater ended with an exception!")
Exemplo n.º 13
0
    def try_run(self):
        self.status = STATUSES.INITIALIZING

        # Try to load service class
        try:
            service_class = load_module_by_path(SERVICE_PATH)
        except Exception:
            self.log.error("Could not find service in path.")
            raise

        # Load on-disk manifest for bootstrap/registration
        service_manifest = self._load_manifest()

        # Register the service
        registration = self.tasking_client.register_service(service_manifest)

        # Are we just registering?
        if not registration['keep_alive'] or REGISTER_ONLY:
            self.status = STATUSES.STOPPING
            self.stop()
            return

        # Instantiate the service based of the registration results
        self.service_config = registration.get('service_config', {})
        self.service = service_class(config=self.service_config.get('config'))
        self.service_name = self.service_config['name']
        self.service_tool_version = self.service.get_tool_version()
        self.metric_factory = MetricsFactory('service',
                                             Metrics,
                                             name=self.service_name,
                                             export_zero=False,
                                             redis=self.redis)
        file_required = self.service_config.get('file_required', True)

        # Start the service
        self.service.start_service()

        while self.running:
            # Cleanup the working directory
            self._cleanup_working_directory()

            # Get a task
            self.status = STATUSES.WAITING_FOR_TASK
            task, _ = self.tasking_client.get_task(
                self.client_id, self.service_name,
                self.service_config['version'], self.service_tool_version,
                self.metric_factory)

            if not task:
                continue

            # Load Task
            try:
                # Inspect task to ensure submission parameters are given, add defaults where necessary
                params = {
                    x['name']:
                    task['service_config'].get(x['name'], x['default'])
                    for x in service_manifest.get('submission_params', [])
                }
                task['service_config'] = params
                service_task = ServiceTask(task)
                self.log.info(f"[{service_task.sid}] New task received")
            except ValueError as e:
                self.log.error(f"Invalid task received: {str(e)}")
                continue

            # Download file if needed
            if file_required:
                self.status = STATUSES.DOWNLOADING_FILE
                file_path = os.path.join(self.tasking_dir,
                                         service_task.fileinfo.sha256)
                received_file_sha256 = None
                self.log.info(
                    f"[{service_task.sid}] Downloading file: {service_task.fileinfo.sha256}"
                )
                try:
                    self.filestore.download(service_task.fileinfo.sha256,
                                            file_path)
                    received_file_sha256 = get_sha256_for_file(file_path)
                except FileStoreException:
                    self.status = STATUSES.FILE_NOT_FOUND
                    self.log.error(
                        f"[{service_task.sid}] Requested file not found in the system: {service_task.fileinfo.sha256}"
                    )

                # If the file retrieved is different from what we requested, report the error
                if received_file_sha256 and received_file_sha256 != service_task.fileinfo.sha256:
                    self.status = STATUSES.ERROR_FOUND
                    self.log.error(
                        f"[{service_task.sid}] Downloaded ({received_file_sha256}) doesn't match "
                        f"requested ({service_task.fileinfo.sha256})")

            # Process if we're not already in error
            if self.status not in [
                    STATUSES.ERROR_FOUND, STATUSES.FILE_NOT_FOUND
            ]:
                self.status = STATUSES.PROCESSING
                self.service.handle_task(service_task)

                # Check for the response from the service
                result_json = os.path.join(
                    self.tasking_dir,
                    f"{service_task.sid}_{service_task.fileinfo.sha256}_result.json"
                )
                error_json = os.path.join(
                    self.tasking_dir,
                    f"{service_task.sid}_{service_task.fileinfo.sha256}_error.json"
                )
                if os.path.exists(result_json):
                    self.status = STATUSES.RESULT_FOUND
                elif os.path.exists(error_json):
                    self.status = STATUSES.ERROR_FOUND
                else:
                    self.status = STATUSES.ERROR_FOUND
                    error_json = None

            # Handle the service response
            if self.status == STATUSES.RESULT_FOUND:
                self.log.info(
                    f"[{service_task.sid}] Task successfully completed")
                try:
                    self._handle_task_result(result_json, service_task)
                except RuntimeError as re:
                    if is_recoverable_runtime_error(re):
                        self.log.info(
                            f"[{service_task.sid}] Service trying to use a threadpool during shutdown, "
                            "sending recoverable error.")
                        self._handle_task_error(service_task)
                    else:
                        raise
            elif self.status == STATUSES.ERROR_FOUND:
                self.log.info(
                    f"[{service_task.sid}] Task completed with errors")
                self._handle_task_error(service_task,
                                        error_json_path=error_json)
            elif self.status == STATUSES.FILE_NOT_FOUND:
                self.log.info(
                    f"[{service_task.sid}] Task completed with errors due to missing file from filestore"
                )
                self._handle_task_error(service_task,
                                        status="FAIL_NONRECOVERABLE",
                                        error_type="EXCEPTION")