Exemplo n.º 1
0
    def __init__(self):
        super().__init__("assemblyline.replay_creator.worker")

        if not self.replay_config.creator.alert_input.enabled and \
                not self.replay_config.creator.submission_input.enabled:
            return

        # Initialize filestore object
        self.filestore = FileStore(self.replay_config.creator.output_filestore)

        # Create cache directory
        os.makedirs(self.replay_config.creator.working_directory,
                    exist_ok=True)

        # Load client
        client_config = dict(
            lookback_time=self.replay_config.creator.lookback_time,
            alert_fqs=self.replay_config.creator.alert_input.filter_queries,
            submission_fqs=self.replay_config.creator.submission_input.
            filter_queries)

        if self.replay_config.creator.client.type == 'direct':
            self.log.info("Using direct database access client")
            self.client = DirectClient(self.log, **client_config)
        elif self.replay_config.creator.client.type == 'api':
            self.log.info(
                f"Using API access client to ({self.replay_config.creator.client.options.host})"
            )
            client_config.update(
                self.replay_config.creator.client.options.as_primitives())
            self.client = APIClient(self.log, **client_config)
        else:
            raise ValueError(
                f'Invalid client type ({self.replay_config.creator.client.type}). '
                'Must be either \'api\' or \'direct\'.')
Exemplo n.º 2
0
def test_https():
    """
    Test HTTPS FileStore by fetching the assemblyline page on
    CSE's cyber center page.
    """
    fs = FileStore('https://github.com/CybercentreCanada/')
    assert fs.exists('assemblyline-base') != []
    assert fs.get('assemblyline-base') is not None
Exemplo n.º 3
0
def test_https():
    """
    Test HTTPS FileStore by fetching the assemblyline page on
    CSE's cyber center page.
    """
    fs = FileStore('https://cyber.gc.ca/en/')
    assert fs.exists('assemblyline') != []
    assert fs.get('assemblyline') is not None
def test_s3():
    """
    Test Amazon S3 FileStore by fetching a test file from
    the assemblyline-support bucket on Amazon S3.
    """
    fs = FileStore('s3://AKIAIIESFCKMSXUP6KWQ:Uud08qLQ48Cbo9RB7b+H+M97aA2wdR8OXaHXIKwL@'
                   's3.amazonaws.com/?s3_bucket=assemblyline-support&aws_region=us-east-1')
    assert fs.exists('al4_s3_pytest.txt') != []
    assert fs.get('al4_s3_pytest.txt') is not None
Exemplo n.º 5
0
def test_file():
    """
    Test Local FileStore by fetching the README.md file from
    the assemblyline core repo directory.

    Note: This test will fail if pytest is not ran from the root
          of the assemblyline core repo.
    """
    fs = FileStore('file://%s' % os.path.dirname(__file__))
    assert fs.exists(os.path.basename(__file__)) != []
    assert fs.get(os.path.basename(__file__)) is not None
    def __init__(self, component: str, config=None, datastore=None):
        if not component:
            raise ValueError("Cannot instantiate a cachestore without providing a component name.")

        if not COMPONENT_VALIDATOR.match(component):
            raise ValueError("Invalid component name. (Only letters, numbers, underscores and dots allowed)")

        if config is None:
            config = forge.get_config()

        self.component = component
        self.datastore = datastore or forge.get_datastore(config=config)
        self.filestore = FileStore(*config.filestore.cache)
def test_file():
    """
    Test Local FileStore by fetching the README.md file from
    the assemblyline core repo directory.

    Note: This test will fail if pytest is not ran from the root
          of the assemblyline core repo.
    """
    fs = FileStore('file://%s' % os.path.dirname(__file__))
    assert fs.exists(os.path.basename(__file__)) != []
    assert fs.get(os.path.basename(__file__)) is not None

    with tempfile.TemporaryDirectory() as temp_dir:
        with FileStore('file://' + temp_dir) as fs:
            common_actions(fs)
    def __init__(self, force_ilm=False):
        self.config = forge.get_config()
        if force_ilm:
            self.config.datastore.ilm.enabled = True

        super().__init__('assemblyline.expiry',
                         shutdown_timeout=self.config.core.expiry.sleep_time +
                         5)
        self.datastore = forge.get_datastore(config=self.config,
                                             archive_access=True)
        self.hot_datastore = forge.get_datastore(config=self.config,
                                                 archive_access=False)
        self.filestore = forge.get_filestore(config=self.config)
        self.cachestore = FileStore(*self.config.filestore.cache)
        self.expirable_collections = []
        self.archiveable_collections = []
        self.counter = MetricsFactory('expiry', Metrics)
        self.counter_archive = MetricsFactory('archive', Metrics)

        if self.config.datastore.ilm.enabled:
            self.fs_hashmap = {
                'file': self.archive_filestore_delete,
                'cached_file': self.archive_cachestore_delete
            }
        else:
            self.fs_hashmap = {
                'file': self.filestore_delete,
                'cached_file': self.cachestore_delete
            }

        for name, definition in self.datastore.ds.get_models().items():
            if hasattr(definition, 'archive_ts'):
                self.archiveable_collections.append(
                    getattr(self.datastore, name))
            if hasattr(definition, 'expiry_ts'):
                self.expirable_collections.append(getattr(
                    self.datastore, name))

        if self.config.core.metrics.apm_server.server_url is not None:
            self.log.info(
                f"Exporting application metrics to: {self.config.core.metrics.apm_server.server_url}"
            )
            elasticapm.instrument()
            self.apm_client = elasticapm.Client(
                server_url=self.config.core.metrics.apm_server.server_url,
                service_name="expiry")
        else:
            self.apm_client = None
def test_ftps(temp_ftps_server):
    """
    Run some operations against an in-process ftp server
    """
    with FileStore(f'ftps://{temp_ftps_server}') as fs:
        assert 'localhost' in str(fs)
        common_actions(fs)
def test_https():
    """
    Test HTTPS FileStore by fetching the assemblyline page on
    CSE's cyber center page.
    """
    fs = FileStore('https://github.com/CybercentreCanada/')
    assert 'github.com' in str(fs)
    httpx_tests(fs)
def test_minio():
    """
    Test Minio FileStore by pushing and fetching back content from it.
    """
    content = b"THIS IS A MINIO TEST"

    fs = FileStore('s3://al_storage_key:Ch@ngeTh!sPa33w0rd@localhost:9000/?s3_bucket=test&use_ssl=False')
    assert fs.delete('al4_minio_pytest.txt') is None
    assert fs.put('al4_minio_pytest.txt', content) != []
    assert fs.exists('al4_minio_pytest.txt') != []
    assert fs.get('al4_minio_pytest.txt') == content
    assert fs.delete('al4_minio_pytest.txt') is None
def test_azure():
    """
    Azure filestore by downloading a file from our public storage blob
    """
    fs = FileStore("azure://alpytest.blob.core.windows.net/pytest/", connection_attempts=2)
    assert fs.exists('test') != []
    assert fs.get('test') is not None
    with pytest.raises(TransportException):
        fs.put('bob', 'bob')
Exemplo n.º 13
0
class CacheStore(object):
    def __init__(self, component, config=None, datastore=None):
        if not component:
            raise ValueError("Cannot instanciate a cachestore without providing a component name.")

        if not COMPONENT_VALIDATOR.match(component):
            raise ValueError("Invalid component name. (Only letters, numbers, underscores and dots allowed)")

        if config is None:
            config = forge.get_config()

        self.component = component
        self.datastore = datastore or forge.get_datastore()
        self.filestore = FileStore(*config.filestore.cache)

    def __enter__(self):
        return self

    def __exit__(self, ex_type, exc_val, exc_tb):
        self.filestore.close()

    def save(self, cache_key, data, ttl=DEFAULT_CACHE_LEN):
        if not COMPONENT_VALIDATOR.match(cache_key):
            raise ValueError("Invalid cache_key for cache item. "
                             "(Only letters, numbers, underscores and dots allowed)")

        new_key = f"{self.component}_{cache_key}" if self.component else cache_key

        self.datastore.cached_file.save(new_key, {'expiry_ts': now_as_iso(ttl), 'component': self.component})
        self.filestore.put(new_key, data)

    def get(self, cache_key):
        new_key = f"{self.component}_{cache_key}" if self.component else cache_key

        return self.filestore.get(new_key)

    def delete(self, cache_key, db_delete=True):
        new_key = f"{self.component}_{cache_key}" if self.component else cache_key

        self.filestore.delete(new_key)
        if db_delete:
            self.datastore.cached_file.delete(new_key)
Exemplo n.º 14
0
class CacheStore(object):
    def __init__(self, component: str, config=None, datastore=None):
        if not component:
            raise ValueError("Cannot instantiate a cachestore without providing a component name.")

        if not COMPONENT_VALIDATOR.match(component):
            raise ValueError("Invalid component name. (Only letters, numbers, underscores and dots allowed)")

        if config is None:
            config = forge.get_config()

        self.component = component
        self.datastore = datastore or forge.get_datastore(config=config)
        self.filestore = FileStore(*config.filestore.cache)

    def __enter__(self) -> 'CacheStore':
        return self

    def __exit__(self, ex_type, exc_val, exc_tb):
        self.filestore.close()

    def save(self, cache_key: str, data: AnyStr, ttl=DEFAULT_CACHE_LEN, force=False):
        if not COMPONENT_VALIDATOR.match(cache_key):
            raise ValueError("Invalid cache_key for cache item. "
                             "(Only letters, numbers, underscores and dots allowed)")

        new_key = f"{self.component}_{cache_key}" if self.component else cache_key

        self.datastore.cached_file.save(new_key, {'expiry_ts': now_as_iso(ttl), 'component': self.component})
        self.filestore.put(new_key, data, force=force)

    def upload(self, cache_key: str, path: str, ttl=DEFAULT_CACHE_LEN):
        if not COMPONENT_VALIDATOR.match(cache_key):
            raise ValueError("Invalid cache_key for cache item. "
                             "(Only letters, numbers, underscores and dots allowed)")

        new_key = f"{self.component}_{cache_key}" if self.component else cache_key

        self.datastore.cached_file.save(new_key, {'expiry_ts': now_as_iso(ttl), 'component': self.component})
        self.filestore.upload(new_key, path, force=True)

    def touch(self, cache_key: str, ttl=DEFAULT_CACHE_LEN):
        if not COMPONENT_VALIDATOR.match(cache_key):
            raise ValueError("Invalid cache_key for cache item. "
                             "(Only letters, numbers, underscores and dots allowed)")
        if not self.exists(cache_key):
            raise KeyError(cache_key)

        new_key = f"{self.component}_{cache_key}" if self.component else cache_key
        self.datastore.cached_file.save(new_key, {'expiry_ts': now_as_iso(ttl), 'component': self.component})

    def get(self, cache_key: str) -> Optional[bytes]:
        new_key = f"{self.component}_{cache_key}" if self.component else cache_key
        return self.filestore.get(new_key)

    def download(self, cache_key: str, path: str):
        new_key = f"{self.component}_{cache_key}" if self.component else cache_key
        return self.filestore.download(new_key, path)

    def exists(self, cache_key: str):
        new_key = f"{self.component}_{cache_key}" if self.component else cache_key
        return self.filestore.exists(new_key)

    def delete(self, cache_key: str, db_delete=True):
        new_key = f"{self.component}_{cache_key}" if self.component else cache_key

        self.filestore.delete(new_key)
        if db_delete:
            self.datastore.cached_file.delete(new_key)
Exemplo n.º 15
0
class ReplayCreatorWorker(ReplayBase):
    def __init__(self):
        super().__init__("assemblyline.replay_creator.worker")

        if not self.replay_config.creator.alert_input.enabled and \
                not self.replay_config.creator.submission_input.enabled:
            return

        # Initialize filestore object
        self.filestore = FileStore(self.replay_config.creator.output_filestore)

        # Create cache directory
        os.makedirs(self.replay_config.creator.working_directory,
                    exist_ok=True)

        # Load client
        client_config = dict(
            lookback_time=self.replay_config.creator.lookback_time,
            alert_fqs=self.replay_config.creator.alert_input.filter_queries,
            submission_fqs=self.replay_config.creator.submission_input.
            filter_queries)

        if self.replay_config.creator.client.type == 'direct':
            self.log.info("Using direct database access client")
            self.client = DirectClient(self.log, **client_config)
        elif self.replay_config.creator.client.type == 'api':
            self.log.info(
                f"Using API access client to ({self.replay_config.creator.client.options.host})"
            )
            client_config.update(
                self.replay_config.creator.client.options.as_primitives())
            self.client = APIClient(self.log, **client_config)
        else:
            raise ValueError(
                f'Invalid client type ({self.replay_config.creator.client.type}). '
                'Must be either \'api\' or \'direct\'.')

    def process_alerts(self, once=False):
        while self.running:
            # Process alerts found
            alert = self.client.get_next_alert()
            if alert:
                self.log.info(f"Processing alert: {alert['alert_id']}")

                # Make sure directories exists
                os.makedirs(self.replay_config.creator.working_directory,
                            exist_ok=True)

                # Create the bundle
                bundle_path = os.path.join(
                    self.replay_config.creator.working_directory,
                    f"alert_{alert['alert_id']}.al_bundle")
                self.client.create_alert_bundle(alert['alert_id'], bundle_path)

                # Move the bundle
                self.filestore.upload(bundle_path,
                                      f"alert_{alert['alert_id']}.al_bundle")

                # Remove temp file
                if os.path.exists(bundle_path):
                    os.unlink(bundle_path)

                # Set alert state done
                self.client.set_single_alert_complete(alert['alert_id'])

            if once:
                break

    def process_submissions(self, once=False):
        while self.running:
            # Process submissions found
            submission = self.client.get_next_submission()
            if submission:
                self.log.info(f"Processing submission: {submission['sid']}")

                # Make sure directories exists
                os.makedirs(self.replay_config.creator.working_directory,
                            exist_ok=True)

                # Create the bundle
                bundle_path = os.path.join(
                    self.replay_config.creator.working_directory,
                    f"submission_{submission['sid']}.al_bundle")
                self.client.create_submission_bundle(submission['sid'],
                                                     bundle_path)

                # Move the bundle
                self.filestore.upload(
                    bundle_path, f"submission_{submission['sid']}.al_bundle")

                # Remove temp file
                if os.path.exists(bundle_path):
                    os.unlink(bundle_path)

                # Set submission state done
                self.client.set_single_submission_complete(submission['sid'])

            if once:
                break

    def try_run(self):
        threads = {}
        if self.replay_config.creator.alert_input.enabled:
            for ii in range(self.replay_config.creator.alert_input.threads):
                threads[f'Alert process thread #{ii}'] = self.process_alerts

        if self.replay_config.creator.submission_input.enabled:
            for ii in range(
                    self.replay_config.creator.submission_input.threads):
                threads[
                    f'Submission process thread #{ii}'] = self.process_submissions

        if threads:
            self.maintain_threads(threads)
        else:
            self.log.warning("There are no configured input, terminating")
            self.main_loop_exit.set()
            self.stop()
class ExpiryManager(ServerBase):
    def __init__(self, force_ilm=False):
        self.config = forge.get_config()
        if force_ilm:
            self.config.datastore.ilm.enabled = True

        super().__init__('assemblyline.expiry',
                         shutdown_timeout=self.config.core.expiry.sleep_time +
                         5)
        self.datastore = forge.get_datastore(config=self.config,
                                             archive_access=True)
        self.hot_datastore = forge.get_datastore(config=self.config,
                                                 archive_access=False)
        self.filestore = forge.get_filestore(config=self.config)
        self.cachestore = FileStore(*self.config.filestore.cache)
        self.expirable_collections = []
        self.archiveable_collections = []
        self.counter = MetricsFactory('expiry', Metrics)
        self.counter_archive = MetricsFactory('archive', Metrics)

        if self.config.datastore.ilm.enabled:
            self.fs_hashmap = {
                'file': self.archive_filestore_delete,
                'cached_file': self.archive_cachestore_delete
            }
        else:
            self.fs_hashmap = {
                'file': self.filestore_delete,
                'cached_file': self.cachestore_delete
            }

        for name, definition in self.datastore.ds.get_models().items():
            if hasattr(definition, 'archive_ts'):
                self.archiveable_collections.append(
                    getattr(self.datastore, name))
            if hasattr(definition, 'expiry_ts'):
                self.expirable_collections.append(getattr(
                    self.datastore, name))

        if self.config.core.metrics.apm_server.server_url is not None:
            self.log.info(
                f"Exporting application metrics to: {self.config.core.metrics.apm_server.server_url}"
            )
            elasticapm.instrument()
            self.apm_client = elasticapm.Client(
                server_url=self.config.core.metrics.apm_server.server_url,
                service_name="expiry")
        else:
            self.apm_client = None

    def stop(self):
        if self.counter:
            self.counter.stop()

        if self.apm_client:
            elasticapm.uninstrument()
        super().stop()

    def filestore_delete(self, sha256, _):
        self.filestore.delete(sha256)

    def archive_filestore_delete(self, sha256, expiry_time):
        # If we are working with an archive, their may be a hot entry that expires later.
        doc = self.hot_datastore.file.get_if_exists(sha256, as_obj=False)
        if doc and doc['expiry_ts'] > expiry_time:
            return
        self.filestore.delete(sha256)

    def cachestore_delete(self, sha256, _):
        self.filestore.delete(sha256)

    def archive_cachestore_delete(self, sha256, expiry_time):
        doc = self.hot_datastore.cached_file.get_if_exists(sha256,
                                                           as_obj=False)
        if doc and doc['expiry_ts'] > expiry_time:
            return
        self.cachestore.delete(sha256)

    def run_expiry_once(self):
        now = now_as_iso()
        reached_max = False

        # Expire data
        for collection in self.expirable_collections:
            # Call heartbeat pre-dated by 5 minutes. If a collection takes more than
            # 5 minutes to expire, this container could be seen as unhealthy. The down
            # side is if it is stuck on something it will be more than 5 minutes before
            # the container is restarted.
            self.heartbeat(int(time.time() + 5 * 60))

            # Start of expiry transaction
            if self.apm_client:
                self.apm_client.begin_transaction("Delete expired documents")

            if self.config.core.expiry.batch_delete:
                computed_date = epoch_to_iso(
                    dm(f"{now}||-{self.config.core.expiry.delay}h/d").
                    float_timestamp)
            else:
                computed_date = epoch_to_iso(
                    dm(f"{now}||-{self.config.core.expiry.delay}h").
                    float_timestamp)

            delete_query = f"expiry_ts:[* TO {computed_date}]"

            if self.config.core.expiry.delete_storage and collection.name in self.fs_hashmap:
                file_delete = True
                sort = ["expiry_ts asc", "id asc"]
            else:
                file_delete = False
                sort = None

            number_to_delete = collection.search(
                delete_query,
                rows=0,
                as_obj=False,
                use_archive=True,
                sort=sort,
                track_total_hits=EXPIRY_SIZE)['total']

            if self.apm_client:
                elasticapm.label(query=delete_query)
                elasticapm.label(number_to_delete=number_to_delete)

            self.log.info(f"Processing collection: {collection.name}")
            if number_to_delete != 0:
                if file_delete:
                    with elasticapm.capture_span(
                            name='FILESTORE [ThreadPoolExecutor] :: delete()',
                            labels={
                                "num_files": number_to_delete,
                                "query": delete_query
                            }):
                        # Delete associated files
                        with concurrent.futures.ThreadPoolExecutor(
                                self.config.core.expiry.workers,
                                thread_name_prefix="file_delete") as executor:
                            for item in collection.search(
                                    delete_query,
                                    fl='id',
                                    rows=number_to_delete,
                                    sort=sort,
                                    use_archive=True,
                                    as_obj=False)['items']:
                                executor.submit(
                                    self.fs_hashmap[collection.name],
                                    item['id'], computed_date)

                        self.log.info(
                            f'    Deleted associated files from the '
                            f'{"cachestore" if "cache" in collection.name else "filestore"}...'
                        )

                    # Proceed with deletion
                    collection.delete_by_query(
                        delete_query,
                        workers=self.config.core.expiry.workers,
                        sort=sort,
                        max_docs=number_to_delete)

                else:
                    # Proceed with deletion
                    collection.delete_by_query(
                        delete_query, workers=self.config.core.expiry.workers)

                if number_to_delete == EXPIRY_SIZE:
                    reached_max = True

                self.counter.increment(f'{collection.name}',
                                       increment_by=number_to_delete)

                self.log.info(
                    f"    Deleted {number_to_delete} items from the datastore..."
                )

            else:
                self.log.debug("    Nothing to delete in this collection.")

            # End of expiry transaction
            if self.apm_client:
                self.apm_client.end_transaction(collection.name, 'deleted')

        return reached_max

    def run_archive_once(self):
        reached_max = False
        if not self.config.datastore.ilm.enabled:
            return reached_max

        now = now_as_iso()
        # Archive data
        for collection in self.archiveable_collections:
            # Call heartbeat pre-dated by 5 minutes. If a collection takes more than
            # 5 minutes to expire, this container could be seen as unhealthy. The down
            # side is if it is stuck on something it will be more than 5 minutes before
            # the container is restarted.
            self.heartbeat(int(time.time() + 5 * 60))

            # Start of expiry transaction
            if self.apm_client:
                self.apm_client.begin_transaction("Archive older documents")

            archive_query = f"archive_ts:[* TO {now}]"
            sort = ["archive_ts asc", "id asc"]

            number_to_archive = collection.search(
                archive_query,
                rows=0,
                as_obj=False,
                use_archive=False,
                sort=sort,
                track_total_hits=ARCHIVE_SIZE)['total']

            if number_to_archive == ARCHIVE_SIZE:
                reached_max = True

            if self.apm_client:
                elasticapm.label(query=archive_query)
                elasticapm.label(number_to_archive=number_to_archive)

            self.log.info(f"Processing collection: {collection.name}")
            if number_to_archive != 0:
                # Proceed with archiving
                if collection.archive(archive_query,
                                      max_docs=number_to_archive,
                                      sort=sort):
                    self.counter_archive.increment(
                        f'{collection.name}', increment_by=number_to_archive)
                    self.log.info(
                        f"    Archived {number_to_archive} documents...")
                else:
                    self.log.warning(
                        f"    Failed to properly archive {number_to_archive} documents..."
                    )

            else:
                self.log.debug("    Nothing to archive in this collection.")

            # End of expiry transaction
            if self.apm_client:
                self.apm_client.end_transaction(collection.name, 'archived')

        return reached_max

    def try_run(self):
        while self.running:
            expiry_maxed_out = False
            archive_maxed_out = False
            try:
                expiry_maxed_out = self.run_expiry_once()
            except Exception as e:
                self.log.exception(str(e))

            try:
                archive_maxed_out = self.run_archive_once()
            except Exception as e:
                self.log.exception(str(e))

            if not expiry_maxed_out and not archive_maxed_out:
                self.sleep_with_heartbeat(self.config.core.expiry.sleep_time)
Exemplo n.º 17
0
def get_filestore(config=None, connection_attempts=None):
    from assemblyline.filestore import FileStore
    if config is None:
        config = get_config()
    return FileStore(*config.filestore.storage, connection_attempts=connection_attempts)