Пример #1
0
    async def _run_opa_until_terminated(self) -> int:
        """
        This function runs opa server as a subprocess.
        it returns only when the process terminates.
        """
        logger.info("Running OPA inline: {command}", command=self.command)
        self._process = await asyncio.create_subprocess_shell(
            self.command,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
            start_new_session=True,
        )

        # waits until the process is up, then runs a callback
        asyncio.create_task(
            wait_until_process_is_up(self._process.pid,
                                     callback=self._run_start_callbacks))

        await asyncio.wait([
            pipe_opa_logs(self._process.stdout),
            pipe_opa_logs(self._process.stderr)
        ])

        return_code = await self._process.wait()
        logger.info("OPA exited with return code: {return_code}",
                    return_code=return_code)
        if return_code > 0:  # exception in running opa
            raise Exception(f"OPA exited with return code: {return_code}")
        return return_code
Пример #2
0
    async def stop(self):
        self._stopping = True
        logger.info("Stopping data updater")

        # disconnect from Pub/Sub
        try:
            await asyncio.wait_for(self._client.disconnect(), timeout=3)
        except asyncio.TimeoutError:
            logger.debug(
                "Timeout waiting for DataUpdater pubsub client to disconnect")

        # stop subscriber task
        if self._subscriber_task is not None:
            logger.debug("Cancelling DataUpdater subscriber task")
            self._subscriber_task.cancel()
            try:
                await self._subscriber_task
            except asyncio.CancelledError as exc:
                logger.debug(
                    "DataUpdater subscriber task was force-cancelled: {e}",
                    exc=exc)
            self._subscriber_task = None
            logger.debug("DataUpdater subscriber task was cancelled")

        # stop the data fetcher
        logger.debug("Stopping data fetcher")
        await self._data_fetcher.stop()
Пример #3
0
async def pipe_opa_logs(stream):
    """
    gets a stream of logs from the opa process, and logs it into the main opal log.
    """
    if opal_client_config.INLINE_OPA_LOG_FORMAT == OpaLogFormat.NONE:
        return

    while True:
        line = await stream.readline()
        if not line:
            break
        try:
            log_line = json.loads(line)
            level = logging.getLevelName(
                logging_level_from_string(log_line.pop("level", "info")))
            msg = log_line.pop("msg", None)

            logged = False
            if opal_client_config.INLINE_OPA_LOG_FORMAT == OpaLogFormat.MINIMAL:
                logged = log_event_name(level, msg)
            elif opal_client_config.INLINE_OPA_LOG_FORMAT == OpaLogFormat.HTTP:
                logged = log_formatted_http_details(level, msg, log_line)

            # always fall back to log the entire line
            if not logged or opal_client_config.INLINE_OPA_LOG_FORMAT == OpaLogFormat.FULL:
                log_entire_dict(level, msg, log_line)
        except json.JSONDecodeError:
            logger.info(line)
Пример #4
0
 def trigger_data_update(self, update: DataUpdate):
     # make sure the id has a unique id for tracking
     if update.id is None:
         update.id = uuid.uuid4().hex
     logger.info("Triggering data update with id: {id}", update=update, id=update.id)
     asyncio.create_task(self.update_policy_data(
         update, policy_store=self._policy_store, data_fetcher=self._data_fetcher))
Пример #5
0
 async def start(self):
     """
     launches the policy updater
     """
     logger.info("Launching policy updater")
     if self._subscriber_task is None:
         self._subscriber_task = asyncio.create_task(self._subscriber())
Пример #6
0
    async def stop(self):
        """
        stops the policy updater
        """
        self._stopping = True
        logger.info("Stopping policy updater")

        # disconnect from Pub/Sub
        if self._client is not None:
            try:
                await asyncio.wait_for(self._client.disconnect(), timeout=3)
            except asyncio.TimeoutError:
                logger.debug(
                    "Timeout waiting for PolicyUpdater pubsub client to disconnect"
                )

        # stop subscriber task
        if self._subscriber_task is not None:
            logger.debug("Cancelling PolicyUpdater subscriber task")
            self._subscriber_task.cancel()
            try:
                await self._subscriber_task
            except asyncio.CancelledError as exc:
                logger.debug(
                    "PolicyUpdater subscriber task was force-cancelled: {e}",
                    exc=exc)
            self._subscriber_task = None
            logger.debug("PolicyUpdater subscriber task was cancelled")
Пример #7
0
    async def report_update_results(self, update: DataUpdate,
                                    reports: List[DataEntryReport],
                                    data_fetcher: DataFetcher):
        try:
            whole_report = DataUpdateReport(update_id=update.id,
                                            reports=reports)

            callbacks = update.callback.callbacks or opal_client_config.DEFAULT_UPDATE_CALLBACKS.callbacks
            urls = []
            for callback in callbacks:
                if isinstance(callback, str):
                    url = callback
                    callback_config = opal_client_config.DEFAULT_UPDATE_CALLBACK_CONFIG.copy(
                    )
                else:
                    url, callback_config = callback
                callback_config.data = whole_report.json()
                urls.append((url, callback_config))

            logger.info("Reporting the update to requested callbacks",
                        urls=repr(urls))
            report_results = await data_fetcher.handle_urls(urls)
            # log reports which we failed to send
            for (url, config), result in zip(urls, report_results):
                if isinstance(result, Exception):
                    logger.error(
                        "Failed to send report to {url} with config {config}",
                        url=url,
                        config=config,
                        exc_info=result)
        except:
            logger.exception("Failed to excute report_update_results")
Пример #8
0
 async def get_policy_data_config(self,
                                  url: str = None) -> DataSourceConfig:
     """
     Get the configuration for
     Args:
         url: the URL to query for the config, Defaults to self._data_sources_config_url
     Returns:
         DataSourceConfig: the data sources config
     """
     if url is None:
         url = self._data_sources_config_url
     logger.info("Getting data-sources configuration from '{source}'",
                 source=url)
     try:
         async with ClientSession(headers=self._extra_headers) as session:
             response = await session.get(url)
             if response.status == 200:
                 return DataSourceConfig.parse_obj(await response.json())
             else:
                 error_details = await response.json()
                 raise ClientError(
                     f"Fetch data sources failed with status code {response.status}, error: {error_details}"
                 )
     except:
         logger.exception(f"Failed to load data sources config")
         raise
Пример #9
0
 async def _on_connect(self, client: PubSubClient, channel: RpcChannel):
     """
     Pub/Sub on_connect callback
     On connection to backend, whether its the first connection,
     or reconnecting after downtime, refetch the state opa needs.
     As long as the connection is alive we know we are in sync with the server,
     when the connection is lost we assume we need to start from scratch.
     """
     logger.info("Connected to server")
     await self.update_policy()
Пример #10
0
 async def notify(self, subscription=None, data=None):
     topic = subscription["topic"]
     logger.info("Received notification of event: {topic}",
                 topic=topic,
                 subscription=subscription,
                 data=data)
     if self.TOPIC_SEPARATOR in topic:
         topic_parts = topic.split(self.TOPIC_SEPARATOR)
         if len(topic_parts) > 1:
             topic = topic_parts[1]  # index 0 holds the app id
     await self.client.trigger_topic(topic=topic, data=data)
Пример #11
0
 async def persist(self):
     """
     renders the policy template with the current state, and writes it to OPA
     """
     logger.info("persisting health check policy: ready={ready}, healthy={healthy}", ready=self.ready, healthy=self.healthy)
     policy_code = self._policy_template.format(
         ready=self.ready,
         last_policy_transaction=self.last_policy_transaction,
         last_data_transaction=self.last_data_transaction
     )
     return await self._store.set_policy(policy_id=self._policy_id, policy_code=policy_code)
Пример #12
0
 async def _update_policy_data_callback(self, data: dict = None, topic=""):
     """
     Pub/Sub callback - triggering data updates
     will run when we get notifications on the policy_data topic.
     i.e: when new roles are added, changes to permissions, etc.
     """
     if data is not None:
         reason = data.get("reason", "")
     else:
         reason = "Periodic update"
     logger.info("Updating policy data, reason: {reason}", reason=reason)
     update = DataUpdate.parse_obj(data)
     self.trigger_data_update(update)
Пример #13
0
    async def get_base_policy_data(self, config_url: str = None, data_fetch_reason="Initial load"):
        """
        Load data into the policy store according to the data source's config provided in the config URL

        Args:
            config_url (str, optional): URL to retrive data sources config from. Defaults to None ( self._data_sources_config_url).
            data_fetch_reason (str, optional): Reason to log for the update operation. Defaults to "Initial load".
        """
        logger.info("Performing data configuration, reason: {reason}", reason={data_fetch_reason})
        sources_config = await self.get_policy_data_config(url=config_url)
        # translate config to a data update
        entries = sources_config.entries
        update = DataUpdate(reason=data_fetch_reason, entries=entries)
        self.trigger_data_update(update)
Пример #14
0
    async def stop(self):
        """
        stops the opa runner task (and terminates OPA)
        """
        self._init_events()
        if not self._should_stop.is_set():
            logger.info("Stopping opa runner")
            self._should_stop.set()
            self._terminate_opa()
            await asyncio.sleep(1)  # wait for opa process to go down

        if self._run_task is not None:
            await self._run_task
        self._run_task = None
Пример #15
0
    def process_transaction(self, transaction: StoreTransaction):
        """
        mutates the state into a new state that can be then persisted as hardcoded policy
        """
        logger.info("processing store transaction: {transaction}", transaction=transaction.dict())
        if self._is_policy_transaction(transaction):
            self._last_policy_transaction = transaction

            if transaction.success:
                self._num_successful_policy_transactions += 1

        elif self._is_data_transaction(transaction):
            self._last_data_transaction = transaction

            if transaction.success:
                self._num_successful_data_transactions += 1
Пример #16
0
async def update_policy(policy_store: BasePolicyStoreClient,
                        directories: List[str] = None,
                        force_full_update=False):
    """
    fetches policy (code, e.g: rego) from backend and stores it in the policy store.

    Args:
        policy_store (BasePolicyStoreClient, optional): Policy store client to use to store policy code.
        directories (List[str], optional): specific source directories we want.
        force_full_update (bool, optional): if true, ignore stored hash and fetch full policy bundle.
    """
    directories = directories if directories is not None else default_subscribed_policy_directories(
    )
    if force_full_update:
        logger.info("full update was forced (ignoring stored hash if exists)")
        base_hash = None
    else:
        base_hash = await policy_store.get_policy_version()

    if base_hash is None:
        logger.info("Refetching policy code (full bundle)")
    else:
        logger.info(
            "Refetching policy code (delta bundle), base hash: '{base_hash}'",
            base_hash=base_hash)
    bundle: Optional[PolicyBundle] = await policy_fetcher.fetch_policy_bundle(
        directories, base_hash=base_hash)
    if bundle:
        if bundle.old_hash is None:
            logger.info("got policy bundle, commit hash: '{commit_hash}'",
                        commit_hash=bundle.hash,
                        manifest=bundle.manifest)
        else:
            deleted_files = None if bundle.deleted_files is None else bundle.deleted_files.dict(
            )
            logger.info(
                "got policy bundle (delta): '{diff_against_hash}' -> '{commit_hash}'",
                commit_hash=bundle.hash,
                diff_against_hash=bundle.old_hash,
                manifest=bundle.manifest,
                deleted=deleted_files)
        # store policy bundle in OPA cache
        # We wrap our interaction with the policy store with a transaction, so that
        # if the write-op fails, we will mark the transaction as failed.
        async with policy_store.transaction_context(
                bundle.hash) as store_transaction:
            await store_transaction.set_policies(bundle)
Пример #17
0
    async def _run_start_callbacks(self):
        """
        runs callbacks after OPA process starts
        """
        # TODO: make policy store expose the /health api of OPA
        await asyncio.sleep(1)

        if self._process_was_never_up_before:
            # no need to rehydrate the first time
            self._process_was_never_up_before = False
            logger.info("Running OPA initial start callbacks")
            asyncio.create_task(
                self._run_callbacks(self._on_opa_initial_start_callbacks))
        else:
            logger.info("Running OPA rehydration callbacks")
            asyncio.create_task(
                self._run_callbacks(self._on_opa_restart_callbacks))
Пример #18
0
 async def _subscriber(self):
     """
     Coroutine meant to be spunoff with create_task to listen in
     the background for data events and pass them to the data_fetcher
     """
     logger.info("Subscribing to topics: {topics}",
                 topics=self._data_topics)
     self._client = PubSubClient(
         self._data_topics,
         self._update_policy_data_callback,
         methods_class=TenantAwareRpcEventClientMethods,
         on_connect=[self.on_connect],
         extra_headers=self._extra_headers,
         keep_alive=opal_client_config.KEEP_ALIVE_INTERVAL,
         server_uri=self._server_url)
     async with self._client:
         await self._client.wait_until_done()
Пример #19
0
 async def get_policy_data_config(self, url: str = None) -> DataSourceConfig:
     """
     Get the configuration for
     Args:
         url: the URL to query for the config, Defaults to self._data_sources_config_url
     Returns:
         DataSourceConfig: the data sources config
     """
     if url is None:
         url = self._data_sources_config_url
     logger.info("Getting data-sources configuration from '{source}'", source=url)
     try:
         async with ClientSession(headers=self._extra_headers) as session:
             res = await session.get(url)
         return DataSourceConfig.parse_obj(await res.json())
     except:
         logger.exception(f"Failed to load data sources config")
         raise
Пример #20
0
 async def _subscriber(self):
     """
     Coroutine meant to be spunoff with create_task to listen in
     the background for policy update events and pass them to the
     update_policy() callback (which will fetch the relevant policy
     bundle from the server and update the policy store).
     """
     logger.info("Subscribing to topics: {topics}", topics=self._topics)
     self._client = PubSubClient(
         topics=self._topics,
         callback=self._update_policy_callback,
         on_connect=[self._on_connect],
         on_disconnect=[self._on_disconnect],
         extra_headers=self._extra_headers,
         keep_alive=opal_client_config.KEEP_ALIVE_INTERVAL,
         server_uri=self._server_url)
     async with self._client:
         await self._client.wait_until_done()
Пример #21
0
    async def _update_policy_callback(self, data: dict = None, topic: str = "", **kwargs):
        """
        Pub/Sub callback - triggering policy updates
        will run when we get notifications on the policy topic.
        i.e: when the source repository changes (new commits)
        """
        if topic.startswith(POLICY_PREFIX):
            directories = [remove_prefix(topic, prefix=POLICY_PREFIX)]
            logger.info(
                "Received policy update: affected directories={directories}, new commit hash='{new_hash}'",
                directories=directories,
                topic=topic,
                new_hash=data
            )
        else:
            directories = default_subscribed_policy_directories()
            logger.warning("Received policy updated (invalid topic): {topic}", topic=topic)

        await self.update_policy(directories)
Пример #22
0
async def update_policy_data(update: DataUpdate = None,
                             policy_store: BasePolicyStoreClient = None,
                             data_fetcher=None):
    """
    fetches policy data (policy configuration) from backend and updates it into policy-store (i.e. OPA)
    """
    policy_store = policy_store or DEFAULT_POLICY_STORE_GETTER()
    if data_fetcher is None:
        data_fetcher = DataFetcher()
    # types
    urls: Dict[str, FetcherConfig] = None
    url_to_entry: Dict[str, DataSourceEntry] = None
    # if we have an actual specification for the update
    if update is not None:
        entries: List[DataSourceEntry] = update.entries
        urls = {entry.url: entry.config for entry in entries}
        url_to_entry = {entry.url: entry for entry in entries}
    # get the data for the update
    logger.info("Fetching policy data", urls=urls)
    # Urls may be None - fetch_policy_data has a default for None
    policy_data_by_urls = await data_fetcher.fetch_policy_data(urls)
    # save the data from the update
    for url in policy_data_by_urls:
        # get path to store the URL data (default mode (None) is as "" - i.e. as all the data at root)
        entry = url_to_entry.get(url, None)
        policy_store_path = "" if entry is None else entry.dst_path
        # None is not valid - use "" (protect from missconfig)
        if policy_store_path is None:
            policy_store_path = ""
        # fix opa_path (if not empty must start with "/" to be nested under data)
        if policy_store_path != "" and not policy_store_path.startswith("/"):
            policy_store_path = f"/{policy_store_path}"
        policy_data = policy_data_by_urls[url]
        logger.info(
            "Saving fetched data to policy-store: source url='{url}', destination path='{path}'",
            url=url,
            path=policy_store_path or '/')
        await policy_store.set_policy_data(policy_data, path=policy_store_path)
Пример #23
0
 async def _on_disconnect(self, channel: RpcChannel):
     """
     Pub/Sub on_disconnect callback
     """
     logger.info("Disconnected from server")
Пример #24
0
 def start(self):
     """
     starts the opa runner task, and launches the OPA subprocess
     """
     logger.info("Launching opa runner")
     self._run_task = asyncio.create_task(self._run())
Пример #25
0
    async def update_policy_data(self, update: DataUpdate = None, policy_store: BasePolicyStoreClient = None, data_fetcher=None):
        """
        fetches policy data (policy configuration) from backend and updates it into policy-store (i.e. OPA)
        """
        policy_store = policy_store or DEFAULT_POLICY_STORE_GETTER()
        if data_fetcher is None:
            data_fetcher = DataFetcher()
        # types / defaults
        urls: List[Tuple[str, FetcherConfig]] = None
        entries: List[DataSourceEntry] = []
        # track the result of each url in order to report back
        reports: List[DataEntryReport] = []
        # if we have an actual specification for the update
        if update is not None:
            entries = update.entries
            urls = [(entry.url, entry.config) for entry in entries]

        # get the data for the update
        logger.info("Fetching policy data", urls=urls)
        # Urls may be None - handle_urls has a default for None
        policy_data_with_urls = await data_fetcher.handle_urls(urls)
        # Save the data from the update
        # We wrap our interaction with the policy store with a transaction  
        async with policy_store.transaction_context(update.id) as store_transaction:
            # for intelisense treat store_transaction as a PolicyStoreClient (which it proxies)
            store_transaction: BasePolicyStoreClient
            for (url, fetch_config, result), entry in itertools.zip_longest(policy_data_with_urls, entries):
                if not isinstance(result, Exception):
                    # get path to store the URL data (default mode (None) is as "" - i.e. as all the data at root)
                    policy_store_path = "" if entry is None else entry.dst_path
                    # None is not valid - use "" (protect from missconfig)
                    if policy_store_path is None:
                        policy_store_path = ""
                    # fix opa_path (if not empty must start with "/" to be nested under data)
                    if policy_store_path != "" and not policy_store_path.startswith("/"):
                        policy_store_path = f"/{policy_store_path}"
                    policy_data = result
                    # Create a report on the data-fetching
                    report = DataEntryReport(entry=entry, hash=self.calc_hash(policy_data), fetched=True)
                    logger.info(
                        "Saving fetched data to policy-store: source url='{url}', destination path='{path}'",
                        url=url,
                        path=policy_store_path or '/'
                    )
                    try:
                        await store_transaction.set_policy_data(policy_data, path=policy_store_path)
                        # No exception we we're able to save to the policy-store
                        report.saved = True
                        # save the report for the entry
                        reports.append(report)
                    except:
                        logger.exception("Failed to save data update to policy-store")
                        # we failed to save to policy-store
                        report.saved = False
                        # save the report for the entry
                        reports.append(report)
                        # re-raise so the context manager will be aware of the failure
                        raise
                else:
                    report = DataEntryReport(entry=entry, fetched=False, saved=False)
                    # save the report for the entry
                    reports.append(report)
        # should we send a report to defined callbackers?
        if self._should_send_reports:
            # spin off reporting (no need to wait on it)
            asyncio.create_task(self.report_update_results(update, reports, data_fetcher))
Пример #26
0
 def _terminate_opa(self):
     logger.info("Stopping OPA")
     self._process.terminate()
Пример #27
0
 async def start(self):
     logger.info("Launching data updater")
     if self._subscriber_task is None:
         self._subscriber_task = asyncio.create_task(self._subscriber())
         await self._data_fetcher.start()
Пример #28
0
 async def on_disconnect(self, channel: RpcChannel):
     logger.info("Disconnected from server")
Пример #29
0
 def trigger_data_update(self, update: DataUpdate):
     logger.info("Triggering data fetch and update", update=update)
     asyncio.create_task(
         update_policy_data(update,
                            policy_store=self._policy_store,
                            data_fetcher=self._data_fetcher))