Пример #1
0
    def do_verify(self):
        """ Perform verify using a random generated challenge
        """
        challenge = TPM_Utilities.random_password(20)
        numtries = 0
        while True:
            try:
                cloudagent_base_url = (f'{self.agent_ip}:{self.agent_port}')
                do_verify = RequestsClient(cloudagent_base_url,
                                           tls_enabled=False)
                response = do_verify.get(
                    (f'/keys/verify?challenge={challenge}'),
                    cert=self.cert,
                    verify=False)
            except Exception as e:
                if response.status_code in (503, 504):
                    numtries += 1
                    maxr = config.getint('tenant', 'max_retries')
                    if numtries >= maxr:
                        logger.error(
                            f"Cannot establish connection to agent on {self.agent_ip} with port {self.agent_port}"
                        )
                        sys.exit()
                    retry = config.getfloat('tenant', 'retry_interval')
                    logger.info(
                        f"Verifier connection to agent at {self.agent_ip} refused {numtries}/{maxr} times, trying again in {retry} seconds..."
                    )
                    time.sleep(retry)
                    continue

                raise e
            response_body = response.json()
            if response.status_code == 200:
                if "results" not in response_body or 'hmac' not in response_body[
                        'results']:
                    logger.critical(
                        f"Error: unexpected http response body from Cloud Agent: {response.status_code}"
                    )
                    break
                mac = response_body['results']['hmac']

                ex_mac = crypto.do_hmac(self.K, challenge)

                if mac == ex_mac:
                    logger.info("Key derivation successful")
                else:
                    logger.error("Key derivation failed")
            else:
                keylime_logging.log_http_response(logger, logging.ERROR,
                                                  response_body)
                retry = config.getfloat('tenant', 'retry_interval')
                logger.warning(
                    f"Key derivation not yet complete...trying again in {retry} seconds...Ctrl-C to stop"
                )
                time.sleep(retry)
                continue
            break
Пример #2
0
    def worker_webhook(tosend, url):
        interval = config.getfloat('cloud_verifier', 'retry_interval')
        exponential_backoff = config.getboolean('cloud_verifier',
                                                'exponential_backoff')
        session = requests.session()
        logger.info("Sending revocation event via webhook...")
        for i in range(config.getint('cloud_verifier', 'max_retries')):
            next_retry = retry.retry_time(exponential_backoff, interval, i,
                                          logger)
            try:
                response = session.post(url, json=tosend, timeout=5)
                if response.status_code in [200, 202]:
                    break

                logger.debug(
                    "Unable to publish revocation message %d times via webhook, "
                    "trying again in %d seconds. "
                    "Server returned status code: %s", i, next_retry,
                    response.status_code)
            except requests.exceptions.RequestException as e:
                logger.debug(
                    "Unable to publish revocation message %d times via webhook, "
                    "trying again in %d seconds: %s", i, next_retry, e)

            time.sleep(next_retry)
 def worker(tosend):
     context = zmq.Context()
     mysock = context.socket(zmq.PUB)
     mysock.connect("ipc:///tmp/keylime.verifier.ipc")
     # wait 100ms for connect to happen
     time.sleep(0.2)
     # now send it out via 0mq
     logger.info("Sending revocation event to listening nodes...")
     for i in range(config.getint('cloud_verifier', 'max_retries')):
         try:
             mysock.send_string(json.dumps(tosend))
             break
         except Exception as e:
             logger.debug("Unable to publish revocation message %d times, trying again in %f seconds: %s" % (
                 i, config.getfloat('cloud_verifier', 'retry_interval'), e))
             time.sleep(config.getfloat('cloud_verifier', 'retry_interval'))
     mysock.close()
Пример #4
0
    def __run(self, cmd, expectedcode=tpm_abstract.AbstractTPM.EXIT_SUCESS, raiseOnError=True, lock=True, outputpaths=None):
        env = _get_cmd_env()

        # Backwards compat with string input (force all to be dict)
        if isinstance(outputpaths, str):
            outputpaths = [outputpaths]

        # Handle stubbing the TPM out
        fprt = tpm1.__fingerprint(cmd)
        if config.STUB_TPM and config.TPM_CANNED_VALUES is not None:
            stub = _stub_command(fprt, lock, outputpaths)
            if stub:
                return stub

        numtries = 0
        while True:
            if lock:
                with self.tpmutilLock:
                    retDict = cmd_exec.run(
                        cmd=cmd, expectedcode=expectedcode,
                        raiseOnError=False, outputpaths=outputpaths, env=env)
            else:
                retDict = cmd_exec.run(
                    cmd=cmd, expectedcode=expectedcode, raiseOnError=False,
                    outputpaths=outputpaths, env=env)

            code = retDict['code']
            retout = retDict['retout']

            # keep trying to communicate with TPM if there was an I/O error
            if code == tpm_abstract.AbstractTPM.TPM_IO_ERR:
                numtries += 1
                maxr = config.getint('cloud_agent', 'max_retries')
                if numtries >= maxr:
                    logger.error("TPM appears to be in use by another application.  Keylime is incompatible with other TPM TSS applications like trousers/tpm-tools. Please uninstall or disable.")
                    break
                retry = config.getfloat('cloud_agent', 'retry_interval')
                logger.info("Failed to call TPM %d/%d times, trying again in %f seconds..." % (numtries, maxr, retry))
                time.sleep(retry)
                continue

            break

        # Don't bother continuing if TPM call failed and we're raising on error
        if code != expectedcode and raiseOnError:
            raise Exception("Command: %s returned %d, expected %d, output %s" % (cmd, code, expectedcode, retout))

        # Metric output
        if lock or self.tpmutilLock.locked():
            _output_metrics(fprt, cmd, retDict, outputpaths)

        return retDict
Пример #5
0
    def worker_webhook(tosend, url):
        retry_interval = config.getfloat('cloud_verifier', 'retry_interval')
        session = requests.session()
        logger.info("Sending revocation event via webhook...")
        for i in range(config.getint('cloud_verifier', 'max_retries')):
            try:
                response = session.post(url, json=tosend)
                if response.status_code in [200, 202]:
                    break

                logger.debug(
                    f"Unable to publish revocation message {i} times via webhook, "
                    f"trying again in {retry_interval} seconds. "
                    f"Server returned status code: {response.status_code}")
            except requests.exceptions.RequestException as e:
                logger.debug(
                    f"Unable to publish revocation message {i} times via webhook, "
                    f"trying again in {retry_interval} seconds: {e} ")

            time.sleep(retry_interval)
Пример #6
0
 def worker(tosend):
     context = zmq.Context()
     mysock = context.socket(zmq.PUB)
     mysock.connect(f"ipc://{_SOCKET_PATH}")
     # wait 100ms for connect to happen
     time.sleep(0.2)
     # now send it out via 0mq
     logger.info("Sending revocation event to listening nodes...")
     for i in range(config.getint('cloud_verifier', 'max_retries')):
         try:
             mysock.send_string(json.dumps(tosend))
             break
         except Exception as e:
             interval = config.getfloat('cloud_verifier', 'retry_interval')
             exponential_backoff = config.getboolean(
                 'cloud_verifier', 'exponential_backoff')
             next_retry = retry.retry_time(exponential_backoff, interval, i,
                                           logger)
             logger.debug(
                 "Unable to publish revocation message %d times, trying again in %f seconds: %s",
                 i, next_retry, e)
             time.sleep(next_retry)
     mysock.close()
Пример #7
0
async def process_agent(agent, new_operational_state):
    # Convert to dict if the agent arg is a db object
    if not isinstance(agent, dict):
        agent = _from_db_obj(agent)

    session = get_session()
    try:
        main_agent_operational_state = agent['operational_state']
        try:
            stored_agent = session.query(VerfierMain).filter_by(
                agent_id=str(agent['agent_id'])).first()
        except SQLAlchemyError as e:
            logger.error('SQLAlchemy Error: %s', e)

        # if the user did terminated this agent
        if stored_agent.operational_state == states.TERMINATED:
            logger.warning("Agent %s terminated by user.", agent['agent_id'])
            if agent['pending_event'] is not None:
                tornado.ioloop.IOLoop.current().remove_timeout(
                    agent['pending_event'])
            session.query(VerfierMain).filter_by(
                agent_id=agent['agent_id']).delete()
            session.commit()
            return

        # if the user tells us to stop polling because the tenant quote check failed
        if stored_agent.operational_state == states.TENANT_FAILED:
            logger.warning("Agent %s has failed tenant quote. Stopping polling",  agent['agent_id'])
            if agent['pending_event'] is not None:
                tornado.ioloop.IOLoop.current().remove_timeout(
                    agent['pending_event'])
            return

        # If failed during processing, log regardless and drop it on the floor
        # The administration application (tenant) can GET the status and act accordingly (delete/retry/etc).
        if new_operational_state in (states.FAILED, states.INVALID_QUOTE):
            agent['operational_state'] = new_operational_state

            # issue notification for invalid quotes
            if new_operational_state == states.INVALID_QUOTE:
                cloud_verifier_common.notify_error(agent)

            if agent['pending_event'] is not None:
                tornado.ioloop.IOLoop.current().remove_timeout(
                    agent['pending_event'])
            for key in exclude_db:
                if key in agent:
                    del agent[key]
            session.query(VerfierMain).filter_by(
                agent_id=agent['agent_id']).update(agent)
            session.commit()

            logger.warning("Agent %s failed, stopping polling", agent['agent_id'])
            return

        # propagate all state, but remove none DB keys first (using exclude_db)
        try:
            agent_db = dict(agent)
            for key in exclude_db:
                if key in agent_db:
                    del agent_db[key]

            session.query(VerfierMain).filter_by(
                agent_id=agent_db['agent_id']).update(agent_db)
            session.commit()
        except SQLAlchemyError as e:
            logger.error('SQLAlchemy Error: %s', e)

        # if new, get a quote
        if (main_agent_operational_state == states.START and
                new_operational_state == states.GET_QUOTE):
            agent['num_retries'] = 0
            agent['operational_state'] = states.GET_QUOTE
            await invoke_get_quote(agent, True)
            return

        if (main_agent_operational_state == states.GET_QUOTE and
                new_operational_state == states.PROVIDE_V):
            agent['num_retries'] = 0
            agent['operational_state'] = states.PROVIDE_V
            await invoke_provide_v(agent)
            return

        if (main_agent_operational_state in (states.PROVIDE_V, states.GET_QUOTE) and
                new_operational_state == states.GET_QUOTE):
            agent['num_retries'] = 0
            interval = config.getfloat('cloud_verifier', 'quote_interval')
            agent['operational_state'] = states.GET_QUOTE
            if interval == 0:
                await invoke_get_quote(agent, False)
            else:
                logger.debug("Setting up callback to check again in %f seconds", interval)
                # set up a call back to check again
                cb = functools.partial(invoke_get_quote, agent, False)
                pending = tornado.ioloop.IOLoop.current().call_later(interval, cb)
                agent['pending_event'] = pending
            return

        maxr = config.getint('cloud_verifier', 'max_retries')
        retry = config.getfloat('cloud_verifier', 'retry_interval')
        if (main_agent_operational_state == states.GET_QUOTE and
                new_operational_state == states.GET_QUOTE_RETRY):
            if agent['num_retries'] >= maxr:
                logger.warning("Agent %s was not reachable for quote in %d tries, setting state to FAILED", agent['agent_id'], maxr)
                if agent['first_verified']:  # only notify on previously good agents
                    cloud_verifier_common.notify_error(
                        agent, msgtype='comm_error')
                else:
                    logger.debug("Communication error for new agent. No notification will be sent")
                await process_agent(agent, states.FAILED)
            else:
                agent['operational_state'] = states.GET_QUOTE
                cb = functools.partial(invoke_get_quote, agent, True)
                agent['num_retries'] += 1
                logger.info("Connection to %s refused after %d/%d tries, trying again in %f seconds", agent['ip'], agent['num_retries'], maxr, retry)
                tornado.ioloop.IOLoop.current().call_later(retry, cb)
            return

        if (main_agent_operational_state == states.PROVIDE_V and
                new_operational_state == states.PROVIDE_V_RETRY):
            if agent['num_retries'] >= maxr:
                logger.warning("Agent %s was not reachable to provide v in %d tries, setting state to FAILED", agent['agent_id'], maxr)
                cloud_verifier_common.notify_error(
                    agent, msgtype='comm_error')
                await process_agent(agent, states.FAILED)
            else:
                agent['operational_state'] = states.PROVIDE_V
                cb = functools.partial(invoke_provide_v, agent)
                agent['num_retries'] += 1
                logger.info("Connection to %s refused after %d/%d tries, trying again in %f seconds", agent['ip'], agent['num_retries'], maxr, retry)
                tornado.ioloop.IOLoop.current().call_later(retry, cb)
            return
        raise Exception("nothing should ever fall out of this!")

    except Exception as e:
        logger.error("Polling thread error: %s", e)
        logger.exception(e)
Пример #8
0
    def do_quote(self):
        """ Perform TPM quote by GET towards Agent

        Raises:
            UserError: Connection handler
        """
        self.nonce = TPM_Utilities.random_password(20)

        numtries = 0
        response = None
        # Note: We need a specific retry handler (perhaps in common), no point having localised unless we have too.
        while True:
            try:
                params = '/quotes/identity?nonce=%s' % (self.nonce)
                cloudagent_base_url = f'{self.agent_ip}:{self.agent_port}'
                do_quote = RequestsClient(cloudagent_base_url, tls_enabled=False)
                response = do_quote.get(
                    params,
                    cert=self.cert
                )
                response_body = response.json()

            except Exception as e:
                if response.status_code in (503, 504):
                    numtries += 1
                    maxr = config.getint('tenant', 'max_retries')
                    if numtries >= maxr:
                        logger.error("Tenant cannot establish connection to agent on %s with port %s", self.agent_ip, self.agent_port)
                        sys.exit()
                    retry = config.getfloat('tenant', 'retry_interval')
                    logger.info("Tenant connection to agent at %s refused %s/%s times, trying again in %s seconds...",
                        self.agent_ip, numtries, maxr, retry)
                    time.sleep(retry)
                    continue

                raise e
            break

        try:
            if response is not None and response.status_code != 200:
                raise UserError(
                    "Status command response: %d Unexpected response from Cloud Agent." % response.status)

            if "results" not in response_body:
                raise UserError(
                    "Error: unexpected http response body from Cloud Agent: %s" % str(response.status))

            quote = response_body["results"]["quote"]
            logger.debug("Agent_quote received quote: %s", quote)

            public_key = response_body["results"]["pubkey"]
            logger.debug("Agent_quote received public key: %s", public_key)

            # Ensure hash_alg is in accept_tpm_hash_algs list
            hash_alg = response_body["results"]["hash_alg"]
            logger.debug("Agent_quote received hash algorithm: %s", hash_alg)
            if not algorithms.is_accepted(hash_alg, config.get('tenant', 'accept_tpm_hash_algs').split(',')):
                raise UserError(
                    "TPM Quote is using an unaccepted hash algorithm: %s" % hash_alg)

            # Ensure enc_alg is in accept_tpm_encryption_algs list
            enc_alg = response_body["results"]["enc_alg"]
            logger.debug("Agent_quote received encryption algorithm: %s", enc_alg)
            if not algorithms.is_accepted(enc_alg, config.get('tenant', 'accept_tpm_encryption_algs').split(',')):
                raise UserError(
                    "TPM Quote is using an unaccepted encryption algorithm: %s" % enc_alg)

            # Ensure sign_alg is in accept_tpm_encryption_algs list
            sign_alg = response_body["results"]["sign_alg"]
            logger.debug("Agent_quote received signing algorithm: %s", sign_alg)
            if not algorithms.is_accepted(sign_alg, config.get('tenant', 'accept_tpm_signing_algs').split(',')):
                raise UserError(
                    "TPM Quote is using an unaccepted signing algorithm: %s" % sign_alg)

            if not self.validate_tpm_quote(public_key, quote, hash_alg):
                raise UserError(
                    "TPM Quote from cloud agent is invalid for nonce: %s" % self.nonce)

            logger.info("Quote from %s validated", self.agent_ip)

            # encrypt U with the public key
            encrypted_U = crypto.rsa_encrypt(
                crypto.rsa_import_pubkey(public_key), self.U)

            b64_encrypted_u = base64.b64encode(encrypted_U)
            logger.debug("b64_encrypted_u: %s", b64_encrypted_u.decode('utf-8'))
            data = {
                'encrypted_key': b64_encrypted_u,
                'auth_tag': self.auth_tag
            }

            if self.payload is not None:
                data['payload'] = self.payload

            u_json_message = json.dumps(data)

            # post encrypted U back to CloudAgent
            params = '/keys/ukey'
            cloudagent_base_url = (
                f'{self.agent_ip}:{self.agent_port}'
            )

            post_ukey = RequestsClient(cloudagent_base_url, tls_enabled=False)
            response = post_ukey.post(
                params,
                data=u_json_message
            )

            if response.status_code == 503:
                logger.error("Cannot connect to Agent at %s with Port %s. Connection refused.", self.agent_ip, self.agent_port)
                sys.exit()
            elif response.status_code == 504:
                logger.error("Verifier at %s with Port %s timed out.", self.verifier_ip, self.verifier_port)
                sys.exit()

            if response.status_code != 200:
                keylime_logging.log_http_response(
                    logger, logging.ERROR, response_body)
                raise UserError(
                    "Posting of Encrypted U to the Cloud Agent failed with response code %d" % response.status)
        except Exception as e:
            self.do_cvstop()
            raise e