示例#1
0
    def find_primary(self, timeout=3):
        """
        Find the identity of the primary in the network and return its identity
        and the current view.
        """
        primary_id = None
        view = None

        end_time = time.time() + timeout
        while time.time() < end_time:
            for node in self.get_joined_nodes():
                with node.client() as c:
                    try:
                        res = c.get("/node/primary_info")
                        if res.status_code == 200:
                            primary_id = res.body["primary_id"]
                            view = res.body["current_view"]
                            break
                        else:
                            assert "Primary unknown" in res.body, res
                            LOG.warning("Primary unknown. Retrying...")
                    except CCFConnectionException:
                        LOG.warning(
                            f"Could not successful connect to node {node.node_id}. Retrying..."
                        )
            if primary_id is not None:
                break
            time.sleep(0.1)

        if primary_id is None:
            raise PrimaryNotFound
        return (self._get_node_by_id(primary_id), view)
示例#2
0
文件: recovery.py 项目: microsoft/CCF
def find_recovery_tx_seqno(node):
    min_recovery_seqno = 0
    with node.client() as c:
        r = c.get("/node/state").body.json()
        if "last_recovered_seqno" not in r:
            return None
        min_recovery_seqno = r["last_recovered_seqno"]

    ledger = ccf.ledger.Ledger(node.remote.ledger_paths(), committed_only=False)
    for chunk in ledger:
        _, chunk_end_seqno = chunk.get_seqnos()
        if chunk_end_seqno < min_recovery_seqno:
            continue
        for tx in chunk:
            tables = tx.get_public_domain().get_tables()
            seqno = tx.get_public_domain().get_seqno()
            if ccf.ledger.SERVICE_INFO_TABLE_NAME in tables:
                service_status = json.loads(
                    tables[ccf.ledger.SERVICE_INFO_TABLE_NAME][
                        ccf.ledger.WELL_KNOWN_SINGLETON_TABLE_KEY
                    ]
                )["status"]
                if service_status == "Open":
                    return seqno
    return None
示例#3
0
    def wait_for_all_nodes_to_commit(self, primary=None, tx_id=None, timeout=10):
        """
        Wait for all nodes to have joined the network and committed all transactions
        executed on the primary.
        """
        if not (primary or tx_id):
            raise ValueError("Either a valid TxID or primary node should be specified")

        end_time = time.time() + timeout

        # If no TxID is specified, retrieve latest readable one
        if tx_id == None:
            while time.time() < end_time:
                with primary.client() as c:
                    resp = c.get(
                        "/node/network/nodes/self"
                    )  # Well-known read-only endpoint
                    tx_id = TxID(resp.view, resp.seqno)
                    if tx_id.valid():
                        break
                time.sleep(0.1)
            assert (
                tx_id.valid()
            ), f"Primary {primary.node_id} has not made any progress yet ({tx_id})"

        caught_up_nodes = []
        logs = {}
        while time.time() < end_time:
            caught_up_nodes = []
            for node in self.get_joined_nodes():
                with node.client() as c:
                    logs[node.node_id] = []
                    resp = c.get(
                        f"/node/local_tx?transaction_id={tx_id}",
                        log_capture=logs[node.node_id],
                    )
                    if resp.status_code != 200:
                        # Node may not have joined the network yet, try again
                        break
                    status = TxStatus(resp.body.json()["status"])
                    if status == TxStatus.Committed:
                        caught_up_nodes.append(node)
                    elif status == TxStatus.Invalid:
                        flush_info(logs[node.node_id], None, 0)
                        raise RuntimeError(
                            f"Node {node.node_id} reports transaction ID {tx_id} is invalid and will never be committed"
                        )
                    else:
                        pass

            if len(caught_up_nodes) == len(self.get_joined_nodes()):
                break
            time.sleep(0.1)

        for lines in logs.values():
            flush_info(lines, None, 0)
        assert len(caught_up_nodes) == len(
            self.get_joined_nodes()
        ), f"Only {len(caught_up_nodes)} (out of {len(self.get_joined_nodes())}) nodes have joined the network"
示例#4
0
 def _wait_for_app_open(self, node, timeout=3):
     end_time = time.time() + timeout
     while time.time() < end_time:
         # As an operator, query a well-known /app endpoint to find out
         # if the app has been opened to users
         with node.client() as c:
             r = c.get("/app/commit")
             if not (r.status_code == http.HTTPStatus.NOT_FOUND.value):
                 return
             time.sleep(0.1)
     raise TimeoutError(f"Application frontend was not open after {timeout}s")
示例#5
0
 def wait_for_commit_proof(self, node, seqno, timeout=3):
     # Wait that the target seqno has a commit proof on a specific node.
     # This is achieved by first waiting for a commit over seqno, issuing
     # a write request and then waiting for a commit over that
     end_time = time.time() + timeout
     while time.time() < end_time:
         with node.client() as c:
             r = c.get("/node/commit")
             current_tx = TxID.from_str(r.body.json()["transaction_id"])
             if current_tx.seqno >= seqno:
                 with node.client(self.consortium.get_any_active_member().
                                  local_id) as nc:
                     # Using update_state_digest here as a convenient write tx
                     # that is app agnostic
                     r = nc.post("/gov/ack/update_state_digest")
                     assert (r.status_code == http.HTTPStatus.OK.value
                             ), f"Error ack/update_state_digest: {r}"
                     c.wait_for_commit(r)
                     return True
         time.sleep(0.1)
     raise TimeoutError(
         f"seqno {seqno} did not have commit proof after {timeout}s")
示例#6
0
文件: network.py 项目: eddyashton/CCF
 def wait_for_node_commit_sync(self, timeout=3):
     """
     Wait for commit level to get in sync on all nodes. This is expected to
     happen once CFTR has been established, in the absence of new transactions.
     """
     end_time = time.time() + timeout
     while time.time() < end_time:
         commits = []
         for node in self.get_joined_nodes():
             with node.client() as c:
                 r = c.get("/node/commit")
                 assert r.status_code == http.HTTPStatus.OK.value
                 body = r.body.json()
                 commits.append(body["transaction_id"])
         if [commits[0]] * len(commits) == commits:
             break
         time.sleep(0.1)
     expected = [commits[0]] * len(commits)
     if expected != commits:
         for node in self.get_joined_nodes():
             with node.client() as c:
                 r = c.get("/node/consensus")
                 pprint.pprint(r.body.json())
     assert expected == commits, f"Multiple commit values: {commits}"
示例#7
0
 def wait_for_state(self, node, state, timeout=3):
     end_time = time.time() + timeout
     while time.time() < end_time:
         try:
             with node.client(connection_timeout=timeout) as c:
                 r = c.get("/node/state")
                 if r.body.json()["state"] == state:
                     break
         except ConnectionRefusedError:
             pass
         time.sleep(0.1)
     else:
         raise TimeoutError(
             f"Timed out waiting for state {state} on node {node.node_id}")
     if state == "partOfNetwork":
         self.status = ServiceStatus.OPEN
示例#8
0
    def wait_for_all_nodes_to_catch_up(self, primary, timeout=10):
        """
        Wait for all nodes to have joined the network and globally replicated
        all transactions globally executed on the primary (including transactions
        which added the nodes).
        """
        end_time = time.time() + timeout
        while time.time() < end_time:
            with primary.client() as c:
                resp = c.get("/node/commit")
                body = resp.body.json()
                seqno = body["seqno"]
                view = body["view"]
                if seqno != 0:
                    break
            time.sleep(0.1)
        assert (
            seqno != 0
        ), f"Primary {primary.node_id} has not made any progress yet (view: {view}, seqno: {seqno})"

        caught_up_nodes = []
        while time.time() < end_time:
            caught_up_nodes = []
            for node in self.get_joined_nodes():
                with node.client() as c:
                    c.get("/node/commit")
                    resp = c.get(f"/node/local_tx?view={view}&seqno={seqno}")
                    if resp.status_code != 200:
                        # Node may not have joined the network yet, try again
                        break
                    status = TxStatus(resp.body.json()["status"])
                    if status == TxStatus.Committed:
                        caught_up_nodes.append(node)
                    elif status == TxStatus.Invalid:
                        raise RuntimeError(
                            f"Node {node.node_id} reports transaction ID {view}.{seqno} is invalid and will never be committed"
                        )
                    else:
                        pass

            if len(caught_up_nodes) == len(self.get_joined_nodes()):
                break
            time.sleep(0.1)
        assert len(caught_up_nodes) == len(
            self.get_joined_nodes()
        ), f"Only {len(caught_up_nodes)} (out of {len(self.get_joined_nodes())}) nodes have joined the network"
示例#9
0
 def wait_for_node_commit_sync(self, timeout=3):
     """
     Wait for commit level to get in sync on all nodes. This is expected to
     happen once CFTR has been established, in the absence of new transactions.
     """
     end_time = time.time() + timeout
     while time.time() < end_time:
         commits = []
         for node in self.get_joined_nodes():
             with node.client() as c:
                 r = c.get("/node/commit")
                 commits.append(f"{r.view}.{r.seqno}")
         if [commits[0]] * len(commits) == commits:
             break
         time.sleep(0.1)
     expected = [commits[0]] * len(commits)
     assert expected == commits, f"{commits} != {expected}"
示例#10
0
文件: network.py 项目: eddyashton/CCF
    def find_primary(self, nodes=None, timeout=3, log_capture=None):
        """
        Find the identity of the primary in the network and return its identity
        and the current view.
        """
        primary_id = None
        view = None

        logs = []

        asked_nodes = nodes or self.get_joined_nodes()
        end_time = time.time() + timeout
        while time.time() < end_time:
            for node in asked_nodes:
                with node.client() as c:
                    try:
                        logs = []
                        res = c.get("/node/network",
                                    timeout=1,
                                    log_capture=logs)
                        assert res.status_code == http.HTTPStatus.OK.value, res

                        body = res.body.json()
                        view = body["current_view"]
                        primary_id = body["primary_id"]
                        if primary_id is not None:
                            break

                    except Exception:
                        LOG.warning(
                            f"Could not successfully connect to node {node.local_node_id}. Retrying..."
                        )

            if primary_id is not None:
                break
            time.sleep(0.1)

        if primary_id is None:
            flush_info(logs, log_capture, 0)
            raise PrimaryNotFound

        flush_info(logs, log_capture, 0)

        return (self._get_node_by_service_id(primary_id), view)
示例#11
0
文件: network.py 项目: eddyashton/CCF
    def wait_for_primary_unanimity(
            self,
            timeout_multiplier=DEFAULT_TIMEOUT_MULTIPLIER,
            min_view=None):
        timeout = self.observed_election_duration * timeout_multiplier
        LOG.info(
            f"Waiting up to {timeout}s for all nodes to agree on the primary")
        start_time = time.time()
        end_time = start_time + timeout

        primaries = []
        while time.time() < end_time:
            primaries = []
            for node in self.get_joined_nodes():
                logs = []
                try:
                    primary, view = self.find_primary(nodes=[node],
                                                      log_capture=logs)
                    if min_view is None or view > min_view:
                        primaries.append(primary)
                except PrimaryNotFound:
                    pass
            # Stop checking once all primaries are the same
            if (len(self.get_joined_nodes()) == len(primaries)
                    and len(set(primaries)) <= 1):
                break
            time.sleep(0.1)
        all_good = (len(self.get_joined_nodes()) == len(primaries)
                    and len(set(primaries)) <= 1)
        if not all_good:
            flush_info(logs)
            for node in self.get_joined_nodes():
                with node.client() as c:
                    r = c.get("/node/consensus")
                    pprint.pprint(r.body.json())
        assert all_good, f"Multiple primaries: {primaries}"
        delay = time.time() - start_time
        LOG.info(
            f"Primary unanimity after {delay}s: {primaries[0].local_node_id} ({primaries[0].node_id})"
        )
        return primaries[0]
示例#12
0
    def find_primary(self, timeout=3, log_capture=None):
        """
        Find the identity of the primary in the network and return its identity
        and the current view.
        """
        primary_id = None
        view = None

        logs = []

        end_time = time.time() + timeout
        while time.time() < end_time:
            for node in self.get_joined_nodes():
                with node.client() as c:
                    try:
                        logs = []
                        res = c.get("/node/network", log_capture=logs)
                        assert res.status_code == 200, res
                        body = res.body.json()
                        primary_id = body["primary_id"]
                        view = body["current_view"]
                        view_change_in_progress = body[
                            "view_change_in_progress"]
                        if primary_id is not None:
                            break

                    except CCFConnectionException:
                        LOG.warning(
                            f"Could not successfully connect to node {node.node_id}. Retrying..."
                        )
            if primary_id is not None:
                break
            time.sleep(0.1)

        if primary_id is None or view_change_in_progress:
            flush_info(logs, log_capture, 0)
            raise PrimaryNotFound

        flush_info(logs, log_capture, 0)
        return (self._get_node_by_id(primary_id), view)