示例#1
0
def init_lg_repro_drop_data(drop: dict):
    """
    Creates and appends per-drop reproducibility information at the logical graph stage.
    :param drop:
    :return: The same drop with appended reproducibility information
    """
    level = rflag_caster(drop["reprodata"]["rmode"])
    if not rmode_supported(level):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(level))
        level = REPRO_DEFAULT
        drop["reprodata"]["rmode"] = str(level.value)
    if level == ReproducibilityFlags.ALL:
        for rmode in ALL_RMODES:
            data = accumulate_lg_drop_data(drop, rmode)
            merkletree = MerkleTree(data.items(), common_hash)
            data["merkleroot"] = merkletree.merkle_root
            drop["reprodata"][rmode.name]["lg_data"] = data
            drop["reprodata"][rmode.name]["lg_parenthashes"] = {}
    else:
        data = accumulate_lg_drop_data(drop, level)
        merkletree = MerkleTree(data.items(), common_hash)
        data["merkleroot"] = merkletree.merkle_root
        drop["reprodata"]["lg_data"] = data
        drop["reprodata"]["lg_parenthashes"] = {}
    return drop
示例#2
0
def accumulate_pgt_unroll_drop_data(drop: dict):
    """
    Accumulates relevant reproducibility fields for a single drop at the physical template level.
    :param drop:
    :return: A dictionary containing accumulated reproducibility data for a given drop.
    """
    data = {}
    rmode = rflag_caster(drop['reprodata']['rmode'])
    if not rmode_supported(rmode):
        logger.warning('Requested reproducibility mode %s not yet implemented',
                       str(rmode))
        rmode = REPRO_DEFAULT
        drop['reprodata']['rmode'] = str(rmode.value)
    if rmode == ReproducibilityFlags.NOTHING:
        return data
    if rmode == ReproducibilityFlags.REPRODUCE:
        data['type'] = drop['type']
        if drop['type'] == 'plain':
            data['storage'] = drop['storage']
        return data
    if rmode.value >= ReproducibilityFlags.RERUN.value:
        data['type'] = drop['type']
        if data['type'] == 'plain':
            data['storage'] = drop['storage']
        else:
            data['dt'] = drop[
                'dt']  # WARNING: Added to differentiate between subtle component differences.
    if rmode == ReproducibilityFlags.RECOMPUTE or rmode == ReproducibilityFlags.REPLICATE_COMP:
        data['rank'] = drop['rank']

    return data
示例#3
0
def init_pg_repro_data(physical_graph: list):
    """
    Handles adding reproducibility data at the physical graph template level.
    :param physical_graph: The logical graph data structure (a list of drops + reprodata dictionary)
    :return: The same pg object with new information appended
    """
    reprodata = physical_graph.pop()
    if "rmode" not in reprodata:
        physical_graph.append(reprodata)
        return physical_graph
    level = rflag_caster(reprodata["rmode"])
    if not rmode_supported(level):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(level))
        level = REPRO_DEFAULT
    if level == ReproducibilityFlags.NOTHING:
        physical_graph.append(reprodata)
        return physical_graph
    for drop in physical_graph:
        init_pg_repro_drop_data(drop)
    if level == ReproducibilityFlags.ALL:
        for rmode in ALL_RMODES:
            leaves, _ = build_blockdag(physical_graph, "pg", rmode)
            reprodata[rmode.name]["signature"] = agglomerate_leaves(leaves)
    else:
        leaves, _ = build_blockdag(physical_graph, "pg")
        reprodata["signature"] = agglomerate_leaves(leaves)
    physical_graph.append(reprodata)
    logger.info("Reproducibility data finished at PG level")
    return physical_graph
示例#4
0
def init_lg_repro_data(logical_graph: dict):
    """
    Handles adding reproducibility data at the logical graph level.
    Also builds the logical data blockdag over the entire structure.
    :param logical_graph: The logical graph data structure (a JSON object (a dict))
    :return: The same lgt object with new information appended
    """
    if "reprodata" not in logical_graph:
        return logical_graph
    level = rflag_caster(logical_graph["reprodata"]["rmode"])
    if not rmode_supported(level):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(level))
        level = REPRO_DEFAULT
    if level == ReproducibilityFlags.NOTHING:
        return logical_graph
    for drop in logical_graph.get("nodeDataArray", []):
        init_lg_repro_drop_data(drop)
    if level == ReproducibilityFlags.ALL:
        for rmode in ALL_RMODES:
            if rmode.name not in logical_graph["reprodata"]:
                logical_graph["reprodata"][rmode.name] = {}
            leaves, _ = lg_build_blockdag(logical_graph, rmode)
            logical_graph["reprodata"][
                rmode.name]["signature"] = agglomerate_leaves(leaves)
    else:
        leaves, _ = lg_build_blockdag(logical_graph)
        logical_graph["reprodata"]["signature"] = agglomerate_leaves(leaves)
    logger.info("Reproducibility data finished at LG level")
    return logical_graph
示例#5
0
def accumulate_pgt_unroll_drop_data(drop: dict):
    """
    Accumulates relevant reproducibility fields for a single drop at the physical template level.
    :param drop:
    :return: A dictionary containing accumulated reproducibility data for a given drop.
    """
    if drop.get("reprodata") is None:
        drop["reprodata"] = {
            "rmode": str(REPRO_DEFAULT.value),
            "lg_blockhash": None
        }
    if drop["reprodata"].get("rmode") is None:
        level = REPRO_DEFAULT
        drop["reprodata"]["rmode"] = str(level.value)
    else:
        level = rflag_caster(drop["reprodata"]["rmode"])
    if not rmode_supported(level):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(level))
        level = REPRO_DEFAULT
        drop["reprodata"]["rmode"] = str(level.value)
    if drop.get("type") is None:
        return {}
    drop_type = drop["type"]
    if level == ReproducibilityFlags.ALL:
        data = {}
        for rmode in ALL_RMODES:
            pgt_fields = pgt_unroll_block_fields(drop_type, rmode)
            data[rmode.name] = extract_fields(drop, pgt_fields)
    else:
        pgt_fields = pgt_unroll_block_fields(drop_type, level)
        data = extract_fields(drop, pgt_fields)
    return data
示例#6
0
def init_lgt_repro_drop_data(drop: dict, level: ReproducibilityFlags):
    """
    Creates and appends per-drop reproducibility information at the logical template stage.
    :param drop:
    :param level:
    :return: The same drop with appended reproducibility information.
    """
    # Catch pre-set per-drop rmode
    if "reprodata" in drop.keys():
        if "rmode" in drop["reprodata"].keys():
            level = rflag_caster(drop["reprodata"]["rmode"])
    else:
        drop["reprodata"] = {"rmode": str(level.value)}
    if level == ReproducibilityFlags.ALL:
        for rmode in ALL_RMODES:
            data = accumulate_lgt_drop_data(drop, rmode)
            merkletree = MerkleTree(data.items(), common_hash)
            data["merkleroot"] = merkletree.merkle_root
            drop["reprodata"][rmode.name] = {
                "rmode": str(rmode.value),
                "lgt_data": data,
                "lg_parenthashes": {},
            }
    else:
        data = accumulate_lgt_drop_data(drop, level)
        merkletree = MerkleTree(data.items(), common_hash)
        data["merkleroot"] = merkletree.merkle_root
        drop["reprodata"] = {
            "rmode": str(level.value),
            "lgt_data": data,
            "lg_parenthashes": {},
        }
    return drop
示例#7
0
def accumulate_pgt_partition_drop_data(drop: dict):
    """
    Is as combination of unroll drop data
    :param drop:
    :return:
    """
    if drop.get("reprodata") is None:
        drop["reprodata"] = {
            "rmode": str(REPRO_DEFAULT.value),
            "lg_blockhash": None
        }
    if drop["reprodata"].get("rmode") is None:
        level = REPRO_DEFAULT
        drop["reprodata"]["rmode"] = str(level.value)
    else:
        level = rflag_caster(drop["reprodata"]["rmode"])
    if not rmode_supported(level):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(level))
        level = REPRO_DEFAULT
        drop["reprodata"]["rmode"] = str(level.value)
    if level == ReproducibilityFlags.ALL:
        data = {}
        unroll_data = accumulate_pgt_unroll_drop_data(drop)
        for rmode in ALL_RMODES:
            pgt_fields = pgt_partition_block_fields(rmode)
            data[rmode.name] = extract_fields(drop, pgt_fields)
            unroll_data[rmode.name].update(data[rmode.name])
        return unroll_data
    else:
        pgt_fields = pgt_partition_block_fields(level)
        data = extract_fields(drop, pgt_fields)
        return_data = accumulate_pgt_unroll_drop_data(drop)
        return_data.update(data)
        return return_data
示例#8
0
def init_runtime_repro_data(runtime_graph: dict, reprodata: dict):
    """
    Adds reproducibility data at the runtime level to graph-wide values.
    :param runtime_graph:
    :param reprodata:
    :return:
    """
    if reprodata is None:
        return runtime_graph
    level = rflag_caster(reprodata["rmode"])
    if not rmode_supported(level):
        # TODO: Logging needs sessionID at this stage
        # logger.warning("Requested reproducibility mode %s not yet implemented", str(rmode))
        level = REPRO_DEFAULT
        reprodata["rmode"] = str(level.value)
    for drop in runtime_graph.values():
        init_rg_repro_drop_data(drop)
    if level == ReproducibilityFlags.ALL:
        for rmode in ALL_RMODES:
            leaves, _ = build_blockdag(list(runtime_graph.values()), "rg",
                                       rmode)
            reprodata[rmode.name]["signature"] = agglomerate_leaves(leaves)
    else:
        leaves, _ = build_blockdag(list(runtime_graph.values()), "rg")
        reprodata["signature"] = agglomerate_leaves(leaves)
    runtime_graph["reprodata"] = reprodata
    # logger.info("Reproducibility data finished at runtime level")
    return runtime_graph
示例#9
0
def init_lgt_repro_data(logical_graph_template: dict, rmode: str):
    """
    Creates and appends graph-wide reproducibility data at the logical template stage.
    Currently, this is basically a stub that adds the requested flag to the graph.
    Later, this will contain significantly more information.
    :param logical_graph_template: The logical graph data structure (a JSON object (a dict))
    :param rmode: One several values 0-5 defined in constants.py
    :return: The same lgt object with new information appended
    """
    rmode = rflag_caster(rmode)
    if not rmode_supported(rmode):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(rmode))
        rmode = REPRO_DEFAULT
    if rmode == ReproducibilityFlags.NOTHING:
        return logical_graph_template
    reprodata = {
        "rmode": str(rmode.value),
        "meta_data": accumulate_meta_data()
    }
    meta_tree = MerkleTree(reprodata.items(), common_hash)
    reprodata["merkleroot"] = meta_tree.merkle_root
    for drop in logical_graph_template.get("nodeDataArray", []):
        init_lgt_repro_drop_data(drop, rmode)
    logical_graph_template["reprodata"] = reprodata
    logger.info("Reproducibility data finished at LGT level")
    return logical_graph_template
示例#10
0
def init_rg_repro_drop_data(drop: dict):
    """
    Creates and appends per-drop reproducibility information at the runtime graph stage.
    :param drop:
    :return: The same drop with appended reproducibility information
    """
    level = rflag_caster(drop["reprodata"]["rmode"])
    if level == ReproducibilityFlags.ALL:
        for rmode in ALL_RMODES:
            drop["reprodata"][rmode.name]["rg_parenthashes"] = {}
    elif level != ReproducibilityFlags.NOTHING:
        drop["reprodata"]["rg_parenthashes"] = {}
    return drop
示例#11
0
def accumulate_pg_drop_data(drop: dict):
    """
    Accumulate relevant reproducibility fields for a single drop at the physical graph level.
    :param drop:
    :return: A dictionary containing accumulated reproducibility data for a given drop.
    """
    rmode = rflag_caster(drop['reprodata']['rmode'])
    if not rmode_supported(rmode):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(rmode))
        rmode = REPRO_DEFAULT
        drop['reprodata']['rmode'] = str(rmode.value)
    data = {}
    if rmode == ReproducibilityFlags.REPLICATE_COMP or rmode == ReproducibilityFlags.RECOMPUTE:
        data['node'] = drop['node']
        data['island'] = drop['island']
    return data
示例#12
0
def init_lg_repro_drop_data(drop: dict):
    """
    Creates and appends per-drop reproducibility information at the logical graph stage.
    :param drop:
    :return: The same drop with appended reproducibility information
    """
    rmode = rflag_caster(drop['reprodata']['rmode'])
    if not rmode_supported(rmode):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(rmode))
        rmode = REPRO_DEFAULT
        drop['reprodata']['rmode'] = str(rmode.value)
    data = accumulate_lg_drop_data(drop, rmode)
    merkletree = MerkleTree(data.items(), common_hash)
    data['merkleroot'] = merkletree.merkle_root
    drop['reprodata']['lg_data'] = data
    drop['reprodata']['lg_parenthashes'] = {}
    return drop
示例#13
0
def accumulate_pgt_partition_drop_data(drop: dict):
    """
    Is as combination of unroll drop data
    :param drop:
    :return:
    """
    rmode = rflag_caster(drop['reprodata']['rmode'])
    if not rmode_supported(rmode):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(rmode))
        rmode = REPRO_DEFAULT
        drop['reprodata']['rmode'] = str(rmode.value)
    data = accumulate_pgt_unroll_drop_data(drop)
    # This is the only piece of new information added at the partition level
    # It is only pertinent to Repetition and Computational replication
    if rmode == ReproducibilityFlags.REPLICATE_COMP or rmode == ReproducibilityFlags.RECOMPUTE:
        data['node'] = drop['node'][1:]
        data['island'] = drop['island'][1:]
    return data
示例#14
0
def init_runtime_repro_data(rg: dict, reprodata: dict):
    """
    Adds reproducibility data at the runtime level to graph-wide values.
    :param rg:
    :param reprodata:
    :return:
    """
    rmode = rflag_caster(reprodata['rmode'])
    if not rmode_supported(rmode):
        # TODO: Logging needs sessionID at this stage
        # logger.warning("Requested reproducibility mode %s not yet implemented", str(rmode))
        rmode = REPRO_DEFAULT
        reprodata['rmode'] = str(rmode.value)
    for drop_id, drop in rg.items():
        init_rg_repro_drop_data(drop)
    leaves, visited = build_blockdag(list(rg.values()), 'rg')
    reprodata['signature'] = agglomerate_leaves(leaves)
    rg['reprodata'] = reprodata
    # logger.info("Reproducibility data finished at runtime level")
    return rg
示例#15
0
def init_pg_repro_data(pg: list):
    """
    Handles adding reproducibility data at the physical graph template level.
    :param pg: The logical graph data structure (a list of drops + reprodata dictionary)
    :return: The same pg object with new information appended
    """
    reprodata = pg.pop()
    rmode = rflag_caster(reprodata['rmode'])
    if not rmode_supported(rmode):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(rmode))
        rmode = REPRO_DEFAULT
        reprodata['rmode'] = str(rmode.value)
    for drop in pg:
        init_pg_repro_drop_data(drop)
    leaves, visited = build_blockdag(pg, 'pg')
    reprodata['signature'] = agglomerate_leaves(leaves)
    pg.append(reprodata)
    logger.info("Reproducibility data finished at PG level")
    return pg
示例#16
0
def init_pg_repro_drop_data(drop: dict):
    """
    Creates and appends per-drop reproducibility information at the physical graph stage.
    :param drop: The drop description
    :return: The same drop with appended reproducibility information
    """
    level = rflag_caster(drop["reprodata"]["rmode"])
    data = accumulate_pg_drop_data(drop)
    if level == ReproducibilityFlags.ALL:
        for rmode in ALL_RMODES:
            merkletree = MerkleTree(data[rmode.name].items(), common_hash)
            data[rmode.name]["merkleroot"] = merkletree.merkle_root
            drop["reprodata"][rmode.name]["pg_parenthashes"] = {}
            drop["reprodata"][rmode.name]["pg_data"] = data[rmode.name]
    else:
        merkletree = MerkleTree(data.items(), common_hash)
        data["merkleroot"] = merkletree.merkle_root
        #  Separated so chaining can happen on independent elements (or both later)
        drop["reprodata"]["pg_parenthashes"] = {}
        drop["reprodata"]["pg_data"] = data
    return drop
示例#17
0
def accumulate_pg_drop_data(drop: dict):
    """
    Accumulate relevant reproducibility fields for a single drop at the physical graph level.
    :param drop:
    :return: A dictionary containing accumulated reproducibility data for a given drop.
    """
    level = rflag_caster(drop["reprodata"]["rmode"])
    if not rmode_supported(level):
        logger.warning("Requested reproducibility mode %s not yet implemented",
                       str(level))
        level = REPRO_DEFAULT
        drop["reprodata"]["rmode"] = str(level.value)
    if level == ReproducibilityFlags.ALL:
        data = {}
        for rmode in ALL_RMODES:
            pg_fields = pg_block_fields(rmode)
            data[rmode.name] = extract_fields(drop, pg_fields)
    else:
        pg_fields = pg_block_fields(level)
        data = extract_fields(drop, pg_fields)
    return data
示例#18
0
def append_pgt_repro_data(drop: dict, data: dict):
    """
    Adds provided data dictionary to drop description at PGT level.
    :param drop: The drop description
    :param data: The data to be added - arbitrary dictionary
    :return:
    """
    level = rflag_caster(drop["reprodata"]["rmode"])
    if level == ReproducibilityFlags.ALL:
        for rmode in ALL_RMODES:
            merkletree = MerkleTree(data[rmode.name].items(), common_hash)
            data[rmode.name]["merkleroot"] = merkletree.merkle_root
            drop["reprodata"][rmode.name]["pgt_parenthashes"] = {}
            drop["reprodata"][rmode.name]["pgt_data"] = data[rmode.name]
    else:
        merkletree = MerkleTree(data.items(), common_hash)
        data["merkleroot"] = merkletree.merkle_root
        #  Separated so chaining can happen on independent elements (or both later)
        drop["reprodata"]["pgt_parenthashes"] = {}
        drop["reprodata"]["pgt_data"] = data
    return drop
示例#19
0
def process_single(data):
    """
    Processes reprodata containing a single signature.
    Builds a small dictionary mapping the 'rmode' to the signature
    """
    return {rflag_caster(data.get("rmode")).value: data.get("signature")}
示例#20
0
def lg_build_blockdag(logical_graph: dict, level=None):
    """
    Uses Kahn's algorithm to topologically sort a logical graph dictionary.
    Exploits that a DAG contains at least one node with in-degree 0.
    Processes drops in-order.
    O(V + E) time complexity.
    :param logical_graph: The logical graph description (template or actual)
    :return: leaves set and the list of visited components (in order).
    """
    dropset = {}  # Also contains in-degree information
    neighbourset = {}
    roots = []
    leaves = []
    visited = []
    queue = collections.deque()
    # TODO: Deal with MKN/Scatter Input drops
    for drop in logical_graph.get("nodeDataArray", []):
        did = int(drop["key"])
        dropset[did] = [drop, 0, 0]
        neighbourset[did] = []

    for edge in logical_graph.get("linkDataArray", []):
        src = int(edge["from"])
        dest = int(edge["to"])
        dropset[dest][1] += 1
        dropset[src][2] += 1
        neighbourset[src].append(dest)

    #  did == 'drop id'
    for did, drop in dropset.items():
        if drop[1] == 0:
            queue.append(did)
            roots.append(did)
        if not neighbourset[did]:  # Leaf node
            leaves.append(did)

    while queue:
        did = queue.pop()
        # Process
        if "reprodata" not in dropset[did][0]:
            continue
        build_lg_block_data(dropset[did][0], level)
        visited.append(did)
        rmode = rflag_caster(dropset[did][0]["reprodata"]["rmode"])
        if rmode == ReproducibilityFlags.ALL:
            rmode = level  # Only building one layer at a time.
        for neighbour in neighbourset[did]:
            dropset[neighbour][1] -= 1
            parenthash = {}
            if rmode != ReproducibilityFlags.NOTHING:
                if rmode == ReproducibilityFlags.REPRODUCE:
                    if (dropset[did][0]["category"] in STORAGE_TYPES
                            and (dropset[did][1] == 0 or dropset[did][2] == 0)
                            and (did in roots or did in leaves)):
                        # Add my new hash to the parent-hash list
                        if did not in parenthash:
                            if level is None:
                                parenthash[did] = dropset[did][0]["reprodata"][
                                    "lg_blockhash"]
                            else:
                                parenthash[did] = dropset[did][0]["reprodata"][
                                    level.name]["lg_blockhash"]
                        # parenthash.append(dropset[did][0]['reprodata']['lg_blockhash'])
                    else:
                        # Add my parenthashes to the parent-hash list
                        if level is None:
                            parenthash.update(dropset[did][0]["reprodata"]
                                              ["lg_parenthashes"])
                        else:
                            parenthash.update(dropset[did][0]["reprodata"][
                                level.name]["lg_parenthashes"])
                        # parenthash.extend(dropset[did][0]['reprodata']['lg_parenthashes'])
                if rmode != ReproducibilityFlags.REPRODUCE:  # Non-compressing behaviour
                    if level is None:
                        parenthash[did] = dropset[did][0]["reprodata"][
                            "lg_blockhash"]
                    else:
                        parenthash[did] = dropset[did][0]["reprodata"][
                            level.name]["lg_blockhash"]
                    # parenthash.append(dropset[did][0]['reprodata']['lg_blockhash'])
                #  Add our new hash to the parent-hash list
                # We deal with duplicates later
                if level is None:
                    dropset[neighbour][0]["reprodata"][
                        "lg_parenthashes"].update(parenthash)
                else:
                    dropset[neighbour][0]["reprodata"][
                        level.name]["lg_parenthashes"].update(parenthash)
            if dropset[neighbour][1] == 0:  # Add drops at the DAG-frontier
                queue.append(neighbour)

    if len(visited) != len(dropset):
        logger.warning("Untraversed graph")

    logger.info("BlockDAG Generated at LG/T level")

    for i, leaf in enumerate(leaves):
        if level is None:
            # WARNING: Remove once dealt with MKN Nodes
            if "reprodata" in dropset[leaf][0]:
                leaves[i] = dropset[leaf][0]["reprodata"].get(
                    "lg_blockhash", "")
        else:
            if "reprodata" in dropset[leaf][0]:
                leaves[i] = dropset[leaf][0]["reprodata"][level.name].get(
                    "lg_blockhash", "")
    return leaves, visited
示例#21
0
def lg_build_blockdag(lg: dict):
    """
    Uses Kahn's algorithm to topologically sort a logical graph dictionary.
    Exploits that a DAG contains at least one node with in-degree 0.
    Processes drops in-order.
    O(V + E) time complexity.
    :param lg:
    :return:
    """
    from collections import deque
    dropset = {}  # Also contains in-degree information
    neighbourset = {}
    leaves = []
    visited = []
    q = deque()
    for drop in lg['nodeDataArray']:
        did = int(drop['key'])
        dropset[did] = [drop, 0, 0]
        neighbourset[did] = []

    for edge in lg['linkDataArray']:
        src = int(edge['from'])
        dest = int(edge['to'])
        dropset[dest][1] += 1
        dropset[src][2] += 1
        neighbourset[src].append(dest)

    #  did == 'drop id'
    for did in dropset:
        if dropset[did][1] == 0:
            q.append(did)
        if not neighbourset[did]:  # Leaf node
            leaves.append(did)

    while q:
        did = q.pop()
        # Process
        build_lg_block_data(dropset[did][0])
        visited.append(did)
        rmode = rflag_caster(dropset[did][0]['reprodata']['rmode']).value
        for n in neighbourset[did]:
            dropset[n][1] -= 1
            parenthash = {}
            if rmode == ReproducibilityFlags.REPRODUCE.value:
                if dropset[did][0]['categoryType'] == Categories.DATA \
                        and (dropset[did][1] == 0 or dropset[did][2] == 0):
                    # Add my new hash to the parent-hash list
                    if did not in parenthash.keys():
                        parenthash[did] = dropset[did][0]['reprodata'][
                            'lg_blockhash']
                    # parenthash.append(dropset[did][0]['reprodata']['lg_blockhash'])
                else:
                    # Add my parenthashes to the parent-hash list
                    parenthash.update(
                        dropset[did][0]['reprodata']['lg_parenthashes'])
                    # parenthash.extend(dropset[did][0]['reprodata']['lg_parenthashes'])
            if rmode != ReproducibilityFlags.REPRODUCE.value:  # Non-compressing behaviour
                parenthash[did] = dropset[did][0]['reprodata']['lg_blockhash']
                # parenthash.append(dropset[did][0]['reprodata']['lg_blockhash'])
            #  Add our new hash to the parent-hash list
            dropset[n][0]['reprodata']['lg_parenthashes'].update(
                parenthash)  # We deal with duplicates later
            if dropset[n][1] == 0:  # Add drops at the DAG-frontier
                q.append(n)

    if len(visited) != len(dropset):
        raise Exception("Untraversed graph")

    logger.info("BlockDAG Generated at LG/T level")

    for i in range(len(leaves)):
        leaf = leaves[i]
        leaves[i] = dropset[leaf][0]['reprodata']['lg_blockhash']
    return leaves, visited
示例#22
0
def build_blockdag(drops: list, abstraction: str = "pgt", level=None):
    """
    Uses Kahn's algorithm to topologically sort a logical graph dictionary.
    Exploits that a DAG contains at least one node with in-degree 0.
    Processes drops in-order.
    O(V + E) time complexity.
    :param drops: The list of drops
    :param abstraction: The level of graph abstraction 'pgt' || 'pg'
    :return:
    """
    blockstr = "pgt"
    parentstr = "pgt_parenthashes"
    block_builder = build_pgt_block_data
    if abstraction == "pg":
        blockstr = "pg"
        parentstr = "pg_parenthashes"
        block_builder = build_pg_block_data
    if abstraction == "rg":
        blockstr = "rg"
        parentstr = "rg_parenthashes"
        block_builder = build_rg_block_data

    dropset = {}
    neighbourset = {}
    roots = []
    leaves = []
    visited = []
    queue = collections.deque()
    for drop in drops:
        did = drop["oid"]
        dropset[did] = [drop, 0, 0]
    for drop in drops:
        did = drop["oid"]
        neighbourset[did] = []
        if "outputs" in drop:
            # Assumes the model where all edges are defined from source to destination.
            # This may not always be the case.
            for dest in drop["outputs"]:
                if isinstance(dest, dict):
                    dest = next(iter(dest))
                dropset[dest][1] += 1
                dropset[did][2] += 1
                neighbourset[did].append(dest)
        if ("consumers" in drop
            ):  # There may be some bizarre scenario when a drop has both
            for dest in drop["consumers"]:
                if isinstance(dest, dict):
                    dest = next(iter(dest))
                dropset[dest][1] += 1
                dropset[did][2] += 1
                neighbourset[did].append(
                    dest)  # TODO: Appending may not be correct behaviour
    for did, drop_val in dropset.items():
        if drop_val[1] == 0:
            queue.append(did)
            roots.append(did)
        if not neighbourset[did]:  # Leaf node
            leaves.append(did)
    while queue:
        did = queue.pop()
        block_builder(dropset[did][0], level)
        visited.append(did)
        rmode = rflag_caster(dropset[did][0]["reprodata"]["rmode"])
        if rmode == ReproducibilityFlags.ALL:
            rmode = level
        for neighbour in neighbourset[did]:
            dropset[neighbour][1] -= 1
            parenthash = {}
            if rmode != ReproducibilityFlags.NOTHING:
                if rmode == ReproducibilityFlags.REPRODUCE:
                    # WARNING: Hack! may break later, proceed with caution
                    if level is None:
                        category = dropset[did][0]["reprodata"]["lgt_data"][
                            "category"]
                    else:
                        category = dropset[did][0]["reprodata"][
                            rmode.name]["lgt_data"]["category"]
                    if (category in STORAGE_TYPES
                            and (dropset[did][1] == 0 or dropset[did][2] == 0)
                            and (did in roots or did in leaves)):
                        # Add my new hash to the parent-hash list
                        if did not in parenthash:
                            if level is None:
                                parenthash[did] = dropset[did][0]["reprodata"][
                                    blockstr + "_blockhash"]
                            else:
                                parenthash[did] = dropset[did][0]["reprodata"][
                                    level.name][blockstr + "_blockhash"]
                        # parenthash.append(dropset[did][0]['reprodata'] \
                        # [blockstr + "_blockhash"])
                    else:
                        # Add my parenthashes to the parent-hash list
                        if level is None:
                            parenthash.update(
                                dropset[did][0]["reprodata"][parentstr])
                        else:
                            parenthash.update(dropset[did][0]["reprodata"][
                                level.name][parentstr])
                if rmode != ReproducibilityFlags.REPRODUCE:
                    if level is None:
                        parenthash[did] = dropset[did][0]["reprodata"][
                            blockstr + "_blockhash"]
                    else:
                        parenthash[did] = dropset[did][0]["reprodata"][
                            level.name][blockstr + "_blockhash"]
                # Add our new hash to the parent-hash list if on the critical path
                if rmode == ReproducibilityFlags.RERUN:
                    if "iid" in dropset[did][0].keys():
                        if (dropset[did][0]["iid"] == "0/0"
                            ):  # TODO: This is probably wrong
                            if level is None:
                                dropset[neighbour][0]["reprodata"][
                                    parentstr].update(parenthash)
                            else:
                                dropset[neighbour][0]["reprodata"][
                                    level.name][parentstr].update(parenthash)
                    else:
                        if level is None:
                            dropset[neighbour][0]["reprodata"][
                                parentstr].update(parenthash)
                        else:
                            dropset[neighbour][0]["reprodata"][
                                level.name][parentstr].update(parenthash)
                elif rmode != ReproducibilityFlags.RERUN:
                    if level is None:
                        dropset[neighbour][0]["reprodata"][parentstr].update(
                            parenthash)
                    else:
                        dropset[neighbour][0]["reprodata"][
                            level.name][parentstr].update(parenthash)
            if dropset[neighbour][1] == 0:
                queue.append(neighbour)

    if len(visited) != len(dropset):
        logger.warning("Not a DAG")

    for i, leaf in enumerate(leaves):

        if level is None:
            leaves[i] = dropset[leaf][0]["reprodata"][blockstr + "_blockhash"]
        else:
            leaves[i] = dropset[leaf][0]["reprodata"][level.name][blockstr +
                                                                  "_blockhash"]
    return leaves, visited