def accumulate_pgt_partition_drop_data(drop: dict): """ Is as combination of unroll drop data :param drop: :return: """ if drop.get("reprodata") is None: drop["reprodata"] = { "rmode": str(REPRO_DEFAULT.value), "lg_blockhash": None } if drop["reprodata"].get("rmode") is None: level = REPRO_DEFAULT drop["reprodata"]["rmode"] = str(level.value) else: level = rflag_caster(drop["reprodata"]["rmode"]) if not rmode_supported(level): logger.warning("Requested reproducibility mode %s not yet implemented", str(level)) level = REPRO_DEFAULT drop["reprodata"]["rmode"] = str(level.value) if level == ReproducibilityFlags.ALL: data = {} unroll_data = accumulate_pgt_unroll_drop_data(drop) for rmode in ALL_RMODES: pgt_fields = pgt_partition_block_fields(rmode) data[rmode.name] = extract_fields(drop, pgt_fields) unroll_data[rmode.name].update(data[rmode.name]) return unroll_data else: pgt_fields = pgt_partition_block_fields(level) data = extract_fields(drop, pgt_fields) return_data = accumulate_pgt_unroll_drop_data(drop) return_data.update(data) return return_data
def init_runtime_repro_data(runtime_graph: dict, reprodata: dict): """ Adds reproducibility data at the runtime level to graph-wide values. :param runtime_graph: :param reprodata: :return: """ if reprodata is None: return runtime_graph level = rflag_caster(reprodata["rmode"]) if not rmode_supported(level): # TODO: Logging needs sessionID at this stage # logger.warning("Requested reproducibility mode %s not yet implemented", str(rmode)) level = REPRO_DEFAULT reprodata["rmode"] = str(level.value) for drop in runtime_graph.values(): init_rg_repro_drop_data(drop) if level == ReproducibilityFlags.ALL: for rmode in ALL_RMODES: leaves, _ = build_blockdag(list(runtime_graph.values()), "rg", rmode) reprodata[rmode.name]["signature"] = agglomerate_leaves(leaves) else: leaves, _ = build_blockdag(list(runtime_graph.values()), "rg") reprodata["signature"] = agglomerate_leaves(leaves) runtime_graph["reprodata"] = reprodata # logger.info("Reproducibility data finished at runtime level") return runtime_graph
def accumulate_pgt_unroll_drop_data(drop: dict): """ Accumulates relevant reproducibility fields for a single drop at the physical template level. :param drop: :return: A dictionary containing accumulated reproducibility data for a given drop. """ data = {} rmode = rflag_caster(drop['reprodata']['rmode']) if not rmode_supported(rmode): logger.warning('Requested reproducibility mode %s not yet implemented', str(rmode)) rmode = REPRO_DEFAULT drop['reprodata']['rmode'] = str(rmode.value) if rmode == ReproducibilityFlags.NOTHING: return data if rmode == ReproducibilityFlags.REPRODUCE: data['type'] = drop['type'] if drop['type'] == 'plain': data['storage'] = drop['storage'] return data if rmode.value >= ReproducibilityFlags.RERUN.value: data['type'] = drop['type'] if data['type'] == 'plain': data['storage'] = drop['storage'] else: data['dt'] = drop[ 'dt'] # WARNING: Added to differentiate between subtle component differences. if rmode == ReproducibilityFlags.RECOMPUTE or rmode == ReproducibilityFlags.REPLICATE_COMP: data['rank'] = drop['rank'] return data
def accumulate_lg_drop_data(drop: dict, level: ReproducibilityFlags): """ Accumulates relevant reproducibility fields for a single drop. :param drop: :param level: :return: A dictionary containing accumulated reproducibility data for a given drop. """ if not rmode_supported(level): raise NotImplementedError( f"Reproducibility level {level.name} not yet supported") category = drop.get("category", "") # Cheeky way to get field list into dicts. map(dict, drop...) makes a copy fields = { e.pop("name"): e["value"] for e in map(dict, drop.get("fields", {})) } app_fields = { e.pop("name"): e["value"] for e in map(dict, drop.get("applicationArgs", {})) } fields.update(app_fields) lg_fields = lg_block_fields(category, level, app_fields.keys()) data = extract_fields(fields, lg_fields) return data
def init_pg_repro_data(physical_graph: list): """ Handles adding reproducibility data at the physical graph template level. :param physical_graph: The logical graph data structure (a list of drops + reprodata dictionary) :return: The same pg object with new information appended """ reprodata = physical_graph.pop() if "rmode" not in reprodata: physical_graph.append(reprodata) return physical_graph level = rflag_caster(reprodata["rmode"]) if not rmode_supported(level): logger.warning("Requested reproducibility mode %s not yet implemented", str(level)) level = REPRO_DEFAULT if level == ReproducibilityFlags.NOTHING: physical_graph.append(reprodata) return physical_graph for drop in physical_graph: init_pg_repro_drop_data(drop) if level == ReproducibilityFlags.ALL: for rmode in ALL_RMODES: leaves, _ = build_blockdag(physical_graph, "pg", rmode) reprodata[rmode.name]["signature"] = agglomerate_leaves(leaves) else: leaves, _ = build_blockdag(physical_graph, "pg") reprodata["signature"] = agglomerate_leaves(leaves) physical_graph.append(reprodata) logger.info("Reproducibility data finished at PG level") return physical_graph
def init_lgt_repro_data(logical_graph_template: dict, rmode: str): """ Creates and appends graph-wide reproducibility data at the logical template stage. Currently, this is basically a stub that adds the requested flag to the graph. Later, this will contain significantly more information. :param logical_graph_template: The logical graph data structure (a JSON object (a dict)) :param rmode: One several values 0-5 defined in constants.py :return: The same lgt object with new information appended """ rmode = rflag_caster(rmode) if not rmode_supported(rmode): logger.warning("Requested reproducibility mode %s not yet implemented", str(rmode)) rmode = REPRO_DEFAULT if rmode == ReproducibilityFlags.NOTHING: return logical_graph_template reprodata = { "rmode": str(rmode.value), "meta_data": accumulate_meta_data() } meta_tree = MerkleTree(reprodata.items(), common_hash) reprodata["merkleroot"] = meta_tree.merkle_root for drop in logical_graph_template.get("nodeDataArray", []): init_lgt_repro_drop_data(drop, rmode) logical_graph_template["reprodata"] = reprodata logger.info("Reproducibility data finished at LGT level") return logical_graph_template
def init_lg_repro_data(logical_graph: dict): """ Handles adding reproducibility data at the logical graph level. Also builds the logical data blockdag over the entire structure. :param logical_graph: The logical graph data structure (a JSON object (a dict)) :return: The same lgt object with new information appended """ if "reprodata" not in logical_graph: return logical_graph level = rflag_caster(logical_graph["reprodata"]["rmode"]) if not rmode_supported(level): logger.warning("Requested reproducibility mode %s not yet implemented", str(level)) level = REPRO_DEFAULT if level == ReproducibilityFlags.NOTHING: return logical_graph for drop in logical_graph.get("nodeDataArray", []): init_lg_repro_drop_data(drop) if level == ReproducibilityFlags.ALL: for rmode in ALL_RMODES: if rmode.name not in logical_graph["reprodata"]: logical_graph["reprodata"][rmode.name] = {} leaves, _ = lg_build_blockdag(logical_graph, rmode) logical_graph["reprodata"][ rmode.name]["signature"] = agglomerate_leaves(leaves) else: leaves, _ = lg_build_blockdag(logical_graph) logical_graph["reprodata"]["signature"] = agglomerate_leaves(leaves) logger.info("Reproducibility data finished at LG level") return logical_graph
def init_lg_repro_drop_data(drop: dict): """ Creates and appends per-drop reproducibility information at the logical graph stage. :param drop: :return: The same drop with appended reproducibility information """ level = rflag_caster(drop["reprodata"]["rmode"]) if not rmode_supported(level): logger.warning("Requested reproducibility mode %s not yet implemented", str(level)) level = REPRO_DEFAULT drop["reprodata"]["rmode"] = str(level.value) if level == ReproducibilityFlags.ALL: for rmode in ALL_RMODES: data = accumulate_lg_drop_data(drop, rmode) merkletree = MerkleTree(data.items(), common_hash) data["merkleroot"] = merkletree.merkle_root drop["reprodata"][rmode.name]["lg_data"] = data drop["reprodata"][rmode.name]["lg_parenthashes"] = {} else: data = accumulate_lg_drop_data(drop, level) merkletree = MerkleTree(data.items(), common_hash) data["merkleroot"] = merkletree.merkle_root drop["reprodata"]["lg_data"] = data drop["reprodata"]["lg_parenthashes"] = {} return drop
def accumulate_lgt_drop_data(drop: dict, level: ReproducibilityFlags): """ Accumulates relevant reproducibility fields for a single drop. :param drop: :param level: :return: A dictionary containing accumulated reproducibility data for a given drop. """ data = {} if level == ReproducibilityFlags.NOTHING: return data category_type = drop['categoryType'] category = drop['category'] if not rmode_supported(level): raise NotImplementedError( "Reproducibility level %s not yet supported" % level.name) if level == ReproducibilityFlags.REPRODUCE: data['category_type'] = category_type data['category'] = category return data # Early return to avoid next conditional if level.value >= ReproducibilityFlags.RERUN.value: data['category_type'] = category_type data['category'] = category data['numInputPorts'] = len(drop['inputPorts']) data['numOutputPorts'] = len(drop['outputPorts']) data['streaming'] = drop['streaming'] return data
def accumulate_pgt_unroll_drop_data(drop: dict): """ Accumulates relevant reproducibility fields for a single drop at the physical template level. :param drop: :return: A dictionary containing accumulated reproducibility data for a given drop. """ if drop.get("reprodata") is None: drop["reprodata"] = { "rmode": str(REPRO_DEFAULT.value), "lg_blockhash": None } if drop["reprodata"].get("rmode") is None: level = REPRO_DEFAULT drop["reprodata"]["rmode"] = str(level.value) else: level = rflag_caster(drop["reprodata"]["rmode"]) if not rmode_supported(level): logger.warning("Requested reproducibility mode %s not yet implemented", str(level)) level = REPRO_DEFAULT drop["reprodata"]["rmode"] = str(level.value) if drop.get("type") is None: return {} drop_type = drop["type"] if level == ReproducibilityFlags.ALL: data = {} for rmode in ALL_RMODES: pgt_fields = pgt_unroll_block_fields(drop_type, rmode) data[rmode.name] = extract_fields(drop, pgt_fields) else: pgt_fields = pgt_unroll_block_fields(drop_type, level) data = extract_fields(drop, pgt_fields) return data
def accumulate_lgt_drop_data(drop: dict, level: ReproducibilityFlags): """ Accumulates relevant reproducibility fields for a single drop. :param drop: :param level: :return: A dictionary containing accumulated reproducibility data for a given drop. """ if not rmode_supported(level): raise NotImplementedError( f"Reproducibility level {level.name} not yet supported") relevant_fields = lgt_block_fields(level) data = extract_fields(drop, relevant_fields) return data
def accumulate_pg_drop_data(drop: dict): """ Accumulate relevant reproducibility fields for a single drop at the physical graph level. :param drop: :return: A dictionary containing accumulated reproducibility data for a given drop. """ rmode = rflag_caster(drop['reprodata']['rmode']) if not rmode_supported(rmode): logger.warning("Requested reproducibility mode %s not yet implemented", str(rmode)) rmode = REPRO_DEFAULT drop['reprodata']['rmode'] = str(rmode.value) data = {} if rmode == ReproducibilityFlags.REPLICATE_COMP or rmode == ReproducibilityFlags.RECOMPUTE: data['node'] = drop['node'] data['island'] = drop['island'] return data
def init_lg_repro_drop_data(drop: dict): """ Creates and appends per-drop reproducibility information at the logical graph stage. :param drop: :return: The same drop with appended reproducibility information """ rmode = rflag_caster(drop['reprodata']['rmode']) if not rmode_supported(rmode): logger.warning("Requested reproducibility mode %s not yet implemented", str(rmode)) rmode = REPRO_DEFAULT drop['reprodata']['rmode'] = str(rmode.value) data = accumulate_lg_drop_data(drop, rmode) merkletree = MerkleTree(data.items(), common_hash) data['merkleroot'] = merkletree.merkle_root drop['reprodata']['lg_data'] = data drop['reprodata']['lg_parenthashes'] = {} return drop
def accumulate_pgt_partition_drop_data(drop: dict): """ Is as combination of unroll drop data :param drop: :return: """ rmode = rflag_caster(drop['reprodata']['rmode']) if not rmode_supported(rmode): logger.warning("Requested reproducibility mode %s not yet implemented", str(rmode)) rmode = REPRO_DEFAULT drop['reprodata']['rmode'] = str(rmode.value) data = accumulate_pgt_unroll_drop_data(drop) # This is the only piece of new information added at the partition level # It is only pertinent to Repetition and Computational replication if rmode == ReproducibilityFlags.REPLICATE_COMP or rmode == ReproducibilityFlags.RECOMPUTE: data['node'] = drop['node'][1:] data['island'] = drop['island'][1:] return data
def init_runtime_repro_data(rg: dict, reprodata: dict): """ Adds reproducibility data at the runtime level to graph-wide values. :param rg: :param reprodata: :return: """ rmode = rflag_caster(reprodata['rmode']) if not rmode_supported(rmode): # TODO: Logging needs sessionID at this stage # logger.warning("Requested reproducibility mode %s not yet implemented", str(rmode)) rmode = REPRO_DEFAULT reprodata['rmode'] = str(rmode.value) for drop_id, drop in rg.items(): init_rg_repro_drop_data(drop) leaves, visited = build_blockdag(list(rg.values()), 'rg') reprodata['signature'] = agglomerate_leaves(leaves) rg['reprodata'] = reprodata # logger.info("Reproducibility data finished at runtime level") return rg
def init_pg_repro_data(pg: list): """ Handles adding reproducibility data at the physical graph template level. :param pg: The logical graph data structure (a list of drops + reprodata dictionary) :return: The same pg object with new information appended """ reprodata = pg.pop() rmode = rflag_caster(reprodata['rmode']) if not rmode_supported(rmode): logger.warning("Requested reproducibility mode %s not yet implemented", str(rmode)) rmode = REPRO_DEFAULT reprodata['rmode'] = str(rmode.value) for drop in pg: init_pg_repro_drop_data(drop) leaves, visited = build_blockdag(pg, 'pg') reprodata['signature'] = agglomerate_leaves(leaves) pg.append(reprodata) logger.info("Reproducibility data finished at PG level") return pg
def accumulate_pg_drop_data(drop: dict): """ Accumulate relevant reproducibility fields for a single drop at the physical graph level. :param drop: :return: A dictionary containing accumulated reproducibility data for a given drop. """ level = rflag_caster(drop["reprodata"]["rmode"]) if not rmode_supported(level): logger.warning("Requested reproducibility mode %s not yet implemented", str(level)) level = REPRO_DEFAULT drop["reprodata"]["rmode"] = str(level.value) if level == ReproducibilityFlags.ALL: data = {} for rmode in ALL_RMODES: pg_fields = pg_block_fields(rmode) data[rmode.name] = extract_fields(drop, pg_fields) else: pg_fields = pg_block_fields(level) data = extract_fields(drop, pg_fields) return data
def accumulate_lg_drop_data(drop: dict, level: ReproducibilityFlags): """ Accumulates relevant reproducibility fields for a single drop. :param drop: :param level: :return: A dictionary containing accumulated reproducibility data for a given drop. """ data = {} if level == ReproducibilityFlags.NOTHING: return data category_type = drop['categoryType'] category = drop['category'] # Cheeky way to get field list into dicts. map(dict, drop...) makes a copy fields = {e.pop('name'): e['value'] for e in map(dict, drop['fields'])} if not rmode_supported(level): raise NotImplementedError( "Reproducibility level %s not yet supported" % level.name) if level == ReproducibilityFlags.RERUN: pass elif level == ReproducibilityFlags.REPEAT or level == ReproducibilityFlags.REPLICATE_COMP \ or level == ReproducibilityFlags.RECOMPUTE \ or level == ReproducibilityFlags.REPLICATE_TOTAL: if category_type == 'Application': data['execution_time'] = fields['execution_time'] data['num_cpus'] = fields['num_cpus'] if category == Categories.BASH_SHELL_APP: data['command'] = fields['Arg01'] elif category == Categories.DYNLIB_APP: # TODO: Deal with DYNLIB_PROC data['libpath'] = fields['libpath'] elif category == Categories.MPI: data['num_of_procs'] = fields['num_of_procs'] elif category == Categories.DOCKER: data['image'] = fields['image'] data['command'] = fields['command'] data['user'] = fields['user'] data['ensureUserAndSwitch'] = fields['ensureUserAndSwitch'] data['removeContainer'] = fields['removeContainer'] data['additionalBindings'] = fields['additionalBindings'] elif category == Categories.COMPONENT: data['appclass'] = fields['appclass'] elif category_type == Categories.DATA: data['data_volume'] = fields['data_volume'] if category == Categories.MEMORY: pass elif category == Categories.FILE: data['check_filepath_exists'] = fields['check_filepath_exists'] elif category == Categories.S3: pass elif category == Categories.NGAS: pass elif category == Categories.JSON: pass elif category == Categories.NULL: pass elif category_type == 'Group': data['exitAppName'] = drop['exitAppName'] if category == Categories.GROUP_BY: data['group_key'] = fields['group_key'] data['group_axis'] = fields['group_axis'] elif category == Categories.GATHER: data['num_of_inputs'] = fields['num_of_inputs'] data['gather_axis'] = fields['gather_axis'] elif category == Categories.SCATTER: data['num_of_copies'] = fields['num_of_copies'] data['scatter_axis'] = fields['scatter_axis'] elif category == Categories.LOOP: data['num_of_iter'] = fields['num_of_iter'] elif category_type == 'Control': pass elif category_type == 'Other': pass elif level == ReproducibilityFlags.REPRODUCE: pass if level == ReproducibilityFlags.RECOMPUTE or level == ReproducibilityFlags.REPLICATE_COMP: if category_type == Categories.DATA: if category == Categories.FILE: data['filepath'] = fields['filepath'] data['dirname'] = fields['dirname'] return data