def local_main(): current_dir = os.path.dirname(os.path.realpath(__file__)) data_path = os.path.join(current_dir, "data") for pid in {"1", "2"}: # define name for the workflow workflow_name = "aspirin-local-test-" + pid # configure conclave conclave_config = CodeGenConfig(workflow_name, int(pid)) conclave_config.all_pids = [int(pid)] sharemind_conf = SharemindCodeGenConfig("/mnt/shared", use_docker=False, use_hdfs=False) conclave_config.with_sharemind_config(sharemind_conf) # point conclave to the directory where the generated code should be stored/ read from conclave_config.code_path = os.path.join("/mnt/shared", workflow_name) # point conclave to directory where data is to be read from... conclave_config.input_path = data_path # and written to conclave_config.output_path = data_path suffix = "left" if pid == "1" else "right" # define this party's unique ID (in this demo there is only one party) job_queue = generate_code(lambda: protocol_local(suffix, int(pid)), conclave_config, ["sharemind"], ["python"], apply_optimizations=False) dispatch_jobs(job_queue, conclave_config) res_mpc = read_rel(data_path + "/" + "actual_mpc_open.csv") res_left = read_rel(data_path + "/" + "actual_left.csv") res_right = read_rel(data_path + "/" + "actual_right.csv") assert len(res_mpc) == 1 assert len(res_left) == 1 assert len(res_right) == 1 res = [[res_mpc[0][0] + res_left[0][0] + res_right[0][0]]] write_rel(data_path, "actual_open.csv", res, "1")
def main_mpc(pid: str, mpc_backend: str): # define name for the workflow workflow_name = "real-aspirin-partitioned-" + pid # configure conclave mpc_backend = sys.argv[2] conclave_config = CodeGenConfig(workflow_name, int(pid)) \ .with_default_mpc_config(mpc_backend) current_dir = os.path.dirname(os.path.realpath(__file__)) # point conclave to the directory where the generated code should be stored/ read from conclave_config.code_path = os.path.join("/mnt/shared", workflow_name) # point conclave to directory where data is to be read from... conclave_config.input_path = os.path.join(current_dir, "data") # and written to conclave_config.output_path = os.path.join(current_dir, "data") job_queue = generate_code(lambda: protocol_mpc(conclave_config.all_pids), conclave_config, [mpc_backend], ["python"], apply_optimizations=True) dispatch_jobs(job_queue, conclave_config)
def generate_code(protocol: callable, conclave_config: CodeGenConfig, mpc_frameworks: list, local_frameworks: list, apply_optimizations: bool = True): """ Applies optimization rewrite passes to protocol, partitions resulting condag, and generates backend specific code for each sub-condag. :param protocol: protocol to compile :param conclave_config: conclave configuration :param mpc_frameworks: available mpc backend frameworks :param local_frameworks: available local-processing backend frameworks :param apply_optimizations: flag indicating if optimization rewrite passes should be applied to condag :return: queue of job objects to be executed by dispatcher """ # currently only allow one local and one mpc framework assert len(mpc_frameworks) == 1 and len(local_frameworks) == 1 # set up code gen config object if isinstance(conclave_config, CodeGenConfig): cfg = conclave_config else: cfg = CodeGenConfig.from_dict(conclave_config) # apply optimizations dag = condag.OpDag(protocol()) # only apply optimizations if required if apply_optimizations: dag = comp.rewrite_dag(dag) # partition into subdags that will run in specific frameworks mapping = part.heupart(dag, mpc_frameworks, local_frameworks) # for each sub condag run code gen and add resulting job to job queue job_queue = [] for job_num, (framework, sub_dag, stored_with) in enumerate(mapping): print(job_num, framework) if framework == "sharemind": name = "{}-sharemind-job-{}".format(cfg.name, job_num) job = SharemindCodeGen(cfg, sub_dag, cfg.pid).generate(name, cfg.output_path) job_queue.append(job) elif framework == "spark": name = "{}-spark-job-{}".format(cfg.name, job_num) job = SparkCodeGen(cfg, sub_dag).generate(name, cfg.output_path) job_queue.append(job) elif framework == "python": name = "{}-python-job-{}".format(cfg.name, job_num) job = PythonCodeGen(cfg, sub_dag).generate(name, cfg.output_path) job_queue.append(job) else: raise Exception("Unknown framework: " + framework) # TODO: this probably doesn't belong here if conclave_config.pid not in stored_with: job.skip = True return job_queue
def check_workflow(self, dag, name, use_leaky_ops=True): self.maxDiff = None expected_rootdir = \ "{}/sharemind_expected".format(os.path.dirname(os.path.realpath(__file__))) sm_cfg = SharemindCodeGenConfig() cfg = CodeGenConfig('cfg').with_sharemind_config(sm_cfg) cfg.use_leaky_ops = use_leaky_ops cg = SharemindCodeGen(cfg, dag, 1) actual = cg._generate('code', '/tmp')[1]['miner'] with open(expected_rootdir + '/{}'.format(name), 'r') as f_specific, open( expected_rootdir + '/{}'.format("base"), 'r') as f_base: expected_base = f_base.read() expected_specific = f_specific.read() expected = expected_base + expected_specific self.assertEqual(expected, actual)
def check_workflow(self, dag, name): expected_rootdir = \ "{}/sharemind_expected".format(os.path.dirname(os.path.realpath(__file__))) sm_cfg = SharemindCodeGenConfig() cfg = CodeGenConfig('cfg').with_sharemind_config(sm_cfg) cg = SharemindCodeGen(cfg, dag, 1) actual = cg._generate('code', '/tmp')[1]['miner'] with open(expected_rootdir + '/{}'.format(name), 'r') as f: expected = f.read() self.assertEqual(expected, actual)
def run_local(pid: str, data_root: str): workflow_name = "aspirin-local-join-" + pid + "-" + data_root conclave_config = CodeGenConfig(workflow_name, int(pid)) conclave_config.all_pids = [int(pid)] sharemind_conf = SharemindCodeGenConfig("/mnt/shared", use_docker=False, use_hdfs=False) conclave_config.with_sharemind_config(sharemind_conf) conclave_config.code_path = os.path.join("/mnt/shared", workflow_name) conclave_config.input_path = os.path.join("/mnt/shared", data_root) conclave_config.output_path = os.path.join("/mnt/shared", data_root) suffix = "left" if pid == "1" else "right" job_queue = generate_code(lambda: protocol_local(suffix, int(pid)), conclave_config, ["sharemind"], ["python"], apply_optimizations=False) dispatch_jobs(job_queue, conclave_config)
def run_mpc(pid: str, data_root: str, mpc_backend: str): workflow_name = "aspirin-mpc-join-" + pid + "-" + data_root conclave_config = CodeGenConfig(workflow_name, int(pid)) conclave_config.use_leaky_ops = False conclave_config.code_path = os.path.join("/mnt/shared", workflow_name) conclave_config.input_path = os.path.join("/mnt/shared", data_root) conclave_config.output_path = os.path.join("/mnt/shared", data_root) job_queue = generate_code(lambda: protocol_mpc(conclave_config.all_pids), conclave_config, [mpc_backend], ["python"], apply_optimizations=True) dispatch_jobs(job_queue, conclave_config)
right_cols = [defCol("c", "INTEGER", [1]), defCol("d", "INTEGER", [1])] right = cc.create("right", right_cols, {1}) joined = cc.join(left, right, "joined", ["a"], ["c"]) cc.aggregate(joined, "expected", ["b"], "d", "sum", "total") return {left, right} if __name__ == "__main__": pid = sys.argv[1] # define name for the workflow workflow_name = "simple-oblivious-test-" + pid # configure conclave conclave_config = CodeGenConfig(workflow_name, int(pid)) conclave_config.all_pids = [1] sharemind_conf = SharemindCodeGenConfig("/mnt/shared", use_docker=False, use_hdfs=False) conclave_config.with_sharemind_config(sharemind_conf) current_dir = os.path.dirname(os.path.realpath(__file__)) # point conclave to the directory where the generated code should be stored/ read from conclave_config.code_path = os.path.join("/mnt/shared", workflow_name) # point conclave to directory where data is to be read from... conclave_config.input_path = os.path.join(current_dir, "data") # and written to conclave_config.output_path = os.path.join(current_dir, "data") # define this party's unique ID (in this demo there is only one party) job_queue = generate_code(protocol, conclave_config, ["sharemind"], ["python"],
heart_patients = cc.cc_filter(aspirin, "heart_patients", diag_col_diags, "==", scalar=1) cc.collect(cc.distinct_count(heart_patients, "actual", pid_col_meds), 1) return {left_medication, left_diagnosis, right_medication, right_diagnosis} if __name__ == "__main__": pid = sys.argv[1] # define name for the workflow workflow_name = "real-aspirin-test-" + pid # configure conclave mpc_backend = sys.argv[2] conclave_config = CodeGenConfig(workflow_name, int(pid)) \ .with_default_mpc_config(mpc_backend) current_dir = os.path.dirname(os.path.realpath(__file__)) # point conclave to the directory where the generated code should be stored/ read from conclave_config.code_path = os.path.join("/mnt/shared", workflow_name) # point conclave to directory where data is to be read from... conclave_config.input_path = os.path.join(current_dir, "data") # and written to conclave_config.output_path = os.path.join(current_dir, "data") job_queue = generate_code(lambda: protocol(conclave_config.all_pids), conclave_config, [mpc_backend], ["python"], apply_optimizations=True) dispatch_jobs(job_queue, conclave_config)
def heupart(dag: Dag, mpc_frameworks: list, local_frameworks: list): """ Non-exhaustive partition. Returns best partition with respect to certain heuristics. """ def get_stored_with(node: OpNode): """ Returns stored_with set of out_rel or in_rel of a node, depending on it's type. """ if isinstance(node, Open): return node.get_in_rel().stored_with elif isinstance(node, Create): return get_stored_with(next(iter(node.children))) else: return node.out_rel.stored_with def is_correct_mode(node: OpNode, available: set, stored_with: set): """ Verifies that node is stored with same set of parties passed to this function. """ if get_stored_with(node) != stored_with: return False # otherwise check parents return node.parents.issubset(available) or not (node.parents or available) def can_partition(dag: Dag, stored_with: set, top_available: set): """ Returns whether the Dag passed to it can be partitioned. """ # copy so we don't overwrite global available nodes in this pass available = deepcopy(top_available) ordered = dag.top_sort() unavailable = set() for node in ordered: if node in unavailable and get_stored_with(node) == stored_with: for parent in node.parents: if parent in available and not isinstance(parent, Persist): return False if is_correct_mode(node, available, stored_with): available.add(node) else: # mark all descendants as unavailable descendants = Dag(set([node])).get_all_nodes() unavailable = unavailable.union(descendants) return True def disconnect_at_roots(current_dag: Dag, available: set, new_roots: list): previous_parents = set() create_op_lookup = dict() for root in new_roots: for parent in copy(root.parents): if parent in available: create_op = None if parent not in previous_parents: create_op = Create(deepcopy(parent.out_rel)) # create op is in same mode as root create_op.is_mpc = root.is_mpc previous_parents.add(parent) create_op_lookup[parent.out_rel.name] = create_op else: create_op = create_op_lookup[parent.out_rel.name] # unlink root from parent parent.children.remove(root) # insert create op between parent and root root.replace_parent(parent, create_op) # connect create op with root create_op.children.add(root) # keep track of parents we have already visited previous_parents.add(parent) create_op_lookup[create_op.out_rel.name] = create_op if root in current_dag.roots: current_dag.roots.remove(root) parent_roots = set().union(*[root.parents for root in new_roots]) for root in new_roots: if isinstance(root, Create): parent_roots.add(root) return OpDag(set(parent_roots)), available def find_new_roots(current_dag: Dag, available: set, stored_with: set): # need topological ordering ordered = current_dag.top_sort() # roots of the next subdag, i.e., where the current subdag will end new_roots = [] # traverse current condag until all boundary nodes are hit for node in ordered: if is_correct_mode(node, available, stored_with): available.add(node) elif (not node.parents) or (node.parents & available): if node not in new_roots: new_roots.append(node) # roots of the next subdag return new_roots def next_partition(nextdag, available, holding_parties): # roots of the next subdag new_roots = find_new_roots(nextdag, available, holding_parties) # disconnect current dags at new root nodes and return the disconnected # bottom condag return disconnect_at_roots(nextdag, available, new_roots) def _merge_dags(left_dag, right_dag): # TODO: should go inside dagutils, once dagutils exists # to merge, we only need to combine roots roots = left_dag.roots.union(right_dag.roots) return OpDag(roots) def next_holding_ps(nextdag, available): roots = nextdag.roots for root in sorted(roots, key=lambda node: node.out_rel.name): holding_ps = get_stored_with(root) if can_partition(nextdag, holding_ps, available): return holding_ps, root.is_mpc raise Exception("Found no roots to partition on") def merge_neighbor_dags(mapping): updated_mapping = [] prev_fmwk, prev_subdag, stored_with = None, None, None for fmwk, subdag, stored_with in mapping: # we can merge neighboring subdags if they're mapped to the same # framework and are stored by same parties if fmwk == prev_fmwk and stored_with == prev_fmwk: # merge dags together merged_dag = _merge_dags(prev_subdag, subdag) # pop previous subdag updated_mapping = updated_mapping[:-1] updated_mapping.append((fmwk, merged_dag, stored_with)) else: # can't merge, so just add subdag to result updated_mapping.append((fmwk, subdag, stored_with)) # keep track of previous values prev_fmwk = fmwk prev_subdag = subdag return updated_mapping assert len(mpc_frameworks) == 1 and len(local_frameworks) == 1 nextdag = dag mapping = [] available = set() iterations = 0 iteration_limit = 100 local_fmwk = local_frameworks[0] mpc_fmwk = mpc_frameworks[0] while nextdag.roots: if iterations > iteration_limit: raise Exception("Reached iteration limit while partitioning") # find holding set and mpc mode of next valid partition holding_ps, mpcmode = next_holding_ps(nextdag, available) # select framework fmwk = mpc_fmwk if mpcmode else local_fmwk # store mapping mapping.append((fmwk, nextdag, holding_ps)) # partition next subdag nextdag, available = next_partition(nextdag, available, holding_ps) # increment iteration count iterations += 1 for fmwk, subdag, stored_with in mapping: print(ScotchCodeGen(CodeGenConfig(), subdag)._generate(0, 0)) merged = merge_neighbor_dags(mapping) return merged
def main(): pid = sys.argv[1] data_root = sys.argv[2] mpc_backend = sys.argv[3] # define name for the workflow workflow_name = "aspirin-large-join-" + pid + "-" + data_root # configure conclave conclave_config = CodeGenConfig(workflow_name, int(pid)) if mpc_backend == "sharemind": sharemind_conf = SharemindCodeGenConfig("/mnt/shared", use_docker=True, use_hdfs=False) conclave_config.with_sharemind_config(sharemind_conf) elif mpc_backend == "obliv-c": conclave_config.all_pids = [1, 2] net_conf = [{ "host": "ca-spark-node-0", "port": 8001 }, { "host": "cb-spark-node-0", "port": 8002 }] net = NetworkConfig(net_conf, int(pid)) conclave_config.with_network_config(net) oc_conf = OblivcConfig("/obliv-c/bin/oblivcc", "ca-spark-node-0:9000") conclave_config.with_oc_config(oc_conf) else: raise Exception("Unknown MPC backend {}".format(mpc_backend)) conclave_config.code_path = os.path.join("/mnt/shared", workflow_name) conclave_config.input_path = os.path.join("/mnt/shared", data_root) conclave_config.output_path = os.path.join("/mnt/shared", data_root) job_queue = generate_code(protocol, conclave_config, [mpc_backend], ["python"], apply_optimizations=True) dispatch_jobs(job_queue, conclave_config)
company1_cols = [ defCol("c", "INTEGER", 1, 3), defCol("d", "INTEGER", 3) ] company1 = cc.create("company1", company1_cols, {3}) companies = cc.concat([company0, company1], "companies") joined = cc.join(govreg, companies, "joined", ["a"], ["c"]) actual = cc.aggregate(joined, "actual", ["b"], "d", "sum", "total") cc.collect(actual, 1) return {govreg, company0, company1} if __name__ == "__main__": pid = sys.argv[1] data_root = sys.argv[2] workflow_name = "ssn-benchmark" + pid + "-" + data_root conclave_config = CodeGenConfig(workflow_name, int(pid)) sharemind_conf = SharemindCodeGenConfig("/mnt/shared", use_docker=True, use_hdfs=False) conclave_config.with_sharemind_config(sharemind_conf) conclave_config.use_leaky_ops = True current_dir = os.path.dirname(os.path.realpath(__file__)) conclave_config.code_path = os.path.join("/mnt/shared", workflow_name) conclave_config.input_path = os.path.join("/mnt/shared", data_root) conclave_config.output_path = os.path.join("/mnt/shared", data_root) job_queue = generate_code(protocol, conclave_config, ["sharemind"], ["python"], apply_optimizations=True) dispatch_jobs(job_queue, conclave_config)
squared = lang.multiply(input_relation, "squared", "column_b", ["column_b", "column_b"]) # sum group by column_a on column_b and rename group-over column to summed lang.aggregate(squared, "aggregated", ["column_a"], "column_b", "+", "summed") # leaf nodes are automatically written to file so aggregated will be written to ./data/aggregated.csv # return all input relations return {input_relation} if __name__ == "__main__": # define name for the workflow workflow_name = "python-demo" # configure conclave conclave_config = CodeGenConfig(workflow_name) # need the absolute path to current directory current_dir = os.path.dirname(os.path.realpath(__file__)) # point conclave to the directory where the generated code should be stored/ read from conclave_config.code_path = os.path.join(current_dir, workflow_name) # point conclave to directory where data is to be read from... conclave_config.input_path = os.path.join(current_dir, "data") # and written to conclave_config.output_path = os.path.join(current_dir, "data") # define this party's unique ID (in this demo there is only one party) conclave_config.pid = 1 # define all parties involved in this workflow conclave_config.all_pids = [1] # compile and execute protocol, specifying available mpc and local processing backends generate_and_dispatch(protocol, conclave_config, ["sharemind"], ["python"])