Exemplo n.º 1
0
def setup(conf: Dict):
    pid = conf["pid"]
    hdfs_node_name = conf["spark"]["hdfs"]["node_name"]
    hdfs_root = conf["spark"]["hdfs"]["root"]
    spark_master_url = conf["spark"]["master_url"]

    workflow_name = conf["workflow_name"]

    sm_config = SharemindCodeGenConfig(conf["code_path"])
    spark_config = SparkConfig(spark_master_url)

    conclave_config = CodeGenConfig(workflow_name) \
        .with_sharemind_config(sm_config) \
        .with_spark_config(spark_config)

    conclave_config.code_path = conf["code_path"] + workflow_name
    conclave_config.input_path = "hdfs://{}/{}/{}".format(
        hdfs_node_name, hdfs_root, conf["name"])
    conclave_config.output_path = "hdfs://{}/{}/{}".format(
        hdfs_node_name, hdfs_root, conf["name"])
    conclave_config.pid = pid
    conclave_config.name = workflow_name
    conclave_config.all_pids = [int(p) for p in conf["all_pids"]]

    network_config = NetworkConfig(conf["sharemind"]["parties"], pid)

    conclave_config.with_network_config(network_config)

    return conclave_config
Exemplo n.º 2
0
def join(namenode, root, f_size, master_url):
    @dag_only
    def protocol():

        colsInA = [
            defCol('a', 'INTEGER', [1]),
            defCol('b', 'INTEGER', [1]),
        ]

        colsInB = [
            defCol('a', 'INTEGER', [1]),
            defCol('c', 'INTEGER', [1]),
        ]

        in1 = sal.create("in1", colsInA, set([1]))
        in2 = sal.create("in2", colsInB, set([1]))
        join1 = sal.join(in1, in2, 'join1', ['a'], ['a'])

        return set([in1, in2])

    dag = protocol()
    config = CodeGenConfig('join_spark_{}'.format(f_size))

    config.code_path = "/mnt/shared/" + config.name
    config.input_path = "hdfs://{}/{}/{}" \
        .format(namenode, root, f_size)
    config.output_path = "hdfs://{}/{}/join_sp{}" \
        .format(namenode, root, f_size)

    cg = spark.SparkCodeGen(config, dag)
    job = cg.generate(config.name, config.output_path)
    job_queue = [job]

    dis.dispatch_all(master_url, None, job_queue)
Exemplo n.º 3
0
def scalar_div(namenode, root, f_size, master_url):
    @dag_only
    def protocol():

        colsInA = [
            defCol('a', 'INTEGER', [1]),
            defCol('b', 'INTEGER', [1]),
        ]

        in1 = sal.create("in1", colsInA, set([1]))
        div1 = sal.divide(in1, 'div1', 'a', ['a', 5])

        return set([in1])

    dag = protocol()
    config = CodeGenConfig('scalar_div_spark_{}'.format(f_size))

    config.code_path = "/mnt/shared/" + config.name
    config.input_path = "hdfs://{}/{}/{}" \
        .format(namenode, root, f_size)
    config.output_path = "hdfs://{}/{}/scalar_div_sp{}" \
        .format(namenode, root, f_size)

    cg = spark.SparkCodeGen(config, dag)
    job = cg.generate(config.name, config.output_path)
    job_queue = [job]

    dis.dispatch_all(master_url, None, job_queue)
Exemplo n.º 4
0
def project(namenode, root, f_size, master_url):

    @dag_only
    def protocol():

        colsInA = [
            defCol('a', 'INTEGER', [1]),
            defCol('b', 'INTEGER', [1]),
            defCol('c', 'INTEGER', [1]),
            defCol('d', 'INTEGER', [1])
        ]

        in1 = sal.create("in1", colsInA, set([1]))

        cols = ([column.name for column in in1.out_rel.columns])
        shuffle(cols)

        proja = sal.project(in1, "proja", cols)

        return set([in1])

    dag = protocol()
    config = CodeGenConfig('project_spark_{}'.format(f_size))

    config.code_path = "/mnt/shared/" + config.name
    config.input_path = "hdfs://{}/{}/{}" \
        .format(namenode, root, f_size)
    config.output_path = "hdfs://{}/{}/project_sp{}" \
        .format(namenode, root, f_size)

    cg = spark.SparkCodeGen(config, dag)
    job = cg.generate(config.name, config.output_path)
    job_queue = [job]

    dis.dispatch_all(master_url, None, job_queue)
Exemplo n.º 5
0
def no_hdfs():

    pid = int(sys.argv[1])
    num_tuples = sys.argv[2]

    workflow_name = "sharemind_join_{}_{}".format(num_tuples, pid)
    sm_cg_config = SharemindCodeGenConfig(
        workflow_name, "/mnt/shared", use_hdfs=False, use_docker=True)
    codegen_config = CodeGenConfig(
        workflow_name).with_sharemind_config(sm_cg_config)
    codegen_config.code_path = "/mnt/shared/" + workflow_name
    codegen_config.input_path = "/mnt/shared/join/" + num_tuples
    codegen_config.output_path = "/mnt/shared/join/" + num_tuples

    sharemind_config = {
        "pid": pid,
        "parties": {
            1: {"host": "ca-spark-node-0", "port": 9001},
            2: {"host": "cb-spark-node-0", "port": 9002},
            3: {"host": "cc-spark-node-0", "port": 9003}
        }
    }
    sm_peer = conclave.net.setup_peer(sharemind_config)

    join(pid, codegen_config, sm_peer, num_tuples)
Exemplo n.º 6
0
def party_proc(pid):

    sharemind_home = "/home/sharemind/Sharemind-SDK/sharemind/client"
    spark_master = "local"

    sharemind_config = {
        "pid": pid,
        "parties": {
            1: {
                "host": "localhost",
                "port": 9001
            },
            2: {
                "host": "localhost",
                "port": 9002
            },
            3: {
                "host": "localhost",
                "port": 9003
            }
        }
    }
    peer = conclave.net.setup_peer(sharemind_config)

    codegen_config = CodeGenConfig()

    job = SharemindCodeGen(codegen_config, join(),
                           pid).generate("job-" + str(pid), sharemind_home)
    job_queue = [job]
    conclave.dispatch.dispatch_all(spark_master, peer, job_queue)
Exemplo n.º 7
0
    def test_ssn(self):
        def protocol():
            govreg_cols = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1])
            ]
            govreg = cc.create("a_govreg", govreg_cols, {1})
            govreg_dummy = cc.project(govreg, "govreg_dummy", ["a", "b"])

            company0_cols = [
                defCol("c", "INTEGER", [1], [2]),
                defCol("d", "INTEGER", [2])
            ]
            company0 = cc.create("company0", company0_cols, {2})
            company0_dummy = cc.project(company0, "company0_dummy", ["c", "d"])

            company1_cols = [
                defCol("c", "INTEGER", [1], [3]),
                defCol("d", "INTEGER", [3])
            ]
            company1 = cc.create("company1", company1_cols, {3})
            company1_dummy = cc.project(company1, "company1_dummy", ["c", "d"])

            companies = cc.concat([company0_dummy, company1_dummy],
                                  "companies")

            joined = cc.join(govreg_dummy, companies, "joined", ["a"], ["c"])
            res = cc.aggregate(joined, "actual", ["b"], "d", "sum", "total")
            cc.collect(res, 1)

            return {govreg, company0, company1}

        dag = rewrite_dag(ccdag.OpDag(protocol()), use_leaky_ops=True)
        actual = ScotchCodeGen(CodeGenConfig(), dag)._generate(0, 0)
        self.check_workflow(actual, "ssn_leaky")
Exemplo n.º 8
0
def generate(dag, name):

    cfg = CodeGenConfig('cfg')
    cg = SparkCodeGen(cfg, dag)

    actual = cg._generate('code', '/tmp')[1]

    with open('/tmp/' + name + '.py', 'w') as out:
        out.write(actual)
Exemplo n.º 9
0
    def check_workflow(self, dag, name):
        expected_rootdir = \
            "{}/spark_expected".format(os.path.dirname(os.path.realpath(__file__)))

        cfg = CodeGenConfig('cfg')
        cg = SparkCodeGen(cfg, dag)

        actual = cg._generate('code', '/tmp')[1]

        with open(expected_rootdir + '/{}'.format(name), 'r') as f:
            expected = f.read()

        self.assertEqual(expected, actual)
Exemplo n.º 10
0
    def check_workflow(self, dag, name: str):
        expected_rootdir = \
            "{}/python_expected".format(os.path.dirname(os.path.realpath(__file__)))

        cfg = CodeGenConfig('cfg')
        cg = PythonCodeGen(cfg, dag)

        actual = cg._generate('code', '/tmp')[1]

        with open(expected_rootdir + '/{}'.format(name), 'r') as f_specific, open(
                expected_rootdir + '/{}'.format("base"), 'r') as f_base:
            expected_base = f_base.read()
            expected_specific = f_specific.read()
            expected = expected_base + expected_specific
        
        self.assertEqual(expected, actual)
Exemplo n.º 11
0
    def test_hybrid_agg_opt(self):
        def protocol():
            cols_in_1 = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1])
            ]
            in_1 = cc.create("in_1", cols_in_1, {1})
            cols_in_2 = [
                defCol("a", "INTEGER", [1], [2]),
                defCol("b", "INTEGER", [2])
            ]
            in_2 = cc.create("in_2", cols_in_2, {2})
            cc.collect(
                cc.aggregate(cc.concat([in_1, in_2], "rel"), "agg", ["a"], "b",
                             "sum", "total_b"), 1)
            return {in_1, in_2}

        dag = rewrite_dag(ccdag.OpDag(protocol()), use_leaky_ops=True)
        actual = ScotchCodeGen(CodeGenConfig(), dag)._generate(0, 0)
        self.check_workflow(actual, "hybrid_agg_leaky")
Exemplo n.º 12
0
    def test_public_join(self):
        def protocol():
            left_one_cols = [
                defCol("a", "INTEGER", 1, 2, 3),
                defCol("b", "INTEGER", 1)
            ]
            left_one = cc.create("left_one", left_one_cols, {1})

            right_one_cols = [
                defCol("c", "INTEGER", 1, 2, 3),
                defCol("d", "INTEGER", 1)
            ]
            right_one = cc.create("right_one", right_one_cols, {1})

            left_two_cols = [
                defCol("a", "INTEGER", 1, 2, 3),
                defCol("b", "INTEGER", 2)
            ]
            left_two = cc.create("left_two", left_two_cols, {2})

            right_two_cols = [
                defCol("c", "INTEGER", 1, 2, 3),
                defCol("d", "INTEGER", 2)
            ]
            right_two = cc.create("right_two", right_two_cols, {2})

            left = cc.concat([left_one, left_two], "left")
            right = cc.concat([right_one, right_two], "right")

            joined = cc.join(left, right, "joined", ["a"], ["c"])
            cc.collect(joined, 1)

            return {left_one, left_two, right_one, right_two}

        dag = rewrite_dag(ccdag.OpDag(protocol()))
        actual = ScotchCodeGen(CodeGenConfig(), dag)._generate(0, 0)
        self.check_workflow(actual, 'public_join')
Exemplo n.º 13
0
    def test_hybrid_join_party_two_opt(self):
        def protocol():
            # define inputs
            cols_in_1 = [
                defCol("a", "INTEGER", [1], [2]),
                defCol("b", "INTEGER", [1]),
            ]
            in_1 = cc.create("in_1", cols_in_1, {1})

            cols_in_2 = [
                defCol("c", "INTEGER", [2]),
                defCol("d", "INTEGER", [2])
            ]
            in_2 = cc.create("in_2", cols_in_2, {2})

            result = cc.join(in_1, in_2, "result", ["a"], ["c"])

            cc.collect(result, 1)
            # create dag
            return {in_1, in_2}

        dag = rewrite_dag(ccdag.OpDag(protocol()), use_leaky_ops=True)
        actual = ScotchCodeGen(CodeGenConfig(), dag)._generate(0, 0)
        self.check_workflow(actual, 'hybrid_join_leaky_party_two')
Exemplo n.º 14
0
    if len(sys.argv) < 5:
        print(
            "usage: taxi.py <party ID> <HDFS master node:port> <HDFS root dir> <Spark master url>"
        )
        sys.exit(1)

    pid = int(sys.argv[1])
    hdfs_namenode = sys.argv[2]
    hdfs_root = sys.argv[3]
    spark_master_url = sys.argv[4]

    workflow_name = "job-" + str(pid)
    sm_config = SharemindCodeGenConfig("/mnt/shared")
    spark_config = SparkConfig(spark_master_url)
    conclave_config = CodeGenConfig(workflow_name) \
        .with_sharemind_config(sm_config) \
        .with_spark_config(spark_config)
    conclave_config.code_path = "/mnt/shared/" + workflow_name
    conclave_config.input_path = "hdfs://{}/{}/taxi".format(
        hdfs_namenode, hdfs_root)
    conclave_config.output_path = "hdfs://{}/{}/taxi".format(
        hdfs_namenode, hdfs_root)
    conclave_config.pid = pid
    conclave_config.name = workflow_name
    network_config = {
        "pid": pid,
        "parties": {
            1: {
                "host": "ca-spark-node-0",
                "port": 9001
            },
Exemplo n.º 15
0
def testHybridJoinWorkflow():
    def protocol():

        # define inputs
        colsIn1 = [
            defCol("a", "INTEGER", [1]),
            defCol("b", "INTEGER", [1]),
        ]
        in1 = sal.create("in1", colsIn1, set([1]))
        proj1 = sal.project(in1, "proj1", ["a", "b"])

        colsIn2 = [
            defCol("c", "INTEGER", [1], [2]),
            defCol("d", "INTEGER", [2])
        ]
        in2 = sal.create("in2", colsIn2, set([2]))
        proj2 = sal.project(in2, "proj2", ["c", "d"])

        res = sal.join(proj1, proj2, "res", ["a"], ["c"])

        # open result to party 1
        sal.collect(res, 1)

        # return roots of dag
        return set([in1, in2])

    pid = int(sys.argv[1])
    size = sys.argv[2]

    workflow_name = "hybrid-join-" + str(pid)
    sm_cg_config = SharemindCodeGenConfig(workflow_name,
                                          "/mnt/shared",
                                          use_hdfs=False,
                                          use_docker=True)
    codegen_config = CodeGenConfig(workflow_name).with_sharemind_config(
        sm_cg_config)
    codegen_config.pid = pid
    codegen_config.code_path = "/mnt/shared/" + workflow_name
    codegen_config.input_path = "/mnt/shared/hybridjoin/" + size
    codegen_config.output_path = "/mnt/shared/hybridjoin/" + size

    jobqueue = generate_code(protocol, codegen_config, ["sharemind"],
                             ["python"])
    sharemind_config = {
        "pid": pid,
        "parties": {
            1: {
                "host": "localhost",
                "port": 9001
            },
            2: {
                "host": "localhost",
                "port": 9002
            },
            3: {
                "host": "localhost",
                "port": 9003
            }
        }
    }
    sm_peer = setup_peer(sharemind_config)
    conclave.dispatch.dispatch_all(None, sm_peer, jobqueue)
Exemplo n.º 16
0
def testPublicJoinWorkflow():

    @dag_only
    def protocol():

        # define inputs
        colsInA = [
            defCol("a", "INTEGER", [1]),
            defCol("b", "INTEGER", [1]),
        ]
        in1 = sal.create("in1", colsInA, set([1]))
        in1.isMPC = False

        proja = sal.project(in1, "proja", ["a", "b"])
        proja.isMPC = False
        proja.out_rel.storedWith = set([1])

        colsInB = [
            defCol("c", "INTEGER", [1], [2]),
            defCol("d", "INTEGER", [2])
        ]
        in2 = sal.create("in2", colsInB, set([2]))
        in2.isMPC = False

        projb = sal.project(in2, "projb", ["c", "d"])
        projb.isMPC = False
        projb.out_rel.storedWith = set([2])

        clA = sal._close(proja, "clA", set([1, 2, 3]))
        clA.isMPC = True
        clB = sal._close(projb, "clB", set([1, 2, 3]))
        clB.isMPC = True

        persistedA = sal._persist(clA, "persistedA")
        persistedA.isMPC = True
        persistedB = sal._persist(clB, "persistedB")
        persistedB.isMPC = True

        keysaclosed = sal.project(clA, "keysaclosed", ["a"])
        keysaclosed.out_rel.storedWith = set([1, 2, 3])
        keysaclosed.isMPC = True
        keysbclosed = sal.project(clB, "keysbclosed", ["c"])
        keysbclosed.isMPC = True
        keysbclosed.out_rel.storedWith = set([1, 2, 3])

        keysa = sal._open(keysaclosed, "keysa", 1)
        keysa.isMPC = True
        keysb = sal._open(keysbclosed, "keysb", 1)
        keysb.isMPC = True

        indexedA = sal.index(keysa, "indexedA", "indexA")
        indexedA.isMPC = False
        indexedA.out_rel.storedWith = set([1])
        indexedB = sal.index(keysb, "indexedB", "indexB")
        indexedB.isMPC = False
        indexedB.out_rel.storedWith = set([1])

        joinedindeces = sal.join(
            indexedA, indexedB, "joinedindeces", ["a"], ["c"])
        joinedindeces.isMPC = False
        joinedindeces.out_rel.storedWith = set([1])

        indecesonly = sal.project(
            joinedindeces, "indecesonly", ["indexA", "indexB"])
        indecesonly.isMPC = False
        indecesonly.out_rel.storedWith = set([1])

        indecesclosed = sal._close(
            indecesonly, "indecesclosed", set([1, 2, 3]))
        indecesclosed.isMPC = True

        joined = sal._index_join(persistedA, persistedB, "joined", [
                                 "a"], ["c"], indecesclosed)
        joined.out_rel.storedWith = set([1, 2, 3])
        joined.isMPC = True

        sal._open(joined, "opened", 1)

        # create condag
        return set([in1, in2])

    pid = int(sys.argv[1])
    size = sys.argv[2]

    workflow_name = "public-join-" + str(pid)
    sm_cg_config = SharemindCodeGenConfig(
        workflow_name, "/mnt/shared", use_hdfs=False, use_docker=True)
    codegen_config = CodeGenConfig(
        workflow_name).with_sharemind_config(sm_cg_config)
    codegen_config.code_path = "/mnt/shared/" + workflow_name
    codegen_config.input_path = "/mnt/shared/hybridjoin/" + size
    codegen_config.output_path = "/mnt/shared/hybridjoin/" + size

    dag = protocol()
    mapping = part.heupart(dag, ["sharemind"], ["python"])
    job_queue = []
    for idx, (fmwk, subdag, storedWith) in enumerate(mapping):
        if fmwk == "sharemind":
            job = SharemindCodeGen(codegen_config, subdag, pid).generate(
                "sharemind-" + str(idx), None)
        else:
            job = PythonCodeGen(codegen_config, subdag).generate(
                "python-" + str(idx), None)
        # TODO: this probably doesn't belong here
        if not pid in storedWith:
            job.skip = True
        job_queue.append(job)

    sharemind_config = {
        "pid": pid,
        "parties": {
            1: {"host": "ca-spark-node-0", "port": 9001},
            2: {"host": "cb-spark-node-0", "port": 9002},
            3: {"host": "cc-spark-node-0", "port": 9003}
        }
    }
    sm_peer = setup_peer(sharemind_config)
    dispatch_all(None, sm_peer, job_queue)
Exemplo n.º 17
0
    agg = sal.aggregate(joined, "agg", ["b"], "d", "+", "total")

    opened = sal._open(agg, "opened", 1)
    return set([in1, in2, in3])


if __name__ == "__main__":

    pid = int(sys.argv[1])

    workflow_name = "sharemind-ssn-" + str(pid)
    sm_cg_config = SharemindCodeGenConfig(workflow_name,
                                          "/mnt/shared",
                                          use_hdfs=False,
                                          use_docker=True)
    codegen_config = CodeGenConfig(workflow_name).with_sharemind_config(
        sm_cg_config)
    codegen_config.code_path = "/mnt/shared/" + workflow_name
    codegen_config.input_path = "/mnt/shared/ssn-data"
    codegen_config.output_path = "/mnt/shared/ssn-data"

    job = SharemindCodeGen(codegen_config, protocol(),
                           pid).generate("sharemind-0", "")
    job_queue = [job]

    sharemind_config = {
        "pid": pid,
        "parties": {
            1: {
                "host": "ca-spark-node-0",
                "port": 9001
            },
Exemplo n.º 18
0
def testHybridAggWorkflow():
    @dag_only
    def protocol():

        # define inputs
        colsInA = [
            defCol("a", "INTEGER", [1]),
            defCol("b", "INTEGER", [1]),
        ]
        in1 = sal.create("in1", colsInA, set([1]))
        in1.isMPC = False

        proja = sal.project(in1, "proja", ["a", "b"])
        proja.isMPC = False
        proja.out_rel.storedWith = set([1])

        # define inputs
        colsInB = [
            defCol("a", "INTEGER", [2]),
            defCol("b", "INTEGER", [2]),
        ]
        in2 = sal.create("in2", colsInB, set([2]))
        in2.isMPC = False

        projb = sal.project(in2, "projb", ["a", "b"])
        projb.isMPC = False
        projb.out_rel.storedWith = set([2])

        # define inputs
        colsInC = [
            defCol("a", "INTEGER", [3]),
            defCol("b", "INTEGER", [3]),
        ]
        in3 = sal.create("in3", colsInC, set([3]))
        in3.isMPC = False

        projc = sal.project(in3, "projc", ["a", "b"])
        projc.isMPC = False
        projc.out_rel.storedWith = set([3])

        clA = sal._close(proja, "clA", set([1, 2, 3]))
        clA.isMPC = True

        clB = sal._close(projb, "clB", set([1, 2, 3]))
        clB.isMPC = True

        clC = sal._close(projc, "clC", set([1, 2, 3]))
        clC.isMPC = True

        comb = sal.concat([clA, clB, clC], "comb")
        comb.out_rel.storedWith = set([1, 2, 3])
        comb.isMPC = True

        shuffled = sal.shuffle(comb, "shuffled")
        shuffled.out_rel.storedWith = set([1, 2, 3])
        shuffled.isMPC = True

        persisted = sal._persist(shuffled, "persisted")
        persisted.out_rel.storedWith = set([1, 2, 3])
        persisted.isMPC = True

        keysclosed = sal.project(shuffled, "keysclosed", ["a"])
        keysclosed.out_rel.storedWith = set([1, 2, 3])
        keysclosed.isMPC = True

        keys = sal._open(keysclosed, "keys", 1)
        keys.isMPC = True

        indexed = sal.index(keys, "indexed", "rowIndex")
        indexed.isMPC = False
        indexed.out_rel.storedWith = set([1])

        sortedByKey = sal.sort_by(indexed, "sortedByKey", "a")
        sortedByKey.isMPC = False
        sortedByKey.out_rel.storedWith = set([1])

        eqFlags = sal._comp_neighs(sortedByKey, "eqFlags", "a")
        eqFlags.isMPC = False
        eqFlags.out_rel.storedWith = set([1])

        # TODO: hack to get keys stored
        # need to fix later!
        sortedByKey = sal.project(sortedByKey, "sortedByKey",
                                  ["rowIndex", "a"])
        sortedByKey.isMPC = False
        sortedByKey.out_rel.storedWith = set([1])

        closedEqFlags = sal._close(eqFlags, "closedEqFlags", set([1, 2, 3]))
        closedEqFlags.isMPC = True
        closedSortedByKey = sal._close(sortedByKey, "closedSortedByKey",
                                       set([1, 2, 3]))
        closedSortedByKey.isMPC = True

        agg = sal.index_aggregate(persisted, "agg", ["a"], "b", "+", "b",
                                  closedEqFlags, closedSortedByKey)
        agg.out_rel.storedWith = set([1, 2, 3])
        agg.isMPC = True

        sal._open(agg, "opened", 1)

        # create condag
        return set([in1, in2, in3])

    pid = int(sys.argv[1])
    size = sys.argv[2]

    workflow_name = "hybrid-agg-" + str(pid)
    sm_cg_config = SharemindCodeGenConfig(workflow_name,
                                          "/mnt/shared",
                                          use_hdfs=False,
                                          use_docker=True)
    codegen_config = CodeGenConfig(workflow_name).with_sharemind_config(
        sm_cg_config)
    codegen_config.code_path = "/mnt/shared/" + workflow_name
    codegen_config.input_path = "/mnt/shared/" + size
    codegen_config.output_path = "/mnt/shared/" + size

    dag = protocol()

    mapping = part.heupart(dag, ["sharemind"], ["python"])
    job_queue = []
    for idx, (fmwk, subdag, storedWith) in enumerate(mapping):
        if fmwk == "sharemind":
            job = SharemindCodeGen(codegen_config, subdag,
                                   pid).generate("sharemind-" + str(idx), None)
        else:
            job = PythonCodeGen(codegen_config,
                                subdag).generate("python-" + str(idx), None)
        # TODO: this probably doesn't belong here
        if not pid in storedWith:
            job.skip = True
        job_queue.append(job)

    sharemind_config = {
        "pid": pid,
        "parties": {
            1: {
                "host": "ca-spark-node-0",
                "port": 9001
            },
            2: {
                "host": "cb-spark-node-0",
                "port": 9002
            },
            3: {
                "host": "cc-spark-node-0",
                "port": 9003
            }
        }
    }
    sm_peer = setup_peer(sharemind_config)
    dispatch_all(None, sm_peer, job_queue)
Exemplo n.º 19
0
                                     'wghtd_unit_p', '+', 'avg_unit_p')

    # merge in avg_unit_p
    final_join = sal.join(total_units, total_unit_wghts, 'final_join',
                          ['store_code_uc', 'upc', 'week_end'],
                          ['store_code_uc', 'upc', 'week_end'])

    selected_cols = sal.project(final_join, 'selected_cols', [
        'store_code_uc', 'upc', 'week_end', 'q', 'avg_unit_p', 'retailer_code',
        'store_zip3'
    ])

    opened = sal.collect(selected_cols, 1)

    return set([create])


if __name__ == "__main__":

    dag = protocol()

    config = CodeGenConfig("nielsen-local")

    vg = viz.VizCodeGen(config, dag)
    vg.generate("local_workflow", "/tmp")

    cg = spark.SparkCodeGen(config, dag)
    cg.generate("local_workflow", "/tmp")

    print("Spark code generated in {}".format(config.code_path))
Exemplo n.º 20
0
        defCol("b", "INTEGER", [1]),
        defCol("c", "INTEGER", [1]),
        defCol("d", "INTEGER", [1])
    ]

    in1 = sal.create("in1", colsIn1, set([1]))

    in2 = sal.create("in2", colsIn1, set([1]))

    return [in1, in2]


@dag_only
def agg():

    in1 = setup()[0]

    agg = sal.aggregate(in1, "agg", ["a", "b"], "c", "sum", "agg1")

    out = sal.collect(agg, 1)

    return set([in1])


if __name__ == "__main__":

    dag_agg = agg()
    cfg_agg = CodeGenConfig('agg')
    cg_agg = SparkCodeGen(cfg_agg, dag_agg)
    cg_agg.generate('agg', '/tmp')
Exemplo n.º 21
0
def main():
    pid = sys.argv[1]
    data_root = sys.argv[2]
    backend = sys.argv[3]
    workflow_name = "hhi-benchmark-" + pid
    if backend == "python":
        sharemind_conf = SharemindCodeGenConfig("/mnt/shared",
                                                use_docker=True,
                                                use_hdfs=False)
        conclave_config = CodeGenConfig(workflow_name, int(pid))
        conclave_config.with_sharemind_config(sharemind_conf)
        conclave_config.code_path = os.path.join("/mnt/shared", workflow_name)
        conclave_config.input_path = os.path.join("/mnt/shared", data_root)
        conclave_config.output_path = os.path.join("/mnt/shared", data_root)
        generate_and_dispatch(protocol,
                              conclave_config, ["sharemind"], ["python"],
                              apply_optimizations=True)
    elif backend == "spark":
        sharemind_conf = SharemindCodeGenConfig("/mnt/shared",
                                                use_docker=True,
                                                use_hdfs=True)
        conclave_config = CodeGenConfig(workflow_name, int(pid))

        host = conclave_config.network_config["parties"][int(pid)]["host"]
        # Update this if your spark master and HDFS namenode are mapped to a different host than your Conclave node
        spark_master_url = "spark://{}:7077".format(host)
        hdfs_namenode = "{}:9000".format(host)
        spark_config = SparkConfig(spark_master_url)

        conclave_config \
            .with_sharemind_config(sharemind_conf) \
            .with_spark_config(spark_config)

        conclave_config.code_path = os.path.join("/mnt/shared", workflow_name)
        conclave_config.input_path = "hdfs://{}/{}".format(
            hdfs_namenode, data_root)
        conclave_config.output_path = "hdfs://{}/{}".format(
            hdfs_namenode, data_root)
        generate_and_dispatch(protocol,
                              conclave_config, ["sharemind"], ["spark"],
                              apply_optimizations=True)
    else:
        raise Exception("Unknown backend {}".format(backend))
Exemplo n.º 22
0
def testPublicJoinWorkflow():

    @dag_only
    def protocol():

        # define inputs
        colsInA = [
            defCol("a", "INTEGER", [1]),
            defCol("b", "INTEGER", [1]),
        ]
        in1 = sal.create("in1", colsInA, set([1]))
        in1.isMPC = False

        proja = sal.project(in1, "proja", ["a", "b"])
        proja.isMPC = False
        proja.out_rel.storedWith = set([1])

        colsInB = [
            defCol("c", "INTEGER", [1], [2]),
            defCol("d", "INTEGER", [2])
        ]
        in2 = sal.create("in2", colsInB, set([2]))
        in2.isMPC = False

        projb = sal.project(in2, "projb", ["c", "d"])
        projb.isMPC = False
        projb.out_rel.storedWith = set([2])

        clA = sal._close(proja, "clA", set([1, 2, 3]))
        clA.isMPC = True
        clB = sal._close(projb, "clB", set([1, 2, 3]))
        clB.isMPC = True

        persistedA = sal._persist(clA, "persistedA")
        persistedB = sal._persist(clB, "persistedB")

        keysaclosed = sal.project(clA, "keysaclosed", ["a"])
        keysaclosed.out_rel.storedWith = set([1, 2, 3])
        keysaclosed.isMPC = True
        keysbclosed = sal.project(clB, "keysbclosed", ["c"])
        keysbclosed.isMPC = True
        keysbclosed.out_rel.storedWith = set([1, 2, 3])

        keysa = sal._open(keysaclosed, "keysa", 1)
        keysa.isMPC = True
        keysb = sal._open(keysbclosed, "keysb", 1)
        keysb.isMPC = True

        indexedA = sal.index(keysa, "indexedA", "indexA")
        indexedA.isMPC = False
        indexedA.out_rel.storedWith = set([1])
        indexedB = sal.index(keysb, "indexedB", "indexB")
        indexedB.isMPC = False
        indexedB.out_rel.storedWith = set([1])

        joinedindeces = sal.join(
            indexedA, indexedB, "joinedindeces", ["a"], ["c"])
        joinedindeces.isMPC = False
        joinedindeces.out_rel.storedWith = set([1])

        indecesonly = sal.project(
            joinedindeces, "indecesonly", ["indexA", "indexB"])
        indecesonly.isMPC = False
        indecesonly.out_rel.storedWith = set([1])

        indecesclosed = sal._close(
            indecesonly, "indecesclosed", set([1, 2, 3]))
        indecesclosed.isMPC = True

        joined = sal._index_join(persistedA, persistedB, "joined",
                                 ["a"], ["c"], indecesclosed)
        joined.isMPC = True

        sal._open(joined, "opened", 1)

        # create condag
        return set([in1, in2])

    pid = int(sys.argv[1])
    workflow_name = "hybrid-join-" + str(pid)
    sm_cg_config = SharemindCodeGenConfig(
        workflow_name, "/mnt/shared", use_hdfs=False)
    codegen_config = CodeGenConfig(
        workflow_name).with_sharemind_config(sm_cg_config)
    codegen_config.code_path = "/mnt/shared/" + workflow_name
    codegen_config.input_path = "/mnt/shared"
    codegen_config.output_path = "/mnt/shared"

    exampleutils.generate_data(pid, codegen_config.output_path)

    dag = protocol()
    mapping = part.heupart(dag, ["sharemind"], ["python"])
    job_queue = []
    for idx, (fmwk, subdag, storedWith) in enumerate(mapping):
        if fmwk == "sharemind":
            job = SharemindCodeGen(codegen_config, subdag, pid).generate(
                "sharemind-" + str(idx), None)
        else:
            job = PythonCodeGen(codegen_config, subdag).generate(
                "python-" + str(idx), None)
        # TODO: this probably doesn't belong here
        if not pid in storedWith:
            job.skip = True
        job_queue.append(job)

    sharemind_config = exampleutils.get_sharemind_config(pid, True)
    sm_peer = setup_peer(sharemind_config)
    dispatch_all(None, sm_peer, job_queue)
    if pid == 1:
        expected = ['', '2,200,2001', '3,300,3001', '4,400,4001', '42,42,1001', '5,500,5001',
                    '6,600,6001', '7,700,7001', '7,800,7001', '7,900,7001', '8,1000,8001', '9,1100,9001']
        exampleutils.check_res(expected, "/mnt/shared/opened.csv")
        print("Success")
Exemplo n.º 23
0
 def wrap():
     code = sharemind.SharemindCodeGen(CodeGenConfig(),
                                       f())._generate(None, None)
     return code
Exemplo n.º 24
0
 def wrap():
     code = scotch.ScotchCodeGen(CodeGenConfig(), f())._generate(None, None)
     return code
Exemplo n.º 25
0
def testHybridJoinWorkflow():

    def hybrid_join():

        # define inputs
        colsInA = [
            defCol("a", "INTEGER", [1]),
            defCol("b", "INTEGER", [1]),
        ]
        in1 = sal.create("in1", colsInA, set([1]))
        in1.isMPC = False

        proja = sal.project(in1, "proja", ["a", "b"])
        proja.isMPC = False
        proja.out_rel.storedWith = set([1])

        colsInB = [
            defCol("c", "INTEGER", [1], [2]),
            defCol("d", "INTEGER", [2])
        ]
        in2 = sal.create("in2", colsInB, set([2]))
        in2.isMPC = False

        projb = sal.project(in2, "projb", ["c", "d"])
        projb.isMPC = False
        projb.out_rel.storedWith = set([2])

        clA = sal._close(proja, "clA", set([1, 2, 3]))
        clA.isMPC = True
        clB = sal._close(projb, "clB", set([1, 2, 3]))
        clB.isMPC = True

        shuffledA = sal.shuffle(clA, "shuffledA")
        shuffledA.isMPC = True
        persistedA = sal._persist(shuffledA, "persistedA")
        persistedA.isMPC = True
        shuffledB = sal.shuffle(clB, "shuffledB")
        shuffledB.isMPC = True
        persistedB = sal._persist(shuffledB, "persistedB")
        persistedB.isMPC = True

        keysaclosed = sal.project(shuffledA, "keysaclosed", ["a"])
        keysaclosed.out_rel.storedWith = set([1, 2, 3])
        keysaclosed.isMPC = True
        keysbclosed = sal.project(shuffledB, "keysbclosed", ["c"])
        keysbclosed.isMPC = True
        keysbclosed.out_rel.storedWith = set([1, 2, 3])

        keysa = sal._open(keysaclosed, "keysa", 1)
        keysa.isMPC = True
        keysb = sal._open(keysbclosed, "keysb", 1)
        keysb.isMPC = True

        indexedA = sal.index(keysa, "indexedA", "indexA")
        indexedA.isMPC = False
        indexedA.out_rel.storedWith = set([1])
        indexedB = sal.index(keysb, "indexedB", "indexB")
        indexedB.isMPC = False
        indexedB.out_rel.storedWith = set([1])

        joinedindeces = sal.join(
            indexedA, indexedB, "joinedindeces", ["a"], ["c"])
        joinedindeces.isMPC = False
        joinedindeces.out_rel.storedWith = set([1])

        indecesonly = sal.project(
            joinedindeces, "indecesonly", ["indexA", "indexB"])
        indecesonly.isMPC = False
        indecesonly.out_rel.storedWith = set([1])

        indecesclosed = sal._close(
            indecesonly, "indecesclosed", set([1, 2, 3]))
        indecesclosed.isMPC = True

        joined = sal._index_join(persistedA, persistedB, "joined", [
                                 "a"], ["c"], indecesclosed)
        joined.isMPC = True

        return joined, set([in1, in2])

    def hybrid_agg(in1):

        shuffled = sal.shuffle(in1, "shuffled")
        shuffled.out_rel.storedWith = set([1, 2, 3])
        shuffled.isMPC = True

        persisted = sal._persist(shuffled, "persisted")
        persisted.out_rel.storedWith = set([1, 2, 3])
        persisted.isMPC = True
        
        keysclosed = sal.project(shuffled, "keysclosed", ["b"])
        keysclosed.out_rel.storedWith = set([1, 2, 3])
        keysclosed.isMPC = True
        
        keys = sal._open(keysclosed, "keys", 1)
        keys.isMPC = True
        
        indexed = sal.index(keys, "indexed", "rowIndex")
        indexed.isMPC = False
        indexed.out_rel.storedWith = set([1])
        
        distinctKeys = sal.distinct(keys, "distinctKeys", ["b"])
        distinctKeys.isMPC = False
        distinctKeys.out_rel.storedWith = set([1])

        # TODO: hack to get keys stored
        # need to fix later!
        fakeDistinctKeys = sal.distinct(keys, "distinctKeys", ["b"])
        fakeDistinctKeys.isMPC = False
        fakeDistinctKeys.out_rel.storedWith = set([1])

        indexedDistinct = sal.index(distinctKeys, "indexedDistinct", "keyIndex")
        indexedDistinct.isMPC = False
        indexedDistinct.out_rel.storedWith = set([1])

        joinedindeces = sal.join(
            indexed, indexedDistinct, "joinedindeces", ["b"], ["b"])
        joinedindeces.isMPC = False
        joinedindeces.out_rel.storedWith = set([1])

        # TODO: could project row indeces away too
        indecesonly = sal.project(
            joinedindeces, "indecesonly", ["rowIndex", "keyIndex"])
        indecesonly.isMPC = False
        indecesonly.out_rel.storedWith = set([1])

        closedDistinct = sal._close(distinctKeys, "closedDistinct", set([1, 2, 3]))
        closedDistinct.isMPC = True
        closedLookup = sal._close(indecesonly, "closedLookup", set([1, 2, 3]))
        closedLookup.isMPC = True

        agg = sal.index_aggregate(persisted, "agg", ["b"], "d", "+", "d", closedLookup, closedDistinct)
        agg.isMPC = True
        sal._open(agg, "aggopened", 1)

    def protocol():

        joinedres, inputs = hybrid_join()
        hybrid_agg(joinedres)
        return saldag.OpDag(inputs)

    pid = int(sys.argv[1])
    workflow_name = "ssn-" + str(pid)
    sm_cg_config = SharemindCodeGenConfig(
        workflow_name, "/mnt/shared", use_hdfs=False, use_docker=False)
    codegen_config = CodeGenConfig(
        workflow_name).with_sharemind_config(sm_cg_config)
    codegen_config.code_path = "/mnt/shared/" + workflow_name
    codegen_config.input_path = "/mnt/shared"
    codegen_config.output_path = "/mnt/shared"

    exampleutils.generate_ssn_data(pid, codegen_config.output_path)

    dag = protocol()
    mapping = part.heupart(dag, ["sharemind"], ["python"])
    job_queue = []
    for idx, (fmwk, subdag, storedWith) in enumerate(mapping):
        if fmwk == "sharemind":
            job = SharemindCodeGen(codegen_config, subdag, pid).generate(
                "sharemind-" + str(idx), None)
        else:
            job = PythonCodeGen(codegen_config, subdag).generate(
                "python-" + str(idx), None)
        # TODO: this probably doesn't belong here
        if not pid in storedWith:
            job.skip = True
        job_queue.append(job)

    sharemind_config = exampleutils.get_sharemind_config(pid, True)
    sm_peer = setup_peer(sharemind_config)
    dispatch_all(None, sm_peer, job_queue)
    if pid == 1:
        expected = ['', '1,30', '2,50', '3,30']
        exampleutils.check_res(expected, "/mnt/shared/aggopened.csv")
        print("Success")
Exemplo n.º 26
0
def setup(conf: dict):
    # GENERAL
    pid = int(conf["user_config"]["pid"])
    workflow_name = conf["user_config"]["workflow_name"]
    all_pids = conf["user_config"]['all_pids']
    use_leaky = conf["user_config"]["leaky_ops"]
    use_floats = conf["user_config"]["use_floats"]

    conclave_config = CodeGenConfig(workflow_name)

    # SPARK
    try:
        spark_avail = conf["backends"]["spark"]["available"]
        if spark_avail:
            spark_master_url = conf["backends"]["spark"]["master_url"]
            spark_config = SparkConfig(spark_master_url)
            conclave_config.with_spark_config(spark_config)
    except KeyError:
        pass

    # OBLIV-C
    try:
        oc_avail = conf["backends"]["oblivc"]["available"]
        if oc_avail:
            oc_path = conf["backends"]["oblivc"]["oc_path"]
            ip_port = conf["backends"]["oblivc"]["ip_port"]
            oc_config = OblivcConfig(oc_path, ip_port)
            conclave_config.with_oc_config(oc_config)
    except KeyError:
        pass

    # JIFF
    try:
        jiff_avail = conf["backends"]["jiff"]["available"]
        if jiff_avail:
            jiff_path = conf["backends"]["jiff"]["jiff_path"]
            party_count = len(all_pids)
            server_ip = conf["backends"]["jiff"]["server_ip"]
            server_pid = conf["backends"]["jiff"]["server_pid"]
            server_port = conf["backends"]["jiff"]["server_port"]
            jiff_config = JiffConfig(jiff_path, party_count, server_ip,
                                     server_port, server_pid)
            conclave_config.with_jiff_config(jiff_config)
    except KeyError:
        pass

    # NET
    hosts = conf["net"]["parties"]
    net_config = NetworkConfig(hosts, pid)
    conclave_config.with_network_config(net_config)

    conclave_config.pid = pid
    conclave_config.all_pids = all_pids
    conclave_config.name = workflow_name
    conclave_config.use_leaky_ops = use_leaky
    conclave_config.use_floats = use_floats

    conclave_config.input_path = conf["user_config"]["paths"]["input_path"]
    conclave_config.code_path = conf["user_config"]["paths"]["input_path"]
    conclave_config.output_path = conf["user_config"]["paths"]["input_path"]

    return conclave_config
Exemplo n.º 27
0
    sec_4_result = sal.join(
        wghtd_p_final, wghtd_p_sum, 'sec_4_result',
        ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'],
        ['store_zip3', 'retailer_code', 'brand_code_bu', 'week_end'])

    # TODO: filter out sec_4_result rows where 'store_zip3' cell is empty

    final = sal.project(sec_4_result, 'final', [
        'store_zip3', 'retailer_code', 'week_end', 'brand_code_bu',
        'brand_descr_bu', 'q', 'p'
    ])

    opened = sal.collect(final, 1)

    return set([concatenated_DFs, temp_UPC_brandBU_crspnd])


if __name__ == "__main__":

    dag = protocol()

    config = CodeGenConfig("nielsen-main")

    viz = viz.VizCodeGen(config, dag)
    viz.generate("main_workflow", "/tmp")

    cg = spark.SparkCodeGen(config, dag)
    cg.generate("main_workflow", "/tmp")

    print("Spark code generated in {}".format(config.code_path))