def protocol():
            govreg_cols = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1])
            ]
            govreg = cc.create("a_govreg", govreg_cols, {1})
            govreg_dummy = cc.project(govreg, "govreg_dummy", ["a", "b"])

            company0_cols = [
                defCol("c", "INTEGER", [1], [2]),
                defCol("d", "INTEGER", [2])
            ]
            company0 = cc.create("company0", company0_cols, {2})
            company0_dummy = cc.project(company0, "company0_dummy", ["c", "d"])

            company1_cols = [
                defCol("c", "INTEGER", [1], [3]),
                defCol("d", "INTEGER", [3])
            ]
            company1 = cc.create("company1", company1_cols, {3})
            company1_dummy = cc.project(company1, "company1_dummy", ["c", "d"])

            companies = cc.concat([company0_dummy, company1_dummy],
                                  "companies")

            joined = cc.join(govreg_dummy, companies, "joined", ["a"], ["c"])
            res = cc.aggregate(joined, "actual", ["b"], "d", "sum", "total")
            cc.collect(res, 1)

            return {govreg, company0, company1}
示例#2
0
文件: real.py 项目: yangzpag/conclave
def protocol_local(suffix: str, pid: int):
    pid_col_meds = "0"
    med_col_meds = "4"
    date_col_meds = "7"

    pid_col_diags = "8"
    diag_col_diags = "16"
    date_col_diags = "18"

    num_med_cols = 8
    num_diag_cols = 13

    left_medication_cols = [defCol(str(i), "INTEGER", pid) for i in range(num_med_cols)]
    medication = cc.create(suffix + "_medication", left_medication_cols, {pid})
    left_diagnosis_cols = [defCol(str(i + num_med_cols), "INTEGER", pid) for i in range(num_diag_cols)]
    diagnosis = cc.create(suffix + "_diagnosis", left_diagnosis_cols, {pid})

    shared_pids = cc.create("a_{}_shared_pids".format(suffix), [defCol(pid_col_meds, "INTEGER", pid)], {pid})

    # only keep relevant columns
    medication_proj = cc.project(medication, "medication_proj", [pid_col_meds, med_col_meds, date_col_meds])
    medication_mine = cc.filter_by(medication_proj, "medication_mine", pid_col_meds, shared_pids, use_not_in=True)

    diagnosis_proj = cc.project(diagnosis, "diagnosis_proj", [pid_col_diags, diag_col_diags, date_col_diags])
    diagnosis_mine = cc.filter_by(diagnosis_proj, "diagnosis_mine", pid_col_diags, shared_pids, use_not_in=True)

    joined = cc.join(medication_mine, diagnosis_mine, "joined", [pid_col_meds], [pid_col_diags])

    cases = cc.cc_filter(joined, "cases", date_col_diags, "<", other_col_name=date_col_meds)
    aspirin = cc.cc_filter(cases, "aspirin", med_col_meds, "==", scalar=1)
    heart_patients = cc.cc_filter(aspirin, "heart_patients", diag_col_diags, "==", scalar=1)

    cc.distinct_count(heart_patients, "actual_" + suffix, pid_col_meds)

    return {medication, diagnosis}
        def protocol():

            # define inputs
            cols_in_1 = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1])
            ]
            in_1 = sal.create("in_1", cols_in_1, set([1]))
            cols_in_2 = [
                defCol("a", "INTEGER", [2]),
                defCol("b", "INTEGER", [2])
            ]
            in_2 = sal.create("in_2", cols_in_2, set([2]))

            # combine parties' inputs into one relation
            rel = sal.concat([in_1, in_2], "rel")

            # specify the workflow
            proj_a = sal.project(rel, "proj_a", ["a", "b"])
            proj_b = sal.project(proj_a, "proj_b", ["a", "b"])
            agg = sal.aggregate(proj_b, "agg", ["a"], "b", "+", "total_b")
            proj_c = sal.project(agg, "proj_c", ["a", "total_b"])

            sal.collect(proj_c, 1)

            # return root nodes
            return set([in_1, in_2])
示例#4
0
    def protocol():
        # define inputs
        colsInA = [("INTEGER", set([1])), ("INTEGER", set([1]))]
        inA = sal.create("inA", colsInA, set([1]))

        colsInB = [("INTEGER", set([2])), ("INTEGER", set([2]))]
        inB = sal.create("inB", colsInB, set([2]))

        colsInC = [("INTEGER", set([3])), ("INTEGER", set([3])),
                   ("INTEGER", set([3]))]
        inC = sal.create("inC", colsInC, set([3]))

        # specify the workflow
        aggA = sal.aggregate(inA, "aggA", "inA_0", "inA_1", "+")
        projA = sal.project(aggA, "projA", ["aggA_0", "aggA_1"])

        aggB = sal.aggregate(inB, "aggB", "inB_0", "inB_1", "+")
        projB = sal.project(aggB, "projB", ["aggB_0", "aggB_1"])

        joined = sal.join(projA, projB, "joined", "projA_0", "projB_0")
        comb = sal.concat([inC, joined], "comb")
        sal.collect(comb, 3)

        # create condag
        return set([inA, inB, inC])
示例#5
0
    def _rewrite_hybrid_join(self, node: saldag.HybridJoin):
        # TODO
        suffix = "rand"

        # in left parents' children, replace self with first primitive operator
        # in expanded subdag
        shuffled_a = sal.shuffle(node.left_parent, "shuffled_a")
        shuffled_a.is_mpc = True
        node.left_parent.children.remove(node)

        # same for right parent
        shuffled_b = sal.shuffle(node.right_parent, "shuffled_b")
        shuffled_b.is_mpc = True
        node.right_parent.children.remove(node)

        persisted_b = sal._persist(shuffled_b, "persisted_b")
        persisted_b.is_mpc = True
        persisted_a = sal._persist(shuffled_a, "persisted_a")
        persisted_a.is_mpc = True

        keys_a_closed = sal.project(shuffled_a, "keys_a_closed", ["a"])
        keys_a_closed.is_mpc = True
        keys_b_closed = sal.project(shuffled_b, "keys_b_closed", ["c"])
        keys_b_closed.is_mpc = True

        keys_a = sal._open(keys_a_closed, "keys_a", 1)
        keys_a.is_mpc = True
        keys_b = sal._open(keys_b_closed, "keys_b", 1)
        keys_b.is_mpc = True

        indexed_a = sal.index(keys_a, "indexed_a", "index_a")
        indexed_a.is_mpc = False

        indexed_b = sal.index(keys_b, "indexed_b", "index_b")
        indexed_b.is_mpc = False

        joined_indices = sal.join(indexed_a, indexed_b, "joined_indices",
                                  ["a"], ["c"])
        joined_indices.is_mpc = False

        indices_only = sal.project(joined_indices, "indices_only",
                                   ["index_a", "index_b"])
        indices_only.is_mpc = False

        # TODO: update stored_with to use union of parent out_rel stored_with sets
        indices_closed = sal._close(indices_only, "indices_closed", set([1,
                                                                         2]))
        indices_closed.is_mpc = True

        joined = sal._index_join(persisted_a, persisted_b, "joined", ["a"],
                                 ["c"], indices_closed)
        joined.is_mpc = True

        # replace self with leaf of expanded subdag in each child node
        for child in node.get_sorted_children():
            child.replace_parent(node, joined)
        # add former children to children of leaf
        joined.children = node.children
示例#6
0
    def hybrid_agg(in1):

        shuffled = sal.shuffle(in1, "shuffled")
        shuffled.out_rel.storedWith = set([1, 2, 3])
        shuffled.isMPC = True

        persisted = sal._persist(shuffled, "persisted")
        persisted.out_rel.storedWith = set([1, 2, 3])
        persisted.isMPC = True

        keysclosed = sal.project(shuffled, "keysclosed", ["b"])
        keysclosed.out_rel.storedWith = set([1, 2, 3])
        keysclosed.isMPC = True

        keys = sal._open(keysclosed, "keys", 1)
        keys.isMPC = True

        indexed = sal.index(keys, "indexed", "rowIndex")
        indexed.isMPC = False
        indexed.out_rel.storedWith = set([1])

        distinctKeys = sal.distinct(keys, "distinctKeys", ["b"])
        distinctKeys.isMPC = False
        distinctKeys.out_rel.storedWith = set([1])

        # TODO: hack to get keys stored
        # need to fix later!
        fakeDistinctKeys = sal.project(distinctKeys, "distinctKeys", ["b"])
        fakeDistinctKeys.isMPC = False
        fakeDistinctKeys.out_rel.storedWith = set([1])

        indexedDistinct = sal.index(distinctKeys, "indexedDistinct",
                                    "keyIndex")
        indexedDistinct.isMPC = False
        indexedDistinct.out_rel.storedWith = set([1])

        joinedindeces = sal.join(indexed, indexedDistinct, "joinedindeces",
                                 ["b"], ["b"])
        joinedindeces.isMPC = False
        joinedindeces.out_rel.storedWith = set([1])

        # TODO: could project row indeces away too
        indecesonly = sal.project(joinedindeces, "indecesonly",
                                  ["rowIndex", "keyIndex"])
        indecesonly.isMPC = False
        indecesonly.out_rel.storedWith = set([1])

        closedDistinct = sal._close(distinctKeys, "closedDistinct",
                                    set([1, 2, 3]))
        closedDistinct.isMPC = True
        closedLookup = sal._close(indecesonly, "closedLookup", set([1, 2, 3]))
        closedLookup.isMPC = True

        agg = sal.index_aggregate(persisted, "agg", ["b"], "d", "+", "d",
                                  closedLookup, closedDistinct)
        agg.isMPC = True
        sal._open(agg, "aggopened", 1)
示例#7
0
文件: real.py 项目: yangzpag/conclave
def protocol():
    left_cols = [defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1])]
    left = cc.create("left", left_cols, {1})
    left_dummy = cc.project(left, "left_dummy", ["a", "b"])

    right_cols = [defCol("c", "INTEGER", [2]), defCol("d", "INTEGER", [2])]
    right = cc.create("right", right_cols, {2})
    right_dummy = cc.project(right, "right_dummy", ["c", "d"])

    joined = cc.join(left_dummy, right_dummy, "joined", ["a"], ["c"])
    cc.collect(cc.aggregate(joined, "actual", ["b"], "d", "sum", "total"), 1)

    return {left, right}
示例#8
0
        def protocol():
            cols_in_1 = [
                defCol("companyID", "INTEGER", [1]),
                defCol("price", "INTEGER", [1])
            ]
            in_1 = cc.create("yellow1", cols_in_1, {1})
            cols_in_2 = [
                defCol("companyID", "INTEGER", [2]),
                defCol("price", "INTEGER", [2])
            ]
            in_2 = cc.create("yellow2", cols_in_2, {2})
            cols_in_3 = [
                defCol("companyID", "INTEGER", [3]),
                defCol("price", "INTEGER", [3])
            ]
            in_3 = cc.create("yellow3", cols_in_3, {3})

            cab_data = cc.concat([in_1, in_2, in_3], "cab_data")

            selected_input = cc.project(cab_data, "selected_input",
                                        ["companyID", "price"])
            local_rev = cc.aggregate(selected_input, "local_rev",
                                     ["companyID"], "price", "sum",
                                     "local_rev")
            scaled_down = cc.divide(local_rev, "scaled_down", "local_rev",
                                    ["local_rev", 1000])
            first_val_blank = cc.multiply(scaled_down, "first_val_blank",
                                          "companyID", ["companyID", 0])
            local_rev_scaled = cc.multiply(first_val_blank, "local_rev_scaled",
                                           "local_rev", ["local_rev", 100])
            total_rev = cc.aggregate(first_val_blank, "total_rev",
                                     ["companyID"], "local_rev", "sum",
                                     "global_rev")
            local_total_rev = cc.join(local_rev_scaled, total_rev,
                                      "local_total_rev", ["companyID"],
                                      ["companyID"])
            market_share = cc.divide(local_total_rev, "market_share",
                                     "local_rev", ["local_rev", "global_rev"])
            market_share_squared = cc.multiply(market_share,
                                               "market_share_squared",
                                               "local_rev",
                                               ["local_rev", "local_rev", 1])
            hhi = cc.aggregate(market_share_squared, "hhi", ["companyID"],
                               "local_rev", "sum", "hhi")
            # dummy projection to force non-mpc subdag
            hhi_only = cc.project(hhi, "hhi_only", ["companyID", "hhi"])

            cc.collect(hhi_only, 1)

            # return root nodes
            return {in_1, in_2, in_3}
示例#9
0
        def protocol():

            # define inputs
            cols_in_a = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1]),
            ]
            in_1 = sal.create("in_1", cols_in_a, set([1]))
            in_1.is_mpc = False

            proj_a = sal.project(in_1, "proj_a", ["a", "b"])
            proj_a.is_mpc = False
            proj_a.out_rel.stored_with = set([1])

            cols_in_b = [
                defCol("c", "INTEGER", [1], [2]),
                defCol("d", "INTEGER", [2])
            ]
            in_2 = sal.create("in_2", cols_in_b, set([2]))
            in_2.is_mpc = False

            proj_b = sal.project(in_2, "proj_b", ["c", "d"])
            proj_b.is_mpc = False
            proj_b.out_rel.stored_with = set([2])

            cols_in_c = [
                defCol("c", "INTEGER", [1], [3]),
                defCol("d", "INTEGER", [3])
            ]
            in_3 = sal.create("beforeOthers", cols_in_c, set([1, 2, 3]))
            in_3.is_mpc = True

            cl_a = sal._close(proj_a, "cl_a", set([1, 2, 3]))
            cl_a.is_mpc = True
            cl_b = sal._close(proj_b, "cl_b", set([1, 2, 3]))
            cl_b.is_mpc = True
            cl_c = sal._close(in_3, "cl_c", set([1, 2, 3]))
            cl_c.is_mpc = True

            right_closed = sal.concat([cl_a, cl_b, cl_c], "a")
            right_closed.is_mpc = True
            right_closed.out_rel.stored_with = set([1, 2, 3])

            shuffled_a = sal.shuffle(cl_a, "shuffled_a")
            shuffled_a.is_mpc = True
            sal._open(shuffled_a, "ssn_opened", 1)

            return saldag.OpDag(set([in_1, in_2, in_3]))
示例#10
0
        def protocol():

            # define inputs
            cols_in_1 = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1])
            ]
            in_1 = sal.create("in_1", cols_in_1, set([1]))
            cols_in_2 = [
                defCol("a", "INTEGER", [2]),
                defCol("b", "INTEGER", [2])
            ]
            in_2 = sal.create("in_2", cols_in_2, set([2]))
            cols_in_3 = [
                defCol("a", "INTEGER", [3]),
                defCol("b", "INTEGER", [3])
            ]
            in_3 = sal.create("in_3", cols_in_3, set([3]))

            # combine parties' inputs into one relation
            rel = sal.concat([in_1, in_2, in_3], "rel")
            proj = sal.project(rel, "proj", ["a", "b"])
            agg = sal.aggregate(proj, "agg", ["a"], "b", "+", "total_b")

            sal.collect(agg, 1)

            # return root nodes
            return set([in_1, in_2, in_3])
示例#11
0
def proj():

    inputs, rel = setup()
    res = sal.project(rel, "res", ["a"])

    opened = sal._open(res, "opened", 1)
    return inputs
示例#12
0
        def protocol():
            # define inputs
            cols_in_1 = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1]),
                defCol("c", "INTEGER", [1])
            ]
            in_1 = cc.create("in_1", cols_in_1, {1})
            cols_in_2 = [
                defCol("a", "INTEGER", [2]),
                defCol("b", "INTEGER", [2]),
                defCol("c", "INTEGER", [2])
            ]
            in_2 = cc.create("in_2", cols_in_2, {2})

            # combine parties' inputs into one relation
            rel = cc.concat([in_1, in_2], "rel")

            projected = cc.project(rel, "projected", ["c", "b"])

            # specify the workflow
            filtered = cc.cc_filter(projected,
                                    "filtered",
                                    "c",
                                    "==",
                                    other_col_name="b")

            cc.collect(filtered, 1)

            # return root nodes
            return {in_1, in_2}
示例#13
0
        def protocol():
            # define inputs
            cols_in_1 = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1])
            ]
            in_1 = cc.create("in_1", cols_in_1, {1})
            cols_in_2 = [
                defCol("a", "INTEGER", [2]),
                defCol("b", "INTEGER", [2])
            ]
            in_2 = cc.create("in_2", cols_in_2, {2})
            cols_in_3 = [
                defCol("a", "INTEGER", [3]),
                defCol("b", "INTEGER", [3])
            ]
            in_3 = cc.create("in_3", cols_in_3, {3})

            # combine parties' inputs into one relation
            rel = cc.concat([in_1, in_2, in_3], "rel")
            proj = cc.project(rel, "proj", ["a", "b"])
            agg = cc.aggregate(proj, "agg", ["a"], "b", "sum", "total_b")

            cc.collect(agg, 1)

            # return root nodes
            return {in_1, in_2, in_3}
示例#14
0
        def protocol():
            # define inputs
            cols_in_a = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1]),
            ]
            in_1 = cc.create("in_1", cols_in_a, {1})
            in_1.is_mpc = False

            proj_a = cc.project(in_1, "proj_a", ["a", "b"])
            proj_a.is_mpc = False
            proj_a.out_rel.stored_with = {1}

            cols_in_b = [
                defCol("c", "INTEGER", [1], [2]),
                defCol("d", "INTEGER", [2])
            ]
            in_2 = cc.create("in_2", cols_in_b, {2})
            in_2.is_mpc = False

            proj_b = cc.project(in_2, "proj_b", ["c", "d"])
            proj_b.is_mpc = False
            proj_b.out_rel.stored_with = {2}

            cols_in_c = [
                defCol("c", "INTEGER", [1], [3]),
                defCol("d", "INTEGER", [3])
            ]
            in_3 = cc.create("beforeOthers", cols_in_c, {1, 2, 3})
            in_3.is_mpc = True

            cl_a = cc._close(proj_a, "cl_a", {1, 2, 3})
            cl_a.is_mpc = True
            cl_b = cc._close(proj_b, "cl_b", {1, 2, 3})
            cl_b.is_mpc = True
            cl_c = cc._close(in_3, "cl_c", {1, 2, 3})
            cl_c.is_mpc = True

            right_closed = cc.concat([cl_a, cl_b, cl_c], "a")
            right_closed.is_mpc = True
            right_closed.out_rel.stored_with = {1, 2, 3}

            shuffled_a = cc.shuffle(cl_a, "shuffled_a")
            shuffled_a.is_mpc = True
            cc._open(shuffled_a, "ssn_opened", 1)

            return ccdag.OpDag({in_1, in_2, in_3})
示例#15
0
        def protocol():
            inputs, rel = setup()
            cols = [column.name for column in rel.out_rel.columns][::-1]
            proj = sal.project(rel, "proja", cols)

            opened = sal._open(proj, "opened", 1)

            return inputs
示例#16
0
        def protocol():
            inpts = setup()
            in_1 = inpts[0]

            proj = sal.project(in_1, "proj", ["a", "b"])
            out = sal.collect(proj, 1)

            return set([in_1])
示例#17
0
        def protocol():
            inpts = setup()
            in_1 = inpts[0]

            proj = cc.project(in_1, "proj", ["a", "b"])
            out = cc.collect(proj, 1)

            return {in_1}
示例#18
0
        def hybrid_agg(in1):

            shuffled = sal.shuffle(in1, "shuffled")
            shuffled.out_rel.stored_with = set([1, 2, 3])
            shuffled.is_mpc = True

            persisted = sal._persist(shuffled, "persisted")
            persisted.out_rel.stored_with = set([1, 2, 3])
            persisted.is_mpc = True

            keys_closed = sal.project(shuffled, "keys_closed", ["b"])
            keys_closed.out_rel.stored_with = set([1, 2, 3])
            keys_closed.is_mpc = True

            keys = sal._open(keys_closed, "keys", 1)
            keys.is_mpc = True

            indexed = sal.index(keys, "indexed", "rowIndex")
            indexed.is_mpc = False
            indexed.out_rel.stored_with = set([1])

            sorted_by_key = sal.sort_by(indexed, "sorted_by_key", "b")
            sorted_by_key.is_mpc = False
            sorted_by_key.out_rel.stored_with = set([1])

            eq_flags = sal._comp_neighs(sorted_by_key, "eq_flags", "b")
            eq_flags.is_mpc = False
            eq_flags.out_rel.stored_with = set([1])

            # TODO: should be a persist op
            sorted_by_key_stored = sal.project(
                sorted_by_key, "sorted_by_key_stored", ["rowIndex", "b"])
            sorted_by_key_stored.is_mpc = False
            sorted_by_key_stored.out_rel.stored_with = set([1])

            closed_eq_flags = sal._close(eq_flags, "closed_eq_flags", set([1, 2, 3]))
            closed_eq_flags.is_mpc = True
            closed_sorted_by_key = sal._close(
                sorted_by_key_stored, "closed_sorted_by_key", set([1, 2, 3]))
            closed_sorted_by_key.is_mpc = True

            agg = sal.index_aggregate(
                persisted, "agg", ["b"], "d", "+", "d", closed_eq_flags, closed_sorted_by_key)
            agg.is_mpc = True
            sal._open(agg, "ssnopened", 1)
示例#19
0
    def hybrid_agg(in1):
        shuffled = sal.shuffle(in1, "shuffled")
        shuffled.out_rel.storedWith = {1, 2, 3}
        shuffled.isMPC = True

        persisted = sal._persist(shuffled, "persisted")
        persisted.out_rel.storedWith = {1, 2, 3}
        persisted.isMPC = True

        keysclosed = sal.project(shuffled, "keysclosed", ["b"])
        keysclosed.out_rel.storedWith = {1, 2, 3}
        keysclosed.isMPC = True

        keys = sal._open(keysclosed, "keys", 1)
        keys.isMPC = True

        indexed = sal.index(keys, "indexed", "rowIndex")
        indexed.isMPC = False
        indexed.out_rel.storedWith = {1}

        sortedByKey = sal.sort_by(indexed, "sortedByKey", "b")
        sortedByKey.isMPC = False
        sortedByKey.out_rel.storedWith = {1}

        eqFlags = sal._comp_neighs(sortedByKey, "eqFlags", "b")
        eqFlags.isMPC = False
        eqFlags.out_rel.storedWith = {1}

        # TODO: should be a persist op
        sortedByKeyStored = sal.project(sortedByKey, "sortedByKeyStored",
                                        ["rowIndex", "b"])
        sortedByKeyStored.isMPC = False
        sortedByKeyStored.out_rel.storedWith = {1}

        closedEqFlags = sal._close(eqFlags, "closedEqFlags", {1, 2, 3})
        closedEqFlags.isMPC = True
        closedSortedByKey = sal._close(sortedByKeyStored, "closedSortedByKey",
                                       {1, 2, 3})
        closedSortedByKey.isMPC = True

        agg = sal.index_aggregate(persisted, "agg", ["b"], "d", "+", "d",
                                  closedEqFlags, closedSortedByKey)
        agg.isMPC = True
        sal._open(agg, "ssnopened", 1)
示例#20
0
        def protocol():
            cols_in_a = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1]),
            ]
            in_a = cc.create("in_a", cols_in_a, {1})
            proj_a = cc.project(in_a, "proj_a", ["a", "b"])

            cols_in_b = [
                defCol("c", "INTEGER", [1], [2]),
                defCol("d", "INTEGER", [2])
            ]
            in_b = cc.create("in_b", cols_in_b, {2})
            proj_b = cc.project(in_b, "proj_b", ["c", "d"])

            joined = cc.join(proj_a, proj_b, "joined", ["a"], ["c"])
            cc.collect(joined, 1)

            return {in_a, in_b}
示例#21
0
def protocol():
    cols_in1 = [
        defCol("companyID", "INTEGER", [1]),
        defCol("price", "INTEGER", [1])
    ]
    in1 = sal.create("green1", cols_in1, {1})
    cols_in2 = [
        defCol("companyID", "INTEGER", [2]),
        defCol("price", "INTEGER", [2])
    ]
    in2 = sal.create("green2", cols_in2, {2})
    cols_in3 = [
        defCol("companyID", "INTEGER", [3]),
        defCol("price", "INTEGER", [3])
    ]
    in3 = sal.create("green3", cols_in3, {3})

    cab_data = sal.concat([in1, in2, in3], "cab_data")

    selected_input = sal.project(cab_data, "selected_input",
                                 ["companyID", "price"])

    local_rev = sal.aggregate(selected_input, "local_rev", ["companyID"],
                              "price", "+", "local_rev")

    scaled_down = sal.divide(local_rev, "scaled_down", "local_rev",
                             ["local_rev", 1000])

    first_val_blank = sal.multiply(scaled_down, "first_val_blank", "companyID",
                                   ["companyID", 0])

    local_rev_scaled = sal.multiply(first_val_blank, "local_rev_scaled",
                                    "local_rev", ["local_rev", 100])

    total_rev = sal.aggregate(first_val_blank, "total_rev", ["companyID"],
                              "local_rev", "+", "global_rev")

    local_total_rev = sal.join(local_rev_scaled, total_rev, "local_total_rev",
                               ["companyID"], ["companyID"])

    market_share = sal.divide(local_total_rev, "market_share", "local_rev",
                              ["local_rev", "global_rev"])

    market_share_squared = sal.multiply(market_share, "market_share_squared",
                                        "local_rev",
                                        ["local_rev", "local_rev", 1])

    hhi = sal.aggregate(market_share_squared, "hhi", ["companyID"],
                        "local_rev", "+", "hhi")

    sal.collect(hhi, 1)

    # return root nodes
    return {in1, in2, in3}
示例#22
0
        def protocol():

            cols_in_a = [
                defCol("a", "INTEGER", [1]),
                defCol("b", "INTEGER", [1]),
            ]
            in_a = sal.create("in_a", cols_in_a, set([1]))
            proj_a = sal.project(in_a, "proj_a", ["a", "b"])

            cols_in_b = [
                defCol("c", "INTEGER", [1], [2]),
                defCol("d", "INTEGER", [2])
            ]
            in_b = sal.create("in_b", cols_in_b, set([2]))
            proj_b = sal.project(in_b, "proj_b", ["c", "d"])

            joined = sal.join(proj_a, proj_b, "joined", ["a"], ["c"])
            sal.collect(joined, 1)

            return set([in_a, in_b])
示例#23
0
        def protocol():
            inpts = setup()
            in_1, in_2 = inpts[0], inpts[1]

            mult = sal.multiply(in_1, "mult", "a", ["b", "c"])
            proj_2 = sal.project(in_2, "proj_2", ["a", "b"])
            join = sal.join(mult, proj_2, "join", ["a", "b"], ["a", "b"])
            agg = sal.aggregate(join, "agg", ["a", "b"], "c", "sum", "agg_1")
            out = sal.collect(agg, 1)

            return {in_1, in_2}
示例#24
0
def protocol():

    colsInA = [
        defCol("store_code_uc", "STRING", [1]),
        defCol("upc", "STRING", [1]),
        defCol("week_end", "STRING", [1]),
        defCol("units", "INTEGER", [1]),
        defCol("prmult", "INTEGER", [1]),
        defCol("price", "FLOAT", [1]),
        defCol("retailer_code", "STRING", [1]),
        defCol("store_zip3", "STRING", [1])
    ]
    create = sal.create("movement", colsInA, set([1]))

    # divides 'price' by 'prmult' to compute unit price.
    w_unit_p = sal.divide(create, "w_unit_p", 'unit_price',
                          ['price', 'prmult'])

    # aggregate multiple entries for the same (store, product, week) combination
    sum_units = sal.aggregate(w_unit_p, 'sum_units',
                              ['store_code_uc', 'upc', 'week_end'], 'units',
                              '+', 'q')

    # add 'unit_price' to each row keyed by (store, product, week)
    total_units = sal.join(w_unit_p, sum_units, 'total_units',
                           ['store_code_uc', 'upc', 'week_end'],
                           ['store_code_uc', 'upc', 'week_end'])

    # computed weighted unit price (multiply aggregate units sold by their per-unit price)
    wghtd_total = sal.multiply(total_units, 'wghtd_total', 'wghtd_unit_p',
                               ['units', 'unit_price'])

    # compute some kind of weighted per-unit price by dividing by 'q' (total units sold)
    wghtd_total_final = sal.divide(wghtd_total, 'wghtd_total_final',
                                   'wghtd_unit_p', ['wghtd_unit_p', 'q'])

    total_unit_wghts = sal.aggregate(wghtd_total_final, 'total_unit_wghts',
                                     ['store_code_uc', 'upc', 'week_end'],
                                     'wghtd_unit_p', '+', 'avg_unit_p')

    # merge in avg_unit_p
    final_join = sal.join(total_units, total_unit_wghts, 'final_join',
                          ['store_code_uc', 'upc', 'week_end'],
                          ['store_code_uc', 'upc', 'week_end'])

    selected_cols = sal.project(final_join, 'selected_cols', [
        'store_code_uc', 'upc', 'week_end', 'q', 'avg_unit_p', 'retailer_code',
        'store_zip3'
    ])

    opened = sal.collect(selected_cols, 1)

    return set([create])
示例#25
0
        def protocol():
            inpts = setup()
            in_1, in_2 = inpts[0], inpts[1]

            div_1 = cc.divide(in_1, "div", "a", ["a", "b"])
            mult_2 = cc.multiply(in_2, "mult", "a", ["a", "b"])
            proj_1 = cc.project(div_1, "proj", ["a", "b"])
            join = cc.join(proj_1, mult_2, "join", ["a", "b"], ["a", "b"])
            agg = cc.aggregate(join, "agg", ["a", "b"], "c", "sum", "agg_1")
            cc.collect(agg, 1)

            return {in_1, in_2}
示例#26
0
def protocol():
    # define inputs
    left_cols = [
        defCol("a", "INTEGER", [1]),
        defCol("b", "INTEGER", [1]),
    ]
    left = cc.create("left", left_cols, {1})
    left_dummy = cc.project(left, "zzz_left_dummy", ["a", "b"])

    right_cols = [
        defCol("c", "INTEGER", [1], [2]),
        defCol("d", "INTEGER", [2])
    ]
    right = cc.create("right", right_cols, {2})
    right_dummy = cc.project(right, "right_dummy", ["c", "d"])

    actual = cc.join(left_dummy, right_dummy, "actual", ["a"], ["c"])

    cc.collect(actual, 1)
    # create dag
    return {left, right}
示例#27
0
        def protocol():
            inpts = setup()
            in_1, in_2 = inpts[0], inpts[1]

            div_1 = sal.divide(in_1, "div", "a", ["a", "b"])
            mult_2 = sal.multiply(in_2, "mult", "a", ["a", "b"])
            proj_1 = sal.project(div_1, "proj", ["a", "b"])
            join = sal.join(proj_1, mult_2, "join", ["a", "b"], ["a", "b"])
            agg = sal.aggregate(join, "agg", ["a", "b"], "c", "sum", "agg_1")
            out = sal.collect(agg, 1)

            return set([in_1, in_2])
示例#28
0
def protocol():

    # define inputs
    colsIn1 = [defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1])]
    in1 = sal.create("govreg", colsIn1, set([1]))
    colsIn2 = [defCol("c", "INTEGER", [2]), defCol("d", "INTEGER", [2])]
    in2 = sal.create("company0", colsIn2, set([2]))
    colsIn3 = [defCol("c", "INTEGER", [3]), defCol("d", "INTEGER", [3])]
    in3 = sal.create("company1", colsIn3, set([3]))

    cl1 = sal._close(in1, "cl1", set([1, 2, 3]))
    projA = sal.project(cl1, "projA", ["a", "b"])
    cl2 = sal._close(in2, "cl2", set([1, 2, 3]))
    cl3 = sal._close(in3, "cl3", set([1, 2, 3]))
    right_rel = sal.concat([cl2, cl3], "right_rel")
    projB = sal.project(right_rel, "projB", ["c", "d"])

    joined = sal.join(projA, right_rel, "joined", ["a"], ["c"])
    agg = sal.aggregate(joined, "agg", ["b"], "d", "+", "total")

    opened = sal._open(agg, "opened", 1)
    return set([in1, in2, in3])
示例#29
0
def protocol():

    # define inputs
    colsIn1 = [
        defCol("companyID", "INTEGER", [1]),
        defCol("price", "INTEGER", [1])
    ]
    in1 = sal.create("in1", colsIn1, set([1]))
    colsIn2 = [
        defCol("companyID", "INTEGER", [2]),
        defCol("price", "INTEGER", [2])
    ]
    in2 = sal.create("in2", colsIn2, set([2]))
    colsIn3 = [
        defCol("companyID", "INTEGER", [3]),
        defCol("price", "INTEGER", [3])
    ]
    in3 = sal.create("in3", colsIn3, set([3]))

    cl1 = sal._close(in1, "cl1", set([1, 2, 3]))
    cl2 = sal._close(in2, "cl2", set([1, 2, 3]))
    cl3 = sal._close(in3, "cl3", set([1, 2, 3]))
    cab_data = sal.concat([cl1, cl2, cl3], "cab_data")

    selected_input = sal.project(cab_data, "selected_input",
                                 ["companyID", "price"])
    local_rev = sal.aggregate(selected_input, "local_rev", ["companyID"],
                              "price", "+", "local_rev")
    scaled_down = sal.divide(local_rev, "scaled_down", "local_rev",
                             ["local_rev", 1000])
    first_val_blank = sal.multiply(scaled_down, "first_val_blank", "companyID",
                                   ["companyID", 0])
    local_rev_scaled = sal.multiply(first_val_blank, "local_rev_scaled",
                                    "local_rev", ["local_rev", 100])
    total_rev = sal.aggregate(first_val_blank, "total_rev", ["companyID"],
                              "local_rev", "+", "global_rev")
    local_total_rev = sal.join(local_rev_scaled, total_rev, "local_total_rev",
                               ["companyID"], ["companyID"])
    market_share = sal.divide(local_total_rev, "market_share", "local_rev",
                              ["local_rev", "global_rev"])
    market_share_squared = sal.multiply(market_share, "market_share_squared",
                                        "local_rev",
                                        ["local_rev", "local_rev", 1])
    hhi = sal.aggregate(market_share_squared, "hhi", ["companyID"],
                        "local_rev", "+", "hhi")

    hhi_opened = sal._open(hhi, "hhi_opened", 1)

    # return root nodes
    return set([in1, in2, in3])
示例#30
0
    def protocol():

        # define inputs
        colsIn1 = [
            defCol("a", "INTEGER", [1]),
            defCol("b", "INTEGER", [1]),
        ]
        in1 = sal.create("in1", colsIn1, set([1]))
        proj1 = sal.project(in1, "proj1", ["a", "b"])

        colsIn2 = [
            defCol("c", "INTEGER", [1], [2]),
            defCol("d", "INTEGER", [2])
        ]
        in2 = sal.create("in2", colsIn2, set([2]))
        proj2 = sal.project(in2, "proj2", ["c", "d"])

        res = sal.join(proj1, proj2, "res", ["a"], ["c"])

        # open result to party 1
        sal.collect(res, 1)

        # return roots of dag
        return set([in1, in2])