def protocol(): govreg_cols = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]) ] govreg = cc.create("a_govreg", govreg_cols, {1}) govreg_dummy = cc.project(govreg, "govreg_dummy", ["a", "b"]) company0_cols = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] company0 = cc.create("company0", company0_cols, {2}) company0_dummy = cc.project(company0, "company0_dummy", ["c", "d"]) company1_cols = [ defCol("c", "INTEGER", [1], [3]), defCol("d", "INTEGER", [3]) ] company1 = cc.create("company1", company1_cols, {3}) company1_dummy = cc.project(company1, "company1_dummy", ["c", "d"]) companies = cc.concat([company0_dummy, company1_dummy], "companies") joined = cc.join(govreg_dummy, companies, "joined", ["a"], ["c"]) res = cc.aggregate(joined, "actual", ["b"], "d", "sum", "total") cc.collect(res, 1) return {govreg, company0, company1}
def protocol_local(suffix: str, pid: int): pid_col_meds = "0" med_col_meds = "4" date_col_meds = "7" pid_col_diags = "8" diag_col_diags = "16" date_col_diags = "18" num_med_cols = 8 num_diag_cols = 13 left_medication_cols = [defCol(str(i), "INTEGER", pid) for i in range(num_med_cols)] medication = cc.create(suffix + "_medication", left_medication_cols, {pid}) left_diagnosis_cols = [defCol(str(i + num_med_cols), "INTEGER", pid) for i in range(num_diag_cols)] diagnosis = cc.create(suffix + "_diagnosis", left_diagnosis_cols, {pid}) shared_pids = cc.create("a_{}_shared_pids".format(suffix), [defCol(pid_col_meds, "INTEGER", pid)], {pid}) # only keep relevant columns medication_proj = cc.project(medication, "medication_proj", [pid_col_meds, med_col_meds, date_col_meds]) medication_mine = cc.filter_by(medication_proj, "medication_mine", pid_col_meds, shared_pids, use_not_in=True) diagnosis_proj = cc.project(diagnosis, "diagnosis_proj", [pid_col_diags, diag_col_diags, date_col_diags]) diagnosis_mine = cc.filter_by(diagnosis_proj, "diagnosis_mine", pid_col_diags, shared_pids, use_not_in=True) joined = cc.join(medication_mine, diagnosis_mine, "joined", [pid_col_meds], [pid_col_diags]) cases = cc.cc_filter(joined, "cases", date_col_diags, "<", other_col_name=date_col_meds) aspirin = cc.cc_filter(cases, "aspirin", med_col_meds, "==", scalar=1) heart_patients = cc.cc_filter(aspirin, "heart_patients", diag_col_diags, "==", scalar=1) cc.distinct_count(heart_patients, "actual_" + suffix, pid_col_meds) return {medication, diagnosis}
def protocol(): # define inputs cols_in_1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]) ] in_1 = sal.create("in_1", cols_in_1, set([1])) cols_in_2 = [ defCol("a", "INTEGER", [2]), defCol("b", "INTEGER", [2]) ] in_2 = sal.create("in_2", cols_in_2, set([2])) # combine parties' inputs into one relation rel = sal.concat([in_1, in_2], "rel") # specify the workflow proj_a = sal.project(rel, "proj_a", ["a", "b"]) proj_b = sal.project(proj_a, "proj_b", ["a", "b"]) agg = sal.aggregate(proj_b, "agg", ["a"], "b", "+", "total_b") proj_c = sal.project(agg, "proj_c", ["a", "total_b"]) sal.collect(proj_c, 1) # return root nodes return set([in_1, in_2])
def protocol(): # define inputs colsInA = [("INTEGER", set([1])), ("INTEGER", set([1]))] inA = sal.create("inA", colsInA, set([1])) colsInB = [("INTEGER", set([2])), ("INTEGER", set([2]))] inB = sal.create("inB", colsInB, set([2])) colsInC = [("INTEGER", set([3])), ("INTEGER", set([3])), ("INTEGER", set([3]))] inC = sal.create("inC", colsInC, set([3])) # specify the workflow aggA = sal.aggregate(inA, "aggA", "inA_0", "inA_1", "+") projA = sal.project(aggA, "projA", ["aggA_0", "aggA_1"]) aggB = sal.aggregate(inB, "aggB", "inB_0", "inB_1", "+") projB = sal.project(aggB, "projB", ["aggB_0", "aggB_1"]) joined = sal.join(projA, projB, "joined", "projA_0", "projB_0") comb = sal.concat([inC, joined], "comb") sal.collect(comb, 3) # create condag return set([inA, inB, inC])
def _rewrite_hybrid_join(self, node: saldag.HybridJoin): # TODO suffix = "rand" # in left parents' children, replace self with first primitive operator # in expanded subdag shuffled_a = sal.shuffle(node.left_parent, "shuffled_a") shuffled_a.is_mpc = True node.left_parent.children.remove(node) # same for right parent shuffled_b = sal.shuffle(node.right_parent, "shuffled_b") shuffled_b.is_mpc = True node.right_parent.children.remove(node) persisted_b = sal._persist(shuffled_b, "persisted_b") persisted_b.is_mpc = True persisted_a = sal._persist(shuffled_a, "persisted_a") persisted_a.is_mpc = True keys_a_closed = sal.project(shuffled_a, "keys_a_closed", ["a"]) keys_a_closed.is_mpc = True keys_b_closed = sal.project(shuffled_b, "keys_b_closed", ["c"]) keys_b_closed.is_mpc = True keys_a = sal._open(keys_a_closed, "keys_a", 1) keys_a.is_mpc = True keys_b = sal._open(keys_b_closed, "keys_b", 1) keys_b.is_mpc = True indexed_a = sal.index(keys_a, "indexed_a", "index_a") indexed_a.is_mpc = False indexed_b = sal.index(keys_b, "indexed_b", "index_b") indexed_b.is_mpc = False joined_indices = sal.join(indexed_a, indexed_b, "joined_indices", ["a"], ["c"]) joined_indices.is_mpc = False indices_only = sal.project(joined_indices, "indices_only", ["index_a", "index_b"]) indices_only.is_mpc = False # TODO: update stored_with to use union of parent out_rel stored_with sets indices_closed = sal._close(indices_only, "indices_closed", set([1, 2])) indices_closed.is_mpc = True joined = sal._index_join(persisted_a, persisted_b, "joined", ["a"], ["c"], indices_closed) joined.is_mpc = True # replace self with leaf of expanded subdag in each child node for child in node.get_sorted_children(): child.replace_parent(node, joined) # add former children to children of leaf joined.children = node.children
def hybrid_agg(in1): shuffled = sal.shuffle(in1, "shuffled") shuffled.out_rel.storedWith = set([1, 2, 3]) shuffled.isMPC = True persisted = sal._persist(shuffled, "persisted") persisted.out_rel.storedWith = set([1, 2, 3]) persisted.isMPC = True keysclosed = sal.project(shuffled, "keysclosed", ["b"]) keysclosed.out_rel.storedWith = set([1, 2, 3]) keysclosed.isMPC = True keys = sal._open(keysclosed, "keys", 1) keys.isMPC = True indexed = sal.index(keys, "indexed", "rowIndex") indexed.isMPC = False indexed.out_rel.storedWith = set([1]) distinctKeys = sal.distinct(keys, "distinctKeys", ["b"]) distinctKeys.isMPC = False distinctKeys.out_rel.storedWith = set([1]) # TODO: hack to get keys stored # need to fix later! fakeDistinctKeys = sal.project(distinctKeys, "distinctKeys", ["b"]) fakeDistinctKeys.isMPC = False fakeDistinctKeys.out_rel.storedWith = set([1]) indexedDistinct = sal.index(distinctKeys, "indexedDistinct", "keyIndex") indexedDistinct.isMPC = False indexedDistinct.out_rel.storedWith = set([1]) joinedindeces = sal.join(indexed, indexedDistinct, "joinedindeces", ["b"], ["b"]) joinedindeces.isMPC = False joinedindeces.out_rel.storedWith = set([1]) # TODO: could project row indeces away too indecesonly = sal.project(joinedindeces, "indecesonly", ["rowIndex", "keyIndex"]) indecesonly.isMPC = False indecesonly.out_rel.storedWith = set([1]) closedDistinct = sal._close(distinctKeys, "closedDistinct", set([1, 2, 3])) closedDistinct.isMPC = True closedLookup = sal._close(indecesonly, "closedLookup", set([1, 2, 3])) closedLookup.isMPC = True agg = sal.index_aggregate(persisted, "agg", ["b"], "d", "+", "d", closedLookup, closedDistinct) agg.isMPC = True sal._open(agg, "aggopened", 1)
def protocol(): left_cols = [defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1])] left = cc.create("left", left_cols, {1}) left_dummy = cc.project(left, "left_dummy", ["a", "b"]) right_cols = [defCol("c", "INTEGER", [2]), defCol("d", "INTEGER", [2])] right = cc.create("right", right_cols, {2}) right_dummy = cc.project(right, "right_dummy", ["c", "d"]) joined = cc.join(left_dummy, right_dummy, "joined", ["a"], ["c"]) cc.collect(cc.aggregate(joined, "actual", ["b"], "d", "sum", "total"), 1) return {left, right}
def protocol(): cols_in_1 = [ defCol("companyID", "INTEGER", [1]), defCol("price", "INTEGER", [1]) ] in_1 = cc.create("yellow1", cols_in_1, {1}) cols_in_2 = [ defCol("companyID", "INTEGER", [2]), defCol("price", "INTEGER", [2]) ] in_2 = cc.create("yellow2", cols_in_2, {2}) cols_in_3 = [ defCol("companyID", "INTEGER", [3]), defCol("price", "INTEGER", [3]) ] in_3 = cc.create("yellow3", cols_in_3, {3}) cab_data = cc.concat([in_1, in_2, in_3], "cab_data") selected_input = cc.project(cab_data, "selected_input", ["companyID", "price"]) local_rev = cc.aggregate(selected_input, "local_rev", ["companyID"], "price", "sum", "local_rev") scaled_down = cc.divide(local_rev, "scaled_down", "local_rev", ["local_rev", 1000]) first_val_blank = cc.multiply(scaled_down, "first_val_blank", "companyID", ["companyID", 0]) local_rev_scaled = cc.multiply(first_val_blank, "local_rev_scaled", "local_rev", ["local_rev", 100]) total_rev = cc.aggregate(first_val_blank, "total_rev", ["companyID"], "local_rev", "sum", "global_rev") local_total_rev = cc.join(local_rev_scaled, total_rev, "local_total_rev", ["companyID"], ["companyID"]) market_share = cc.divide(local_total_rev, "market_share", "local_rev", ["local_rev", "global_rev"]) market_share_squared = cc.multiply(market_share, "market_share_squared", "local_rev", ["local_rev", "local_rev", 1]) hhi = cc.aggregate(market_share_squared, "hhi", ["companyID"], "local_rev", "sum", "hhi") # dummy projection to force non-mpc subdag hhi_only = cc.project(hhi, "hhi_only", ["companyID", "hhi"]) cc.collect(hhi_only, 1) # return root nodes return {in_1, in_2, in_3}
def protocol(): # define inputs cols_in_a = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in_1 = sal.create("in_1", cols_in_a, set([1])) in_1.is_mpc = False proj_a = sal.project(in_1, "proj_a", ["a", "b"]) proj_a.is_mpc = False proj_a.out_rel.stored_with = set([1]) cols_in_b = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in_2 = sal.create("in_2", cols_in_b, set([2])) in_2.is_mpc = False proj_b = sal.project(in_2, "proj_b", ["c", "d"]) proj_b.is_mpc = False proj_b.out_rel.stored_with = set([2]) cols_in_c = [ defCol("c", "INTEGER", [1], [3]), defCol("d", "INTEGER", [3]) ] in_3 = sal.create("beforeOthers", cols_in_c, set([1, 2, 3])) in_3.is_mpc = True cl_a = sal._close(proj_a, "cl_a", set([1, 2, 3])) cl_a.is_mpc = True cl_b = sal._close(proj_b, "cl_b", set([1, 2, 3])) cl_b.is_mpc = True cl_c = sal._close(in_3, "cl_c", set([1, 2, 3])) cl_c.is_mpc = True right_closed = sal.concat([cl_a, cl_b, cl_c], "a") right_closed.is_mpc = True right_closed.out_rel.stored_with = set([1, 2, 3]) shuffled_a = sal.shuffle(cl_a, "shuffled_a") shuffled_a.is_mpc = True sal._open(shuffled_a, "ssn_opened", 1) return saldag.OpDag(set([in_1, in_2, in_3]))
def protocol(): # define inputs cols_in_1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]) ] in_1 = sal.create("in_1", cols_in_1, set([1])) cols_in_2 = [ defCol("a", "INTEGER", [2]), defCol("b", "INTEGER", [2]) ] in_2 = sal.create("in_2", cols_in_2, set([2])) cols_in_3 = [ defCol("a", "INTEGER", [3]), defCol("b", "INTEGER", [3]) ] in_3 = sal.create("in_3", cols_in_3, set([3])) # combine parties' inputs into one relation rel = sal.concat([in_1, in_2, in_3], "rel") proj = sal.project(rel, "proj", ["a", "b"]) agg = sal.aggregate(proj, "agg", ["a"], "b", "+", "total_b") sal.collect(agg, 1) # return root nodes return set([in_1, in_2, in_3])
def proj(): inputs, rel = setup() res = sal.project(rel, "res", ["a"]) opened = sal._open(res, "opened", 1) return inputs
def protocol(): # define inputs cols_in_1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), defCol("c", "INTEGER", [1]) ] in_1 = cc.create("in_1", cols_in_1, {1}) cols_in_2 = [ defCol("a", "INTEGER", [2]), defCol("b", "INTEGER", [2]), defCol("c", "INTEGER", [2]) ] in_2 = cc.create("in_2", cols_in_2, {2}) # combine parties' inputs into one relation rel = cc.concat([in_1, in_2], "rel") projected = cc.project(rel, "projected", ["c", "b"]) # specify the workflow filtered = cc.cc_filter(projected, "filtered", "c", "==", other_col_name="b") cc.collect(filtered, 1) # return root nodes return {in_1, in_2}
def protocol(): # define inputs cols_in_1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]) ] in_1 = cc.create("in_1", cols_in_1, {1}) cols_in_2 = [ defCol("a", "INTEGER", [2]), defCol("b", "INTEGER", [2]) ] in_2 = cc.create("in_2", cols_in_2, {2}) cols_in_3 = [ defCol("a", "INTEGER", [3]), defCol("b", "INTEGER", [3]) ] in_3 = cc.create("in_3", cols_in_3, {3}) # combine parties' inputs into one relation rel = cc.concat([in_1, in_2, in_3], "rel") proj = cc.project(rel, "proj", ["a", "b"]) agg = cc.aggregate(proj, "agg", ["a"], "b", "sum", "total_b") cc.collect(agg, 1) # return root nodes return {in_1, in_2, in_3}
def protocol(): # define inputs cols_in_a = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in_1 = cc.create("in_1", cols_in_a, {1}) in_1.is_mpc = False proj_a = cc.project(in_1, "proj_a", ["a", "b"]) proj_a.is_mpc = False proj_a.out_rel.stored_with = {1} cols_in_b = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in_2 = cc.create("in_2", cols_in_b, {2}) in_2.is_mpc = False proj_b = cc.project(in_2, "proj_b", ["c", "d"]) proj_b.is_mpc = False proj_b.out_rel.stored_with = {2} cols_in_c = [ defCol("c", "INTEGER", [1], [3]), defCol("d", "INTEGER", [3]) ] in_3 = cc.create("beforeOthers", cols_in_c, {1, 2, 3}) in_3.is_mpc = True cl_a = cc._close(proj_a, "cl_a", {1, 2, 3}) cl_a.is_mpc = True cl_b = cc._close(proj_b, "cl_b", {1, 2, 3}) cl_b.is_mpc = True cl_c = cc._close(in_3, "cl_c", {1, 2, 3}) cl_c.is_mpc = True right_closed = cc.concat([cl_a, cl_b, cl_c], "a") right_closed.is_mpc = True right_closed.out_rel.stored_with = {1, 2, 3} shuffled_a = cc.shuffle(cl_a, "shuffled_a") shuffled_a.is_mpc = True cc._open(shuffled_a, "ssn_opened", 1) return ccdag.OpDag({in_1, in_2, in_3})
def protocol(): inputs, rel = setup() cols = [column.name for column in rel.out_rel.columns][::-1] proj = sal.project(rel, "proja", cols) opened = sal._open(proj, "opened", 1) return inputs
def protocol(): inpts = setup() in_1 = inpts[0] proj = sal.project(in_1, "proj", ["a", "b"]) out = sal.collect(proj, 1) return set([in_1])
def protocol(): inpts = setup() in_1 = inpts[0] proj = cc.project(in_1, "proj", ["a", "b"]) out = cc.collect(proj, 1) return {in_1}
def hybrid_agg(in1): shuffled = sal.shuffle(in1, "shuffled") shuffled.out_rel.stored_with = set([1, 2, 3]) shuffled.is_mpc = True persisted = sal._persist(shuffled, "persisted") persisted.out_rel.stored_with = set([1, 2, 3]) persisted.is_mpc = True keys_closed = sal.project(shuffled, "keys_closed", ["b"]) keys_closed.out_rel.stored_with = set([1, 2, 3]) keys_closed.is_mpc = True keys = sal._open(keys_closed, "keys", 1) keys.is_mpc = True indexed = sal.index(keys, "indexed", "rowIndex") indexed.is_mpc = False indexed.out_rel.stored_with = set([1]) sorted_by_key = sal.sort_by(indexed, "sorted_by_key", "b") sorted_by_key.is_mpc = False sorted_by_key.out_rel.stored_with = set([1]) eq_flags = sal._comp_neighs(sorted_by_key, "eq_flags", "b") eq_flags.is_mpc = False eq_flags.out_rel.stored_with = set([1]) # TODO: should be a persist op sorted_by_key_stored = sal.project( sorted_by_key, "sorted_by_key_stored", ["rowIndex", "b"]) sorted_by_key_stored.is_mpc = False sorted_by_key_stored.out_rel.stored_with = set([1]) closed_eq_flags = sal._close(eq_flags, "closed_eq_flags", set([1, 2, 3])) closed_eq_flags.is_mpc = True closed_sorted_by_key = sal._close( sorted_by_key_stored, "closed_sorted_by_key", set([1, 2, 3])) closed_sorted_by_key.is_mpc = True agg = sal.index_aggregate( persisted, "agg", ["b"], "d", "+", "d", closed_eq_flags, closed_sorted_by_key) agg.is_mpc = True sal._open(agg, "ssnopened", 1)
def hybrid_agg(in1): shuffled = sal.shuffle(in1, "shuffled") shuffled.out_rel.storedWith = {1, 2, 3} shuffled.isMPC = True persisted = sal._persist(shuffled, "persisted") persisted.out_rel.storedWith = {1, 2, 3} persisted.isMPC = True keysclosed = sal.project(shuffled, "keysclosed", ["b"]) keysclosed.out_rel.storedWith = {1, 2, 3} keysclosed.isMPC = True keys = sal._open(keysclosed, "keys", 1) keys.isMPC = True indexed = sal.index(keys, "indexed", "rowIndex") indexed.isMPC = False indexed.out_rel.storedWith = {1} sortedByKey = sal.sort_by(indexed, "sortedByKey", "b") sortedByKey.isMPC = False sortedByKey.out_rel.storedWith = {1} eqFlags = sal._comp_neighs(sortedByKey, "eqFlags", "b") eqFlags.isMPC = False eqFlags.out_rel.storedWith = {1} # TODO: should be a persist op sortedByKeyStored = sal.project(sortedByKey, "sortedByKeyStored", ["rowIndex", "b"]) sortedByKeyStored.isMPC = False sortedByKeyStored.out_rel.storedWith = {1} closedEqFlags = sal._close(eqFlags, "closedEqFlags", {1, 2, 3}) closedEqFlags.isMPC = True closedSortedByKey = sal._close(sortedByKeyStored, "closedSortedByKey", {1, 2, 3}) closedSortedByKey.isMPC = True agg = sal.index_aggregate(persisted, "agg", ["b"], "d", "+", "d", closedEqFlags, closedSortedByKey) agg.isMPC = True sal._open(agg, "ssnopened", 1)
def protocol(): cols_in_a = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in_a = cc.create("in_a", cols_in_a, {1}) proj_a = cc.project(in_a, "proj_a", ["a", "b"]) cols_in_b = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in_b = cc.create("in_b", cols_in_b, {2}) proj_b = cc.project(in_b, "proj_b", ["c", "d"]) joined = cc.join(proj_a, proj_b, "joined", ["a"], ["c"]) cc.collect(joined, 1) return {in_a, in_b}
def protocol(): cols_in1 = [ defCol("companyID", "INTEGER", [1]), defCol("price", "INTEGER", [1]) ] in1 = sal.create("green1", cols_in1, {1}) cols_in2 = [ defCol("companyID", "INTEGER", [2]), defCol("price", "INTEGER", [2]) ] in2 = sal.create("green2", cols_in2, {2}) cols_in3 = [ defCol("companyID", "INTEGER", [3]), defCol("price", "INTEGER", [3]) ] in3 = sal.create("green3", cols_in3, {3}) cab_data = sal.concat([in1, in2, in3], "cab_data") selected_input = sal.project(cab_data, "selected_input", ["companyID", "price"]) local_rev = sal.aggregate(selected_input, "local_rev", ["companyID"], "price", "+", "local_rev") scaled_down = sal.divide(local_rev, "scaled_down", "local_rev", ["local_rev", 1000]) first_val_blank = sal.multiply(scaled_down, "first_val_blank", "companyID", ["companyID", 0]) local_rev_scaled = sal.multiply(first_val_blank, "local_rev_scaled", "local_rev", ["local_rev", 100]) total_rev = sal.aggregate(first_val_blank, "total_rev", ["companyID"], "local_rev", "+", "global_rev") local_total_rev = sal.join(local_rev_scaled, total_rev, "local_total_rev", ["companyID"], ["companyID"]) market_share = sal.divide(local_total_rev, "market_share", "local_rev", ["local_rev", "global_rev"]) market_share_squared = sal.multiply(market_share, "market_share_squared", "local_rev", ["local_rev", "local_rev", 1]) hhi = sal.aggregate(market_share_squared, "hhi", ["companyID"], "local_rev", "+", "hhi") sal.collect(hhi, 1) # return root nodes return {in1, in2, in3}
def protocol(): cols_in_a = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in_a = sal.create("in_a", cols_in_a, set([1])) proj_a = sal.project(in_a, "proj_a", ["a", "b"]) cols_in_b = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in_b = sal.create("in_b", cols_in_b, set([2])) proj_b = sal.project(in_b, "proj_b", ["c", "d"]) joined = sal.join(proj_a, proj_b, "joined", ["a"], ["c"]) sal.collect(joined, 1) return set([in_a, in_b])
def protocol(): inpts = setup() in_1, in_2 = inpts[0], inpts[1] mult = sal.multiply(in_1, "mult", "a", ["b", "c"]) proj_2 = sal.project(in_2, "proj_2", ["a", "b"]) join = sal.join(mult, proj_2, "join", ["a", "b"], ["a", "b"]) agg = sal.aggregate(join, "agg", ["a", "b"], "c", "sum", "agg_1") out = sal.collect(agg, 1) return {in_1, in_2}
def protocol(): colsInA = [ defCol("store_code_uc", "STRING", [1]), defCol("upc", "STRING", [1]), defCol("week_end", "STRING", [1]), defCol("units", "INTEGER", [1]), defCol("prmult", "INTEGER", [1]), defCol("price", "FLOAT", [1]), defCol("retailer_code", "STRING", [1]), defCol("store_zip3", "STRING", [1]) ] create = sal.create("movement", colsInA, set([1])) # divides 'price' by 'prmult' to compute unit price. w_unit_p = sal.divide(create, "w_unit_p", 'unit_price', ['price', 'prmult']) # aggregate multiple entries for the same (store, product, week) combination sum_units = sal.aggregate(w_unit_p, 'sum_units', ['store_code_uc', 'upc', 'week_end'], 'units', '+', 'q') # add 'unit_price' to each row keyed by (store, product, week) total_units = sal.join(w_unit_p, sum_units, 'total_units', ['store_code_uc', 'upc', 'week_end'], ['store_code_uc', 'upc', 'week_end']) # computed weighted unit price (multiply aggregate units sold by their per-unit price) wghtd_total = sal.multiply(total_units, 'wghtd_total', 'wghtd_unit_p', ['units', 'unit_price']) # compute some kind of weighted per-unit price by dividing by 'q' (total units sold) wghtd_total_final = sal.divide(wghtd_total, 'wghtd_total_final', 'wghtd_unit_p', ['wghtd_unit_p', 'q']) total_unit_wghts = sal.aggregate(wghtd_total_final, 'total_unit_wghts', ['store_code_uc', 'upc', 'week_end'], 'wghtd_unit_p', '+', 'avg_unit_p') # merge in avg_unit_p final_join = sal.join(total_units, total_unit_wghts, 'final_join', ['store_code_uc', 'upc', 'week_end'], ['store_code_uc', 'upc', 'week_end']) selected_cols = sal.project(final_join, 'selected_cols', [ 'store_code_uc', 'upc', 'week_end', 'q', 'avg_unit_p', 'retailer_code', 'store_zip3' ]) opened = sal.collect(selected_cols, 1) return set([create])
def protocol(): inpts = setup() in_1, in_2 = inpts[0], inpts[1] div_1 = cc.divide(in_1, "div", "a", ["a", "b"]) mult_2 = cc.multiply(in_2, "mult", "a", ["a", "b"]) proj_1 = cc.project(div_1, "proj", ["a", "b"]) join = cc.join(proj_1, mult_2, "join", ["a", "b"], ["a", "b"]) agg = cc.aggregate(join, "agg", ["a", "b"], "c", "sum", "agg_1") cc.collect(agg, 1) return {in_1, in_2}
def protocol(): # define inputs left_cols = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] left = cc.create("left", left_cols, {1}) left_dummy = cc.project(left, "zzz_left_dummy", ["a", "b"]) right_cols = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] right = cc.create("right", right_cols, {2}) right_dummy = cc.project(right, "right_dummy", ["c", "d"]) actual = cc.join(left_dummy, right_dummy, "actual", ["a"], ["c"]) cc.collect(actual, 1) # create dag return {left, right}
def protocol(): inpts = setup() in_1, in_2 = inpts[0], inpts[1] div_1 = sal.divide(in_1, "div", "a", ["a", "b"]) mult_2 = sal.multiply(in_2, "mult", "a", ["a", "b"]) proj_1 = sal.project(div_1, "proj", ["a", "b"]) join = sal.join(proj_1, mult_2, "join", ["a", "b"], ["a", "b"]) agg = sal.aggregate(join, "agg", ["a", "b"], "c", "sum", "agg_1") out = sal.collect(agg, 1) return set([in_1, in_2])
def protocol(): # define inputs colsIn1 = [defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1])] in1 = sal.create("govreg", colsIn1, set([1])) colsIn2 = [defCol("c", "INTEGER", [2]), defCol("d", "INTEGER", [2])] in2 = sal.create("company0", colsIn2, set([2])) colsIn3 = [defCol("c", "INTEGER", [3]), defCol("d", "INTEGER", [3])] in3 = sal.create("company1", colsIn3, set([3])) cl1 = sal._close(in1, "cl1", set([1, 2, 3])) projA = sal.project(cl1, "projA", ["a", "b"]) cl2 = sal._close(in2, "cl2", set([1, 2, 3])) cl3 = sal._close(in3, "cl3", set([1, 2, 3])) right_rel = sal.concat([cl2, cl3], "right_rel") projB = sal.project(right_rel, "projB", ["c", "d"]) joined = sal.join(projA, right_rel, "joined", ["a"], ["c"]) agg = sal.aggregate(joined, "agg", ["b"], "d", "+", "total") opened = sal._open(agg, "opened", 1) return set([in1, in2, in3])
def protocol(): # define inputs colsIn1 = [ defCol("companyID", "INTEGER", [1]), defCol("price", "INTEGER", [1]) ] in1 = sal.create("in1", colsIn1, set([1])) colsIn2 = [ defCol("companyID", "INTEGER", [2]), defCol("price", "INTEGER", [2]) ] in2 = sal.create("in2", colsIn2, set([2])) colsIn3 = [ defCol("companyID", "INTEGER", [3]), defCol("price", "INTEGER", [3]) ] in3 = sal.create("in3", colsIn3, set([3])) cl1 = sal._close(in1, "cl1", set([1, 2, 3])) cl2 = sal._close(in2, "cl2", set([1, 2, 3])) cl3 = sal._close(in3, "cl3", set([1, 2, 3])) cab_data = sal.concat([cl1, cl2, cl3], "cab_data") selected_input = sal.project(cab_data, "selected_input", ["companyID", "price"]) local_rev = sal.aggregate(selected_input, "local_rev", ["companyID"], "price", "+", "local_rev") scaled_down = sal.divide(local_rev, "scaled_down", "local_rev", ["local_rev", 1000]) first_val_blank = sal.multiply(scaled_down, "first_val_blank", "companyID", ["companyID", 0]) local_rev_scaled = sal.multiply(first_val_blank, "local_rev_scaled", "local_rev", ["local_rev", 100]) total_rev = sal.aggregate(first_val_blank, "total_rev", ["companyID"], "local_rev", "+", "global_rev") local_total_rev = sal.join(local_rev_scaled, total_rev, "local_total_rev", ["companyID"], ["companyID"]) market_share = sal.divide(local_total_rev, "market_share", "local_rev", ["local_rev", "global_rev"]) market_share_squared = sal.multiply(market_share, "market_share_squared", "local_rev", ["local_rev", "local_rev", 1]) hhi = sal.aggregate(market_share_squared, "hhi", ["companyID"], "local_rev", "+", "hhi") hhi_opened = sal._open(hhi, "hhi_opened", 1) # return root nodes return set([in1, in2, in3])
def protocol(): # define inputs colsIn1 = [ defCol("a", "INTEGER", [1]), defCol("b", "INTEGER", [1]), ] in1 = sal.create("in1", colsIn1, set([1])) proj1 = sal.project(in1, "proj1", ["a", "b"]) colsIn2 = [ defCol("c", "INTEGER", [1], [2]), defCol("d", "INTEGER", [2]) ] in2 = sal.create("in2", colsIn2, set([2])) proj2 = sal.project(in2, "proj2", ["c", "d"]) res = sal.join(proj1, proj2, "res", ["a"], ["c"]) # open result to party 1 sal.collect(res, 1) # return roots of dag return set([in1, in2])