def ramyKnowledge01(params): # Check on parameters print("########## Parameters passed ##########") pp.pprint(params) # Attack setup attack = gdaAttack(params) print("\nInfo >>> Attack Class created\n") table = attack.getAttackTableName() print("### Table ###") pp.pprint(table) rawColNames = attack.getColNames(dbType="rawDb") print("### Raw Cols ###") pp.pprint(rawColNames) anonColNames = attack.getColNames(dbType="anonDb") print("### Anon Cols ###") pp.pprint(anonColNames) # set attack specific parameters uidCol = attack.getUidColName() searchedCol = random.choice(rawColNames) publicValues = attack.getPublicColValues(searchedCol, table) while searchedCol == uidCol and len(publicValues) < 2: searchedCol = random.choice(rawColNames) publicValues = attack.getPublicColValues(searchedCol, table) print(f"\nInfo >>> Randomly selected column : {searchedCol}\n") print("### Column Values ###") pp.pprint(publicValues) searchedItem = random.choice(publicValues) searchedValue = searchedItem[0] condition = f"{searchedCol} = '{searchedValue}'" # Define SQL queries sql = "SELECT " sql += comma_ize(rawColNames, lastComma=False) sql += " FROM " + table sql += " WHERE " + condition query = {} query['sql'] = sql query['db'] = "rawDb" # ask for knowledge print("\nInfo >>> Asking for knowledge ...\n") attack.askKnowledge(query) knowledge = attack.getKnowledge() pp.pprint(knowledge) # Compute and display score result = attack.getResults() gda_score = gdaScores(result) print("\nInfo >>> Score Class created\n") print("########## Knowledge learned ##########") score = gda_score.getScores() pp.pprint(score) # Abschliessung attack.cleanUp() final_result = finishGdaAttack(params, score)
def launch_differential_attack(params, verbose, tab1, tab2, s, bins): # Check on parameters print("########## Parameters passed ##########") pp.pprint(params) # Attack setup attack = gdaAttack(params) attack.unsetVerbose() print("\nInfo >>> Attack Class created\n") # Information gathering table = attack.getAttackTableName() rawColNames = attack.getColNames(dbType="rawDb") anonColNames = attack.getColNames(dbType="anonDb") print(f"\nInfo >>> Working on table : {table} ...") # Preparing attack parameters a_cols_star = tab1 x_a_star = tab2 #s = "frequency" # to be eliminated # Executing attack attack_result = full_differential_attack(attack, a_cols_star, x_a_star, s, bins) if attack_result == None: print("Attribute Non Attackable (launch level)") return None '''if attack_result: attack_result = 1 else: attack_result = 0''' # Make the guess according to attack result print("\nInfo >>> Making claims ...\n") spec = {} guess = [] for i in range(len(a_cols_star)): guess.append({'col': a_cols_star[i], 'val': x_a_star[i]}) guess.append({'col': s, 'val': attack_result}) spec['guess'] = guess attack.askClaim(spec, claim=True) while True: claim = attack.getClaim() if claim["stillToCome"] == 0: break # Compute and display score result = attack.getResults() gda_score = gdaScores(result) print("\nInfo >>> Score Class created\n") print("########## Attack score ##########") score = gda_score.getScores() if verbose: pp.pprint(score) # Abschliessung attack.cleanUp() final_result = finishGdaAttack(params, score) return None
def launch_greedy_cloning_attack(params, verbose): # Check on parameters print("########## Parameters passed ##########") pp.pprint(params) # Attack setup attack = gdaAttack(params) attack.unsetVerbose() print("\nInfo >>> Attack Class created\n") # Information gathering table = attack.getAttackTableName() rawColNames = attack.getColNames(dbType="rawDb") anonColNames = attack.getColNames(dbType="anonDb") print(f"\nInfo >>> Working on table : {table} ...") # Preparing attack parameters a_cols_star = [] x_a_star = [] delta = [] s = "attribute" v = "value" # Executing attack attack_result = full_cloning_attack(attack, a_cols_star, x_a_star, delta, s, v) if attack_result == None: print("Attribute Non Attackable") return None # Make the guess according to attack result print("\nInfo >>> Making claims ...\n") spec = {} guess = [] for i in range(len(a_cols_star)): guess.append({'col': a_cols_star[i], 'val': x_a_star[i]}) guess.append({'col': s, 'val': v}) spec['guess'] = guess attack.askClaim(spec, claim=attack_result) while True: claim = attack.getClaim() if claim["stillToCome"] == 0: break # Compute and display score result = attack.getResults() gda_score = gdaScores(result) print("\nInfo >>> Score Class created\n") print("########## Attack score ##########") score = gda_score.getScores() if verbose: pp.pprint(score) # Abschliessung attack.cleanUp() final_result = finishGdaAttack(params, score) return None
def ramyAttack01(params): # Check on parameters print("########## Parameters passed ##########") pp.pprint(params) # Attack setup attack = gdaAttack(params) print("\nInfo >>> Attack Class created\n") table = attack.getAttackTableName() print("### Table ###") pp.pprint(table) rawColNames = attack.getColNames(dbType="rawDb") print("### Raw Cols ###") pp.pprint(rawColNames) anonColNames = attack.getColNames(dbType="anonDb") print("### Anon Cols ###") pp.pprint(anonColNames) # set attack specific parameters searchedCol = random.choice(rawColNames) publicValues = attack.getPublicColValues("gender", table) print(f"\nInfo >>> Randomly selected column : {searchedCol}\n") print("### Column Values ###") pp.pprint(publicValues) searchedValue = "" condition = "gender = 'Male'" # Define SQL queries sql = "SELECT " sql += comma_ize(rawColNames, lastComma=False) sql += " FROM " + table sql += " WHERE " + condition query = {} query['sql'] = sql query['db'] = "rawDb" # executing attack print("\nInfo >>> Launching attack ...\n") attack.askAttack(query) reply = attack.getAttack() pp.pprint(reply) # Compute and display score result = attack.getResults() gda_score = gdaScores(result) print("\nInfo >>> Score Class created\n") print("########## Attack reply ##########") score = gda_score.getScores() pp.pprint(score) # Abschliessung attack.cleanUp() final_result = finishGdaAttack(params, score)
def diffix_noise(params, verbose): # Check on parameters print("########## Parameters passed ##########") pp.pprint(params) # Attack setup attack = gdaAttack(params) attack.unsetVerbose() print("\nInfo >>> Attack Class created\n") # Back to target table ... table = attack.getAttackTableName() rawColNames = attack.getColNames(dbType="rawDb") anonColNames = attack.getColNames(dbType="anonDb") # Query sql = f"SELECT avg(duration), count(*) FROM {table} WHERE gender='Male'" query = {} query['db'] = "rawDb" query['sql'] = sql print("########## Query Info ##########") print(f"Table : {table} > {query['db']}") print(f"Query : {sql}") # Attack print("\nInfo >>> Launching attack ...") attack.askExplore(query) while True: reply = attack.getExplore() print(f"... acquiring knowledge > {reply['stillToCome']} yet to come") if reply["stillToCome"] == 0: break print("\n########## Query Result ##########") if "answer" in reply.keys(): print(f"Result : {reply['answer']}") else: print("Error >>> Reply ...") pp.pprint(reply) # Compute and display score result = attack.getResults() gda_score = gdaScores(result) print("\nInfo >>> Score Class created\n") #print("########## Attack score ##########") score = gda_score.getScores() #pp.pprint(score) # Abschliessung attack.cleanUp() final_result = finishGdaAttack(params, score) return None
def distance_attack(params, verbose): # Check on parameters print("########## Parameters passed ##########") pp.pprint(params) # Attack setup attack = gdaAttack(params) attack.unsetVerbose() print("\nInfo >>> Attack Class created\n") # Exploring table schema table = attack.getAttackTableName() print(f"\nInfo >>> Working on table : {table} ...") rawColNames = attack.getColNames(dbType="rawDb") anonColNames = attack.getColNames(dbType="anonDb") uidCol = attack.getUidColName() raw_schema = dict() for col in rawColNames: publicValues = attack.getPublicColValues(col, table) #print("debug: ", publicValues) if publicValues: values = [] for t in publicValues: values.append(t[0]) #print("debug1 > values: ", values) raw_schema[col] = values # Preparing the attack print("\nInfo >>> Preparing the attack columns ...\n") cols = [] # columns to be attacked queries = [] # list of queries for knowledge acquiring for col in anonColNames: if col == uidCol: continue publicValues = attack.getPublicColValues(col, table) #print(f"debug: {col} > {publicValues}") if publicValues: values = [] for t in publicValues: values.append(t[0]) #print(f"debug2: {col} > {values}") if len(values) < 2: continue ### temporary filter #if type(values[0])==str: # continue ### end filter cols.append(col) print("\nInfo >>> Attack columns selected ...") print(f"... {cols}\n") # Get a list all anonymized users' data print("\nInfo >>> Retrieving users' useful data ...") raw_data = {} # look like {'uid': [val1, val2, ..., valn], ...} anon_data = {} # look like {'uid': [val1, val2, ..., valn], ...} cols2 = cols[:] cols2.insert(0, uidCol) query = {} sql = sqlQueryGen(table, cols2, None, None) print(f"... SQL query : {sql}") query['sql'] = sql # fill in raw_data print("... users' raw data") query['db'] = "rawDb" attack.askExplore(query) while True: reply = attack.getExplore() if reply["stillToCome"] == 0: break if "answer" in reply.keys(): for a in reply['answer']: raw_data[a[0]] = a[1:] # fill in anon_data print("... users' anonymized data\n") query['db'] = "anonDb" attack.askExplore(query) while True: reply = attack.getExplore() if reply["stillToCome"] == 0: break if "answer" in reply.keys(): for a in reply['answer']: anon_data[a[0]] = a[1:] print("Info >>> Users data retrieved ...") print( f"... {len(raw_data.keys())} plain users and {len(anon_data.keys())} anonymized users\n" ) if len(anon_data.keys()) == 0: print("\nError >>> No anonymized data to work on :(") return None #print("debug 01 > ", raw_data) i = 0 for it in raw_data.items(): #print("debug 01 > ", it) i += 1 if i >= 2: break #print("debug 02 > ", anon_data) # Determine practically attackable columns att = [True] * len( cols) # list of whether the column is to consider or not uid_att = True i = 0 for it in anon_data.items(): samp = (it[0], list(it[1])) i += 1 if i >= 1: break #print("debug samp > ", samp) if samp[0] == None: uid_att = False #print(f"debug {samp[0]} > {uid_att}") #print(f"debug {type(samp[1])} > {samp[1]}") for i in range(len(samp[1])): if samp[1][i] == None: att[i] = False att = (uid_att, att) #print("debug att > ", att) # De-identification phase print("\nInfo >>> De-identifying users ...\n") matches = dict() # record of identified users ({"fake_id": "real_id"}) repulses = dict() # record of identified users ({"fake_id": "real_id"}) for anon_id in anon_data.keys(): '''min = 10000 for raw_id in raw_data.keys(): d = compute_global_distance(raw_data[raw_id], anon_data[anon_id]) if d < min: matches[anon_id] = raw_id min = d''' #for i in range(len(raw_data.keys())): # raw_id = raw_data.keys()[i] first = True for raw_id in raw_data.keys(): d = compute_global_distance(raw_data[raw_id], anon_data[anon_id], att) if d == None: continue #print("debug d > ", d) if first: matches[anon_id] = raw_id repulses[anon_id] = raw_id min = d max = d first = False elif d < min: matches[anon_id] = raw_id min = d elif d > max: repulses[anon_id] = raw_id max = d print("... de-identification finished") if not (matches.keys() or repulses.keys()): print("\nError >>> No user was de-identified\n") return None print(f"... {len(matches.keys())} matches found :)") #print("... ", matches) # Launching the attack print("\nInfo >>> Launching the attack ...\n") guess_all = [] # list of all guesses to make as claim=True guess_all_false = [] # list of all guesses to make as claim=False for m in matches.items(): sg = [] # one single guess fid = m[0] # anonymized uid rid = m[1] # real uid #sg = [{'col': uidCol, 'val': fid}] for i in range(len(cols)): guessed_val = raw_data[rid][i] if guessed_val == None: continue sg.append({'col': cols[i], 'val': guessed_val}) guess_all.append(sg) for m in repulses.items(): fid = m[0] # anonymized uid rid = m[1] # real uid sg = [{'col': uidCol, 'val': fid}] for i in range(len(cols)): guessed_val = raw_data[rid][i] if guessed_val == None: continue sg.append({'col': cols[i], 'val': guessed_val}) guess_all_false.append(sg) #print('debug > guess_all[0] ', guess_all[0]) #print('debug > guess_all_false[0] ', guess_all_false[0]) '''for id in matches.keys(): target = f"{uidCol}={id}" sql = sqlQueryGen(table, cols, target, None) queries.append(sql) query['sql'] = sql attack.askAttack(query) replies = [] while True: reply = attack.getAttack() #if 'error' in reply.keys(): # continue if reply["stillToCome"]==0: break replies.extend(reply['answer']) if len(replies)>1: continue # to be better treated later # deduce and format the guess from the reply #print("debug > ", reply) if reply['answer']!=None: sg = format_guess(cols, reply['answer']) # single guess guess_all.append(sg)''' # Make some guesses print("\nInfo >>> Making claims ...\n") spec = {} nb_guess = 0 for g in guess_all: spec['guess'] = g try: attack.askClaim(spec, claim=True) except: continue while True: claim = attack.getClaim() if claim["stillToCome"] == 0: break # Compute and display score result = attack.getResults() gda_score = gdaScores(result) print("\nInfo >>> Score Class created\n") print("########## Attack score ##########") score = gda_score.getScores() if verbose: pp.pprint(score) # Abschliessung attack.cleanUp() final_result = finishGdaAttack(params, score) return None
def ramyAttack03(params, verbose): # Check on parameters print("########## Parameters passed ##########") pp.pprint(params) # Attack setup attack = gdaAttack(params) attack.unsetVerbose() print("\nInfo >>> Attack Class created\n") # Exploring DB schema tables = attack.getTableNames() print("debug1", tables) db_model = {} # dict of columns for each table for table in tables: print(f"\nInfo >>> Working on table : {table} ...") #print("debug2") #rawColNames = attack.getColNames(dbType="rawDb", tableName=table) #print("debug3") anonColNames = [] try: anonColNames = attack.getColNames(dbType="anonDb", tableName=table) db_model[table] = anonColNames except: print(f"{table} doesn't exist .. Skip to next !") pass #print("debug4") # Looking for similar inter-table columns (in progress ...) print("\n########## Database model ##########\n") pp.pprint(db_model) # Back to target table ... table = attack.getAttackTableName() rawColNames = attack.getColNames(dbType="rawDb") anonColNames = attack.getColNames(dbType="anonDb") # Looking for interesting columns and values to fetch interestValues = [] # list of (column, value) tuples to investigate deeper uidCol = attack.getUidColName() for col in anonColNames: if (col != uidCol) and not ("id" in col): publicValues = attack.getPublicColValues(col, table) for val in publicValues: if val[1] <= 50: interestValues.append((col, val[0])) if not interestValues: print("\nInfo >>> No interesting values found") return None else: print("\nInfo >>> Interesting values found\n") print("########## Interesting values ##########") pp.pprint(interestValues) # Numbering conditions print("\nInfo >>> Generating conditions ...") conditions = [] condition = "" for val in interestValues: condition = f"{val[0]}={val[1]}" conditions.append(condition) # Generate SQL queries regarding target table print("\nInfo >>> Generating SQL queries ...") queries = [] sql = "" for cond in conditions: sql = sqlQueryGen(table, anonColNames, cond) queries.append(sql) # Looking at possible interesting inter-cross columns lucky_cols = [] # list of cols from interestValues interestCols = [ ] # list of (table, col) tuples for interesting inter-cross columns for i in interestValues: if not i[0] in lucky_cols: lucky_cols.append(i[0]) for t in db_model.keys(): if (t == table) or (not db_model[t]) or (db_model[t] == None): continue for c in db_model[t]: if c in lucky_cols: interestCols.append((t, c)) # Generate SQL queries regarding other tables for col in interestCols: # prepare condition for v in interestValues: if v[0] == interestCols[1]: cond = f"{interestCols[1]}={v[1]}" sql = sqlQueryGen(interestCols[0], interestCols[1], cond) queries.append(sql) # executing attack query = {} query['db'] = "anonDb" print("\nInfo >>> Launching attack ...\n") print(f"... {len(queries)} SQL queries to execute") for q in queries: query['sql'] = q attack.askAttack(query) print("... getting replies") while True: reply = attack.getAttack() print(f"... acquiring knowledge > {reply['stillToCome']} yet to come") if reply["stillToCome"] == 0: break #reply = attack.getAttack() if verbose: print("########## Attack reply ##########") pp.pprint(reply) # Make some guesses :3 print("\nInfo >>> Making claims ...\n") if "answer" in reply.keys(): for row in reply['answer']: spec = {} guess = [] for i in range(len(anonColNames)): guess.append({'col': anonColNames[i], 'val': row[i]}) spec['guess'] = guess print("# DEBUG: 01") try: attack.askClaim(spec, claim=True) except: continue #claim = attack.getClaim() print("# DEBUG: 02") while True: claim = attack.getClaim() print("# DEBUG: 03") if claim["stillToCome"] == 0: break # Compute and display score print("# DEBUG: 04") result = attack.getResults() print("# DEBUG: 05") gda_score = gdaScores(result) print("# DEBUG: 06") print("\nInfo >>> Score Class created\n") print("########## Attack score ##########") score = gda_score.getScores() pp.pprint(score) # Abschliessung attack.cleanUp() final_result = finishGdaAttack(params, score) return None
def dumb_list_linkability_attack(params): """ Dumb List attack for the Linkability criteria. All it does is request rows with all columns from the anonymized link database. The attack succeeds if the anonymized database returns rows that single out users, and fails otherwise. It is designed to work against raw and pseudonymized data. NOTE: This is effectively the same attack as with singling out dumb list.""" attack = gdaAttack(params) # ------------------- Exploration Phase ------------------------ # We need to know the columns that are in the anonymized database # and in the raw database. It is these columns that we can attack. # (Note that pseudonymization schemes typically delete some columns.) table = attack.getAttackTableName() rawColNames = attack.getColNames(dbType='rawDb') anonColNames = attack.getColNames(dbType='anonDb') if rawColNames is None or anonColNames is None: print(f"No table to attack (raw {rawColNames}, anon {anonColNames}") attack.cleanUp() return colNames = list(set(rawColNames) & set(anonColNames)) # ------------------- Prior Knowledge Phase -------------------- # This attack doesn't require any prior knowledge # ------------------- Attack Phase ----------------------------- query = {} sql = "SELECT " sql += comma_ize(colNames) sql += str(f"count(*) FROM {table} ") sql += makeGroupBy(colNames) sql += " HAVING count(*) = 1 ORDER BY count(*) LIMIT 100" query['sql'] = sql print("-------------------- Attack query:") print(sql) attack.askAttack(query) reply = attack.getAttack() if v: print("-------------------- Attack reply:") if v: pp.pprint(reply) # ------------------- Claims Phase ---------------------------- if 'answer' not in reply: print("ERROR: reply to claim query contains no answer") pp.pprint(reply) attack.cleanUp() sys.exit() for row in reply['answer']: spec = {} guess = [] for i in range(len(colNames)): guess.append({'col': colNames[i], 'val': row[i]}) spec['guess'] = guess attack.askClaim(spec) if v: print("------------------- Attack claims:") while True: reply = attack.getClaim() if v: pp.pprint(reply) if reply['stillToCome'] == 0: break # ------------------- Scores Phase ---------------------------- attackResult = attack.getResults() sc = gdaScores(attackResult) score = sc.getScores() if v: pp.pprint(score) attack.cleanUp() final = finishGdaAttack(params, score) pp.pprint(final)
def diffix_infer_1_attack(params): ''' This is an inference attack against Diffix In this attack, we find attribute groups where the inference conditions exist (one one guessed column value exists for some set of one or more known column values). This is designed to work against Diffix and Full K-anonymity at least. ''' attack = gdaAttack(params) # ------------------- Exploration Phase ------------------------ # We need to know the columns that are in the anonymized database # and in the raw database. It is these columns that we can attack. table = attack.getAttackTableName() rawColNames = attack.getColNames(dbType='rawDb') anonColNames = attack.getColNames(dbType='anonDb') colNames = list(set(rawColNames) & set(anonColNames)) if v: print(f"Common columns are: {colNames}") # Get the total number of rows so that we can later determine fraction # of cells per column that are susceptible sql = str(f"SELECT count(*) FROM {table}") query = dict(db="rawDb", sql=sql) attack.askExplore(query) reply = attack.getExplore() if 'error' in reply: doQueryErrorAndExit(reply, attack) totalRows = reply['answer'][0][0] if v: print(f"Total Rows: {totalRows}") # There is really no point in trying to find instances of # inference where the guessed column has a large number of values. # In these cases, the chances of finding an inference instance is # very low. We (arbitrarily for now) set the threshold for this at 10 # By the same token, an attack where the known column has a majority # values that are distinct to a single user won't work for an attack, # because in the case of Diffix, they will be low-count filtered, and # in the case of Full K-anonymity, they may be aggregated # So we record the number of distinct values per column. (In practice, # this would not be known exactly, but the attacker can be assumed to # have a reasonable guess just based on knowledge of the column.) distincts = {} guessableCols = [] for col in colNames: sql = str(f"SELECT count(DISTINCT {col}) FROM {table}") query = dict(db="rawDb", sql=sql) attack.askAttack(query) reply = attack.getAttack() if 'error' in reply: doQueryErrorAndExit(reply, attack) totalDistinct = reply['answer'][0][0] distincts[col] = totalDistinct if totalDistinct <= 10: guessableCols.append(col) if v: print(f"Distincts: {distincts}") if v: print(f"guessableCols: {guessableCols}") # ------------------- Prior Knowledge Phase -------------------- # This attack doesn't require any prior knowledge for guessedCol in guessableCols: numClaims = 0 remainingCols = [x for x in colNames if x != guessedCol] # We want to try various combinations of the remaining columns, # and try the attack if the ratio of distinct values (or expected # distinct value combinations) is not too high unusedCombinations = 0 for num in range(len(remainingCols)): if unusedCombinations > 1000: # If we don't find a useable combination 1000 # consecutive times, then give up break if numClaims > 25: break combs = itertools.combinations(remainingCols, num + 1) while True: if unusedCombinations > 1000: break if numClaims > 25: break try: knownCols = next(combs) except: break totalDistinct = 1 for c in knownCols: totalDistinct *= distincts[c] if v: print(f"totalDistinct: {totalDistinct} " "from known columns {knownCols}") if (totalDistinct / totalRows) > 0.8: unusedCombinations += 1 continue unusedCombinations = 0 numClaims = runOneAttack(guessedCol, knownCols, attack, table, numClaims) # ------------------- Scores Phase ---------------------------- attackResult = attack.getResults() sc = gdaScores(attackResult) # New we need to assign susceptibility scores, which means making # some explore queries for guessedCol in colNames: remainingCols = [x for x in colNames if x != guessedCol] # -------------- More exploration phase ------------------ # First find out how many of the cells are attackable sql = "SELECT sum(rows) FROM (SELECT " sql += comma_ize(remainingCols) sql += str(f"count(*) AS rows FROM {table} ") sql += makeGroupBy(remainingCols) sql += str(f" HAVING count(DISTINCT {guessedCol}) = 1) t") if v: print("-------------------- Explore query:") if v: print(sql) query = dict(db="raw", sql=sql) attack.askExplore(query) reply = attack.getExplore() if 'error' in reply: doQueryErrorAndExit(reply, attack) numRows = reply['answer'][0][0] if v: print("-------------------- Explore reply:") if v: pp.pprint(reply) susValue = numRows / totalRows sc.assignColumnSusceptibility(guessedCol, susValue) # Get average score (default behavior) score = sc.getScores() if v: pp.pprint(score) score = sc.getScores(numColumns=1) if v: pp.pprint(score) attack.cleanUp(cleanUpCache=False) final = finishGdaAttack(params, score) pp.pprint(final)
def ramyAttack02(params, verbose): # Check on parameters print("########## Parameters passed ##########") pp.pprint(params) # Attack setup attack = gdaAttack(params) attack.unsetVerbose() print("\nInfo >>> Attack Class created") # Information gathering table = attack.getAttackTableName() rawColNames = attack.getColNames(dbType="rawDb") anonColNames = attack.getColNames(dbType="anonDb") print(f"\nInfo >>> Working on table : {table} ...") # Looking for interesting columns and values to fetch interestValues = [] # list (column, value) to investigate deeper uidCol = attack.getUidColName() for col in anonColNames: if (col != uidCol) and not ("id" in col): publicValues = attack.getPublicColValues(col, table) #print("debug1", publicValues) #print("debug2", col) for val in publicValues: if val[1]<=50: interestValues.append((col, val[0])) #print("debug3", val) if not interestValues: print("\nInfo >>> No interesting values found") return None else: print("\nInfo >>> Interesting values found\n") print("########## Interesting values ##########") pp.pprint(interestValues) # Numbering conditions print("\nInfo >>> Generating conditions ...") conditions = [] condition = "" for val in interestValues: condition = f"{val[0]}={val[1]}" conditions.append(condition) # Define SQL queries print("\nInfo >>> Generating SQL queries ...") queries = [] sql = "" for cond in conditions: sql = sqlQueryGen(table, anonColNames, cond) queries.append(sql) # executing attack query = {} query['db'] = "anonDb" print("\nInfo >>> Launching attack ...\n") for q in queries: query['sql'] = q attack.askAttack(query) #print("debug4", q) while True: reply = attack.getAttack() if reply["stillToCome"]==0: break #reply = attack.getAttack() if verbose: print("########## Attack reply ##########") pp.pprint(reply) # Make some guesses :3 print("\nInfo >>> Making claims ...\n") if "answer" in reply.keys(): for row in reply['answer']: spec = {} guess = [] for i in range(len(anonColNames)): guess.append({'col':anonColNames[i],'val':row[i]}) spec['guess'] = guess attack.askClaim(spec, claim=True) #claim = attack.getClaim() while True: claim = attack.getClaim() if claim["stillToCome"]==0: break # Compute and display score result = attack.getResults() gda_score = gdaScores(result) print("\nInfo >>> Score Class created\n") print("########## Attack score ##########") score = gda_score.getScores() pp.pprint(score) # Abschliessung attack.cleanUp() final_result = finishGdaAttack(params, score) return None
def dumb_list_singling_out_attack(params): """ Dumb List attack for the Singling Out criteria. All it does is request rows with all columns from the anonymized database. The attack succeeds if the anonymized database returns rows that single out users, and fails otherwise. It is designed to work against raw and pseudonymized data.""" attack = gdaAttack(params) # ------------------- Exploration Phase ------------------------ # We need to know the columns that are in the anonymized database # and in the raw database. It is these columns that we can attack. # (Note that pseudonymization schemes can delete some columns.) table = attack.getAttackTableName() rawColNames = attack.getColNames(dbType='rawDb') anonColNames = attack.getColNames(dbType='anonDb') if rawColNames is None or anonColNames is None: print(f"No table to attack (raw {rawColNames}, anon {anonColNames}") attack.cleanUp() return uid = attack.getUidColName() colNamesAll = list(set(rawColNames) & set(anonColNames)) if v: print(f"Use columns: {colNamesAll}") # The cloak can't handle queries with a large number of columns, # so we split up the attack into groups of 5 columns each. Each group # contains the uid column, so that we are sure that the resulting # answer pertains to a single user. groupSize = 5 minAttacksPerGroup = 5 groups = [] colsWithoutUid = colNamesAll.copy() colsWithoutUid.remove(uid) if v: print(colNamesAll) if v: print(colsWithoutUid) index = 0 while (1): if index >= len(colsWithoutUid): break endIndex = index + groupSize - 1 nextGroup = colsWithoutUid[index:endIndex] nextGroup.append(uid) groups.append(nextGroup) index += groupSize - 1 # This will give us around 100 attack queries total: numAttacksPerGroup = min(int(100 / len(groups)) + 1, minAttacksPerGroup) if v: pp.pprint(groups) # ------------------- Prior Knowledge Phase -------------------- # This attack doesn't require any prior knowledge # ------------------- Attack Phase ----------------------------- for colNames in groups: query = {} sql = "SELECT " sql += comma_ize(colNames) sql += str(f"count(*) FROM {table} WHERE ") sql += makeInNotNullConditions(colNames) sql += makeGroupBy(colNames) sql += " HAVING count(*) = 1 ORDER BY uid " sql += str(f" LIMIT {numAttacksPerGroup} ") query['sql'] = sql print("-------------------- Attack query:") print(sql) attack.askAttack(query) reply = attack.getAttack() if v: print("-------------------- Attack reply:") if v: pp.pprint(reply) # ------------------- Claims Phase ---------------------------- if 'answer' not in reply: print("ERROR: reply to claim query contains no answer") pp.pprint(reply) attack.cleanUp() sys.exit() for row in reply['answer']: spec = {} guess = [] for i in range(len(colNames)): guess.append({'col': colNames[i], 'val': row[i]}) spec['guess'] = guess attack.askClaim(spec) if v: print("------------------- Attack claims:") while True: reply = attack.getClaim() if v: pp.pprint(reply) if reply['stillToCome'] == 0: break # ------------------- Scores Phase ---------------------------- attackResult = attack.getResults() sc = gdaScores(attackResult) score = sc.getScores() if v: pp.pprint(score) attack.cleanUp() final = finishGdaAttack(params, score) pp.pprint(final)
def dumb_list_inference_attack(params): """ Dumb List attack for the Inference criteria. In an inference attack, there are 'known' column values, and 'guessed' column values. An inference claim succeeds when all users with the known column values have the same guessed column values. There only needs to be one such user, so we can try making inferences on all columns by using all the other columns as known values. """ attack = gdaAttack(params) # ------------------- Exploration Phase ------------------------ # We need to know the columns that are in the anonymized database # and in the raw database. It is these columns that we can attack. # (Note that pseudonymization schemes typically delete some columns.) table = attack.getAttackTableName() rawColNames = attack.getColNames(dbType='rawDb') anonColNames = attack.getColNames(dbType='anonDb') if rawColNames is None or anonColNames is None: print(f"No table to attack (raw {rawColNames}, anon {anonColNames}") attack.cleanUp() return colNames = list(set(rawColNames) & set(anonColNames)) # Get the total number of rows so that we can later determine fraction # of cells per column that are susceptible sql = str(f"SELECT count(*) FROM {table}") if v: print(sql) query = dict(db="raw",sql=sql) attack.askExplore(query) reply = attack.getExplore() if 'error' in reply: doQueryErrorAndExit(reply,attack) totalRows = reply['answer'][0][0] # ------------------- Prior Knowledge Phase -------------------- # This attack doesn't require any prior knowledge # ------------------- Attack Phase ----------------------------- # I'm going to attack each (guessed) column by using the remaining # columns as the known colums. In the following, I loop through # attack and claims for each guessed column. for guessedCol in colNames: remainingCols = [x for x in colNames if x != guessedCol] # -------------- Attack phase ------------------ # And now run the attack for some fraction of the attackable cells sql = "SELECT " sql += comma_ize(remainingCols) sql += str(f"max({guessedCol}) FROM {table} WHERE ") sql += makeInNotNullConditions(remainingCols) sql += makeGroupBy(remainingCols) sql += str(f" HAVING count(DISTINCT {guessedCol}) = 1 ") sql += str(f"ORDER BY 1 LIMIT 20") if v: print(sql) query = dict(sql=sql) attack.askAttack(query) reply = attack.getAttack() if 'error' in reply: # For this attack, cloak can't deal with max(text_col), # so just continue without claims continue # -------------- Claims phase ------------------ for row in reply['answer']: spec = {} known = [] for i in range(len(remainingCols)): known.append({'col':remainingCols[i],'val':row[i]}) spec['known'] = known i = len(remainingCols) spec['guess'] = [{'col':guessedCol,'val':row[i]}] if not attack.isClaimed(spec): attack.askClaim(spec) while True: reply = attack.getClaim() if v: pp.pprint(reply) if reply['stillToCome'] == 0: break # ------------------- Scores Phase ---------------------------- attackResult = attack.getResults() sc = gdaScores(attackResult) # New we need to assign susceptibility scores, which means making # some explore queries for guessedCol in colNames: remainingCols = [x for x in colNames if x != guessedCol] if len(remainingCols) > 20: remainingCols = remainingCols[:20] # -------------- More exploration phase ------------------ # First find out how many of the cells are attackable sql = "SELECT sum(rows) FROM (SELECT " sql += comma_ize(remainingCols) sql += str(f"count(*) AS rows FROM {table} ") sql += makeGroupBy(remainingCols) sql += str(f" HAVING count(DISTINCT {guessedCol}) = 1) t") if v: print("-------------------- Explore query:") if v: print(sql) query = dict(db="raw",sql=sql) attack.askExplore(query) reply = attack.getExplore() if 'error' in reply: doQueryErrorAndExit(reply,attack) numRows = reply['answer'][0][0] if v: print("-------------------- Explore reply:") if v: pp.pprint(reply) susValue = numRows / totalRows sc.assignColumnSusceptibility(guessedCol,susValue) score = sc.getScores() if v: pp.pprint(score) final = finishGdaAttack(params,score) attack.cleanUp() pp.pprint(final)