def driver(): random.seed(553) reservoir = [0] * 100 sequence_no = 0 num_of_asks = int(argv[3]) stream_size = int(argv[2]) bx = BlackBox() results = [] for _ in range(num_of_asks): stream_users = bx.ask(str(argv[1]), stream_size) for user in stream_users: sequence_no += 1 if sequence_no <= 100: reservoir[sequence_no - 1] = user else: # reservoir full. # choose to keep user with prob 100 / sequence_no. p_keep_user = random.randint(0, 100000) % sequence_no if p_keep_user < 100: # have to keep user. # replace one elt in reservoir with uniform prob. position_to_replace = random.randint(0, 100000) % 100 reservoir[position_to_replace] = user if sequence_no % 100 == 0: results.append( str( str(sequence_no) + "," + reservoir[0] + "," + reservoir[20] + "," + reservoir[40] + "," + reservoir[60] + "," + reservoir[80])) return results
def main(input_file_path, stream_size, num_of_asks, output_file_path): global bloom_filter bx = BlackBox() gt_set = set() fp = 0 output_file = open(output_file_path, "wt") output_file.write("Time,FPR\n") for it in range(num_of_asks): stream_users = bx.ask(input_file_path, stream_size) for s in stream_users: indices = myhashs(s) is_not_present = False for i in indices: if (not bloom_filter[i]): is_not_present = True break if (is_not_present): for i in indices: bloom_filter[i] = True if (not s in gt_set and not is_not_present): fp += 1 gt_set.add(s) output_file.write("{},{}\n".format(it, fp / ((it + 1) * stream_size))) output_file.close() return
def flajolet_martin(input_file_path, stream_size, num_of_asks, output_file_path): global number_of_hashes bx = BlackBox() output_file = open(output_file_path, "wt") output_file.write("Time,Ground Truth,Estimation\n") predicted_sum = 0 actual_sum = 0 for it in range(num_of_asks): stream_users = bx.ask(input_file_path, stream_size) max_number_of_trainling_zeros = [-sys.maxsize] * number_of_hashes for s in stream_users: hashes = myhashs(s) for i, h in enumerate(hashes): h = format(h, '016b') number_of_trailing_zeros = len(h) - len(h.rstrip('0')) if (number_of_trailing_zeros > max_number_of_trainling_zeros[i]): max_number_of_trainling_zeros[i] = number_of_trailing_zeros count = calculate_count(max_number_of_trainling_zeros) output_file.write("{},{},{}\n".format(it, stream_size, int(count))) predicted_sum += count actual_sum += stream_size print(predicted_sum / actual_sum) output_file.close() return
def main(): blackBox = BlackBox() op = "Time,FPR" for i in range(numOfAsks): stream_users = blackBox.ask(inputFile, streamSize) op += bloom_filtering(i, stream_users) writeToFile(op)
def main(): input_file, output_file = sys.argv[1], sys.argv[4] stream_size, num_of_asks = int(sys.argv[2]), int( sys.argv[3]) #stream size = 100 bx = BlackBox() seq_num = 0 window_size = 100 random.seed(553) user_list = [] with open(output_file, "w") as f: f.write("seqnum,0_id,20_id,40_id,60_id,80_id") for i in range(num_of_asks): stream_users = bx.ask(input_file, stream_size) if seq_num == 0: user_list += stream_users seq_num += stream_size else: for user in stream_users: seq_num += 1 prob = random.randint(0, 100000) % seq_num if prob < window_size: pos = random.randint(0, 100000) % window_size user_list[pos] = user f.write("\n{},{},{},{},{},{}".format(seq_num, user_list[0], user_list[20], user_list[40], user_list[60], user_list[80])) print("{},{},{},{},{},{}".format(seq_num, user_list[0], user_list[20], user_list[40], user_list[60], user_list[80]))
def driver(): bx = BlackBox() num_of_asks = int(argv[3]) stream_size = int(argv[2]) results = [] for i in range(num_of_asks): stream_users = bx.ask(str(argv[1]), stream_size) ground_truth = set() for user in stream_users: ground_truth.add(user) estimation = flajolet_martin(stream_users) results.append((i, len(ground_truth), int(estimation))) sum_estimations = 0 sum_ground_truth = 0 for i in results: # print(i) sum_ground_truth += i[1] sum_estimations += i[2] print("Final Result = ", i[2] / i[1]) with open(str(argv[4]), "w") as file: file.write("Time,Ground Truth,Estimation") for r in results: file.write("\n" + str(r[0]) + "," + str(r[1]) + "," + str(r[2])) file.close()
def main(): blackBox = BlackBox() op = "Time,Ground Truth,Estimation" for i in range(numOfAsks): stream_users = blackBox.ask(inputFile, streamSize) op += flajolet_martin(i, stream_users) writeToFile(op)
def main(): input_file, output_file = sys.argv[1], sys.argv[4] stream_size, num_of_asks = int(sys.argv[2]), int(sys.argv[3]) hash_function_num = 16 m = 69997 bx = BlackBox() boom_filter = [0 for _ in range(m)] with open(output_file, "w") as f: f.write("Time,FPR") for time in range(num_of_asks): stream_users = bx.ask(input_file, stream_size) visited_users = set() FP, TN = 0, 0 for user in stream_users: hash_values = myhashs(user) count = 0 for hash_value in hash_values: if boom_filter[hash_value] == 1: count += 1 if user not in visited_users: if count == hash_function_num: FP += 1 else: TN += 1 visited_users.add(user) for hash_value in hash_values: boom_filter[hash_value] = 1 FPR = float(FP / (FP + TN)) f.write("\n{},{}".format(time, FPR))
def getServices(body, userId): userService = UserService(userServiceApi+"?format=json&agencyId="+agencyId+"&userId="+str(userId)) print(" [x] Loading user service "+userServiceApi+"?format=json&agencyId="+agencyId+"&userId="+str(userId)) box = BlackBox(serviceApi, userService) services = box.getWidgets(body) for service in services: service["AgencyID"] = agencyId result = json.dumps(services) return result
def __init__(self, files, debugMode): self.__cards = None self.__tableCards = None self.__table = None self.__players = None self.doCallback = False self.endGameCallback = None self.win = True self.debugMode = debugMode self.slackMessage = "" self.blackbox = BlackBox(files, 6, 7, 7, 6) self.playerName = -1 self.response = [0, 0, 0, 0, 0, 0] self.reload = 0 self.betAmount = 100
def main(): input_file, output_file = sys.argv[1], sys.argv[4] stream_size, num_of_asks = int(sys.argv[2]), int(sys.argv[3]) hash_function_num = 1000 groups_len = 10 hash_functions_per_group = int(hash_function_num / groups_len) bx = BlackBox() with open(output_file, "w") as f: f.write("Time,Ground Truth,Estimation") est_all = 0 gt_all = 0 for time in range(num_of_asks): gt = set() stream_users = bx.ask(input_file, stream_size) all_hash_values = [] for user in stream_users: gt.add(user) hash_values = myhashs(user) all_hash_values.append(hash_values) estimates = [] for i in range(hash_function_num): longest_trailing_zeros = 0 for hash_values in all_hash_values: hash_value = hash_values[i] trailing_zeros = 0 while hash_value & 1 == 0 and hash_value > 0: trailing_zeros += 1 hash_value = hash_value >> 1 longest_trailing_zeros = max(trailing_zeros, longest_trailing_zeros) estimates.append(2**longest_trailing_zeros) estimates_avg = [] for i in range(groups_len): sum_est = 0 for j in range(hash_functions_per_group): sum_est += estimates[i * hash_functions_per_group + j] estimates_avg.append(float(sum_est / hash_functions_per_group)) estimates_avg.sort() estimate = round(estimates_avg[int(groups_len / 2)]) est_all += estimate gt_all += len(gt) f.write("\n{},{},{}".format(time, len(gt), estimate))
def main(): blackbox = BlackBox(config="repack/config.ini") #blackbox.clearCache() blackbox.loadData() blackbox.train() blackbox.predict() #blackbox.updateConfig("POLYFIT", "poly_degree", 15, wipe=False) print( f"[+] {blackbox.accuracy}, {blackbox.test_hash}, {blackbox.train_hash}" ) print(blackbox.accuracy_topf)
def driver(): bx = BlackBox() num_of_asks = int(argv[3]) stream_size = int(argv[2]) fpr = [] bit_array = [0 for _ in range(69997)] previous_users = set() for i in range(num_of_asks): stream_users = bx.ask(str(argv[1]), stream_size) batch_fpr = bloom_filter(bit_array, stream_users, previous_users) fpr.append((i, batch_fpr)) for f in fpr: print(f) with open(str(argv[4]), "w") as file: file.write("Time,FPR") for f in fpr: file.write("\n" + str(f[0]) + "," + str(f[1])) file.close()
result.append(hash_val) # print(result) return result if __name__ == '__main__': start = time.time() input_path = sys.argv[1] stream_size = int(sys.argv[2]) num_of_asks = int(sys.argv[3]) output_path = sys.argv[4] bx = BlackBox() ground_truth = [] estimate_length = [] for _ in range(num_of_asks): stream_users = bx.ask(input_path, stream_size) ground_truth.append(len(set(stream_users))) longest_trail = [0 for _ in range(hash_num)] for user in stream_users: user_hash = myhashs(user) for i, h in enumerate(user_hash): longest_trail[i] = max(longest_trail[i], len(h) - len(h.rstrip('0'))) for i in range(hash_num): longest_trail[i] = 2**longest_trail[i]
import random import sys #class BlackBox: # def ask(self, file, num): # lines = open(file, 'r').readlines() # users = [0 for i in range(num)] # for i in range(num): # users[i] = lines[random.randint(0, len(lines) - 1)].rstrip("\n") # return users # This is how we read the data! from blackbox import BlackBox bx = BlackBox() users_fn = sys.argv[1] stream_size = int(sys.argv[2]) num_asks = int(sys.argv[3]) print("This is user FN:", users_fn) print("This is stream size:", stream_size) print("This is number of asks:", num_asks) # Task 2.1 -- Bloom Filter filter_bit_array = [0] * 69997 import binascii num_rows = 69997 all_primes = [
for s in stream: h = myhashs(s) hash_str = list(map(lambda x: bin(x), h)) tzeros = list(map(lambda x: len(x) - len(x.rstrip("0")), hash_str)) longest_tzeros = [ max(a, b) for a, b in zip(longest_tzeros, tzeros) ] data = sorted([2**r for r in longest_tzeros]) data = [ data[GROUP_SIZE * i:GROUP_SIZE * (i + 1)] for i in range(NUM_GROUP) ] avg = list(map(lambda x: sum(x) / len(x), data)) med = int(len(avg) / 2) return round(avg[med]) if __name__ == "__main__": infile = sys.argv[1] stream_size = int(sys.argv[2]) num_asks = int(sys.argv[3]) outfile = sys.argv[4] previous_user = BlackBox().ask(infile, stream_size) t0 = time.time() with open(outfile, 'w+') as f: f.write("Time,FPR\n") for i in range(num_asks): stream = BlackBox().ask(infile, stream_size) fpr = Task2.one_run(stream) f.write(str(i) + ',' + str(stream_size) + ',' + str(fpr) + '\n') print("Duration:", time.time() - t0)
def testBlackBoxSentimentAnalysis(data_type, model_type, num_samples): start = time.time() ## Import glove-vectors once if (data_type == 'RT' or data_type == 'IMDB'): glove_vectors = json.load( open( "glove_final.json".format(data_type, data_type), "rb") ) elif (data_type == 'Kaggle'): glove_vectors = json.load( open( "datasets/{}/glove_kaggle.json".format(data_type, data_type), "rb") ) embed_map = pickle.load( open( "datasets/{}/{}_embed_map.p".format(data_type, data_type), "rb" ) ) ## Get Dataset (3 types: IMDB, RT, Kaggle) if (data_type == 'IMDB'): data = pickle.load( open( "datasets/IMDB/IMDB_tokens.p", "rb" ) ) elif (data_type == 'RT'): data = pickle.load( open( "datasets/RT/RT_tokens.p", "rb" ) ) elif(data_type == 'Kaggle'): data = pickle.load(open("datasets/Kaggle/Kaggle_tokens.p","rb")) ## Get Model (3 types: LR, LSTM, CNN) if (model_type == 'LR'): if (data_type == 'IMDB'): model = pickle.load( open( "models/LR/LR_SA_IMDB.p", "rb" )) elif(data_type == 'RT'): model = pickle.load( open( "models/LR/LR_SA_RT.p", "rb" )) elif(data_type == 'Kaggle'): model = pickle.load( open( "models/LR/LR_TCD_Kaggle.p", "rb" )) elif (model_type == 'LSTM'): if (data_type == 'IMDB'): model = pickle.load( open( "models/LSTM/LSTM_SA_IMDB.p", "rb" )) elif(data_type == 'RT'): model = pickle.load( open( "models/LSTM/LSTM_SA_RT.p", "rb" )) elif(data_type == 'Kaggle'): model = pickle.load( open( "models/LSTM/LSTM_TCD_Kaggle.p", "rb" )) elif (model_type == 'CNN'): if (data_type == 'IMDB'): model = pickle.load( open( "models/CNN/CNN_SA_IMDB.p", "rb" )) elif(data_type == 'RT'): model = pickle.load( open( "models/CNN/CNN_SA_RT.p", "rb" )) elif(data_type == 'Kaggle'): model = pickle.load( open( "models/CNN/CNN_TCD_Kaggle.p", "rb" )) #---- DONE LOADING ---------- end = time.time() print("DONE LOADING: {} minutes".format(np.round((end-start)/60),4)) num_successes = 0 sample_id = 1 percent_perturbed = [] pos_samples = data['test']['pos'][0:num_samples] neg_samples = data['test']['neg'][0:num_samples] num_successes = 0 total_docs = 0 for token_list in pos_samples: sentence = TreebankWordDetokenizer().detokenize(token_list) y = get_blackbox_classifier_score(model_type, sentence) y_class = np.round(y,0) # print(sentence) # print('Original Score: {} | Label: {}'.format(y,y_class)) blackbox = BlackBox(token_list,y_class,0.8, model_type, glove_vectors, data_type) res = blackbox.blackBoxAttack() if res != None: num_successes += 1 percent_perturbed.append(res[1]) total_docs += 1 for token_list in neg_samples: sentence = TreebankWordDetokenizer().detokenize(token_list) y = get_blackbox_classifier_score(model_type, sentence) y_class = np.round(y,0) # print(sentence) # print('Original Score: {} | Label: {}'.format(y,y_class)) blackbox = BlackBox(token_list,y_class,0.8, model_type, glove_vectors, data_type) res = blackbox.blackBoxAttack() if res != None: num_successes += 1 percent_perturbed.append(res[1]) # print("Successful adversary. Fraction of original input perturbed: {}".format(np.round(percent_perturbed,2))) total_docs += 1 total_docs = 2 * num_samples success_rate = np.round((num_successes/total_docs)*100,3) perturb_rate = np.round(np.mean(percent_perturbed)*100,3) print("Avg % Perturbed: {}".format(perturb_rate)) print("{} | {} | {}".format(data_type, model_type, success_rate)) # testBlackBoxSentimentAnalysis('RT', 'Google_NLP') # testBlackBoxSentimentAnalysis('RT', 'IBM_Watson') # testBlackBoxSentimentAnalysis('RT', 'Microsoft_Azure') # testBlackBoxSentimentAnalysis('RT', 'AWS_Comprehend') # testBlackBoxSentimentAnalysis('RT', 'FB_fastText') # testBlackBoxSentimentAnalysis('IMDB', 'Google_NLP') # testBlackBoxSentimentAnalysis('IMDB', 'IBM_Watson') # testBlackBoxSentimentAnalysis('IMDB', 'Microsoft_Azure') # testBlackBoxSentimentAnalysis('IMDB', 'AWS_Comprehend') # testBlackBoxSentimentAnalysis('IMDB', 'FB_fastText') # testBlackBoxSentimentAnalysis('Kaggle', 'Google_NLP', 2) # testBlackBoxSentimentAnalysis('Kaggle', 'IBM_Watson', 2) # testBlackBoxSentimentAnalysis('Kaggle', 'Microsoft_Azure',10) # testBlackBoxSentimentAnalysis('Kaggle', 'AWS_Comprehend',10) # testBlackBoxSentimentAnalysis('Kaggle', 'FB_fastText',2) 16.2%P | 75%A
class TakeAction: def __init__(self, files, debugMode): self.__cards = None self.__tableCards = None self.__table = None self.__players = None self.doCallback = False self.endGameCallback = None self.win = True self.debugMode = debugMode self.slackMessage = "" self.blackbox = BlackBox(files, 6, 7, 7, 6) self.playerName = -1 self.response = [0, 0, 0, 0, 0, 0] self.reload = 0 self.betAmount = 100 def setCallback(self, callback): self.doCallback = True self.endGameCallback = callback def getVectorResponse(self): # format of response vector: [call/check, fold, allin, raise/bet, reload (T/F), bet amount] # we can also perform a check (i.e. calling with an amount of 0 chips) cards = self.__cards.copy() cards.extend(self.__tableCards) self.response = self.blackbox.run(cards, self.__players, self.__table) if not self.debugMode: print(str(self.response)) # Parses the Json and chooses an appropriate action def processRequest(self, jsonObject): # if the json is form a file use json.load(file) try: action = json.loads(jsonObject) except Exception as e: return None if "eventName" not in action: print("json object has no eventName") print(str(action)) return None self.slackMessage = "" # reset message if not self.debugMode: print(str(action["eventName"])) # The Json for players and table is different for __action, __bet and __show_action. if action["eventName"] == "__action": if self.playerName == -1: self.playerName = action["data"]["self"]["playerName"] if not self.debugMode: print("Hello. My name is " + str(self.playerName)) self.getVectorResponse() response = self.response[:4] # It's our turn, we should respond with an __action. actionObj = {"eventName": "__action", "data": {"action": ""}} maxValue = max(response) maxIndex = response.index(maxValue) if maxIndex == 0: actionObj["data"]["action"] = "call" elif maxIndex == 1: actionObj["data"]["action"] = "fold" elif maxIndex == 2: actionObj["data"]["action"] = "allin" if action["data"]["self"]["chips"] >= 5000: self.slackMessage = "Oh no. We are betting " + str( action["data"]["self"] ["chips"]) + " chips!!! Wish me luck." self.__sendSlackStatus() elif maxIndex == 3: actionObj["data"]["action"] = "raise" self.playerName = action["data"]["self"]["playerName"] return json.dumps(actionObj) elif action["eventName"] == "__show_action": # Broadcast to everyone when someone makes an __action (on their turn) self.__setTable(action["data"]["table"]) self.__setPlayers(action["data"]["players"]) elif action["eventName"] == "__bet": # possibilities: [check, bet, fold] if not self.debugMode: print("We are betting!\n") self.getVectorResponse() response = self.response[: 2] # index 2 is "allin" which is not applicable here response.append(self.response[3]) actionObj = { "eventName": "__action", "data": { "action": "", "amount": 0 } } maxValue = max(response) maxIndex = response.index(maxValue) self.betAmount = self.response[5] if maxIndex == 0: actionObj["data"]["action"] = "check" elif maxIndex == 1: actionObj["data"]["action"] = "fold" elif maxIndex == 2: actionObj["data"]["action"] = "bet" actionObj["data"]["amount"] = int( self.betAmount * action["data"]["self"]["chips"]) return json.dumps(actionObj) elif action["eventName"] == "__deal": # The small and big blinds are set! self.__setTable(action["data"]["table"]) self.__setPlayers(action["data"]["players"]) elif action["eventName"] == "__start_reload": # we should either reload or not, so T/F if not self.debugMode: print("Reload probability " + str(self.response[5]) + "\n") if self.response[5] > 0.5: if not self.debugMode: print("Reloaded!\n") return json.dumps({"eventName": "__reload"}) elif action["eventName"] == "__new_round": # The round begins, we have some useful info here. self.__setTable(action["data"]["table"]) self.__setPlayers(action["data"]["players"]) elif action["eventName"] == "__round_end": # End of a round, shows everything. Usefull for dynamic learning self.__setTable(action["data"]["table"]) self.__setPlayers(action["data"]["players"]) elif action["eventName"] == "__game_over": # Shows the winner self.win = self.__Survive(action) if self.win: self.slackMessage = "We survived!" else: self.slackMessage = "We didn't survive." self.__sendSlackStatus() if self.doCallback: self.endGameCallback(self) elif action["eventName"] == "__new_peer": # response to our __join request if not self.debugMode: print("I'm in!") return None # Checks if we survived or not. def __Survive(self, action): for element in action["data"]["players"]: if not self.debugMode: print(str(element)) if element["playerName"] == self.playerName and element[ "isSurvive"]: return True return False # self.__sendSlackStatus() # sends AI status to slack webhook def __sendSlackStatus(self): if not self.debugMode: slackClient.sendMessage(self.slackMessage) ## # # Table object # # tableNumber int Id of the table. # roundName String Name of the round. (preflop, flop, turn, river) # board String Array Probably the cards in the middle. # roundCount int Max amount of reloads I think. # raiseCount int Not sure. # betCount int Number of raises this round. # totalBet int I think this is the pot. # smallBlind object Name of the player and amount. # bigBlind object Name of the player and amount. # ## def __setTable(self, table): tbl = [] tbl.append(self.normalize(table['tableNumber'])) tbl.append(self.normalize(table['roundCount'])) tbl.append(self.normalize(table['raiseCount'])) tbl.append(self.normalize(table['betCount'])) tbl.append(self.normalize(table['totalBet'])) tbl.append(self.normalize(table['smallBlind']['amount'])) tbl.append(self.normalize(table['bigBlind']['amount'])) self.__tableCards = [self.__parseCards(c) for c in table['board']] self.__table = tbl ## # # Players should be ab array of player objects. # # Player object: # playerName String Name of the player. # chips int Amount of money available. # folded boolean Has the player folded yet. # allIn boolean Is the player All in. # cards String Array The player's cards. # isSurvive boolean Not sure. # reloadCount int How many times the player has reloaded. # roundBet int Not sure, probably the big blind or something. # bet int What the player has bet already I think. # ## def __setPlayers(self, players): plrs = [] for plr in players: arr = [] arr.append(self.normalize(plr["chips"])) arr.append(self.normalize(plr["reloadCount"])) arr.append(self.normalize(plr["roundBet"])) arr.append(self.normalize(plr["bet"])) arr.append(int(plr["folded"])) arr.append(int(plr["allIn"])) arr.append(int(plr["isSurvive"])) plrs.append(arr) if 'cards' in plr: self.__cards = [self.__parseCards(c) for c in plr['cards']] self.__players = plrs def normalize(self, x): return 0 if x == 0 else 1 / float(x) def __parseCards(self, card): c = [] num = "A23456789TJQK".index(card[0]) * 4 + "HDCS".index(card[1]) for i in [32, 16, 8, 4, 2, 1]: c.append(1 if num >= i else 0) num = num - i if num >= i else num return c
import random import sys from pyspark import SparkContext import binascii from blackbox import BlackBox sc = SparkContext.getOrCreate() bx = BlackBox() """ class BlackBox: def ask(self, file, num): lines = open(file,'r').readlines() users = [0 for i in range(num)] for i in range(num): users[i] = lines[random.randint(0, len(lines) - 1)].rstrip("\n") return users if __name__ == '__main__': bx = BlackBox() """ input_data = sys.argv[1] stream_size = int(sys.argv[2]) num_of_asks = int(sys.argv[3]) output_file = sys.argv[4]
return gen_func def one_run(stream): fp, tn = 0., 0. filters = set() prev = set() for s in stream: h = set(myhashs(s)) if h.issubset(filters) == True and set(s).issubset(prev) == False: fp += 1 if h.issubset(filters) == False and set(s).issubset(prev) == False: tn += 1 filters |= h prev.add(s) return float(fp) / float((fp + tn)) if __name__ == "__main__": infile = sys.argv[1] stream_size = int(sys.argv[2]) num_asks = int(sys.argv[3]) outfile = sys.argv[4] t0 = time.time() with open(outfile, 'w+') as f: f.write("Time,FPR\n") for i in range(num_asks): stream = BlackBox().ask(infile, stream_size) fpr = Task1.one_run(stream) f.write(str(i) + ',' + str(fpr) + '\n') print("Duration:", time.time() - t0)
import random import sys from statistics import median,mean #class BlackBox: # def ask(self, file, num): # lines = open(file,'r').readlines() # users = [0 for i in range(num)] # for i in range(num): # users[i] = lines[random.randint(0, len(lines) - 1)].rstrip("\n") # return users #This is how we read the data! from blackbox import BlackBox bx = BlackBox() users_fn = sys.argv[1] stream_size = int(sys.argv[2]) num_asks = int(sys.argv[3]) print("This is user FN:",users_fn) print("This is stream size:",stream_size) print("This is number of asks:",num_asks) #Task 2.1 -- Bloom Filter filter_bit_array = [0]*69997 import binascii import math def isPrime(n):
def reservoir(seqNum, data): global reservoirList count = seqNum if len(reservoirList) == 0: reservoirList = data count += len(data) else: for d in data: count += 1 prob = random.randint(0, 100000) % count if (prob < streamSize): pos = random.randint(0, 100000) % streamSize reservoirList[pos] = d return str(count) + "," + str(reservoirList[0]) + "," + str( reservoirList[20]) + "," + str(reservoirList[40]) + "," + str( reservoirList[60]) + "," + str(reservoirList[80] + "\n") if __name__ == "__main__": blackBox = BlackBox() random.seed(553) op = "seqnum,0_id,20_id,40_id,60_id,80_id\n" for i in range(numOfAsks): data = blackBox.ask(inputFile, streamSize) op += reservoir(i * streamSize, data) writeToFile(op) end = time.time() print("Duration:" + str(round(end - start, 2)))
stream_size = 100 num_of_asks = 30 output_filename = 'task3_out.csv' elif len(sys.argv) != 5: print('Usage : python task1.py <input_filename> stream_size num_of_asks <output_filename>') exit(1) else: input_filename = sys.argv[1] stream_size = int(sys.argv[2]) num_of_asks = int(sys.argv[3]) output_filename = sys.argv[4] random.seed(553) saved_users = [] bx = BlackBox() n = 0 print('seqnum, 0_id, 20_id, 40_id, 60_id, 80_id\n') with open(output_filename, 'w+') as file: file.write('seqnum, 0_id, 20_id, 40_id, 60_id, 80_id\n') for i in range(num_of_asks): data = bx.ask(input_filename, stream_size) for s in data: n += 1 if n <= 100: saved_users.append(s) elif random.randint(0, 100000) % n < 100: index = random.randint(0, 100000) % 100 saved_users[index] = s print((i+1)*stream_size, saved_users[0], saved_users[20],saved_users[40],saved_users[60],saved_users[80])
# calculate FPR = FP / (FP + TN) false_positive_rate = 0.0 if (false_pos + true_neg == 0) else false_pos / (false_pos + true_neg) with open(output, 'a') as f: f.write(str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) f.write("," + str(float(false_positive_rate))) f.write("\n") print( str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + " " + str(float(false_positive_rate))) if __name__ == '__main__': file = "users.txt" num = 100 times = 30 output = "Bloomfilter_output.csv" filter_array = [0 for i in range(69997)] seen_user = set() with open(output, 'w+') as f: f.write("Time,FPR") f.write("\n") from blackbox import BlackBox bx = BlackBox() for i in range(times): bloom(bx.ask(file, num))
estimate = (int)(statistics.median((avg_group))) with open(output, "a") as f: f.write(str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) f.write("," + str(ground_truth)) f.write("," + str(estimate)) f.write("\n") print( str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + " " + str(ground_truth) + " " + str(estimate)) if __name__ == '__main__': file = "users.txt" num = 300 times = 30 output = "FlajoletMartin_output.csv" # num of hash_functions k = 15 # groups g = 5 # num in group l = 3 with open(output, 'w+') as f: f.write("Time,Ground Truth,Estimation") f.write("\n") bx = BlackBox() for i in range(times): flajolet(bx.ask(file, num))
if __name__ == '__main__': from blackbox import BlackBox import sys import time start = time.time() file_name = sys.argv[1] stream_size = int(sys.argv[2]) num_ask = int(sys.argv[3]) output_path = sys.argv[4] random.seed(553) bx = BlackBox() data_holder = [] n = 0 res = [] for _ in range(num_ask): stream_users = bx.ask(file_name, stream_size) for user in stream_users: n += 1 if len(data_holder) < 100: data_holder.append(user) else: prob = random.random() if prob < 100 / n: idx = random.randint(0, 99) data_holder[idx] = user res.append((n, data_holder[0], data_holder[20], data_holder[40], data_holder[60], data_holder[80]))
from pyspark import SparkContext import random import sys import binascii from blackbox import BlackBox sc = SparkContext.getOrCreate() bx = BlackBox() """ class BlackBox: def ask(self, file, num): lines = open(file,'r').readlines() users = [0 for i in range(num)] for i in range(num): users[i] = lines[random.randint(0, len(lines) - 1)].rstrip("\n") return users if __name__ == '__main__': bx = BlackBox() # users = bx.ask() # print(users) """ input_data = sys.argv[1] stream_size = int(sys.argv[2])
fpr = float(false_positives / float(false_positives + true_negatives)) f.write(str(ask) + "," + str(fpr) + "\n") if __name__ == "__main__": # time python3 task1.py $ASNLIB/publicdata/users.txt 500 30 task1.csv start_time = time.time() input_file = 'dataset/users.txt' stream_size = 100 num_of_asks = 300 output_file = 'output/task1.csv' # input_file = sys.argv[1] # stream_size = int(sys.argv[2]) # num_of_asks = int(sys.argv[3]) # output_file = sys.argv[4] filter_bit_array = [0] * 69997 global_user_set = set() f = open(output_file, "w") f.write("Time,FPR\n") bx = BlackBox() for ask in range(num_of_asks): stream_users = bx.ask(input_file, stream_size) bloom_filter(stream_users, ask) f.close() print("Duration : ", time.time() - start_time)
def main(_): start_t = time.time() try: welcome_message() # import data mdl = BlackBox(FLAGS) # NOTE this will work with format: mdl.oracle = (image, pred_val, true_val) except: logger.error('black blox training failed...........shutting down!') return logger.info('obtained black box training data') logger.info('oracle data capture time: %f' %(time.time()-start_t)) mnist = mdl.oracle prep_t = time.time() with tf.device('/cpu:0'): # translate into tensorflow style nparrays x_vals = image_list_to_np(mnist, 0) true_vals = image_list_to_np(mnist,2) # yvals converted to one hot vector y_vals = [ x[1] for x in mnist ] y_vals = [ one_hot(i) for i in y_vals] y_vals = np.array(y_vals) y_vals = y_vals.reshape((len(y_vals),10)) # split training and test data into nparrays train_images, train_labels, test_images, test_labels =split_train_data(x_vals, y_vals, FLAGS.split) # Tensorflow variable setup # input vector x = tf.placeholder(tf.float32, [None, 784]) # y output vector y_ = tf.placeholder(tf.float32, [None, 10]) # build the graph for the deep net y_conv, keep_prob = deepnn(x) # define loss function -> cross entropy for now with softmax cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)) # train step, 1e-4 is default, best to use -2/-3 depending on time train_step = tf.train.AdamOptimizer(FLAGS.optimize).minimize(cross_entropy) # define correct prediction vectore and accuracy comparison correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) logger.info('training sets: %d test sets: %d' % (len(train_images),len(test_images))) cnn_saver = tf.train.Saver() logger.info('cpu preprocessing time: %f' %(time.time()-prep_t)) train_t = time.time() logger.info('starting adversarvial model training') #begin training with session with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # TODO create batching loop for i in range(FLAGS.iters): #sanity check on accuracy should be going down -> necessary but not sufficient if i % int((FLAGS.iters/5)) == 0: train_accuracy = accuracy.eval(feed_dict={x: train_images, y_: train_labels, keep_prob: 1.0}) logger.info('step %d, training accuracy %g' % (i, train_accuracy)) #update rule - softmax vector obtained for checking trainer, softmax = sess.run([train_step, cross_entropy],feed_dict={x: train_images, y_: train_labels, keep_prob: 0.5}) logger.info('adversarial model has been trained') # snag the gradient vector wrt X inputs grads = tf.gradients(cross_entropy, [x]) jacobian = sess.run(grads, feed_dict={x:test_images, y_:test_labels, keep_prob: 1.0}) #use test data as input for perturbations #test_ = tf.argmax(y_,1) #test_vals = test_.eval(feed_dict={y_:test_labels}) # use this... verify = sess.run(tf.argmax(test_labels, 1)) #for ver in zip(verify, test_vals): #print(ver, test_vals) pred_ = tf.argmax(y_conv,1) #vify = tf.argmax(y_,1) pred_vals = pred_.eval(feed_dict={x:test_images, y_:test_labels, keep_prob:1.0}) #vify_vals = vify.eval(feed_dict={x:test_images, y_:test_labels, keep_prob:1.0}) true_pred = [ (pxl, p) for pxl, p, r in zip(test_images, pred_vals, verify) if p==r ] logger.info('true positive test exemplars: %f' %(len(true_pred))) logger.results('adversary accuracy: %g' % (accuracy.eval(feed_dict={x: test_images, y_: test_labels, keep_prob: 1.0}))) #setup the goodfellow attack iterations logger.info('attack model train time: %f' %(time.time()-train_t)) pert_t = time.time() adv_list = [] for idx,pos in enumerate(true_pred): for epsilon in np.linspace(0.025,.25,num=FLAGS.augments): xp = goodfellow_mod(np.array(pos[0]), jacobian[0][idx], epsilon) prime_label = one_hot(int(pos[1])) xprime = np.array(xp).reshape((1,784)) #xprime = xprime.reshape((1,784)) yprime = np.array(prime_label).reshape((1,10)) #yprime = yprime.reshape((1, 10)) pred_vals = pred_.eval(feed_dict={x: xprime, y_: yprime, keep_prob:1.0}) acc = accuracy.eval(feed_dict={x: xprime, y_: yprime, keep_prob: 1.0}) #corr = sess.run(mdl.y, feed_dict={x:xprime}) if acc < 1.0: #plt.figure(1) #plt.subplot(121) #img = pos[0].reshape((28,28)) #plt.imshow(img) #plt.subplot(122) #img1 = xp.reshape((28,28)) #plt.imshow(img1) #print(pos[1], np.argmax(yprime), pred_vals, epsilon, np.sum(xp), np.sum(pos[0]), np.sum(xprime)) #plt.show() adv_list.append((xprime, np.argmax(yprime), pred_vals, epsilon, pos[0])) #logger.results('YES adversary accuracy: %g %f' % (acc, epsilon)) break #can do this each iteration - or as a whole...at this point timing doesnt matter, but will logger.results('true positive adversary count: %f' % (float(len(adv_list))/float(len(true_pred)))) logger.info('distortion vector time: %f' %(time.time()-pert_t)) att_t = time.time() # save model to file cnn_saver_path = cnn_saver.save(sess, 'cnn_saver.ckpt') # at this point adv_list is a tuple (x modifed image, y label, true label, epsilon found) adv_images = [ a[0] for a in adv_list ] l = len(adv_list) adv_images = np.array(adv_images).reshape((l,784)) #adv_images = adv_images.reshape((l, 784)) adv_labels = [ a[1] for a in adv_list ] adv_labels = [ one_hot(int(v)) for v in adv_labels ] adv_labels = np.array(adv_labels).reshape((l,10)) adv_real = [ a[2] for a in adv_list ] adv_real = np.array(adv_real) #adv_labels = adv_labels.reshape((l,10)) adv_epsilon = [ a[3] for a in adv_list ] adv_epsilon = np.array(adv_epsilon) adv_real_image = [ a[4] for a in adv_list ] # test for transferability adv_real = mdl.sess.run(tf.argmax(adv_labels,1)) adv_ = tf.argmax(mdl.y,1) adv_pred = mdl.sess.run(adv_, feed_dict={mdl.x: adv_images}) winners = [] epsilon_tracker = collections.defaultdict(int) for idx, (a, l, r) in enumerate(zip(adv_pred, adv_labels, adv_real)): #print( a, l, r, a == r) if a != r: #logger.info('found adversarial example: %g %g' % (a, r)) winners.append(idx) epsilon_tracker[adv_epsilon[idx]] += 1 logger.info('attack results time: %f' %(time.time()-att_t)) logger.info('****************** results **************') logger.results('black box adversarial attack transferability: %g' % (1 - sess.run(mdl.accuracy, feed_dict={mdl.x: adv_images,mdl.y_: adv_labels}))) for d,v in sorted(epsilon_tracker.items()): logger.results('epsilon %s %s' % (d,v)) # grab first two success stories and show them -> lets assume two or error handle later adv_pic0 = adv_images[winners[0]].reshape((28,28)) adv_pic0_real = adv_real_image[winners[0]].reshape((28,28)) rando = random.randint(1,(len(winners)-1)) adv_pic1 = adv_images[winners[rando]].reshape((28,28)) adv_pic1_real = adv_real_image[winners[rando]].reshape((28,28)) true_pic = mdl.pictrue false_pic = mdl.picfalse labels = ['ORIGINAL NEURAL NET CORRECT ON THIS %s' %( mdl.pictruelabel[0]), 'ORIGINAL NEURAL NET THOUGHT UNTAMPERED %s WAS %s'% (mdl.picfalselabel[1], mdl.picfalselabel[0]), 'ORIGINAL IMAGE %s' % (adv_real[winners[0]]), 'ORIGINAL NET THOUGHT %s'%(adv_pred[winners[0]]),'ORIGINAL IMAGE %s' % (adv_real[winners[rando]]), 'ORIGINAL NET THOUGHT %s' % (adv_pred[winners[rando]]) ] logger.info('total program run time: %f' %(time.time()-start_t)) if not FLAGS.nograph: graphics([true_pic, false_pic, adv_pic0_real, adv_pic0, adv_pic1_real, adv_pic1], labels)
random.seed(553) ## Defining the path to the data. dataSetPath = sys.argv[1] ## Defining the stream size. streamSize = int(sys.argv[2]) ## Defining the number of asks. numAsks = int(sys.argv[3]) ## Defining the path to the output file. outfilePath = sys.argv[4] ## Initiate an instance of blackbox. bxInstance = BlackBox() ## Initiating a list to hold the user ID's. userIDList = [] ## Defining the maximum size of the list. maxListSize = 100 ## Initialising a variable to hold the number ## of elements seen till now. numElems = 0 ## Function to run the reservoir sampling. def reservoirSampling(userStream): ## Global parameters.