def getModelData(webpageIds,runID): countermeasure = intToCountermeasure(config.COUNTERMEASURE) traintracesofWebsite = [] targetWebpage = None if config.DATA_SOURCE == 0: startIndex = config.GLOVE_OPTIONS['ModelTraceNum'] endIndex = len(config.DATA_SET) - config.NUM_TESTING_TRACES elif config.DATA_SOURCE == 1: maxTracesPerWebsiteH = 160 startIndex = config.GLOVE_OPTIONS['ModelTraceNum'] endIndex = maxTracesPerWebsiteH - config.NUM_TESTING_TRACES elif config.DATA_SOURCE == 2: maxTracesPerWebsiteH = 18 startIndex = config.GLOVE_OPTIONS['ModelTraceNum'] endIndex = maxTracesPerWebsiteH - config.NUM_TESTING_TRACES elif config.DATA_SOURCE == 3: config.DATA_SET = config.DATA_SET_ANDROID_TOR startIndex = config.GLOVE_OPTIONS['ModelTraceNum'] endIndex = len(config.DATA_SET) - config.NUM_TESTING_TRACES config.PCAP_ROOT = os.path.join(config.BASE_DIR, 'pcap-logs-Android-Tor-Grouping') seed = random.randint(startIndex, endIndex) for webpageId in webpageIds: if config.DATA_SOURCE == 0 or config.DATA_SOURCE == 3: webpageTrain = Datastore.getWebpagesLL([webpageId], seed - config.GLOVE_OPTIONS['ModelTraceNum'], seed) elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2: webpageTrain = Datastore.getWebpagesHerrmann([webpageId], seed - config.GLOVE_OPTIONS['ModelTraceNum'], seed) webpageTrain = webpageTrain[0] # print webpageTrain # print webpageTrain.getHistogram() if targetWebpage == None: targetWebpage = webpageTrain metadata = None if config.COUNTERMEASURE in [config.DIRECT_TARGET_SAMPLING, config.WRIGHT_STYLE_MORPHING]: metadata = countermeasure.buildMetadata(webpageTrain, targetWebpage) i = 0 for w in [webpageTrain]: for trace in w.getTraces(): if countermeasure: if config.COUNTERMEASURE in [config.DIRECT_TARGET_SAMPLING, config.WRIGHT_STYLE_MORPHING]: if w.getId() != targetWebpage.getId(): traceWithCountermeasure = countermeasure.applyCountermeasure(trace, metadata) else: traceWithCountermeasure = trace else: traceWithCountermeasure = countermeasure.applyCountermeasure(trace) else: traceWithCountermeasure = trace if i == 0: traintracesofWebsite.append(traceWithCountermeasure) generateModel(traintracesofWebsite, runID)
seed = random.randint( startIndex, endIndex ) preCountermeasureOverhead = 0 postCountermeasureOverhead = 0 classifier = intToClassifier(config.CLASSIFIER) countermeasure = intToCountermeasure(config.COUNTERMEASURE) trainingSet = [] testingSet = [] targetWebpage = None for webpageId in webpageIds: if config.DATA_SOURCE == 0: webpageTrain = Datastore.getWebpagesLL( [webpageId], seed-config.NUM_TRAINING_TRACES, seed ) webpageTest = Datastore.getWebpagesLL( [webpageId], seed, seed+config.NUM_TESTING_TRACES ) elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2: webpageTrain = Datastore.getWebpagesHerrmann( [webpageId], seed-config.NUM_TRAINING_TRACES, seed ) webpageTest = Datastore.getWebpagesHerrmann( [webpageId], seed, seed+config.NUM_TESTING_TRACES ) webpageTrain = webpageTrain[0] webpageTest = webpageTest[0] if targetWebpage == None: targetWebpage = webpageTrain preCountermeasureOverhead += webpageTrain.getBandwidth() preCountermeasureOverhead += webpageTest.getBandwidth() metadata = None
trainingSet = [] testingSet = [] targetWebpage = None traintracesofWebsite = [] testtracesofWebsite = [] if config.CLASSIFIER == config.GLOVE_CLASSIFIER or config.CLASSIFIER == config.GLOVE_CLASSIFIER2: getModelData(webpageIds,runID) tempRunID = runID for webpageId in webpageIds: if config.DATA_SOURCE == 0 or config.DATA_SOURCE == 3: webpageTrain = Datastore.getWebpagesLL([webpageId], seed - config.NUM_TRAINING_TRACES, seed) webpageTest = Datastore.getWebpagesLL([webpageId], seed, seed + config.NUM_TESTING_TRACES) elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2: webpageTrain = Datastore.getWebpagesHerrmann([webpageId], seed - config.NUM_TRAINING_TRACES, seed) webpageTest = Datastore.getWebpagesHerrmann([webpageId], seed, seed + config.NUM_TESTING_TRACES) webpageTrain = webpageTrain[0] webpageTest = webpageTest[0] # print webpageTrain # print webpageTrain.getHistogram() if targetWebpage == None: targetWebpage = webpageTrain preCountermeasureOverhead += webpageTrain.getBandwidth() preCountermeasureOverhead += webpageTest.getBandwidth() #print preCountermeasureOverhead
if not os.path.exists(WangOpenWorldKnnfolder): os.mkdir(WangOpenWorldKnnfolder) else: shutil.rmtree( WangOpenWorldKnnfolder) # delete and remake folder os.mkdir(WangOpenWorldKnnfolder) # batch folder os.mkdir(WangOpenWorldKnnfolder + '/' + 'batch') for webpageId in webpageIds: if config.DATA_SOURCE == 0 or config.DATA_SOURCE == 3 or config.DATA_SOURCE == 4: if config.COVARIATE_SHIFT == 0: # Normal case webpageTrain = Datastore.getWebpagesLL( [webpageId], seed - config.NUM_TRAINING_TRACES, seed) webpageTest = Datastore.getWebpagesLL( [webpageId], seed, seed + config.NUM_TESTING_TRACES) else: # span time training/testing webpageTrain = Datastore.getWebpagesLL( [webpageId], 0, config.NUM_TRAINING_TRACES) #webpageTest = Datastore.getWebpagesLL( [webpageId], len(config.DATA_SET)-config.NUM_TESTING_TRACES, len(config.DATA_SET) ) # a span of config.COVARIATE_SHIFT days webpageTest = Datastore.getWebpagesLL( [webpageId], config.NUM_TRAINING_TRACES + config.COVARIATE_SHIFT, config.NUM_TRAINING_TRACES + config.COVARIATE_SHIFT +
if not os.path.exists(WangOpenWorldKnnfolder): os.mkdir(WangOpenWorldKnnfolder) else: shutil.rmtree(WangOpenWorldKnnfolder ) # delete and remake folder os.mkdir(WangOpenWorldKnnfolder) # batch folder os.mkdir(WangOpenWorldKnnfolder + '/' + 'batch') for webpageId in webpageIds: if config.DATA_SOURCE == 0 or config.DATA_SOURCE == 3 or config.DATA_SOURCE == 4: if config.COVARIATE_SHIFT == 0: # Normal case webpageTrain = Datastore.getWebpagesLL( [webpageId], seed - config.NUM_TRAINING_TRACES, seed) webpageTest = Datastore.getWebpagesLL( [webpageId], seed, seed + config.NUM_TESTING_TRACES) else: # span time training/testing endSlideTrain = config.START_SLIDE_TRAIN + config.NUM_TRAINING_TRACES webpageTrain = Datastore.getWebpagesLL( [webpageId], config.START_SLIDE_TRAIN, endSlideTrain) #webpageTest = Datastore.getWebpagesLL( [webpageId], len(config.DATA_SET)-config.NUM_TESTING_TRACES, len(config.DATA_SET) ) # a span of config.COVARIATE_SHIFT days #webpageTest = Datastore.getWebpagesLL( [webpageId], config.NUM_TRAINING_TRACES+config.COVARIATE_SHIFT, config.NUM_TRAINING_TRACES+config.COVARIATE_SHIFT+config.NUM_TESTING_TRACES) webpageTest = Datastore.getWebpagesLL(
def run(): run_id, countermeasure_params, classifier_params = read_arguments() output_filename_list = [ 'results', 'k' + str(config.BUCKET_SIZE), 'c' + str(config.COUNTERMEASURE), 'd' + str(config.DATA_SOURCE), 'C' + str(config.CLASSIFIER), 'N' + str(config.TOP_N), 't' + str(config.NUM_TRAINING_TRACES), 'T' + str(config.NUM_TESTING_TRACES), ] output_filename = os.path.join(config.OUTPUT_DIR, '.'.join(output_filename_list)) if not os.path.exists(config.CACHE_DIR): os.mkdir(config.CACHE_DIR) if not os.path.exists(output_filename + '.output'): banner = [ 'accuracy', 'overhead', 'timeElapsedTotal', 'timeElapsedClassifier' ] f = open(output_filename + '.output', 'w') f.write(','.join(banner)) f.close() if not os.path.exists(output_filename + '.debug'): f = open(output_filename + '.debug', 'w') f.close() # Data-set Selection training_set_size = config.NUM_TRAINING_TRACES testing_set_size = config.NUM_TESTING_TRACES if config.DATA_SOURCE == 0: dataset_size = len(config.DATA_SET) start_index = config.NUM_TRAINING_TRACES end_index = len(config.DATA_SET) - config.NUM_TESTING_TRACES elif config.DATA_SOURCE == 1: dataset_size = 160 max_traces_per_website_h = 160 start_index = config.NUM_TRAINING_TRACES end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES elif config.DATA_SOURCE == 2: dataset_size = 18 max_traces_per_website_h = 18 start_index = config.NUM_TRAINING_TRACES end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES else: error('Invalid data-source id:', config.DATA_SOURCE) return 3 # Checking Training-set and Test-set Sizes info('|dataset|={}\t|training-set|={}, |testing-set|={}'.format( dataset_size, training_set_size, testing_set_size)) if training_set_size + testing_set_size > dataset_size: print('[ERROR] t+T is larger than data-set size!') print( '\tThe data-set is divided into two parts: Training set (t) and Testing set (T), so t+T must be ' ) print('\tless than or equal to the total number of data in data-set.') sys.exit(4) # Selecting Algorithms classifier = int_to_classifier(config.CLASSIFIER) countermeasure = int_to_countermeasure(config.COUNTERMEASURE) if issubclass(countermeasure, CounterMeasure): countermeasure.initialize() countermeasure = countermeasure() # also instantiating new_style_cm = True else: new_style_cm = False countermeasure_params = countermeasure_params.split(',') for p in countermeasure_params: if not p or not p.strip(): continue try: attr, val = p.strip().split('=', 1) except ValueError: error('Invalid parameter:', p) return 3 try: val = int(val) except ValueError: pass if new_style_cm: countermeasure.set_param(attr, val) else: setattr(countermeasure, attr, val) # Run for run_index in range(config.NUM_TRIALS): run_start_time = time.time() print('Run #{}'.format(run_index)) # Select a sample of size k from websites 1..N webpage_ids = range(0, config.TOP_N - 1) random.shuffle(webpage_ids) webpage_ids = webpage_ids[0:config.BUCKET_SIZE] seed = random.randint(start_index, end_index) info('selected webpages:', webpage_ids) training_set = [] testing_set = [] target_webpage = None actual_bandwidth = 0 modified_bandwidth = 0 actual_timing = 0 modified_timing = 0 for page_id in webpage_ids: print('.', end='') sys.stdout.flush() # Sampling From Data-source if config.DATA_SOURCE == 0: webpage_train = Datastore.getWebpagesLL( [page_id], seed - config.NUM_TRAINING_TRACES, seed) webpage_test = Datastore.getWebpagesLL( [page_id], seed, seed + config.NUM_TESTING_TRACES) elif config.DATA_SOURCE in [1, 2]: webpage_train = Datastore.getWebpagesHerrmann( [page_id], seed - config.NUM_TRAINING_TRACES, seed) webpage_test = Datastore.getWebpagesHerrmann( [page_id], seed, seed + config.NUM_TESTING_TRACES) else: error('Invalid data-source id:', config.DATA_SOURCE) return 3 # Selecting Targets webpage_train = webpage_train[0] webpage_test = webpage_test[0] if target_webpage is None: target_webpage = webpage_train print(webpage_test, webpage_train) # Accounting actual_bandwidth += webpage_train.getBandwidth() actual_bandwidth += webpage_test.getBandwidth() # Train Countermeasure metadata = None if new_style_cm: countermeasure.train(src_page=webpage_train, target_page=target_webpage) else: if countermeasure in [ DirectTargetSampling, WrightStyleMorphing ]: metadata = countermeasure.buildMetadata( webpage_train, target_webpage) # Applying Countermeasure (and feeding data to classifier) for i, w in enumerate([webpage_train, webpage_test]): for trace in w.getTraces(): actual_timing += trace.get_total_time() # print(trace.get_total_time(), '-', end='') if countermeasure: if new_style_cm: modified_trace = countermeasure.apply_to_trace( trace) else: if countermeasure in [ DirectTargetSampling, WrightStyleMorphing ]: if w.getId() != target_webpage.getId(): modified_trace = countermeasure.applyCountermeasure( trace, metadata) else: modified_trace = trace else: modified_trace = countermeasure.applyCountermeasure( trace) else: modified_trace = trace # Overhead Accounting modified_bandwidth += modified_trace.getBandwidth() modified_timing += modified_trace.get_total_time() # print(modified_trace.get_total_time()) instance = classifier.traceToInstance(modified_trace) if instance: if i == 0: # train-page training_set.append(instance) elif i == 1: # test-page testing_set.append(instance) # Classification print('') classification_start_time = time.time() cl = classifier.classify(run_id, training_set, testing_set) run_end_time = time.time() run_total_time = run_end_time - run_start_time classification_total_time = run_end_time - classification_start_time report_summary(cl, output_filename=output_filename, classifier=classifier, countermeasure=countermeasure) return 0
webpageIds = webpageIds[0:config.BUCKET_SIZE] seed = random.randint( startIndex, endIndex ) preCountermeasureOverhead = 0 postCountermeasureOverhead = 0 classifier = intToClassifier(config.CLASSIFIER) countermeasure = intToCountermeasure(config.COUNTERMEASURE) trainingSet = [] testingSet = [] targetWebpage = None webpageAll = Datastore.getWebpagesLL( webpageIds, 0, len(config.DATA_SET) ) #print_triplets(webpageAll) for webpageId in webpageIds: if config.DATA_SOURCE == 0: webpageTrain = Datastore.getWebpagesLL( [webpageId], seed-config.NUM_TRAINING_TRACES, seed ) webpageTest = Datastore.getWebpagesLL( [webpageId], seed, seed+config.NUM_TESTING_TRACES ) elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2: webpageTrain = Datastore.getWebpagesHerrmann( [webpageId], seed-config.NUM_TRAINING_TRACES, seed ) webpageTest = Datastore.getWebpagesHerrmann( [webpageId], seed, seed+config.NUM_TESTING_TRACES ) webpageTrain = webpageTrain[0] webpageTest = webpageTest[0] if targetWebpage == None: targetWebpage = webpageTrain
def run(): run_id, countermeasure_params, classifier_params = read_arguments() output_filename_list = [ 'results', 'k' + str(config.BUCKET_SIZE), 'c' + str(config.COUNTERMEASURE), 'd' + str(config.DATA_SOURCE), 'C' + str(config.CLASSIFIER), 'N' + str(config.TOP_N), 't' + str(config.NUM_TRAINING_TRACES), 'T' + str(config.NUM_TESTING_TRACES), ] output_filename = os.path.join(config.OUTPUT_DIR, '.'.join(output_filename_list)) if not os.path.exists(config.CACHE_DIR): os.mkdir(config.CACHE_DIR) if not os.path.exists(output_filename + '.output'): banner = ['accuracy', 'overhead', 'timeElapsedTotal', 'timeElapsedClassifier'] f = open(output_filename + '.output', 'w') f.write(','.join(banner)) f.close() if not os.path.exists(output_filename + '.debug'): f = open(output_filename + '.debug', 'w') f.close() # Data-set Selection training_set_size = config.NUM_TRAINING_TRACES testing_set_size = config.NUM_TESTING_TRACES if config.DATA_SOURCE == 0: dataset_size = len(config.DATA_SET) start_index = config.NUM_TRAINING_TRACES end_index = len(config.DATA_SET) - config.NUM_TESTING_TRACES elif config.DATA_SOURCE == 1: dataset_size = 160 max_traces_per_website_h = 160 start_index = config.NUM_TRAINING_TRACES end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES elif config.DATA_SOURCE == 2: dataset_size = 18 max_traces_per_website_h = 18 start_index = config.NUM_TRAINING_TRACES end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES else: error('Invalid data-source id:', config.DATA_SOURCE) return 3 # Checking Training-set and Test-set Sizes info('|dataset|={}\t|training-set|={}, |testing-set|={}'.format(dataset_size, training_set_size, testing_set_size)) if training_set_size + testing_set_size > dataset_size: print('[ERROR] t+T is larger than data-set size!') print('\tThe data-set is divided into two parts: Training set (t) and Testing set (T), so t+T must be ') print('\tless than or equal to the total number of data in data-set.') sys.exit(4) # Selecting Algorithms classifier = int_to_classifier(config.CLASSIFIER) countermeasure = int_to_countermeasure(config.COUNTERMEASURE) if issubclass(countermeasure, CounterMeasure): countermeasure.initialize() countermeasure = countermeasure() # also instantiating new_style_cm = True else: new_style_cm = False countermeasure_params = countermeasure_params.split(',') for p in countermeasure_params: if not p or not p.strip(): continue try: attr, val = p.strip().split('=', 1) except ValueError: error('Invalid parameter:', p) return 3 try: val = int(val) except ValueError: pass if new_style_cm: countermeasure.set_param(attr, val) else: setattr(countermeasure, attr, val) # Run for run_index in range(config.NUM_TRIALS): run_start_time = time.time() print('Run #{}'.format(run_index)) # Select a sample of size k from websites 1..N webpage_ids = range(0, config.TOP_N - 1) random.shuffle(webpage_ids) webpage_ids = webpage_ids[0:config.BUCKET_SIZE] seed = random.randint(start_index, end_index) info('selected webpages:', webpage_ids) training_set = [] testing_set = [] target_webpage = None actual_bandwidth = 0 modified_bandwidth = 0 actual_timing = 0 modified_timing = 0 for page_id in webpage_ids: print('.', end='') sys.stdout.flush() # Sampling From Data-source if config.DATA_SOURCE == 0: webpage_train = Datastore.getWebpagesLL([page_id], seed - config.NUM_TRAINING_TRACES, seed) webpage_test = Datastore.getWebpagesLL([page_id], seed, seed + config.NUM_TESTING_TRACES) elif config.DATA_SOURCE in [1, 2]: webpage_train = Datastore.getWebpagesHerrmann([page_id], seed - config.NUM_TRAINING_TRACES, seed) webpage_test = Datastore.getWebpagesHerrmann([page_id], seed, seed + config.NUM_TESTING_TRACES) else: error('Invalid data-source id:', config.DATA_SOURCE) return 3 # Selecting Targets webpage_train = webpage_train[0] webpage_test = webpage_test[0] if target_webpage is None: target_webpage = webpage_train print(webpage_test, webpage_train) # Accounting actual_bandwidth += webpage_train.getBandwidth() actual_bandwidth += webpage_test.getBandwidth() # Train Countermeasure metadata = None if new_style_cm: countermeasure.train(src_page=webpage_train, target_page=target_webpage) else: if countermeasure in [DirectTargetSampling, WrightStyleMorphing]: metadata = countermeasure.buildMetadata(webpage_train, target_webpage) # Applying Countermeasure (and feeding data to classifier) for i, w in enumerate([webpage_train, webpage_test]): for trace in w.getTraces(): actual_timing += trace.get_total_time() # print(trace.get_total_time(), '-', end='') if countermeasure: if new_style_cm: modified_trace = countermeasure.apply_to_trace(trace) else: if countermeasure in [DirectTargetSampling, WrightStyleMorphing]: if w.getId() != target_webpage.getId(): modified_trace = countermeasure.applyCountermeasure(trace, metadata) else: modified_trace = trace else: modified_trace = countermeasure.applyCountermeasure(trace) else: modified_trace = trace # Overhead Accounting modified_bandwidth += modified_trace.getBandwidth() modified_timing += modified_trace.get_total_time() # print(modified_trace.get_total_time()) instance = classifier.traceToInstance(modified_trace) if instance: if i == 0: # train-page training_set.append(instance) elif i == 1: # test-page testing_set.append(instance) # Classification print('') classification_start_time = time.time() cl = classifier.classify(run_id, training_set, testing_set) run_end_time = time.time() run_total_time = run_end_time - run_start_time classification_total_time = run_end_time - classification_start_time report_summary(cl, output_filename=output_filename, classifier=classifier, countermeasure=countermeasure) return 0