def main(): ds = Datastore() print('immediate packages') ds.get_imm_packages() ds.get_seen_info() ds.get_decision_info() ds.save()
def test_readfile(self): config.PCAP_ROOT = os.path.join(config.BASE_DIR, 'honeypatckBenattackTest/sysdig') config.DATA_SOURCE = 65 config.NUM_BENIGN_CLASSES = 12 config.SYSDIG = './sysdigtest' #eventTrace = sysdigparser.readfileHoneyPatch( 5, 156 ) #print str(eventTrace.getId()) for traceId in range(12, 30): #traceId = 5 traceStart = 0 traceEnd = 3 webpage = Datastore.getWebpagesHoneyPatchSysdig( [traceId], traceStart, traceEnd) # bug, files missing 157, 158, ... webpageTest = webpage[0] webpageList = [webpageTest] postCountermeasureOverhead = 0 for w in webpageList: for trace in w.getTraces(): print 'ben/attck id: ' + str( trace.getId()) + '. trace id: ' + str( trace.getTraceIndex()) traceWithCountermeasure = trace postCountermeasureOverhead += traceWithCountermeasure.getBandwidth( ) print 'num of syscalls: ' + str(postCountermeasureOverhead) print '--------------'
def apply(self): # print 'applying countermeasure' # print('WEBPAGE', int(self.trace.webpage)) if self.dst_trace is None: self.open_db_connection() src_clust = self.get_site_cluster(int(self.trace.webpage)) dst_clust = cluster_distances[src_clust][self.D - 1] # print('DST-CLUST', dst_clust) alg = self.params['CLUSTERING_ALGORITHM'] self.cur.execute( 'SELECT site_id FROM ClustTable WHERE {}=%s ORDER BY RAND() LIMIT 1' .format(alg), (dst_clust, )) selected_site_id = int(round(self.cur.fetchone()[0], 0)) # print('SELECTED-SITE', selected_site_id) sample_trace = Datastore.get_trace(site_id=selected_site_id) # print sample_trace self.dst_trace = sample_trace # print self.trace.get_sizes_str() # print self.dst_trace.get_sizes_str() print 'Morphing: {} -> {}'.format(self.trace, self.dst_trace), self.morph_trace(self.trace, self.dst_trace) print '\t{:.2f}'.format(self.new_trace.size * 1.0 / max(1, self.trace.size) - 1)
def getNonMonitoredTarget(webpageTrainSource): if config.BUILD_ALL_WEBPAGES: webpageTrain = Datastore.getWebpagesWangTor([101], 1, 2) webpageTrain = webpageTrain[0] config.ALL_WEBPAGES.append(webpageTrain) config.BUILD_ALL_WEBPAGES = False config.LARGEST_WEBPAGE = webpageTrain #print "src id: " + str(webpageTrainSource.getId()) #print "largest target id: " + str(config.LARGEST_WEBPAGE.getId()) #print "---" return config.LARGEST_WEBPAGE
def __init__(self, handle): activity.Activity.__init__(self, handle) logger.debug("test") # Configure the toolbox toolbox = activity.ActivityToolbox(self) activity_toolbar = toolbox.get_activity_toolbar() activity_toolbar.keep.props.visible = False activity_toolbar.share.props.visible = False self.set_toolbox(toolbox) toolbox.show() # Create a data store and the app datastore = Datastore("127.0.0.1:8080") backend = BackEnd(datastore, self.get_activity_root()) main = SortingPanel(datastore, backend) widget = main.get_widget() # pack self.set_canvas(widget) widget.grab_focus() widget.modify_font(pango.FontDescription("sans %d" % style.zoom(10)))
'Forbidden' (403) error message. The message did not show up on Windows. Also on Solaris 5.10 the separate components worked fine, only combination seems to fail. PROBLEM SOLVED - The error occurs when the Proxy server is set. Check on Public Cognos needs to be done on internal network. So execute this script before setting the proxy server. """ from Datastore import Datastore from PublicCognos import PublicCognos from lib import my_env # Initialize Environment projectname = "mowdr" modulename = my_env.get_modulename(__file__) config = my_env.get_inifile(projectname, __file__) my_log = my_env.init_loghandler(config, modulename) my_log.info('Start Application') ds = Datastore(config) for indic_id in ds.get_indicator_ids(): if not ds.check_resource(indic_id, "cognos"): indicatorname = ds.get_indicator_value(indic_id, "title")[0][0] # Verify if Cognos URL exist on PublicCognos. Load if it does. pc_url = PublicCognos(indicatorname) # Get my PublicCognos URL Object # Check if Cognos Public URL exists if pc_url.check_if_cognos_report_exists(): # get redirect_file and redirect_page. redirect_file, redirect_url = pc_url.redirect2cognos_page( indic_id, config) # Add Cognos URL to indicators table. Cognos Resource ID (id_cognos) is not available as long as package # has not been created. ds.insert_indicator(indic_id, 'url_cognos', redirect_url) my_log.info("End Application")
This script will create a dcat_ap catalog file for the MOW Dataroom Open Data. """ from Datastore import Datastore from datetime import datetime from Ftp_Handler import Ftp_Handler from lib import my_env from xml.etree.ElementTree import ElementTree, Element, SubElement # Initialize Environment projectname = "vea_od" modulename = my_env.get_modulename(__file__) config = my_env.get_inifile(projectname, __file__) my_log = my_env.init_loghandler(config, modulename) my_log.info('Start Application') ds = Datastore(config) store = config['xmlns']['store'] lang = {'xml:lang': 'nl'} # Define URI's for resources catalog_uri = store + 'dr_catalog' publ_uri = store + 'organisatie' # Publisher contact_uri = store + 'contact' # ContactPoint def get_license(el): """ This method will add the license resource to the specified element :param el: element to which the license resource need to be added :return lic_res: License Resource object """
""" # Create new array with attribute and resource fields attrib_res_fields = {} for key in attrib_od_fields.keys(): new_key = key + "_" + resource attrib_res_fields[new_key] = attrib_od_fields[key] handle_attributes(source, target, action, attrib_res_fields) return # Initialize Environment projectname = "mowdr" modulename = my_env.get_modulename(__file__) config = my_env.get_inifile(projectname) my_env.init_logfile(config, modulename) ds = Datastore(config) logging.info('\n\n\nStart Application') # all_attribs = ds.get_all_attribs() logging.info("Handle Main Attributes on Dataset") populate_attribs_main() logging.info("Handle Extra Attributes on Dataset") populate_attribs_extra() logging.info("Handle Main Attributes that are populated from ckan") populate_attribs_main_ckan() resources = my_env.get_resource_types() resource_files = my_env.get_resource_type_file() for resource_name in resources: logging.info("Handle Resource Attributes for resource %s", resource_name) populate_attribs_resource(resource_name) logging.info("Handle Resource Attributes for resource %s from Open Data", resource_name)
if not os.path.exists(WangOpenWorldKnnfolder): os.mkdir(WangOpenWorldKnnfolder) else: shutil.rmtree(WangOpenWorldKnnfolder ) # delete and remake folder os.mkdir(WangOpenWorldKnnfolder) # batch folder os.mkdir(WangOpenWorldKnnfolder + '/' + 'batch') for webpageId in webpageIds: if config.DATA_SOURCE == 0 or config.DATA_SOURCE == 3 or config.DATA_SOURCE == 4: if config.COVARIATE_SHIFT == 0: # Normal case webpageTrain = Datastore.getWebpagesLL( [webpageId], seed - config.NUM_TRAINING_TRACES, seed) webpageTest = Datastore.getWebpagesLL( [webpageId], seed, seed + config.NUM_TESTING_TRACES) else: # span time training/testing endSlideTrain = config.START_SLIDE_TRAIN + config.NUM_TRAINING_TRACES webpageTrain = Datastore.getWebpagesLL( [webpageId], config.START_SLIDE_TRAIN, endSlideTrain) #webpageTest = Datastore.getWebpagesLL( [webpageId], len(config.DATA_SET)-config.NUM_TESTING_TRACES, len(config.DATA_SET) ) # a span of config.COVARIATE_SHIFT days #webpageTest = Datastore.getWebpagesLL( [webpageId], config.NUM_TRAINING_TRACES+config.COVARIATE_SHIFT, config.NUM_TRAINING_TRACES+config.COVARIATE_SHIFT+config.NUM_TESTING_TRACES) webpageTest = Datastore.getWebpagesLL(
class FileHandler: def __init__(self, config): self.config = config self.ds = Datastore(config) self.ckan = CKANConnector(self.config, self.ds) self.ftp = Ftp_Handler(self.config) def url_in_db(self, file): """ Remove the url attribute for this resource. If file does not contain 'empty', then calculate URL the file and set result in indicators table. :param file: :return: """ logging.debug('Add/Remove file %s to indicators table.', file) indic_id = my_env.indic_from_file(file) attribute = my_env.attr_from_file('url', file) # Always remove attribute for this indicator. Then no insert / update logic is required. self.ds.remove_indicator_attribute(indic_id, attribute) if 'empty' not in file: # Calculate URL ftp_home = self.config['FTPServer']['ftp_home'] # Add FTP Subdirectory (if any) ftpdir = self.config['FTPServer']['dir'] if len(ftpdir) > 0: dirname = ftpdir + '/' else: dirname = '' url = ftp_home + '/' + dirname + file # Add URL to indicator table. self.ds.insert_indicator(indic_id, attribute, url) return def size_of_file(self, handledir, file): """ Remove the size attribute for this resource. If file does not contain 'empty', then calculate Size of the file and set result in indicators table. :param handledir: Current directory of the file. :param file: :return: """ logging.debug('Add/Remove filesize %s to indicators table.', file) indic_id = my_env.indic_from_file(file) attribute = my_env.attr_from_file('size', file) # Always remove attribute for this indicator. Then no insert / update logic is required. self.ds.remove_indicator_attribute(indic_id, attribute) if 'empty' not in file: # Calculate size of file filename = os.path.join(handledir, file) size = os.path.getsize(filename) # Add size of file to indicator table. self.ds.insert_indicator(indic_id, attribute, size) return def load_metadata(self, metafile, indic_id): """ Read the file with metadata and add or replace the information to table indicators. This procedure will populate all fields that come from the 'Dataroom'. Call function to populate the dataset if this is a new dataset or an update of the dataset. Pre-requisite for this call is that dataset exists already. Cognos Add / Remove needs to be added here. :param metafile: pointer to the file with metadata. :param indic_id: Indicator ID :return: """ # TODO: Add URL for 'bijsluiter' to database log_msg = "In load_metadata for file " + metafile logging.debug(log_msg) try: tree = Et.parse(metafile) except: # catch all errors for now, try to be more specific in the future. e = sys.exc_info()[1] ec = sys.exc_info()[0] log_msg = "Error during parsing metafile xml: %s %s" logging.critical(log_msg, e, ec) return root = tree.getroot() # metadata is available, get list of attributes from Dataroom Application and required for Dataset Page. # First collect all attribute names in list attrib_names. attrib_names = [] attribs = self.ds.get_attribs_source('Dataroom') for row in attribs: attrib_names.append(row[0]) # Then remove information from Dataroom for Dataset for this indicator ID. for attrib_name in attrib_names: self.ds.remove_indicator_attribute(indic_id, attrib_name) # indicatorname = "" # Add variable data from indicator metadata xml to indicator table. for child in root: # First get child text if child.text: child_text = child.text.strip() else: # Metadata entry does not have a value (key only). child_text = '(niet ingevuld)' # Then see how to handle this text depending on the attribute if child.tag in attrib_names: # Metadata entry exists as an attribute self.ds.insert_indicator(indic_id, child.tag, child_text) # Some metadata fields will be used more than once in Open Data set. # The 'notes' field is a copy of 'definitie'. if child.tag.lower() == 'definitie': self.ds.insert_indicator(indic_id, 'notes', child_text) # The 'title' field will be used for all Dataset and all resources and gets special threatment. elif child.tag.lower() == 'title': # Set Title for cijfers, commentaar and Cognos report (to do). indicatorname = child_text name_cijfersxml = child_text + " - cijfers (XML)" name_cijferstable = child_text + " - cijfers (Tabel)" name_commentaar = child_text + " - commentaar" name_cognos = indicatorname + " - cognos" self.ds.insert_indicator(indic_id, 'title', indicatorname) self.ds.insert_indicator(indic_id, 'name_cijfersxml', name_cijfersxml) self.ds.insert_indicator(indic_id, 'name_commentaar', name_commentaar) self.ds.insert_indicator(indic_id, 'name_cijferstable', name_cijferstable) self.ds.insert_indicator(indic_id, 'name_cognos', name_cognos) elif child.tag != 'id': log_msg = "Found Dataroom Attribute **" + child.tag + "** not required for Open Data Dataset" logging.warning(log_msg) # Add fixed information from 'OpenData' section in Config file to indicator table. additional_attribs = [ 'description_cijfersxml', 'format_cijfersxml', 'tdt_cijfersxml', 'description_cijferstable', 'format_cijferstable', 'tdt_cijferstable', 'description_commentaar', 'format_commentaar', 'tdt_commentaar', 'description_cognos', 'format_cognos', 'tdt_cognos', 'bijsluiter', 'dcat_ap_profile', 'license_id', 'author_name', 'author_email', 'maintainer_name', 'maintainer_email', 'language' ] for add_attrib in additional_attribs: self.ds.insert_indicator(indic_id, add_attrib, self.config['OpenData'][add_attrib]) # Now check if dataset exist already: is there an ID available in the indicators table for this indicator. values_lst = self.ds.get_indicator_value(indic_id, 'id') upd_pkg = "NOK" # I want to have 0 or 1 rows in the list if len(values_lst) == 0: log_msg = "Open Data dataset is not registered for Indicator ID %s, this should have been done" logging.error(log_msg, indic_id) elif len(values_lst) == 1: log_msg = "Open Data dataset exists for Indicator ID %s, no need to create nor to complain about too many" logging.info(log_msg, indic_id) upd_pkg = "OK" else: log_msg = "Multiple Open Data dataset links found for Indicator ID %s, please review" logging.warning(log_msg, indic_id) if upd_pkg == "OK": self.ckan.update_package(indic_id) return True def process_input_directory(self): """ Function to scan input directory for new files in groups. First group contains the resource files commentaar, cijfersXML and cijfersTable. The second group of files is the metadata files. In the first group of files, the file is moved first. Then if the file contains string 'empty' then the file is removed from FTP site since it cannot be available for external parties anymore. Then the resource information is removed from CKAN. If the file is valid information (does not contain string 'empty') then the file is loaded on the FTP site. In both cases the size of the file and the url are calculated and handled: added to the database or removed from the database if filename contains 'empty'. Then the second group of files is handled: the metadata. The file is moved to first. Then if the dataset exists on the Open Data platform and the string contains 'empty' or cijfersxml does not exist, then the update_package method is called to display the package as private on Open Data. Else (the dataset does not yet exist or cijfersxml does exist so a dataset package mmust be created) the load_metadata method is called. :return: """ # Get ckan connection first scandir = self.config['Main']['scandir'] handledir = self.config['Main']['handledir'] log_msg = "Scan %s for files" logging.debug(log_msg, scandir) # Don't use os.listdir in for loop since I'll move files. For loop will get confused. # Extract filelist first for cijfersXML, cijfersTable or commentaar types. Cognos is also known as # resource type, but no files expected so no problem in leaving this. type_list = my_env.get_resource_types() filelist = [ file for file in os.listdir(scandir) if my_env.type_from_file(file) in type_list ] for file in filelist: log_msg = "Filename: %s" logging.debug(log_msg, file) my_env.move_file( file, scandir, handledir) # Move file done in own function, such a hassle... if 'empty' in file: # remove_file handles paths, empty in filename, ... self.ftp.remove_file(file=file) # Strip empty from filename filename = re.sub('empty\.', '', file) indic_id = my_env.indic_from_file(filename) res_type = my_env.type_from_file(filename) self.ckan.remove_resource(indic_id, res_type) else: self.ftp.load_file(file=os.path.join(handledir, file)) self.size_of_file(handledir, file) self.url_in_db(file) # Now handle meta-data filelist = [file for file in os.listdir(scandir) if 'metadata' in file] for file in filelist: # At least one update, so set flag for dcat_ap create. If any change then new metafile is required, # so no need to have create in block above. open(os.path.join(scandir, "dcat_ap_create"), 'w').close() log_msg = "Filename: %s" logging.debug(log_msg, file) my_env.move_file( file, scandir, handledir) # Move file done in own function, such a hassle... # Get indic_id before adding pathname to filename. indic_id = my_env.indic_from_file(file) filename = os.path.join(handledir, file) # Rework logic. # If dataset does not exist, then it needs to be created here (not in load_metadata) if not self.ckan.check_dataset(indic_id): self.ckan.create_package(indic_id) # If cijfersxml does not exist or metadata file has empty string, then set package to private. if 'empty' in file or not self.ckan.check_resource( indic_id, 'cijfersxml'): # Required and sufficient reason to set package to private. # I'm sure that package ID exist. values_lst = self.ds.get_indicator_value(indic_id, 'id') self.ckan.set_pkg_private(values_lst[0][0]) else: # Dataset package does not yet exist or new valid resource file available and cijfersxml exist. self.load_metadata(filename, indic_id) return def add_cognos_resources(self): """ This procedure will find all indicators for which Cognos report is available but resource is not published on Open Dataset. The Resource will be published on the Open Dataset. :return: """ logging.debug("In add_cognos_resources") for indic_id in self.ds.get_indicator_cognos_urls(): if not self.ds.check_resource_published(indic_id, "cognos"): logging.info( "Cognos URL available, but not yet on Open Dataset for ID {0}" .format(str(indic_id))) self.ckan.update_package(indic_id)
trainingSet = [] testingSet = [] targetWebpage = None traintracesofWebsite = [] testtracesofWebsite = [] if config.CLASSIFIER == config.GLOVE_CLASSIFIER or config.CLASSIFIER == config.GLOVE_CLASSIFIER2: getModelData(webpageIds,runID) tempRunID = runID for webpageId in webpageIds: if config.DATA_SOURCE == 0 or config.DATA_SOURCE == 3: webpageTrain = Datastore.getWebpagesLL([webpageId], seed - config.NUM_TRAINING_TRACES, seed) webpageTest = Datastore.getWebpagesLL([webpageId], seed, seed + config.NUM_TESTING_TRACES) elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2: webpageTrain = Datastore.getWebpagesHerrmann([webpageId], seed - config.NUM_TRAINING_TRACES, seed) webpageTest = Datastore.getWebpagesHerrmann([webpageId], seed, seed + config.NUM_TESTING_TRACES) webpageTrain = webpageTrain[0] webpageTest = webpageTest[0] # print webpageTrain # print webpageTrain.getHistogram() if targetWebpage == None: targetWebpage = webpageTrain preCountermeasureOverhead += webpageTrain.getBandwidth() preCountermeasureOverhead += webpageTest.getBandwidth() #print preCountermeasureOverhead
def run_morphing(): run_id, countermeasure_params, classifier_params = read_arguments() # Selecting Algorithms classifier = int_to_classifier(config.CLASSIFIER) countermeasure = int_to_countermeasure(config.COUNTERMEASURE) countermeasure.initialize() countermeasure = countermeasure() set_params(countermeasure, countermeasure_params) conn = mdb.connect('localhost', config.MYSQL_USER, config.MYSQL_PASSWD, 'Harrmann') def select_random_site(cluster, algorithm='PAM10'): c = conn.cursor() c.execute( 'SELECT site_id FROM ClustTable WHERE {}=%s ORDER BY RAND() LIMIT 1' .format(algorithm), (cluster, )) return c.fetchone()[0] # Run for run_index in range(config.NUM_TRIALS): run_start_time = time.time() print('Run #{}'.format(run_index)) # Selecting Sample Webpages src_clust = 4 d = 7 k = config.BUCKET_SIZE pt = config.NUM_TRAINING_TRACES pT = config.NUM_TESTING_TRACES alg = 'PAM10' dst_clust = config.cluster_distances[src_clust][d - 1] print('cluster: {} -> {}'.format(src_clust, dst_clust)) conn = mdb.connect('localhost', config.MYSQL_USER, config.MYSQL_PASSWD, 'Harrmann') cur = conn.cursor() # cur.execute('SELECT site_id FROM ClustTable WHERE {}=%s ORDER BY RAND() LIMIT 1'.format(alg), (dst_clust,)) # D_site = cur.fetchone()[0] # web_pages = [D_site] web_pages = [] cur.execute( 'SELECT site_id FROM ClustTable WHERE {}=%s ORDER BY RAND() LIMIT {}' .format(alg, k), (src_clust, )) for s in cur.fetchall(): web_pages.append(s[0]) print('Webpages:', web_pages) training = [] testing = [] rl = {'size': 0, 'time': 0} ov = {'size': 0, 'time': 0} site_n = len(web_pages) for site_i, wp in enumerate(web_pages): if site_i % 10 == 0: print('--> progress: {}/{}'.format(site_i, site_n)) t = Datastore.get_trace(site_id=wp, limit=pt + pT, multi=True) for i, trace in enumerate(t): countermeasure.dst_trace = None morphed_trace = countermeasure.apply_to_trace(trace) rl['size'] += trace.getBandwidth() ov['size'] += morphed_trace.getBandwidth() if i < pT: testing.append(morphed_trace) elif i < pT + pt: training.append(morphed_trace) else: break print('Overhead:\n\tsize: {}, {:.0f}%\n\ttime: N/A'.format( *calc_overhead(ov['size'], rl['size']))) print('Classifying...') training_set = [classifier.traceToInstance(t) for t in training] testing_set = [classifier.traceToInstance(t) for t in testing] cl = classifier.classify(str(run_index), training_set, testing_set) report_summary(cl, classifier=classifier, countermeasure=countermeasure)
webpageIds = webpageIds[0:config.BUCKET_SIZE] seed = random.randint( startIndex, endIndex ) preCountermeasureOverhead = 0 postCountermeasureOverhead = 0 classifier = intToClassifier(config.CLASSIFIER) countermeasure = intToCountermeasure(config.COUNTERMEASURE) trainingSet = [] testingSet = [] targetWebpage = None webpageAll = Datastore.getWebpagesLL( webpageIds, 0, len(config.DATA_SET) ) #print_triplets(webpageAll) for webpageId in webpageIds: if config.DATA_SOURCE == 0: webpageTrain = Datastore.getWebpagesLL( [webpageId], seed-config.NUM_TRAINING_TRACES, seed ) webpageTest = Datastore.getWebpagesLL( [webpageId], seed, seed+config.NUM_TESTING_TRACES ) elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2: webpageTrain = Datastore.getWebpagesHerrmann( [webpageId], seed-config.NUM_TRAINING_TRACES, seed ) webpageTest = Datastore.getWebpagesHerrmann( [webpageId], seed, seed+config.NUM_TESTING_TRACES ) webpageTrain = webpageTrain[0] webpageTest = webpageTest[0] if targetWebpage == None: targetWebpage = webpageTrain
def run(): run_id, countermeasure_params, classifier_params = read_arguments() output_filename_list = [ 'results', 'k' + str(config.BUCKET_SIZE), 'c' + str(config.COUNTERMEASURE), 'd' + str(config.DATA_SOURCE), 'C' + str(config.CLASSIFIER), 'N' + str(config.TOP_N), 't' + str(config.NUM_TRAINING_TRACES), 'T' + str(config.NUM_TESTING_TRACES), ] output_filename = os.path.join(config.OUTPUT_DIR, '.'.join(output_filename_list)) if not os.path.exists(config.CACHE_DIR): os.mkdir(config.CACHE_DIR) if not os.path.exists(output_filename + '.output'): banner = ['accuracy', 'overhead', 'timeElapsedTotal', 'timeElapsedClassifier'] f = open(output_filename + '.output', 'w') f.write(','.join(banner)) f.close() if not os.path.exists(output_filename + '.debug'): f = open(output_filename + '.debug', 'w') f.close() # Data-set Selection training_set_size = config.NUM_TRAINING_TRACES testing_set_size = config.NUM_TESTING_TRACES if config.DATA_SOURCE == 0: dataset_size = len(config.DATA_SET) start_index = config.NUM_TRAINING_TRACES end_index = len(config.DATA_SET) - config.NUM_TESTING_TRACES elif config.DATA_SOURCE == 1: dataset_size = 160 max_traces_per_website_h = 160 start_index = config.NUM_TRAINING_TRACES end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES elif config.DATA_SOURCE == 2: dataset_size = 18 max_traces_per_website_h = 18 start_index = config.NUM_TRAINING_TRACES end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES else: error('Invalid data-source id:', config.DATA_SOURCE) return 3 # Checking Training-set and Test-set Sizes info('|dataset|={}\t|training-set|={}, |testing-set|={}'.format(dataset_size, training_set_size, testing_set_size)) if training_set_size + testing_set_size > dataset_size: print('[ERROR] t+T is larger than data-set size!') print('\tThe data-set is divided into two parts: Training set (t) and Testing set (T), so t+T must be ') print('\tless than or equal to the total number of data in data-set.') sys.exit(4) # Selecting Algorithms classifier = int_to_classifier(config.CLASSIFIER) countermeasure = int_to_countermeasure(config.COUNTERMEASURE) if issubclass(countermeasure, CounterMeasure): countermeasure.initialize() countermeasure = countermeasure() # also instantiating new_style_cm = True else: new_style_cm = False countermeasure_params = countermeasure_params.split(',') for p in countermeasure_params: if not p or not p.strip(): continue try: attr, val = p.strip().split('=', 1) except ValueError: error('Invalid parameter:', p) return 3 try: val = int(val) except ValueError: pass if new_style_cm: countermeasure.set_param(attr, val) else: setattr(countermeasure, attr, val) # Run for run_index in range(config.NUM_TRIALS): run_start_time = time.time() print('Run #{}'.format(run_index)) # Select a sample of size k from websites 1..N webpage_ids = range(0, config.TOP_N - 1) random.shuffle(webpage_ids) webpage_ids = webpage_ids[0:config.BUCKET_SIZE] seed = random.randint(start_index, end_index) info('selected webpages:', webpage_ids) training_set = [] testing_set = [] target_webpage = None actual_bandwidth = 0 modified_bandwidth = 0 actual_timing = 0 modified_timing = 0 for page_id in webpage_ids: print('.', end='') sys.stdout.flush() # Sampling From Data-source if config.DATA_SOURCE == 0: webpage_train = Datastore.getWebpagesLL([page_id], seed - config.NUM_TRAINING_TRACES, seed) webpage_test = Datastore.getWebpagesLL([page_id], seed, seed + config.NUM_TESTING_TRACES) elif config.DATA_SOURCE in [1, 2]: webpage_train = Datastore.getWebpagesHerrmann([page_id], seed - config.NUM_TRAINING_TRACES, seed) webpage_test = Datastore.getWebpagesHerrmann([page_id], seed, seed + config.NUM_TESTING_TRACES) else: error('Invalid data-source id:', config.DATA_SOURCE) return 3 # Selecting Targets webpage_train = webpage_train[0] webpage_test = webpage_test[0] if target_webpage is None: target_webpage = webpage_train print(webpage_test, webpage_train) # Accounting actual_bandwidth += webpage_train.getBandwidth() actual_bandwidth += webpage_test.getBandwidth() # Train Countermeasure metadata = None if new_style_cm: countermeasure.train(src_page=webpage_train, target_page=target_webpage) else: if countermeasure in [DirectTargetSampling, WrightStyleMorphing]: metadata = countermeasure.buildMetadata(webpage_train, target_webpage) # Applying Countermeasure (and feeding data to classifier) for i, w in enumerate([webpage_train, webpage_test]): for trace in w.getTraces(): actual_timing += trace.get_total_time() # print(trace.get_total_time(), '-', end='') if countermeasure: if new_style_cm: modified_trace = countermeasure.apply_to_trace(trace) else: if countermeasure in [DirectTargetSampling, WrightStyleMorphing]: if w.getId() != target_webpage.getId(): modified_trace = countermeasure.applyCountermeasure(trace, metadata) else: modified_trace = trace else: modified_trace = countermeasure.applyCountermeasure(trace) else: modified_trace = trace # Overhead Accounting modified_bandwidth += modified_trace.getBandwidth() modified_timing += modified_trace.get_total_time() # print(modified_trace.get_total_time()) instance = classifier.traceToInstance(modified_trace) if instance: if i == 0: # train-page training_set.append(instance) elif i == 1: # test-page testing_set.append(instance) # Classification print('') classification_start_time = time.time() cl = classifier.classify(run_id, training_set, testing_set) run_end_time = time.time() run_total_time = run_end_time - run_start_time classification_total_time = run_end_time - classification_start_time report_summary(cl, output_filename=output_filename, classifier=classifier, countermeasure=countermeasure) return 0
def run_morphing(): run_id, countermeasure_params, classifier_params = read_arguments() # Selecting Algorithms classifier = int_to_classifier(config.CLASSIFIER) countermeasure = int_to_countermeasure(config.COUNTERMEASURE) countermeasure.initialize() countermeasure = countermeasure() set_params(countermeasure, countermeasure_params) conn = mdb.connect('localhost', config.MYSQL_USER, config.MYSQL_PASSWD, 'Harrmann') def select_random_site(cluster, algorithm='PAM10'): c = conn.cursor() c.execute('SELECT site_id FROM ClustTable WHERE {}=%s ORDER BY RAND() LIMIT 1'.format(algorithm), (cluster,)) return c.fetchone()[0] # Run for run_index in range(config.NUM_TRIALS): run_start_time = time.time() print('Run #{}'.format(run_index)) # Selecting Sample Webpages src_clust = 4 d = 7 k = config.BUCKET_SIZE pt = config.NUM_TRAINING_TRACES pT = config.NUM_TESTING_TRACES alg = 'PAM10' dst_clust = config.cluster_distances[src_clust][d - 1] print('cluster: {} -> {}'.format(src_clust, dst_clust)) conn = mdb.connect('localhost', config.MYSQL_USER, config.MYSQL_PASSWD, 'Harrmann') cur = conn.cursor() # cur.execute('SELECT site_id FROM ClustTable WHERE {}=%s ORDER BY RAND() LIMIT 1'.format(alg), (dst_clust,)) # D_site = cur.fetchone()[0] # web_pages = [D_site] web_pages = [] cur.execute('SELECT site_id FROM ClustTable WHERE {}=%s ORDER BY RAND() LIMIT {}'.format(alg, k), (src_clust,)) for s in cur.fetchall(): web_pages.append(s[0]) print('Webpages:', web_pages) training = [] testing = [] rl = {'size': 0, 'time': 0} ov = {'size': 0, 'time': 0} site_n = len(web_pages) for site_i, wp in enumerate(web_pages): if site_i % 10 == 0: print('--> progress: {}/{}'.format(site_i, site_n)) t = Datastore.get_trace(site_id=wp, limit=pt+pT, multi=True) for i, trace in enumerate(t): countermeasure.dst_trace = None morphed_trace = countermeasure.apply_to_trace(trace) rl['size'] += trace.getBandwidth() ov['size'] += morphed_trace.getBandwidth() if i < pT: testing.append(morphed_trace) elif i < pT + pt: training.append(morphed_trace) else: break print('Overhead:\n\tsize: {}, {:.0f}%\n\ttime: N/A'.format(*calc_overhead(ov['size'], rl['size']))) print('Classifying...') training_set = [classifier.traceToInstance(t) for t in training] testing_set = [classifier.traceToInstance(t) for t in testing] cl = classifier.classify(str(run_index), training_set, testing_set) report_summary(cl, classifier=classifier, countermeasure=countermeasure)
def __init__(self, config): self.config = config self.ds = Datastore(config) self.ckan = CKANConnector(self.config, self.ds) self.ftp = Ftp_Handler(self.config)
if not os.path.exists(WangOpenWorldKnnfolder): os.mkdir(WangOpenWorldKnnfolder) else: shutil.rmtree( WangOpenWorldKnnfolder) # delete and remake folder os.mkdir(WangOpenWorldKnnfolder) # batch folder os.mkdir(WangOpenWorldKnnfolder + '/' + 'batch') for webpageId in webpageIds: if config.DATA_SOURCE == 0 or config.DATA_SOURCE == 3 or config.DATA_SOURCE == 4: if config.COVARIATE_SHIFT == 0: # Normal case webpageTrain = Datastore.getWebpagesLL( [webpageId], seed - config.NUM_TRAINING_TRACES, seed) webpageTest = Datastore.getWebpagesLL( [webpageId], seed, seed + config.NUM_TESTING_TRACES) else: # span time training/testing webpageTrain = Datastore.getWebpagesLL( [webpageId], 0, config.NUM_TRAINING_TRACES) #webpageTest = Datastore.getWebpagesLL( [webpageId], len(config.DATA_SET)-config.NUM_TESTING_TRACES, len(config.DATA_SET) ) # a span of config.COVARIATE_SHIFT days webpageTest = Datastore.getWebpagesLL( [webpageId], config.NUM_TRAINING_TRACES + config.COVARIATE_SHIFT, config.NUM_TRAINING_TRACES + config.COVARIATE_SHIFT +
seed = random.randint( startIndex, endIndex ) preCountermeasureOverhead = 0 postCountermeasureOverhead = 0 classifier = intToClassifier(config.CLASSIFIER) countermeasure = intToCountermeasure(config.COUNTERMEASURE) trainingSet = [] testingSet = [] targetWebpage = None for webpageId in webpageIds: if config.DATA_SOURCE == 0: webpageTrain = Datastore.getWebpagesLL( [webpageId], seed-config.NUM_TRAINING_TRACES, seed ) webpageTest = Datastore.getWebpagesLL( [webpageId], seed, seed+config.NUM_TESTING_TRACES ) elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2: webpageTrain = Datastore.getWebpagesHerrmann( [webpageId], seed-config.NUM_TRAINING_TRACES, seed ) webpageTest = Datastore.getWebpagesHerrmann( [webpageId], seed, seed+config.NUM_TESTING_TRACES ) webpageTrain = webpageTrain[0] webpageTest = webpageTest[0] if targetWebpage == None: targetWebpage = webpageTrain preCountermeasureOverhead += webpageTrain.getBandwidth() preCountermeasureOverhead += webpageTest.getBandwidth() metadata = None
def run(): run_id, countermeasure_params, classifier_params = read_arguments() output_filename_list = [ 'results', 'k' + str(config.BUCKET_SIZE), 'c' + str(config.COUNTERMEASURE), 'd' + str(config.DATA_SOURCE), 'C' + str(config.CLASSIFIER), 'N' + str(config.TOP_N), 't' + str(config.NUM_TRAINING_TRACES), 'T' + str(config.NUM_TESTING_TRACES), ] output_filename = os.path.join(config.OUTPUT_DIR, '.'.join(output_filename_list)) if not os.path.exists(config.CACHE_DIR): os.mkdir(config.CACHE_DIR) if not os.path.exists(output_filename + '.output'): banner = [ 'accuracy', 'overhead', 'timeElapsedTotal', 'timeElapsedClassifier' ] f = open(output_filename + '.output', 'w') f.write(','.join(banner)) f.close() if not os.path.exists(output_filename + '.debug'): f = open(output_filename + '.debug', 'w') f.close() # Data-set Selection training_set_size = config.NUM_TRAINING_TRACES testing_set_size = config.NUM_TESTING_TRACES if config.DATA_SOURCE == 0: dataset_size = len(config.DATA_SET) start_index = config.NUM_TRAINING_TRACES end_index = len(config.DATA_SET) - config.NUM_TESTING_TRACES elif config.DATA_SOURCE == 1: dataset_size = 160 max_traces_per_website_h = 160 start_index = config.NUM_TRAINING_TRACES end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES elif config.DATA_SOURCE == 2: dataset_size = 18 max_traces_per_website_h = 18 start_index = config.NUM_TRAINING_TRACES end_index = max_traces_per_website_h - config.NUM_TESTING_TRACES else: error('Invalid data-source id:', config.DATA_SOURCE) return 3 # Checking Training-set and Test-set Sizes info('|dataset|={}\t|training-set|={}, |testing-set|={}'.format( dataset_size, training_set_size, testing_set_size)) if training_set_size + testing_set_size > dataset_size: print('[ERROR] t+T is larger than data-set size!') print( '\tThe data-set is divided into two parts: Training set (t) and Testing set (T), so t+T must be ' ) print('\tless than or equal to the total number of data in data-set.') sys.exit(4) # Selecting Algorithms classifier = int_to_classifier(config.CLASSIFIER) countermeasure = int_to_countermeasure(config.COUNTERMEASURE) if issubclass(countermeasure, CounterMeasure): countermeasure.initialize() countermeasure = countermeasure() # also instantiating new_style_cm = True else: new_style_cm = False countermeasure_params = countermeasure_params.split(',') for p in countermeasure_params: if not p or not p.strip(): continue try: attr, val = p.strip().split('=', 1) except ValueError: error('Invalid parameter:', p) return 3 try: val = int(val) except ValueError: pass if new_style_cm: countermeasure.set_param(attr, val) else: setattr(countermeasure, attr, val) # Run for run_index in range(config.NUM_TRIALS): run_start_time = time.time() print('Run #{}'.format(run_index)) # Select a sample of size k from websites 1..N webpage_ids = range(0, config.TOP_N - 1) random.shuffle(webpage_ids) webpage_ids = webpage_ids[0:config.BUCKET_SIZE] seed = random.randint(start_index, end_index) info('selected webpages:', webpage_ids) training_set = [] testing_set = [] target_webpage = None actual_bandwidth = 0 modified_bandwidth = 0 actual_timing = 0 modified_timing = 0 for page_id in webpage_ids: print('.', end='') sys.stdout.flush() # Sampling From Data-source if config.DATA_SOURCE == 0: webpage_train = Datastore.getWebpagesLL( [page_id], seed - config.NUM_TRAINING_TRACES, seed) webpage_test = Datastore.getWebpagesLL( [page_id], seed, seed + config.NUM_TESTING_TRACES) elif config.DATA_SOURCE in [1, 2]: webpage_train = Datastore.getWebpagesHerrmann( [page_id], seed - config.NUM_TRAINING_TRACES, seed) webpage_test = Datastore.getWebpagesHerrmann( [page_id], seed, seed + config.NUM_TESTING_TRACES) else: error('Invalid data-source id:', config.DATA_SOURCE) return 3 # Selecting Targets webpage_train = webpage_train[0] webpage_test = webpage_test[0] if target_webpage is None: target_webpage = webpage_train print(webpage_test, webpage_train) # Accounting actual_bandwidth += webpage_train.getBandwidth() actual_bandwidth += webpage_test.getBandwidth() # Train Countermeasure metadata = None if new_style_cm: countermeasure.train(src_page=webpage_train, target_page=target_webpage) else: if countermeasure in [ DirectTargetSampling, WrightStyleMorphing ]: metadata = countermeasure.buildMetadata( webpage_train, target_webpage) # Applying Countermeasure (and feeding data to classifier) for i, w in enumerate([webpage_train, webpage_test]): for trace in w.getTraces(): actual_timing += trace.get_total_time() # print(trace.get_total_time(), '-', end='') if countermeasure: if new_style_cm: modified_trace = countermeasure.apply_to_trace( trace) else: if countermeasure in [ DirectTargetSampling, WrightStyleMorphing ]: if w.getId() != target_webpage.getId(): modified_trace = countermeasure.applyCountermeasure( trace, metadata) else: modified_trace = trace else: modified_trace = countermeasure.applyCountermeasure( trace) else: modified_trace = trace # Overhead Accounting modified_bandwidth += modified_trace.getBandwidth() modified_timing += modified_trace.get_total_time() # print(modified_trace.get_total_time()) instance = classifier.traceToInstance(modified_trace) if instance: if i == 0: # train-page training_set.append(instance) elif i == 1: # test-page testing_set.append(instance) # Classification print('') classification_start_time = time.time() cl = classifier.classify(run_id, training_set, testing_set) run_end_time = time.time() run_total_time = run_end_time - run_start_time classification_total_time = run_end_time - classification_start_time report_summary(cl, output_filename=output_filename, classifier=classifier, countermeasure=countermeasure) return 0
def getModelData(webpageIds,runID): countermeasure = intToCountermeasure(config.COUNTERMEASURE) traintracesofWebsite = [] targetWebpage = None if config.DATA_SOURCE == 0: startIndex = config.GLOVE_OPTIONS['ModelTraceNum'] endIndex = len(config.DATA_SET) - config.NUM_TESTING_TRACES elif config.DATA_SOURCE == 1: maxTracesPerWebsiteH = 160 startIndex = config.GLOVE_OPTIONS['ModelTraceNum'] endIndex = maxTracesPerWebsiteH - config.NUM_TESTING_TRACES elif config.DATA_SOURCE == 2: maxTracesPerWebsiteH = 18 startIndex = config.GLOVE_OPTIONS['ModelTraceNum'] endIndex = maxTracesPerWebsiteH - config.NUM_TESTING_TRACES elif config.DATA_SOURCE == 3: config.DATA_SET = config.DATA_SET_ANDROID_TOR startIndex = config.GLOVE_OPTIONS['ModelTraceNum'] endIndex = len(config.DATA_SET) - config.NUM_TESTING_TRACES config.PCAP_ROOT = os.path.join(config.BASE_DIR, 'pcap-logs-Android-Tor-Grouping') seed = random.randint(startIndex, endIndex) for webpageId in webpageIds: if config.DATA_SOURCE == 0 or config.DATA_SOURCE == 3: webpageTrain = Datastore.getWebpagesLL([webpageId], seed - config.GLOVE_OPTIONS['ModelTraceNum'], seed) elif config.DATA_SOURCE == 1 or config.DATA_SOURCE == 2: webpageTrain = Datastore.getWebpagesHerrmann([webpageId], seed - config.GLOVE_OPTIONS['ModelTraceNum'], seed) webpageTrain = webpageTrain[0] # print webpageTrain # print webpageTrain.getHistogram() if targetWebpage == None: targetWebpage = webpageTrain metadata = None if config.COUNTERMEASURE in [config.DIRECT_TARGET_SAMPLING, config.WRIGHT_STYLE_MORPHING]: metadata = countermeasure.buildMetadata(webpageTrain, targetWebpage) i = 0 for w in [webpageTrain]: for trace in w.getTraces(): if countermeasure: if config.COUNTERMEASURE in [config.DIRECT_TARGET_SAMPLING, config.WRIGHT_STYLE_MORPHING]: if w.getId() != targetWebpage.getId(): traceWithCountermeasure = countermeasure.applyCountermeasure(trace, metadata) else: traceWithCountermeasure = trace else: traceWithCountermeasure = countermeasure.applyCountermeasure(trace) else: traceWithCountermeasure = trace if i == 0: traintracesofWebsite.append(traceWithCountermeasure) generateModel(traintracesofWebsite, runID)
print('starting') debug = os.environ['IS_DEBUG'] == 'true' dryRun = False #os.environ['DRY_RUN'] == 'true' messageHost = 'localhost' if debug else os.environ['RABBIT_ADDR'] print('messageHost: %s' % messageHost) connection = pika.BlockingConnection( pika.ConnectionParameters(host=messageHost, heartbeat=20)) channel = connection.channel() channel.queue_declare(queue='frame_jobs', durable=True) datastore = Datastore() imageParser = ImageParser() print("openface ready") if dryRun: print("Dry run enabled") # print(" [x] Sent 'Hello World!'") def onFrameJob(ch, method, properties, body): msg = json.loads(body) videoId = msg['videoId']
''' Created on Mar 7, 2011 @author: cgueret ''' from Backend import BackEnd from Datastore import Datastore import os if __name__ == '__main__': # Create a data store datastore = Datastore("127.0.0.1:8080") backend = BackEnd(datastore, ".") for file in os.listdir('items'): print 'Add %s' % file backend.add_item('items/%s' % file) #backend.add_item("chair.jpg")
def test_readfile(self): ''' # By Dyer actualTrace = pcapparser.readfile( month=3, day=14, hour=22, webpageId=8 ) expectedTrace = Trace(8) expectedTrace.addPacket( Packet( Packet.UP , 0 , 148 ) ) expectedTrace.addPacket( Packet( Packet.DOWN, 0 , 100 ) ) expectedTrace.addPacket( Packet( Packet.UP , 0 , 52 ) ) expectedTrace.addPacket( Packet( Packet.UP , 3 , 500 ) ) expectedTrace.addPacket( Packet( Packet.DOWN, 18 , 244 ) ) expectedTrace.addPacket( Packet( Packet.UP , 35 , 436 ) ) expectedTrace.addPacket( Packet( Packet.DOWN, 75 , 52 ) ) expectedTrace.addPacket( Packet( Packet.DOWN, 118, 292 ) ) expectedTrace.addPacket( Packet( Packet.UP , 158, 52 ) ) ''' # By Khaled config.PCAP_ROOT = os.path.join(config.BASE_DIR ,'honeypatckBenattackTest/net') config.DATA_SOURCE = 64 # HoneyPatch pcap dataset config.NUM_BENIGN_CLASSES = 12 for traceId in range(12,33): #traceId = 5 traceStart = 0 traceEnd = 3 # training print 'training' webpage = Datastore.getWebpagesHoneyPatch([traceId], traceStart, traceEnd) webpageTest = webpage[0] webpageList = [webpageTest] postCountermeasureOverhead = 0 for w in webpageList: for trace in w.getTraces(): traceWithCountermeasure = trace postCountermeasureOverhead += traceWithCountermeasure.getBandwidth() print 'Bandwidth = ' + str(postCountermeasureOverhead) # testing print 'testing' webpage = Datastore.getWebpagesHoneyPatchSomePackets([traceId], traceStart, traceEnd) webpageTest = webpage[0] webpageList = [webpageTest] postCountermeasureOverhead = 0 for w in webpageList: for trace in w.getTraces(): traceWithCountermeasure = trace postCountermeasureOverhead += traceWithCountermeasure.getBandwidth() print 'Bandwidth = ' + str(postCountermeasureOverhead) print '------------'