def plot_FAR(dicSensitivityResults): """plot FAR """ sPlotPath = sSensitivityTestResultsBasePath + "plots/" util.create_path(sPlotPath) for sHashType, dicMetrics in dicSensitivityResults.items(): aFAR = dicMetrics["errorrate"] pdERRData = pd.DataFrame( {"Threshold": aThresholdSteps, "Errorrate": aFAR, "Type": ["FAR"] * len(aThresholdSteps)}) pdERRData['subject'] = 0 # calc less FAR FRR value lFARNotNullValueX = aThresholdSteps[np.argmax(np.array(aFAR) > 0) - 1] plt.clf() sTitle = sHashType sb.set_style("whitegrid") oSeabornPlot = sb.tsplot(time="Threshold", value="Errorrate", condition="Type", unit="subject", interpolate=True, data=pdERRData) oSeabornPlot.set(title=sTitle) oSeabornPlot.set(xlabel="Threshold") oSeabornPlot.set(ylabel="Errorrate") oSeabornPlot.set(xticks=np.arange(0, 1.01, 0.1)) oSeabornPlot.set(yticks=np.arange(0, 1.01, 0.1)) oSeabornPlot.set(ylim=(0, 1)) # add not zero lines plt.axvline(x=lFARNotNullValueX, color='r', linestyle="--") sFilenameSafeTitle = util.format_filename(sTitle) oSeabornPlot.get_figure().savefig(sPlotPath + sFilenameSafeTitle + ".png") plt.clf()
def detect_data_set(args): """ Function called in each detector process that run the detector that it is given. """ (i, detector_instance, detector_name, labels, output_dir, relative_path) = args relative_dir, file_name = os.path.split(relative_path) file_name = detector_name + "_" + file_name output_path = os.path.join(output_dir, detector_name, relative_dir, file_name) create_path(output_path) print "%s: Beginning detection with %s for %s" % (i, detector_name, relative_path) detector_instance.initialize() results = detector_instance.run() # label=1 for relaxed windows, 0 otherwise results["label"] = labels results.to_csv(output_path, index=False) print "%s: Completed processing %s records at %s" % (i, len( results.index), datetime.now()) print "%s: Results have been written to %s" % (i, output_path)
def save_stats(dicSensitivityResults): """ write metrics to file """ sStatsPath = sSensitivityTestResultsBasePath + "stats/" util.create_path(sStatsPath) oPandasMetrics = pd.DataFrame( columns=['hashalgorithm', 'min', 'max', 'mean', 'percentile_25', 'percentile_75']) oPandasErrorrates = pd.DataFrame( columns=['hashalgorithm', 'threshold', 'errorrate']) for sHashType, dicMetrics in dicSensitivityResults.items(): # add metrics line to overview pandas dataframe oPandasMetrics = oPandasMetrics.append({ 'hashalgorithm': sHashType, 'min': dicMetrics["metrics"]["min"], 'max': dicMetrics["metrics"]["max"], 'mean': dicMetrics["metrics"]["mean"], 'percentile_25': dicMetrics["metrics"]["p25"], 'percentile_75': dicMetrics["metrics"]["p75"]}, ignore_index=True) # add errorrates to file oPandasErrorrates = pd.concat([oPandasErrorrates, pd.DataFrame({ 'hashalgorithm': [sHashType] * len(aThresholdSteps), 'threshold': aThresholdSteps, 'errorrate': dicMetrics["errorrate"]})]) # write metrics to files sMetricsFileName = "hash_algorithm_metrics" save_pandas_to_file(oPandasMetrics, sStatsPath, sMetricsFileName) # write errorrates to file sErrorratesFileName = "hash_algorithm_errorrates_raw" save_pandas_to_file(oPandasErrorrates, sStatsPath, sErrorratesFileName)
def extract_user_defined_stats(oPandasDeviationsToOriginal): sApplicationTestResultStatsBasePath = sApplicationTestResultBasePath + "stats/" util.create_path(sApplicationTestResultStatsBasePath) # base - cumulated over all printers, resolutions, etc. save_group_to_files( oPandasDeviationsToOriginal.groupby(["hashalgorithm"])["deviation"], sApplicationTestResultStatsBasePath + "original/", "base") # printer save_group_to_files( oPandasDeviationsToOriginal[ oPandasDeviationsToOriginal["special"].isnull()].groupby( ["hashalgorithm", "printer"])["deviation"], sApplicationTestResultStatsBasePath + "original/", "printer") # printer - resolution save_group_to_files( oPandasDeviationsToOriginal[ oPandasDeviationsToOriginal["special"].isnull()].groupby([ "hashalgorithm", "printer", "printer_resolution" ])["deviation"], sApplicationTestResultStatsBasePath + "original/", "printer_resolution") # printer - resolution - (clustered) save_group_to_files( oPandasDeviationsToOriginal[ oPandasDeviationsToOriginal["special"].isnull()].groupby([ "printer", "printer_resolution" ])["deviation"], sApplicationTestResultStatsBasePath + "original/", "printer_resolution_clustered") # printer - special save_group_to_files( oPandasDeviationsToOriginal[oPandasDeviationsToOriginal["printer"] != "D1"].fillna("none").groupby([ "hashalgorithm", "printer", "special" ])["deviation"], sApplicationTestResultStatsBasePath + "original/", "printer_special") # paper save_group_to_files( oPandasDeviationsToOriginal[ oPandasDeviationsToOriginal["special"].isnull()].groupby( ["hashalgorithm", "paper"])["deviation"], sApplicationTestResultStatsBasePath + "original/", "paper") # scanner - resolution save_group_to_files( oPandasDeviationsToOriginal.groupby([ "hashalgorithm", "scanner_resolution" ])["deviation"], sApplicationTestResultStatsBasePath + "original/", "scanner_resolution")
def save_group_to_files(oPandasGroup, sTargetPath, sFileBaseName): """ calc min, mean, max for panda grouping and save to txt, csv and tex""" util.create_path(sTargetPath) oPandasGroup = oPandasGroup.agg({ "min": np.min, "mean": np.mean, "max": np.max }) with open(sTargetPath + sFileBaseName + ".txt", "w") as file: oPandasGroup.to_string(file) with open(sTargetPath + sFileBaseName + ".tex", "w") as file: oPandasGroup.to_latex(file) with open(sTargetPath + sFileBaseName + ".csv", "w") as file: oPandasGroup.to_csv(file)
def addDataSet(self, relativePath, dataSet): """Add datafile to corpus given its realtivePath within the corpus. @param relativePath (string) Path of the new datafile relative to the corpus directory. @param datafile (datafile) Data set to be added to corpus. """ self.dataFiles[relativePath] = copy.deepcopy(dataSet) newPath = self.srcRoot + relativePath create_path(newPath) self.dataFiles[relativePath].srcPath = newPath self.dataFiles[relativePath].write() self.numDataFiles = len(self.dataFiles)
def download_and_save_image_file(url, path): '''download image from a image page(not necessarily a .jpg)''' create_path(path) if os.path.isfile(path): syncPrint('%-64s has been downloaded to %s, skip.' % (url, path)) return photoID = BasePhotoCrawler.getPhotoId(url) doc = BasePhotoCrawler.fetch(url + 'sizes/') pattern = r'<img[^>]+src=\"(http://\w+\.staticflickr\.com/\w+/{id}\w+\.(jpg|png))[^>]*>'.format(id=photoID) try: m = re.search(pattern, doc).group(1) syncPrint('downloading %-64s to %s' % (url, path)) img = BasePhotoCrawler.fetch(m) open(path, "w+").write(img) except: # (TODO) break down exception handling print 'Error: no regex match in %s' % url
def copy(self, newRoot=None): """Copy corpus to a newRoot which cannot already exist. @param newRoot (string) Location of new directory to copy corpus to. """ if newRoot[-1] != os.path.sep: newRoot += os.path.sep if os.path.isdir(newRoot): print("directory already exists") return None else: create_path(newRoot) newCorpus = Corpus(newRoot) for relativePath in list(self.dataFiles.keys()): newCorpus.addDataSet(relativePath, self.dataFiles[relativePath]) return newCorpus
def __init__(self, filename, method, host, port, root='/website'): # Routing scheme requires that everything be in absolute path format if filename == '/' or filename.endswith(root): self.__filename = util.create_path(root, '/index.html') elif not filename.startswith('/website'): self.__filename = util.create_path(root, filename) else: self.__filename = util.create_path(filename) self.__host = host self.__port = port self.__method = method self.__methods = { 'GET' : self.do_get, 'HEAD' : self.do_head } self.__status = self.get_status() self.__content = None logging.basicConfig(filename='server.log',level=logging.DEBUG,format='%(asctime)s %(message)s')
def save_sorted(self, sorted_dataset_dir, num_traces_per_file=None, num_files=None, begin_file_index=None, end_file_index=None): if num_traces_per_file is not None: if num_files is not None: raise ValueError('Expecting either num_traces_per_file or num_files') else: if num_files is None: raise ValueError('Expecting either num_traces_per_file or num_files') else: num_traces_per_file = math.ceil(len(self) / num_files) if os.path.exists(sorted_dataset_dir): if len(glob(os.path.join(sorted_dataset_dir, '*'))) > 0: print('Warning: target directory is not empty: {})'.format(sorted_dataset_dir)) util.create_path(sorted_dataset_dir, directory=True) file_indices = list(util.chunks(list(self._sorted_indices), num_traces_per_file)) num_traces = len(self) num_files = len(file_indices) num_files_digits = len(str(num_files)) file_name_template = 'pyprob_traces_sorted_{{:d}}_{{:0{}d}}'.format(num_files_digits) file_names = list(map(lambda x: os.path.join(sorted_dataset_dir, file_name_template.format(num_traces_per_file, x)), range(num_files))) if begin_file_index is None: begin_file_index = 0 if end_file_index is None: end_file_index = num_files if begin_file_index < 0 or begin_file_index > end_file_index or end_file_index > num_files or end_file_index < begin_file_index: raise ValueError('Invalid indexes begin_file_index:{} and end_file_index: {}'.format(begin_file_index, end_file_index)) print('Sorted offline dataset, traces: {}, traces per file: {}, files: {} (overall)'.format(num_traces, num_traces_per_file, num_files)) util.progress_bar_init('Saving sorted files with indices in range [{}, {}) ({} of {} files overall)'.format(begin_file_index, end_file_index, end_file_index - begin_file_index, num_files), end_file_index - begin_file_index + 1, 'Files') j = 0 for i in range(begin_file_index, end_file_index): j += 1 file_name = file_names[i] print(file_name) shelf = ConcurrentShelf(file_name) shelf.lock(write=True) for new_i, old_i in enumerate(file_indices[i]): shelf[str(new_i)] = self[old_i] shelf['__length'] = len(file_indices[i]) shelf.unlock() util.progress_bar_update(j) util.progress_bar_end()
def __init__(self, sPathToDB="sensitivity_results.db", sBaseFolder="../data/sensitivity_results/", aHashes=[], lNumberOfThreads=4): # set hash algos self.aHashes = aHashes # set nr of threads self.lNumberOfThreads = lNumberOfThreads # create folders if not existent self.sBaseFolder = util.create_path(sBaseFolder) # create db file if not existent self.sPathToDB = self.sBaseFolder + sPathToDB open(self.sPathToDB, 'a').close() dbData = dbcon.Dbcon(self.sPathToDB) sDbSchema = """ BEGIN TRANSACTION; CREATE TABLE IF NOT EXISTS `images_hashes` ( `image_id` INTEGER NOT NULL, `hash_id` INTEGER NOT NULL, FOREIGN KEY(`hash_id`) REFERENCES `hashes`(`id`) ON UPDATE CASCADE ON DELETE RESTRICT, FOREIGN KEY(`image_id`) REFERENCES `images`(`id`) ON UPDATE CASCADE ON DELETE RESTRICT, PRIMARY KEY(`image_id`,`hash_id`) ); CREATE TABLE IF NOT EXISTS `images` ( `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, `name` TEXT NOT NULL, `collection_id` INTEGER NOT NULL, FOREIGN KEY(`collection_id`) REFERENCES `collections`(`id`) ON UPDATE CASCADE ON DELETE RESTRICT ); CREATE TABLE IF NOT EXISTS `hashes` ( `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, `hash` NPARRAY NOT NULL, `hash_type_id` TEXT NOT NULL, FOREIGN KEY(`hash_type_id`) REFERENCES `hash_types`(`id`) ON UPDATE CASCADE ON DELETE RESTRICT ); CREATE TABLE IF NOT EXISTS `hash_types` ( `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, `name` TEXT NOT NULL, `params` TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS `collections` ( `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, `name` TEXT NOT NULL UNIQUE ); COMMIT; """ dbData.execute_sql_query_manipulation_script(sDbSchema)
def get_content(self): ''' Return content of @self.__filename ''' # Serve templated files in case there isn't an OK HTTP response if self.__status != 200: self.__filename = util.create_path('/resp_files', '/'+str(self.__status)+'.html') with open(self.__filename, 'rb') as f: src = f.read() # Compress the source code using gzip z = zlib.compressobj(-1, zlib.DEFLATED, 31) # -1 = middle compression level, 31 = gzip compression compressed = z.compress(src) + z.flush() return compressed
def plot_metrics(oPandasFRData, oPandasFAData, bERR=False): """ calculate and plot FAR and FRR oPandasFRData is the deviation by threshold to the original images oPandasFAData is the deviation by threshold to not the original images """ sPlotPath = sApplicationTestResultBasePath + "plots/" util.create_path(sPlotPath) for sHashType in oPandasFAData["hashalgorithm"].unique(): aFAR = [ oPandasFAData[(oPandasFAData["hashalgorithm"] == sHashType) & (oPandasFAData["deviation"] <= i)]["image"].count() for i in aThresholdSteps ] / oPandasFAData[oPandasFAData["hashalgorithm"] == sHashType]["deviation"].count() aFRR = (oPandasFRData[oPandasFRData["hashalgorithm"] == sHashType] ["deviation"].count() - [ oPandasFRData[ (oPandasFRData["hashalgorithm"] == sHashType) & (oPandasFRData["deviation"] <= i)]["image"].count() for i in aThresholdSteps ]) / oPandasFRData[oPandasFRData["hashalgorithm"] == sHashType]["deviation"].count() oPandasFAR = pd.DataFrame({ "Threshold": aThresholdSteps, "Errorrate": aFAR, "Type": ["FAR"] * len(aThresholdSteps) }) oPandasFRR = pd.DataFrame({ "Threshold": aThresholdSteps, "Errorrate": aFRR, "Type": ["FRR"] * len(aThresholdSteps) }) oPandasEERData = pd.concat([oPandasFAR, oPandasFRR]) # pdEERData = pd.DataFrame({"Threshold": aTick, "FAR":aFAR, "FRR": # aFRR}) oPandasEERData['subject'] = 0 # calc less FAR FRR value lFARNotNullValueX = aThresholdSteps[np.argmax(np.array(aFAR) > 0) - 1] lFRRNotNullValueX = aThresholdSteps[np.argmax(np.array(aFRR) == 0)] if bERR: # calc ERR lMinDistancePosition = np.argmin((np.abs(aFRR - aFAR))) lERRValueX = aThresholdSteps[lMinDistancePosition] dErrorrateAtEER = np.mean( [aFAR[lMinDistancePosition], aFRR[lMinDistancePosition]]) print("ERR: %f" % lERRValueX) print("Value: %f" % dErrorrateAtEER) plt.clf() sTitle = sHashType sb.set_style("whitegrid") oSeabornPlot = sb.tsplot(time="Threshold", value="Errorrate", condition="Type", unit="subject", interpolate=True, data=oPandasEERData) oSeabornPlot.set(title=sTitle) oSeabornPlot.set(xlabel="Threshold") oSeabornPlot.set(ylabel="Errorrate") oSeabornPlot.set(xticks=np.arange(0, 1.01, 0.1)) oSeabornPlot.set(yticks=np.arange(0, 1.01, 0.1)) oSeabornPlot.set(ylim=(0, 1)) # add not zero lines plt.axvline(x=lFARNotNullValueX, color='#1470b0', linestyle="--") plt.axvline(x=lFRRNotNullValueX, color='#ff8c27', linestyle="--") if bERR: # add ERR line plt.axvline(x=lERRValueX, color='r', linestyle="-") # plt.axhline(y=dErrorrateAtEER, color='r', linestyle="-") sFileNameSafeTitle = util.format_filename(sTitle) oSeabornPlot.get_figure().savefig(sPlotPath + sFileNameSafeTitle + ".png") plt.clf()
def decide(self, crit, player, _map): result = NO_ACTION if not crit.is_awake: return result if self._nexttoplayer(crit, player, _map): crit.path = None #we reset path here crit.path_initialized = False crit.seen_player = (player.x, player.y) if crit.can_melee(player): result = crit.do_action(crit.action_cost.attack, lambda: crit.attack(player)) else: result = crit.find_shooting_point(player, _map) return result #check if player in los if util.has_los(crit.x, crit.y, player.x, player.y, _map.current.fov_map0): #this will actualy check if this creature is in player's los, not vice-versa crit.path = None #we reset path here crit.path_initialized = False crit.seen_player = (player.x, player.y) if crit.can_range(player): result = crit.do_action(crit.action_cost.range, lambda: crit.attack_range(player)) else: result = crit.do_action(crit.action_cost.move, lambda : crit.move_towards(player.x, player.y)) #if critter saw player - it will check this position elif crit.seen_player: if not crit.path: # we should make a path towards player crit.path = deque() crit.path_initialized = False crit.path.extend(util.create_path(_map.current.fov_map0, crit.x, crit.y, player.x, player.y)) newxy = crit.path.popleft() #lets make a last check before we giveup on chasing player. #We should check if we have player in los, cause player can move away next turn #making us loose trails if not crit.path: if util.has_los(crit.x, crit.y, player.x, player.y, _map.current.fov_map0): crit.seen_player(player.x, player.y) else: #we lost track of player - give up #todo - if not patroling - then what? crit.seen_player = False result = crit.do_action(crit.action_cost.move, lambda : crit.move_towards(*newxy)) elif self.patroling: if not crit.path and not crit.path_initialized: points = [] crit.path_initialized = True #finding something of interest altars = _map.current.find_feature(oftype='Altar', multiple=True) if altars: map(lambda x: util.do_if_one_chance_in(3, lambda : points.append( (x[1], x[2]) ) ), altars) stairs = _map.current.find_feature(oftype='StairsDown', multiple=True) if stairs: map(lambda x: util.do_if_one_chance_in(3, lambda : points.append( (x[1], x[2]) ) ), stairs) path = deque() path.append((crit.x, crit.y)) prev_point = path[0] if len(points) > 0: for point in points: _path = util.create_path(_map.current.fov_map0, prev_point[0], prev_point[1], *point) if not isinstance(_path, Iterable): raise AssertionError('create_path returned %s type %s' %(_path, type(_path))) path.extend(_path) prev_point = point crit.path = path newxy = crit.path[0] # crit.move_towards(*newxy) result = crit.do_action(crit.action_cost.move, lambda : crit.move_towards(*newxy)) crit.path.rotate(-1) elif crit.path: newxy = crit.path[0] result = crit.do_action(crit.action_cost.move, lambda : crit.move_towards(*newxy)) # crit.move_towards(*newxy) crit.path.rotate(-1) return result
def __init__(self, sBaseFolder="../data/stability_results/", sPathToDB="stability_results.db", aAttacks=[], aHashes=[], fnDeviation=None, lNumberOfThreads=4): # create folders if not existent self.sBaseFolder = util.create_path(sBaseFolder) # set attack list self.aAttacks = aAttacks # set hash function list self.aHashes = aHashes # set deviation function self.fnDeviation = fnDeviation # set number of threads self.lNumberOfThreads = lNumberOfThreads # create db file if not existent self.sPathToDB = sBaseFolder + sPathToDB open(self.sPathToDB, 'a').close() dbData = dbcon.Dbcon(self.sPathToDB) sDbSchema = """ BEGIN TRANSACTION; CREATE TABLE IF NOT EXISTS `tests` ( `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, `attack` INTEGER NOT NULL, `hash_type` INTEGER NOT NULL, `image` INTEGER NOT NULL, `original_hash` INTEGER NOT NULL, `attacked_hash` INTEGER NOT NULL, `deviation_hash` REAL, FOREIGN KEY(`image`) REFERENCES `images`(`id`) ON UPDATE CASCADE ON DELETE RESTRICT, FOREIGN KEY(`attack`) REFERENCES `attacks`(`id`) ON UPDATE CASCADE ON DELETE RESTRICT, FOREIGN KEY(`attacked_hash`) REFERENCES `hashes`(`id`) ON UPDATE CASCADE ON DELETE RESTRICT, FOREIGN KEY(`hash_type`) REFERENCES `hash_types`(`id`) ON UPDATE CASCADE ON DELETE RESTRICT, FOREIGN KEY(`original_hash`) REFERENCES `hashes`(`id`) ON UPDATE CASCADE ON DELETE RESTRICT, UNIQUE(`attack`,`hash_type`,`image`) --ON CONFLICT REPLACE ); CREATE TABLE IF NOT EXISTS `images` ( `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, `name` TEXT NOT NULL UNIQUE ); CREATE TABLE IF NOT EXISTS `hashes` ( `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, `hash_value` NPARRAY NOT NULL ); CREATE TABLE IF NOT EXISTS `hash_types` ( `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, `hash_fn` TEXT NOT NULL, `hash_params` TEXT NOT NULL, UNIQUE(`hash_fn`, `hash_params`) ); CREATE TABLE IF NOT EXISTS `attacks` ( `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, `attack_fn` TEXT NOT NULL, `attack_params` TEXT NOT NULL, UNIQUE(`attack_fn`, `attack_params`) ); COMMIT; """ dbData.execute_sql_query_manipulation_script(sDbSchema)
def __init__(self, temp_dir): self.aws_access_key = os.getenv('AWS_ACCESS_KEY_ID') self.aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY') self.conn = self.get_connection() util.create_path(temp_dir) self.temp_dir = temp_dir