def functionMap(): functions = { 'Shannon': lambda x: '{0:10.8f}'.format(stats.graphEntropy(x)), 'enum': linkoToEnum } functionHelp = ('Shannon (Shannon entropy applied to the' ' linkograph), ' 'enum (The enumeration for the linkograph).') return functions, functionHelp
def regressionTest(): # Figure 1 from linkography.pdf test_linkograph = llinkoCreate.Linkograph() test_linkograph.append((set(), set(), {1, 2, 3})) test_linkograph.append((set(), {0}, {2})) test_linkograph.append((set(), {0, 1}, set())) test_linkograph.append((set(), {0}, set())) if (4 != lstats.links(test_linkograph) or (1.125, 4.5, 1.0, 1.75, 7, 2) != lstats.calculateCartesianStatistics(test_linkograph) or 0.0000000000000001 < abs( lstats.percentageOfLinks(test_linkograph) - 0.6666666666666666) or 0.0000000000000001 < abs(lstats.graphEntropy(test_linkograph) - 0.9182958340544896)): print("error calculating statistics for training data")
def processSession(session_filename, writer): ##################### # generate linkograph ##################### # label commands label_rules = open("abstraction.json", "r") labeler = llabels.Labeler(json.load(label_rules)) label_rules.close() commands = open(session_filename, "r") json_commands = json.load(commands) commands.close() if 2 > len(json_commands): print("can't process", session_filename, "because session files must have at least two commands") return last_command = json_commands.pop() # access_next, look_next, transfer_next, move_next, execute_next, cleanup_next last_command_labels = labeler.labelCommands([last_command], "NoLabel") access_next = 0 look_next = 0 transfer_next = 0 move_next = 0 execute_next = 0 cleanup_next = 0 if "Access" in last_command_labels: access_next = 1 if "Look" in last_command_labels: look_next = 1 if "Transfer" in last_command_labels: transfer_next = 1 if "Move" in last_command_labels: move_next = 1 if "Execute" in last_command_labels: execute_next = 1 if "Cleanup" in last_command_labels: cleanup_next = 1 labeled = labeler.labelCommands(json_commands, "NoLabel") # @todo cleanup labeled.json when its safe llabels.writeLabelsToJsonFile(labeled, "labeled.json") # link commands ontology = open("ontology.json", "r") inv_labeling = open("labeled.json", "r") lg = llinkoCreate.createLinko(json.load(inv_labeling), json.load(ontology)) inv_labeling.close() os.remove("labeled.json") ontology.close() ################## # extract features ################## # node_count node_count = len(lg) # critical_node_count # @todo: pick something real for critical_threshold critical_threshold = node_count / 2 critical_node_count = lstats.countCriticalNodes(lg, critical_threshold) # x_bar, Sigma_x, range_x, y_bar, Sigma_y, range_y x_bar, Sigma_x, range_x, y_bar, Sigma_y, range_y = lstats.calculateCartesianStatistics( lg) # percentage_of_links percentage_of_links = lstats.percentageOfLinks(lg) # entropy entropy = lstats.graphEntropy(lg) # T-Complexity encoded_lg = lstats.linkographToString(lg) t_complexity = lstats.tComplexity(encoded_lg) # link_index link_index = lstats.links(lg) / len(lg) # graph differences graph_differences = lstats.summaryDifference(lg) # entropy deviation entropy_deviation = lstats.entropyDeviation(lg) # mean link coverage mean_link_coverage = lstats.meanLinkCoverage(lg) # top cover top_cover = lstats.topCover(lg) first_command = json_commands[0] first_datetime = utils.stringToDatetime(first_command['ts']) session_start_time = first_datetime.hour * 3600 + first_datetime.minute * 60 + first_datetime.second # session_length_seconds, mean_delay_seconds last_command = json_commands[-1] last_datetime = utils.stringToDatetime(last_command['ts']) session_length_timedelta = last_datetime - first_datetime session_length_seconds = session_length_timedelta.total_seconds() if 1 < len(lg): mean_delay_seconds = session_length_seconds / (len(lg) - 1) else: mean_delay_seconds = None # access_ratio, look_ratio, transfer_ratio, move_ratio, execute_ratio, cleanup_ratio access_ratio = look_ratio = transfer_ratio = move_ratio = execute_ratio = cleanup_ratio = 0 if "Access" in labeled.keys(): access_ratio = len(labeled['Access']) / len(lg) if "Look" in labeled.keys(): look_ratio = len(labeled['Look']) / len(lg) if "Transfer" in labeled.keys(): transfer_ratio = len(labeled['Transfer']) / len(lg) if "Move" in labeled.keys(): move_ratio = len(labeled['Move']) / len(lg) if "Execute" in labeled.keys(): execute_ratio = len(labeled['Execute']) / len(lg) if "Cleanup" in labeled.keys(): cleanup_ratio = len(labeled['Cleanup']) / len(lg) ################# # persist in .csv ################# writer.writerow([ node_count, critical_node_count, x_bar, Sigma_x, range_x, y_bar, Sigma_y, range_y, percentage_of_links, entropy, t_complexity, link_index, graph_differences, entropy_deviation, mean_link_coverage, top_cover, session_start_time, session_length_seconds, mean_delay_seconds, access_ratio, look_ratio, transfer_ratio, move_ratio, execute_ratio, cleanup_ratio, access_next, look_next, transfer_next, move_next, execute_next, cleanup_next ])
sys.path.append(args.path) # Import the linkograph packages. They are loaded here, to # provided the ability of adding the their path on the command # line. import linkograph.linkoCreate as llc # For manipulating linkographs import linkograph.stats as ls # For linkograph statistics # Set the max and min subgraph size to the specified size. minSize = maxSize = args.graphSize # Get the linkograph linko = llc.readLinkoJson(args.linko) # Calcualte the graph Shannon Entropy for the whole linkograph totalEntropy = ls.graphEntropy(linko) # Calcualte the graph Shannon Entropy for subgraphs. subEntropies = ls.subgraphMetric(linkograph=linko, metric=ls.graphEntropy, lowerThreshold=None, upperThreshold=None, minSize=minSize, maxSize=maxSize, step=args.step, lowerBound=args.lowerBound, upperBound=args.upperBound) # ls.subgraphMetric returns tuples that give the lower and upper # index for each of the subgraph. We just want to graph the # entropy portion, which is the third entry of the tuple.
def genSingleOntologyStats(model, ontLink, minLinkoSize, maxLinkoSize, stepLinkoSize, runNum, precision=2, seeds=None): """Generate the stats on link models for a given ontology. inputs: model: the Markov model used to generate the linkographs. ontLink: ontology used for constructing linkographs. minLinkoSize: the minimun number of nodes in the linkographs to consider. maxLinkoSize: the maximum number of nodes in the linkographs to consider. Note that the max is not included to match pythons convertions on lists and ranges. stepLinkoSize: the step size between minLinkoSize to maxLinkoSize for the number of linkographs to Consider. runNum: the number of linkographs to consider for each linkograph size. precision: the number of decimals places to use for the Markov models. output: a number_of _linkographs x 2. The (i, 0) entry provides the average Shannon entropy for the i-th size linkograph considered and the (i, 1) entry provides the standard deviation for of the Shannon entropy for the i-th size linkograph considered. """ linkoSizes = range(minLinkoSize, maxLinkoSize, stepLinkoSize) results = np.zeros((len(linkoSizes), 2)) # For each size linkograph, generate the runNum links and # caculate the needed statistics. for size in linkoSizes: print('size: {0}'.format(size)) # Collect entropy. metrics = np.zeros(runNum) for i in range(runNum): # Randomize the initial state model.state = model.random.randint(1, len(model.absClasses)) - 1 linko = model.genLinkograph(size, ontology=ontLink) entropy = lstats.graphEntropy(linko) metrics[i] = entropy # Find mean of the entropy index = (size - minLinkoSize) // stepLinkoSize results[index, 0] = np.mean(metrics) results[index, 1] = np.std(metrics) return results
def genSingleOntologyStats(ontNext, ontLink, minLinkoSize, maxLinkoSize, stepLinkoSize, modelNum, runNum, precision=2, seeds=None): """Generate the stats on link models for a given ontology. inputs: ontNext: ontology used to generate Markov model that create the next state. ontLink: ontology used for constructing linkographs. minLinkoSize: the minimun number of nodes in the linkographs to consider. maxLinkoSize: the maximum number of nodes in the linkographs to consider. Note that the max is not included to match pythons convertions on lists and ranges. stepLinkoSize: the step size between minLinkoSize to maxLinkoSize for the number of linkographs to Consider. modelNum: the number of models. runNum: the number of linkographs to consider for each linkograph size. precision: the number of decimals places to use for the Markov models. seeds: a list of seeds to use for the generated next Markov models. The size of the list should be the same as the number of runs. output: a modelNum x number_of _linkographs. The (i, j) entry provides the average shannon entropy for the i-th model and j-th size linkgraph considered. """ linkoSizes = range(minLinkoSize, maxLinkoSize, stepLinkoSize) ontSize = len(ontNext) absClasses = list(ontNext.keys()) absClasses.sort() results = np.zeros((modelNum, len(linkoSizes))) if seeds is None: seeds = [time.time() * i for i in range(modelNum)] models = [] # Create the generating models for i in range(modelNum): m = markel.genModelFromOntology(ontology=ontNext, precision=2, seed=seeds[i]) # Storing the model and the current state models.append(m) # For each size linkograph, generate the runNum links and # caculate the needed statistics. for size in linkoSizes: print('size: {0}'.format(size)) for modelIndex, m in enumerate(models): # Collect entropy and complexity. metrics = np.zeros(runNum) for i in range(runNum): # Randomize the initial state m.state = m.random.randint(1, len(m.absClasses)) - 1 linko = m.genLinkograph(size, ontology=ontLink) entropy = lstats.graphEntropy(linko) metrics[i] = entropy # Find the mean across the different runs. index = (size - minLinkoSize) // stepLinkoSize results[modelIndex][index] = np.mean(metrics) return results