def update(input_option): old_fsm_path = input_option.args.old_fsm local_fsm_path = input_option.args.work_dir+'/FINAL_mindfa.txt' updated_fsm_path = input_option.args.work_dir+'/UPDATED_mindfa.txt' updated_dot_path = input_option.args.work_dir+'/UPDATED_mindfa.dot' # merge two FSMs updated_fsm = merge_fsms(old_fsm_path,local_fsm_path) # minimize them merge_fsm = graph_lib.minimize_dfa(updated_fsm.nfa2dfa()) if not os.path.isdir(os.path.dirname(updated_fsm_path)): os.makedirs(os.path.dirname(updated_fsm_path)) with open(updated_fsm_path,'w') as writer: writer.write(merge_fsm.to_string()+'\n') merge_fsm.to_dot(updated_dot_path) return updated_fsm
def predict_accuracy(fsm_file, stat_file, input_file, prediction_file): print(fsm_file) num_cluster = int( os.path.basename(os.path.dirname(fsm_file)).replace('S_', '')) training_pairs = set() with open(input_file, 'r') as reader: lines = [l.strip().split() for l in reader] for tr in lines: training_pairs |= set([(tr[i], tr[i + 1]) for i in range(len(tr) - 1)]) ###################################### fsm_pairs = set() the_fsm = graph_lib.parse_fsm_file(fsm_file) adjlst = the_fsm.create_adjacent_list() for a in adjlst: for (b, label_one) in adjlst[a]: if b not in adjlst: continue for (c, label_two) in adjlst[b]: fsm_pairs.add((label_one, label_two)) ###################################### predicted_precision = float(len(training_pairs)) / float( len(training_pairs | fsm_pairs)) print("Predicted Precision:", predicted_precision, "unseen pairs:", len(fsm_pairs - training_pairs), "training pairs:", len(training_pairs)) ###################################### with open(stat_file, 'r') as reader: lines = [l.strip().split(':') for l in reader] recall = list(filter(lambda x: x[0].strip() == 'recall', lines))[0][-1].strip() recall = float(recall) print("Predicted Recall:", recall) ###################################### if predicted_precision + recall > 0.0: predicted_f1 = float(2.0 * predicted_precision * recall) / float(predicted_precision + recall) else: predicted_f1 = 0.0 print("Predicted F-measure", predicted_f1) with open(prediction_file, 'w') as writer: writer.write(str(predicted_precision) + '\n') writer.write(str(recall) + '\n') writer.write(str(predicted_f1) + '\n') return (fsm_file, num_cluster, predicted_precision, recall, predicted_f1)
def when_ending_method_available(ending_methods, fsm, output_folder, make_dfa=False): extended_fsm_dir = output_folder + '/extended_endings_fsm' lib.init_dir(extended_fsm_dir) extended_fsm = extending_ending_states(fsm, ending_methods) open(extended_fsm_dir + '/fsm.txt', 'w').write(extended_fsm.to_string()) drawing_dot(extended_fsm, extended_fsm_dir + '/fsm') extended_dfa = extended_fsm.nfa2dfa() open(extended_fsm_dir + '/dfa.txt', 'w').write(extended_dfa.to_string()) drawing_dot(extended_dfa, extended_fsm_dir + '/dfa') if make_dfa: extended_mindfa = graph_lib.minimize_dfa(extended_dfa) open(extended_fsm_dir + '/mindfa.txt', 'w').write(extended_mindfa.to_string()) drawing_dot(extended_mindfa, extended_fsm_dir + '/mindfa')
def create_fsm_for_unit_traces(elementID2cluster, training_traces, output_folder): lib.init_dir(output_folder) unit_id = 0 for one_trace in training_traces: unit_id += 1 unit_dir = output_folder + '/fsm_d' + str(unit_id) lib.init_dir(unit_dir) fsm, log_fsm = create_fsm(elementID2cluster, [one_trace]) dfa = fsm.nfa2dfa() mindfa = graph_lib.minimize_dfa(dfa) open(unit_dir + '/fsm.txt', 'w').write(fsm.to_string()) open(unit_dir + '/dfa.txt', 'w').write(dfa.to_string()) open(unit_dir + '/mindfa.txt', 'w').write(mindfa.to_string()) drawing_dot(fsm, unit_dir + '/fsm') drawing_dot(dfa, unit_dir + '/dfa') drawing_dot(mindfa, unit_dir + '/mindfa')
def compute_statistics(X, method_list, args, estimator, generated_traces, validation_traces, output_folder=None, X_id_mapping=None, create_fsm_per_unit_trace=False, ending_methods=None, minimize_dfa=True, ktails=False, check_accepted_traces=True): if output_folder is None: output_folder = args.output_folder lib.init_dir(output_folder) if estimator is not None: ### elementID2cluster, centroids, X_labels = read_clusters( estimator, X, X_id_mapping=X_id_mapping) elif ktails: # ktails elementID2cluster, centroids, X_labels = read_ktails_clusters( X, X_id_mapping=X_id_mapping) else: print("ERROR: no estimators!") sys.exit(0) # write cluster info write_cluster(elementID2cluster, X, output_folder + '/resultant_cluster.gz', X_id_mapping=X_id_mapping) # write centroids write_centroids_to_file(centroids, output_folder + '/centroids.txt') # write distance to centroid of each element in each cluster write_cluster_contents_distance( elementID2cluster, X, centroids, output_folder + '/cluster_element_distances.txt') if create_fsm_per_unit_trace: create_fsm_for_unit_traces(elementID2cluster, generated_traces, output_folder + '/unit_fsms') # create FSM fsm, log_fsm = create_fsm(elementID2cluster, generated_traces) # write info of data contained inside each cluster write_trace_cluster_info(elementID2cluster, generated_traces, output_folder + '/trace_cluster_info.txt') # write fsm log to file write_log_to_file(log_fsm, output_folder + '/debug_fsm.txt') # DFA dfa = fsm.nfa2dfa() if check_accepted_traces: dfa_num_accepted_traces = count_accepted_traces( dfa, validation_traces, output_file=output_folder + '/dfa_uncovered_traces.txt') else: dfa_num_accepted_traces = -1 print("Finished validating DFA:", dfa_num_accepted_traces, "validation traces accepted by DFA") if minimize_dfa: # MinDFA mindfa = graph_lib.minimize_dfa(dfa) else: mindfa = None open(output_folder + '/fsm.txt', 'w').write(fsm.to_string()) drawing_dot(fsm, output_folder + '/fsm') open(output_folder + '/dfa.txt', 'w').write(dfa.to_string()) if minimize_dfa: open(output_folder + '/mindfa.txt', 'w').write(mindfa.to_string()) drawing_dot(dfa, output_folder + '/dfa') if minimize_dfa: drawing_dot(mindfa, output_folder + '/mindfa') print("after drawing dot") print(output_folder) try: fsm.serialize(output_folder + "/serialized_fsa.json") except Exception as e: print("Serialization problem:") print(e) # Number of accepted data; size of DFA, MinDFA, FSM; # fsm_num_accepted_traces = count_accepted_traces(fsm, validation_traces, debug=True) # print "Finished validating FSM:", fsm_num_accepted_traces, "data" ### # mindfa_num_accepted_traces = count_accepted_traces(mindfa, validation_traces) # print "Finished validating MinDFA:", mindfa_num_accepted_traces, "data" ##### compute silhouete #### # try: # import signal # signal.signal(signal.SIGALRM, lib.handler) # signal.alarm(waiting_time()) # silhouette_avg = silhouette_score(np.array(X), estimator.labels_, # sample_size=min( # args.silhouette_sample_size if args.silhouette_sample_size is not None else len( # X), len(X)), # random_state=args.seed) # print "silhouette_avg:", silhouette_avg # # signal.alarm(0) # except TimeoutError: # print "silhouette computation runs too long!" # silhouette_avg = -1 # except ValueError as e: # print e # silhouette_avg = -1 # finally: # signal.alarm(0) # write statistics with open(output_folder + '/statistic.txt', 'w') as writer: writer.write('FSM_size:' + '\t' + str(len(fsm.states)) + '\n') if dfa is not None: writer.write('DFA_size:' + '\t' + str(len(dfa.states)) + '\n') if mindfa is not None: writer.write('MinDFA_size:' + '\t' + str(len(mindfa.states)) + '\n') # writer.write('FSM_validation:' + '\t' + str(fsm_num_accepted_traces) + '\n') if dfa_num_accepted_traces is not None: writer.write('DFA_validation:' + '\t' + str(dfa_num_accepted_traces) + '\n') # writer.write('MinDFA_validation:' + '\t' + str(mindfa_num_accepted_traces) + '\n') # writer.write('silhouette_avg:' + '\t' + str(silhouette_avg) + '\n') if hasattr(estimator, 'n_clusters'): writer.write('num_cluster:\t' + str(estimator.n_clusters) + '\n') else: n_clusters_ = len(set(X_labels)) - (1 if -1 in X_labels else 0) writer.write('num_cluster:\t' + str(n_clusters_) + '\n') writer.write('total_validation_traces:\t' + str(len(validation_traces)) + '\n') if dfa_num_accepted_traces is not None: possible_recall = float(dfa_num_accepted_traces) / float( len(validation_traces)) writer.write('recall:\t' + str(possible_recall) + '\n') print("after writing stats") ######################## if ending_methods is not None: when_ending_method_available(ending_methods, fsm, output_folder, make_dfa=minimize_dfa)
if predicted_precision + predicted_recall != 0: fmeasure = 2.0 * predicted_precision * predicted_recall / ( predicted_precision + predicted_recall) else: fmeasure = 0.0 if verbose: print("Unseen pairs:", unseen_pairs) return predicted_precision, predicted_recall, fmeasure if __name__ == '__main__': args = read_args() the_fsm = graph_lib.parse_fsm_file(args.fsm) ####### training_pairs = set() with open(args.traces, 'r') as reader: traces = [l.strip().split() for l in reader] traces = list(map(lambda x: x[1:] if x[0] == '<START>' else x, traces)) precision, recall, fmeasure = predict(the_fsm, traces, verbose=args.verbose) print('Precision:', precision) print('Recall:', recall) print('F-measure:', fmeasure)