tag_groups = mfv.autogen_tag_groups() machines = ['win7x32','win7x64'] #seperate by machine for machine in machines : for tags in tag_groups : #group name auto generated by tags and machine group_name = "{0}_{1}_{2}_{3}_{4}".format(tags[0][1],tags[1][1],tags[2][1],tags[3][1],machine) print "Creating automated stats for {0}".format(group_name) task_ids = None hashes = None #get subset subset_vectors = mfv.select_vectors(tags, [machine], task_ids, hashes) subset_vectors = mfv.normalize_vectors(subset_vectors, max_values_vector) #get superset - keep only fileytpe tags and machine superset_vectors = mfv.select_vectors([tags[1]],[machine],None,None) superset_vectors = mfv.normalize_vectors(superset_vectors, max_values_vector) #find archetype vector and extract feature keys archetype, archetype_stddev = mfv.get_archetype(subset_vectors, superset_vectors) feature_keys = [] for key, value in archetype.features.iteritems() : feature_keys.append(key) #find groups stats and plot archetype features for group mfv.stats_summary(subset_vectors,superset_vectors) title = "Scatter Plot of Family {0}".format(group_name)
test_task_id = int(sys.argv[1]) #retrieve features from the database cmd = "SELECT label,mean_features,stddev_features,max_value_features FROM archetypes" cursor.execute(cmd) results = cursor.fetchall() for result in results : #parse data from results archetype_label = result[0] mean_features = json.loads(result[1]) stddev_features = json.loads(result[2]) max_value_features = json.loads(result[3]) #add features to FeatureVectors mean_vector = mfv.FeatureVector(None,None,None,mean_features) stddev_vector = mfv.FeatureVector(None,None,None,stddev_features) max_values_vector = mfv.FeatureVector(None,None,None,max_value_features) #retrieve and normalize the test_vector test_vector = mfv.select_vectors(None,None,[test_task_id],None)[0] test_vectors = mfv.normalize_vectors([test_vector],max_values_vector) #add labels to vectors mean_vector.label = archetype_label+"_mean" stddev_vector.label = archetype_label+"_stddev" title = "Comparison of Task {0} to Archetype : {1}".format(test_task_id,archetype_label) filename = "comparisions/{0}/{1}".format(test_task_id, archetype_label) print mfv.plotly_scatter(test_vectors, mean_vector, stddev_vector, None, filename, title)
#retrieve features from the database cmd = "SELECT label,mean_features,stddev_features,max_value_features FROM archetypes" cursor.execute(cmd) results = cursor.fetchall() for result in results: #parse data from results archetype_label = result[0] mean_features = json.loads(result[1]) stddev_features = json.loads(result[2]) max_value_features = json.loads(result[3]) #add features to FeatureVectors mean_vector = mfv.FeatureVector(None, None, None, mean_features) stddev_vector = mfv.FeatureVector(None, None, None, stddev_features) max_values_vector = mfv.FeatureVector(None, None, None, max_value_features) #retrieve and normalize the test_vector test_vector = mfv.select_vectors(None, None, [test_task_id], None)[0] test_vectors = mfv.normalize_vectors([test_vector], max_values_vector) #add labels to vectors mean_vector.label = archetype_label + "_mean" stddev_vector.label = archetype_label + "_stddev" title = "Comparison of Task {0} to Archetype : {1}".format( test_task_id, archetype_label) filename = "comparisions/{0}/{1}".format(test_task_id, archetype_label) print mfv.plotly_scatter(test_vectors, mean_vector, stddev_vector, None, filename, title)