def main(base_directory, db, stats_folder): tga_directory = os.path.join(base_directory, "tgas") stats_directory = os.path.join(base_directory, stats_folder) num_objects = len(list(utilities.iterator_over_object_groups(db))) for adj in utilities.adjectives: now = time.time() filename = "%s.pkl" % adj if not test_file(filename, stats_directory): print "File %s already exist, skipping it" % filename continue try: clf = tga_kernel_chain.TGAEnsemble(adj, tga_directory) except ValueError: print "Adjective %s does not exist, skipping it" % adj print "\nCreating stats for adjective ", adj positives = 0. total = 0. stat = {} for obj in utilities.iterator_over_object_groups(db): total += 1. res = clf.classification_labels(obj) output = np.mean(res) > 0.5 true_label = (clf.adjective in obj.adjectives[:]) if true_label == output: positives += 1. stat[obj._v_name] = dict(result = output, true_label = true_label, classifiers = res) totaltime = (time.time()-now)/60.0 sys.stdout.write("\r%d/%d ->\t precision: %.2f, time: %f min" %(total, num_objects, positives / total, totaltime ) ) sys.stdout.flush() stat['result'] = positives / total full_pathname = os.path.join(stats_directory, filename) f = open(full_pathname, "w") cPickle.dump(stat, f, cPickle.HIGHEST_PROTOCOL) f.close()
def create_feature_object_set(database, phase): feature_objs = defaultdict(dict) feature_vect = [] norm_phase = "MOVE_ARM_START_POSITION" # For each object in the database, extract the phase and sensor # data for #temp = [g for g in utilities.iterator_over_object_groups(database)] for group in utilities.iterator_over_object_groups(database): #for group in temp[0:2]: # Pull data from h5 database data_dict = utilities.dict_from_h5_group(group, [phase]) norm_dict = utilities.dict_from_h5_group(group, [norm_phase]) data = data_dict["data"][phase] norm_data = norm_dict["data"][norm_phase] object_name = data_dict["name"] name = object_name.split('_') print "Loading object ", object_name # Extract features static_feature_phase, feats = extract_static_features(data, norm_data) # Store information about object static_feature_phase.labels = data_dict["adjectives"] static_feature_phase.name = object_name static_feature_phase.detailed_state = phase static_feature_phase.object_id = int(name[-2]) static_feature_phase.run_num = int(name[-1]) feature_objs[object_name] = static_feature_phase feature_vect.append(feats) return feature_objs, np.array(feature_vect)
def create_features_set(self, database, store = False, verbose = False): """ For each object in the database, run classifier.extract_features. All the features are then collected in a matrix. If the classifier's adjective is among the objects' then the feature is labeled with 1, otherwise 0. Parameters: database: either a string or an open pytables file. Returns the features and the labels as two 2-dimensional matrices. """ labels = [] features = [] for group in utilities.iterator_over_object_groups(database): data_dict = utilities.dict_from_h5_group(group) if verbose: print "Loading object ", data_dict["name"] data = data_dict["data"] features.append(self.extract_features(data)) if self.adjective in data_dict["adjectives"]: labels.append(1) else: labels.append(0) features = np.array(features).squeeze() labels = np.array(labels).flatten() if store: self.features = features self.labels = labels return features, labels
def create_features_set(self, database, store=False, verbose=False): """ For each object in the database, run classifier.extract_features. All the features are then collected in a matrix. If the classifier's adjective is among the objects' then the feature is labeled with 1, otherwise 0. Parameters: database: either a string or an open pytables file. Returns the features and the labels as two 2-dimensional matrices. """ labels = [] features = [] for group in utilities.iterator_over_object_groups(database): data_dict = utilities.dict_from_h5_group(group) if verbose: print "Loading object ", data_dict["name"] data = data_dict["data"] features.append(self.extract_features(data)) if self.adjective in data_dict["adjectives"]: labels.append(1) else: labels.append(0) features = np.array(features).squeeze() labels = np.array(labels).flatten() if store: self.features = features self.labels = labels return features, labels
def main(base_directory, db, stats_folder): tga_directory = os.path.join(base_directory, "tgas") stats_directory = os.path.join(base_directory, stats_folder) num_objects = len(list(utilities.iterator_over_object_groups(db))) for adj in utilities.adjectives: now = time.time() filename = "%s.pkl" % adj if not test_file(filename, stats_directory): print "File %s already exist, skipping it" % filename continue try: clf = tga_kernel_chain.TGAEnsemble(adj, tga_directory) except ValueError: print "Adjective %s does not exist, skipping it" % adj print "\nCreating stats for adjective ", adj positives = 0. total = 0. stat = {} for obj in utilities.iterator_over_object_groups(db): total += 1. res = clf.classification_labels(obj) output = np.mean(res) > 0.5 true_label = (clf.adjective in obj.adjectives[:]) if true_label == output: positives += 1. stat[obj._v_name] = dict(result=output, true_label=true_label, classifiers=res) totaltime = (time.time() - now) / 60.0 sys.stdout.write( "\r%d/%d ->\t precision: %.2f, time: %f min" % (total, num_objects, positives / total, totaltime)) sys.stdout.flush() stat['result'] = positives / total full_pathname = os.path.join(stats_directory, filename) f = open(full_pathname, "w") cPickle.dump(stat, f, cPickle.HIGHEST_PROTOCOL) f.close()
def create_feature_set(database, feature_dict, object_set, adjective): """ For each object in the database, run classifier.extract_features. All the features are then collected in a matrix. If the classifier's adjective is among the objects' then the feature is labeled with 1, otherwise 0. Parameters: database: either a string or an open pytables file. Returns the features and the labels as two 2-dimensional matrices. """ labels = [] features = [] object_names = [] object_ids = [] print "Building adjective %s" % adjective # For each object in the database, extract the phase and sensor # data for for group in utilities.iterator_over_object_groups(database): # Pull data from h5 database data_dict = utilities.dict_from_h5_group(group) object_name = data_dict["name"] name = object_name.split('_') # Skip over object if it is in the set # Training set will skip over test objects # and vice versa if object_name not in object_set: continue # print "Loading object ", object_name # Store object name object_names.append(object_name) object_ids.append(int(name[-2])) # Extract features feature_obj = feature_dict[object_name] feature_vector = createFeatureVector(feature_obj, static_features) features.append(feature_vector) # Store off the labels here if adjective in data_dict["adjectives"]: labels.append(1) else: labels.append(0) set_dict = defaultdict(dict) set_dict['features'] = np.array(features).squeeze() set_dict['labels'] = np.array(labels).flatten() set_dict['object_names'] = np.array(object_names).flatten() set_dict['object_ids'] = np.array(object_ids).flatten() return set_dict
def test_on_database(self, database): score = 0.0 tots = 0.0 for g in utilities.iterator_over_object_groups(database): p = self.predict(g) label = (p[0] == 1) in_adjective = (self.adjective in g.adjectives[:]) if in_adjective == label: score += 1 tots += 1 return score / tots
def create_feature_object_set(database, phase): feature_objs = defaultdict(dict) feature_vect = [] norm_phase = "MOVE_ARM_START_POSITION" # For each object in the database, extract the phase and sensor # data for #temp = [g for g in utilities.iterator_over_object_groups(database)] all_values = dict() for phase in phases: all_values[phase] = [] for group in utilities.iterator_over_object_groups(database): #for group in temp[0:2]: # Pull data from h5 database data_dict = utilities.dict_from_h5_group(group) for phase in phases: all_values[phase].append(data_dict['data'][phase]['electrodes']) ''' #data_dict = utilities.dict_from_h5_group(group, [phase]) norm_dict = utilities.dict_from_h5_group(group, [norm_phase]) data = data_dict["data"][phase] norm_data = norm_dict["data"][norm_phase] object_name = data_dict["name"] name = object_name.split('_') print "Loading object ", object_name import pdb; pdb.set_trace() # Extract features #static_feature_phase, feats = extract_static_features(data, norm_data) # Store information about object static_feature_phase.labels = data_dict["adjectives"] static_feature_phase.name = object_name static_feature_phase.detailed_state = phase static_feature_phase.object_id = int(name[-2]) static_feature_phase.run_num = int(name[-1]) feature_objs[object_name] = static_feature_phase feature_vect.append(feats) ''' import pdb; pdb.set_trace() slide = np.concatenate(all_values['SLIDE_5CM']) squeeze = np.concatenate(all_values['SQUEEZE_SET_PRESSURE_SLOW']) hold = np.concatenate(all_values['HOLD_FOR_10_SECONDS']) fast_slide = np.concatenate(all_values['MOVE_DOWN_5CM']) return feature_objs, np.array(feature_vect)
def create_feature_object_set(database, phase): feature_objs = defaultdict(dict) feature_vect = [] norm_phase = "MOVE_ARM_START_POSITION" # For each object in the database, extract the phase and sensor # data for #temp = [g for g in utilities.iterator_over_object_groups(database)] all_values = dict() for phase in phases: all_values[phase] = [] for group in utilities.iterator_over_object_groups(database): #for group in temp[0:2]: # Pull data from h5 database data_dict = utilities.dict_from_h5_group(group) for phase in phases: all_values[phase].append(data_dict['data'][phase]['electrodes']) ''' #data_dict = utilities.dict_from_h5_group(group, [phase]) norm_dict = utilities.dict_from_h5_group(group, [norm_phase]) data = data_dict["data"][phase] norm_data = norm_dict["data"][norm_phase] object_name = data_dict["name"] name = object_name.split('_') print "Loading object ", object_name import pdb; pdb.set_trace() # Extract features #static_feature_phase, feats = extract_static_features(data, norm_data) # Store information about object static_feature_phase.labels = data_dict["adjectives"] static_feature_phase.name = object_name static_feature_phase.detailed_state = phase static_feature_phase.object_id = int(name[-2]) static_feature_phase.run_num = int(name[-1]) feature_objs[object_name] = static_feature_phase feature_vect.append(feats) ''' import pdb pdb.set_trace() slide = np.concatenate(all_values['SLIDE_5CM']) squeeze = np.concatenate(all_values['SQUEEZE_SET_PRESSURE_SLOW']) hold = np.concatenate(all_values['HOLD_FOR_10_SECONDS']) fast_slide = np.concatenate(all_values['MOVE_DOWN_5CM']) return feature_objs, np.array(feature_vect)
def test_adjective(classifier, database, test_object_names, adjective_report): true_positives = 0.0 true_negatives = 0.0 false_positives = 0.0 false_negatives = 0.0 false_positive_list = [] false_negative_list = [] true_positive_list = [] true_negative_list = [] print '\n \nTesting Adjective: %s' % classifier.adjective for group in utilities.iterator_over_object_groups(database): assert isinstance(group, tables.Group) data_dict = utilities.dict_from_h5_group(group) if data_dict['name'] not in test_object_names: continue features = classifier.extract_features(data_dict["data"]) output = classifier.predict(features) # For this object - find out if the adjective applies # True label is 0 if adjective is false for this adjective true_labels = data_dict['adjectives'] if classifier.adjective in true_labels: true_label = 1 else: true_label = 0 # Determine if the true label and classifier prediction match if true_label == 1: if output[0] == 1: true_positives += 1.0 true_positive_list.append(data_dict['name']) else: false_negatives += 1.0 false_negative_list.append(data_dict['name']) else: # label is 0 if output[0] == 1: false_positives += 1.0 false_positive_list.append(data_dict['name']) else: true_negatives += 1.0 true_negative_list.append(data_dict['name']) # Compute statistics for the adjective precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) try: f1 = 2.0 * precision*recall / (precision + recall) except ZeroDivisionError: f1 = 0 print "Precision: %f, Recall: %f, F1: %f \n" % (precision, recall, f1) adjective_report.write("%s, %f, %f, %f\n" % (classifier.adjective, precision, recall, f1)) print "%d False Positive Objects are: %s \n" % (false_positives, sorted(false_positive_list)) print "%d False Negative Objects are: %s \n" % (false_negatives, sorted(false_negative_list)) print "%d True Positive Objects are: %s\n" % (true_positives, sorted(true_positive_list)) print "%d True Negative Objects are: %s\n" % (true_negatives, sorted(true_negative_list)) return (precision, recall, f1)
def create_hmm_feature_set(database, object_set, adj_obj, phase_list): """ For each object in the database, run classifier.extract_features. All the features are then collected in a matrix. If the classifier's adjective is among the objects' then the feature is labeled with 1, otherwise 0. Parameters: database: either a string or an open pytables file. Returns the features and the labels as two 2-dimensional matrices. """ print "Building adjective %s" % adj_obj.adjective # For each object in the database, extract the phase and sensor # data for for group in utilities.iterator_over_object_groups(database): # Pull data from h5 database data_dict = utilities.dict_from_h5_group(group) object_name = data_dict["name"] name = object_name.split('_') labels = [] # Skip over object if it is in the set # Training set will skip over test objects # and vice versa if object_name in object_set: # Extract features feature_data = data_dict["data"] for i, phase_obj in enumerate(phase_list): scores = [] set_dict = defaultdict(dict) if phase_obj.build == False: continue for sensor, data in feature_data[phase_obj.phase].iteritems(): try: chain = adj_obj.chains[phase_obj.phase][sensor] scores.append(chain.score(data)) except KeyError: pass #import pdb; pdb.set_trace() phase_obj.features.append(scores) # Sort out the objec's label if adj_obj.adjective in data_dict["adjectives"]: phase_obj.labels.append(1) else: phase_obj.labels.append(0) phase_obj.object_names.append(object_name) phase_obj.object_ids.append(int(name[-2])) #Iterate over all phases, convert to dictionaries and sqeeze #place all phases in a list set_dict_list = [] for phase_obj in phase_list: set_dict = defaultdict(dict) if phase_obj.build == True: set_dict['features'] = np.array(phase_obj.features).squeeze() set_dict['labels'] = np.array(phase_obj.labels).flatten() set_dict['object_names'] = np.array(phase_obj.object_names).flatten() set_dict['object_ids'] = np.array(phase_obj.object_ids).flatten() phase_obj.wipe_data() #import pdb; pdb.set_trace() set_dict_list.append(set_dict) return set_dict_list
if __name__ == "__main__": if len(sys.argv) != 3: print "Usage %s h5_database classifiers" % sys.argv[0] sys.exit(1) database = tables.openFile(sys.argv[1]) classifiers = cPickle.load(open(sys.argv[2])) f1s = 0 precs = 0 recalls = 0 total = 0 for group in utilities.iterator_over_object_groups(database): try: p, r, f1 = test_object(group, classifiers) precs += p recalls += r f1s += f1 total += 1 except ValueError: print "Skipping values" continue print "Average f1s: ", f1s / total print "Average precision: ", precs / total print "Average recall: ", recalls / total
def create_hmm_feature_set(database, object_set, adj_obj, phase_list): """ For each object in the database, run classifier.extract_features. All the features are then collected in a matrix. If the classifier's adjective is among the objects' then the feature is labeled with 1, otherwise 0. Parameters: database: either a string or an open pytables file. Returns the features and the labels as two 2-dimensional matrices. """ print "Building adjective %s" % adj_obj.adjective # For each object in the database, extract the phase and sensor # data for for group in utilities.iterator_over_object_groups(database): # Pull data from h5 database data_dict = utilities.dict_from_h5_group(group) object_name = data_dict["name"] name = object_name.split('_') labels = [] # Skip over object if it is in the set # Training set will skip over test objects # and vice versa if object_name in object_set: # Extract features feature_data = data_dict["data"] for i, phase_obj in enumerate(phase_list): scores = [] set_dict = defaultdict(dict) if phase_obj.build == False: continue for sensor, data in feature_data[phase_obj.phase].iteritems(): try: chain = adj_obj.chains[phase_obj.phase][sensor] scores.append(chain.score(data)) except KeyError: pass #import pdb; pdb.set_trace() phase_obj.features.append(scores) # Sort out the objec's label if adj_obj.adjective in data_dict["adjectives"]: phase_obj.labels.append(1) else: phase_obj.labels.append(0) phase_obj.object_names.append(object_name) phase_obj.object_ids.append(int(name[-2])) #Iterate over all phases, convert to dictionaries and sqeeze #place all phases in a list set_dict_list = [] for phase_obj in phase_list: set_dict = defaultdict(dict) if phase_obj.build == True: set_dict['features'] = np.array(phase_obj.features).squeeze() set_dict['labels'] = np.array(phase_obj.labels).flatten() set_dict['object_names'] = np.array( phase_obj.object_names).flatten() set_dict['object_ids'] = np.array(phase_obj.object_ids).flatten() phase_obj.wipe_data() #import pdb; pdb.set_trace() set_dict_list.append(set_dict) return set_dict_list
def test_adjective(classifier, database, test_object_names, adjective_report): true_positives = 0.0 true_negatives = 0.0 false_positives = 0.0 false_negatives = 0.0 false_positive_list = [] false_negative_list = [] true_positive_list = [] true_negative_list = [] print '\n \nTesting Adjective: %s' % classifier.adjective for group in utilities.iterator_over_object_groups(database): assert isinstance(group, tables.Group) data_dict = utilities.dict_from_h5_group(group) if data_dict['name'] not in test_object_names: continue features = classifier.extract_features(data_dict["data"]) output = classifier.predict(features) # For this object - find out if the adjective applies # True label is 0 if adjective is false for this adjective true_labels = data_dict['adjectives'] if classifier.adjective in true_labels: true_label = 1 else: true_label = 0 # Determine if the true label and classifier prediction match if true_label == 1: if output[0] == 1: true_positives += 1.0 true_positive_list.append(data_dict['name']) else: false_negatives += 1.0 false_negative_list.append(data_dict['name']) else: # label is 0 if output[0] == 1: false_positives += 1.0 false_positive_list.append(data_dict['name']) else: true_negatives += 1.0 true_negative_list.append(data_dict['name']) # Compute statistics for the adjective precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) try: f1 = 2.0 * precision * recall / (precision + recall) except ZeroDivisionError: f1 = 0 print "Precision: %f, Recall: %f, F1: %f \n" % (precision, recall, f1) adjective_report.write("%s, %f, %f, %f\n" % (classifier.adjective, precision, recall, f1)) print "%d False Positive Objects are: %s \n" % ( false_positives, sorted(false_positive_list)) print "%d False Negative Objects are: %s \n" % ( false_negatives, sorted(false_negative_list)) print "%d True Positive Objects are: %s\n" % (true_positives, sorted(true_positive_list)) print "%d True Negative Objects are: %s\n" % (true_negatives, sorted(true_negative_list)) return (precision, recall, f1)
if __name__ == "__main__": if len(sys.argv) != 3: print "Usage %s h5_database classifiers" % sys.argv[0] sys.exit(1) database = tables.openFile(sys.argv[1]) classifiers = cPickle.load(open(sys.argv[2])) f1s= 0 precs = 0 recalls = 0 total = 0 for group in utilities.iterator_over_object_groups(database): try: p, r, f1 = test_object(group, classifiers) precs += p recalls += r f1s += f1 total += 1 except ValueError: print "Skipping values" continue print "Average f1s: ", f1s / total print "Average precision: ", precs / total print "Average recall: ", recalls / total