def create_feature_object_set(database, phase):
  
    feature_objs = defaultdict(dict) 
    feature_vect = [] 
    norm_phase = "MOVE_ARM_START_POSITION"

    # For each object in the database, extract the phase and sensor
    # data for 
    #temp = [g for g in utilities.iterator_over_object_groups(database)] 
    for group in utilities.iterator_over_object_groups(database):
    #for group in temp[0:2]:   
        # Pull data from h5 database
        data_dict = utilities.dict_from_h5_group(group, [phase])
        norm_dict = utilities.dict_from_h5_group(group, [norm_phase])
        data = data_dict["data"][phase]
        norm_data = norm_dict["data"][norm_phase]
        object_name = data_dict["name"]
        name = object_name.split('_') 
    
        print "Loading object ", object_name
       
        # Extract features
        static_feature_phase, feats = extract_static_features(data, norm_data)
        # Store information about object
        static_feature_phase.labels = data_dict["adjectives"]
        static_feature_phase.name = object_name
        static_feature_phase.detailed_state = phase
        static_feature_phase.object_id = int(name[-2])
        static_feature_phase.run_num = int(name[-1])

        feature_objs[object_name] = static_feature_phase
        feature_vect.append(feats)

    return feature_objs, np.array(feature_vect)
Пример #2
0
def create_feature_object_set(database, phase):

    feature_objs = defaultdict(dict)
    feature_vect = []
    norm_phase = "MOVE_ARM_START_POSITION"

    # For each object in the database, extract the phase and sensor
    # data for
    #temp = [g for g in utilities.iterator_over_object_groups(database)]
    for group in utilities.iterator_over_object_groups(database):
        #for group in temp[0:2]:
        # Pull data from h5 database
        data_dict = utilities.dict_from_h5_group(group, [phase])
        norm_dict = utilities.dict_from_h5_group(group, [norm_phase])
        data = data_dict["data"][phase]
        norm_data = norm_dict["data"][norm_phase]
        object_name = data_dict["name"]
        name = object_name.split('_')

        print "Loading object ", object_name

        # Extract features
        static_feature_phase, feats = extract_static_features(data, norm_data)
        # Store information about object
        static_feature_phase.labels = data_dict["adjectives"]
        static_feature_phase.name = object_name
        static_feature_phase.detailed_state = phase
        static_feature_phase.object_id = int(name[-2])
        static_feature_phase.run_num = int(name[-1])

        feature_objs[object_name] = static_feature_phase
        feature_vect.append(feats)

    return feature_objs, np.array(feature_vect)
    def create_features_set(self, database, store = False, verbose = False):
        """
        For each object in the database, run classifier.extract_features. All the
        features are then collected in a matrix.
        If the classifier's adjective is among the objects' then the feature
        is labeled with 1, otherwise 0. 

        Parameters:
        database: either a string or an open pytables file.
        
        Returns the features and the labels as two 2-dimensional matrices.
        """
        labels = []
        features = []
        for group in utilities.iterator_over_object_groups(database):
            data_dict = utilities.dict_from_h5_group(group)
            if verbose:
                print "Loading object ", data_dict["name"]
            data = data_dict["data"]            
            features.append(self.extract_features(data))
            if self.adjective in data_dict["adjectives"]:
                labels.append(1)
            else:
                labels.append(0)
        
        features = np.array(features).squeeze()
        labels = np.array(labels).flatten()
        if store:
            self.features = features
            self.labels = labels
        return features, labels
    def create_features_set(self, database, store=False, verbose=False):
        """
        For each object in the database, run classifier.extract_features. All the
        features are then collected in a matrix.
        If the classifier's adjective is among the objects' then the feature
        is labeled with 1, otherwise 0. 

        Parameters:
        database: either a string or an open pytables file.
        
        Returns the features and the labels as two 2-dimensional matrices.
        """
        labels = []
        features = []
        for group in utilities.iterator_over_object_groups(database):
            data_dict = utilities.dict_from_h5_group(group)
            if verbose:
                print "Loading object ", data_dict["name"]
            data = data_dict["data"]
            features.append(self.extract_features(data))
            if self.adjective in data_dict["adjectives"]:
                labels.append(1)
            else:
                labels.append(0)

        features = np.array(features).squeeze()
        labels = np.array(labels).flatten()
        if store:
            self.features = features
            self.labels = labels
        return features, labels
def test_object(obj, classifiers):
    assert isinstance(obj, tables.Group)
    data_dict = utilities.dict_from_h5_group(obj)    
    true_adjectives = [a for a in sorted(data_dict["adjectives"])
                       if a not in to_remove]

    if len(true_adjectives) == 0:
        print "Object in database has no adjectives!"
        test_classifier = False
    else:
        test_classifier = True
        print "Object %s has adjectives %s" %(data_dict["name"],
                                              " ".join(true_adjectives)
                                              )
    print "Positive classifiers:"    
    
    positives = []
    for clf in classifiers:
        if clf.adjective in to_remove:
            continue
        assert isinstance(clf, FeaturesAdjectiveClassifier)
        features = clf.extract_features(data_dict["data"])
        output = clf.predict(features)
        if output[0] == 1:
            positives.append(clf.adjective)
   
    if not test_classifier:
        print "Results can't be shown"
        raise ValueError()
    if len(positives) == 0:
        "No classifiers output!"
        return (0.0, 0.0, 0.0)

    positives = sorted(positives)
    print "\t" + " ".join(positives)
    
    cls_set = set(positives)
    true_set = set(true_adjectives)
    intersection  = cls_set & true_set
    difference = true_set - cls_set

    true_length = float(len(true_set))
    clf_length = float(len(cls_set))

    true_positives = len(intersection) / clf_length
    false_negatives = (true_length - len(intersection)) / true_length
    false_positives = len(cls_set - true_set) / clf_length
    
    print "True posititives %f, False positivies %f, False negatives %f" %( true_positives,
                                                                        false_positives,
                                                                       false_negatives)
    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    try:
        f1 = 2.0 * precision*recall / (precision + recall)
    except ZeroDivisionError:
        f1 = 0
    print "Precision: %f, Recall: %f, F1: %f" % (precision, recall, f1)

    return (precision, recall, f1)
def test_object(obj, classifiers):
    assert isinstance(obj, tables.Group)
    data_dict = utilities.dict_from_h5_group(obj)
    true_adjectives = [
        a for a in sorted(data_dict["adjectives"]) if a not in to_remove
    ]

    if len(true_adjectives) == 0:
        print "Object in database has no adjectives!"
        test_classifier = False
    else:
        test_classifier = True
        print "Object %s has adjectives %s" % (data_dict["name"],
                                               " ".join(true_adjectives))
    print "Positive classifiers:"

    positives = []
    for clf in classifiers:
        if clf.adjective in to_remove:
            continue
        assert isinstance(clf, FeaturesAdjectiveClassifier)
        features = clf.extract_features(data_dict["data"])
        output = clf.predict(features)
        if output[0] == 1:
            positives.append(clf.adjective)

    if not test_classifier:
        print "Results can't be shown"
        raise ValueError()
    if len(positives) == 0:
        "No classifiers output!"
        return (0.0, 0.0, 0.0)

    positives = sorted(positives)
    print "\t" + " ".join(positives)

    cls_set = set(positives)
    true_set = set(true_adjectives)
    intersection = cls_set & true_set
    difference = true_set - cls_set

    true_length = float(len(true_set))
    clf_length = float(len(cls_set))

    true_positives = len(intersection) / clf_length
    false_negatives = (true_length - len(intersection)) / true_length
    false_positives = len(cls_set - true_set) / clf_length

    print "True posititives %f, False positivies %f, False negatives %f" % (
        true_positives, false_positives, false_negatives)
    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    try:
        f1 = 2.0 * precision * recall / (precision + recall)
    except ZeroDivisionError:
        f1 = 0
    print "Precision: %f, Recall: %f, F1: %f" % (precision, recall, f1)

    return (precision, recall, f1)
    def predict(self, X):
        if isinstance(X, tables.Group):
            data_dict = utilities.dict_from_h5_group(X)
            features = self.extract_features(data_dict["data"])
        else:
            features = X

        return self.svc.predict(features)
 def predict(self, X):
     if isinstance(X, tables.Group):
         data_dict = utilities.dict_from_h5_group(X)          
         features = self.extract_features(data_dict["data"])
     else:
         features = X
         
     return self.svc.predict(features)
def create_feature_set(database, feature_dict, object_set, adjective):
    """
    For each object in the database, run classifier.extract_features. All the
    features are then collected in a matrix.
    If the classifier's adjective is among the objects' then the feature
    is labeled with 1, otherwise 0. 

    Parameters:
    database: either a string or an open pytables file.
        
    Returns the features and the labels as two 2-dimensional matrices.
    """
    labels = []
    features = []
    object_names = []
    object_ids = []

    print "Building adjective %s" % adjective

    # For each object in the database, extract the phase and sensor
    # data for
    for group in utilities.iterator_over_object_groups(database):
        # Pull data from h5 database
        data_dict = utilities.dict_from_h5_group(group)
        object_name = data_dict["name"]
        name = object_name.split('_')

        # Skip over object if it is in the set
        # Training set will skip over test objects
        # and vice versa
        if object_name not in object_set:
            continue


#        print "Loading object ", object_name

# Store object name
        object_names.append(object_name)
        object_ids.append(int(name[-2]))

        # Extract features
        feature_obj = feature_dict[object_name]
        feature_vector = createFeatureVector(feature_obj, static_features)
        features.append(feature_vector)

        # Store off the labels here
        if adjective in data_dict["adjectives"]:
            labels.append(1)
        else:
            labels.append(0)

    set_dict = defaultdict(dict)
    set_dict['features'] = np.array(features).squeeze()
    set_dict['labels'] = np.array(labels).flatten()
    set_dict['object_names'] = np.array(object_names).flatten()
    set_dict['object_ids'] = np.array(object_ids).flatten()

    return set_dict
def create_feature_set(database, feature_dict, object_set, adjective):
    """
    For each object in the database, run classifier.extract_features. All the
    features are then collected in a matrix.
    If the classifier's adjective is among the objects' then the feature
    is labeled with 1, otherwise 0. 

    Parameters:
    database: either a string or an open pytables file.
        
    Returns the features and the labels as two 2-dimensional matrices.
    """
    labels = []
    features = []
    object_names = []
    object_ids = []

    print "Building adjective %s" % adjective

    # For each object in the database, extract the phase and sensor
    # data for 
    for group in utilities.iterator_over_object_groups(database):
        # Pull data from h5 database
        data_dict = utilities.dict_from_h5_group(group)
        object_name = data_dict["name"]
        name = object_name.split('_') 

        # Skip over object if it is in the set
        # Training set will skip over test objects
        # and vice versa        
        if object_name not in object_set:
            continue
 
#        print "Loading object ", object_name
        
        # Store object name
        object_names.append(object_name)
        object_ids.append(int(name[-2]))

        # Extract features
        feature_obj = feature_dict[object_name] 
        feature_vector = createFeatureVector(feature_obj, static_features)
        features.append(feature_vector)

        # Store off the labels here  
        if adjective in data_dict["adjectives"]:
            labels.append(1)
        else:
            labels.append(0)

    set_dict = defaultdict(dict) 
    set_dict['features'] = np.array(features).squeeze()
    set_dict['labels'] = np.array(labels).flatten()
    set_dict['object_names'] = np.array(object_names).flatten()
    set_dict['object_ids'] = np.array(object_ids).flatten()

    return set_dict
Пример #11
0
def load_dataset(database, adjective, phase, sensor):
    """Loads the data from a dataset corresponding to an adjective, phase and
    sensor."""
    
    if adjective not in adjectives:
        raise ValueError("%s is not a known adjective" % adjective)
    if phase not in phases:
        raise ValueError("%s is not a known phase" % phase)
    if sensor not in sensors:
        raise ValueError("%s is not a known sensor" % sensor)    

    train_group = database.getNode("/train_test_sets", adjective).train
    train_set = [utilities.dict_from_h5_group(g, [phase], [sensor])["data"][phase][sensor]
                    for g in train_group._v_children.values()]
    test_group = database.getNode("/train_test_sets", adjective).test
    test_set = [utilities.dict_from_h5_group(g, [phase], [sensor])["data"][phase][sensor]
                    for g in test_group._v_children.values()]
    
    return train_set, test_set
def create_feature_object_set(database, phase):
  
    feature_objs = defaultdict(dict) 
    feature_vect = [] 
    norm_phase = "MOVE_ARM_START_POSITION"

    # For each object in the database, extract the phase and sensor
    # data for 
    #temp = [g for g in utilities.iterator_over_object_groups(database)] 

    all_values = dict()
    for phase in phases:
        all_values[phase] = []


    for group in utilities.iterator_over_object_groups(database):
    #for group in temp[0:2]:   
        # Pull data from h5 database
        data_dict = utilities.dict_from_h5_group(group) 
        
        for phase in phases:
            all_values[phase].append(data_dict['data'][phase]['electrodes'])

        ''' 
        #data_dict = utilities.dict_from_h5_group(group, [phase])
        norm_dict = utilities.dict_from_h5_group(group, [norm_phase])
        data = data_dict["data"][phase]
        norm_data = norm_dict["data"][norm_phase]
        object_name = data_dict["name"]
        name = object_name.split('_') 
    
        print "Loading object ", object_name
        import pdb; pdb.set_trace() 
        # Extract features
        #static_feature_phase, feats = extract_static_features(data, norm_data)
        # Store information about object
        static_feature_phase.labels = data_dict["adjectives"]
        static_feature_phase.name = object_name
        static_feature_phase.detailed_state = phase
        static_feature_phase.object_id = int(name[-2])
        static_feature_phase.run_num = int(name[-1])

        feature_objs[object_name] = static_feature_phase
        feature_vect.append(feats)
        '''
    import pdb; pdb.set_trace()
    
    slide = np.concatenate(all_values['SLIDE_5CM'])
    squeeze = np.concatenate(all_values['SQUEEZE_SET_PRESSURE_SLOW'])
    hold = np.concatenate(all_values['HOLD_FOR_10_SECONDS'])
    fast_slide = np.concatenate(all_values['MOVE_DOWN_5CM'])
    
    return feature_objs, np.array(feature_vect)
Пример #13
0
def create_feature_object_set(database, phase):

    feature_objs = defaultdict(dict)
    feature_vect = []
    norm_phase = "MOVE_ARM_START_POSITION"

    # For each object in the database, extract the phase and sensor
    # data for
    #temp = [g for g in utilities.iterator_over_object_groups(database)]

    all_values = dict()
    for phase in phases:
        all_values[phase] = []

    for group in utilities.iterator_over_object_groups(database):
        #for group in temp[0:2]:
        # Pull data from h5 database
        data_dict = utilities.dict_from_h5_group(group)

        for phase in phases:
            all_values[phase].append(data_dict['data'][phase]['electrodes'])
        ''' 
        #data_dict = utilities.dict_from_h5_group(group, [phase])
        norm_dict = utilities.dict_from_h5_group(group, [norm_phase])
        data = data_dict["data"][phase]
        norm_data = norm_dict["data"][norm_phase]
        object_name = data_dict["name"]
        name = object_name.split('_') 
    
        print "Loading object ", object_name
        import pdb; pdb.set_trace() 
        # Extract features
        #static_feature_phase, feats = extract_static_features(data, norm_data)
        # Store information about object
        static_feature_phase.labels = data_dict["adjectives"]
        static_feature_phase.name = object_name
        static_feature_phase.detailed_state = phase
        static_feature_phase.object_id = int(name[-2])
        static_feature_phase.run_num = int(name[-1])

        feature_objs[object_name] = static_feature_phase
        feature_vect.append(feats)
        '''
    import pdb
    pdb.set_trace()

    slide = np.concatenate(all_values['SLIDE_5CM'])
    squeeze = np.concatenate(all_values['SQUEEZE_SET_PRESSURE_SLOW'])
    hold = np.concatenate(all_values['HOLD_FOR_10_SECONDS'])
    fast_slide = np.concatenate(all_values['MOVE_DOWN_5CM'])

    return feature_objs, np.array(feature_vect)
Пример #14
0
 def classification_labels(self, X):
     """
     X: list of dictionaries d, each with the structure:
         d[phase][sensor] = data
     """
     if isinstance(X, tables.Group):
         X = utilities.dict_from_h5_group(X)['data']
     
     if type(X) is not list:
         X = [X]
     ret = []
     for x in X:        
         scores = []
         for phase, v in x.iteritems():                
             for sensor, data in v.iteritems():
                 try:
                     chain = self.chains[phase][sensor]                            
                     scores.append(chain.predict(data)[0])
                 except KeyError:
                     print "No key for %s %s" %(phase, sensor)
         ret.append(scores)
     return ret            
    def extract_features(self, X):
        """
        X: list of dictionaries d, each with the structure:
            d[phase][sensor] = data
        """
        if isinstance(X, tables.Group):
            X = utilities.dict_from_h5_group(X)

        if type(X) is not list:
            X = [X]
        ret = []
        for x in X:
            scores = []
            for phase, v in x.iteritems():
                for sensor, data in v.iteritems():
                    try:
                        chain = self.chains[phase][sensor]
                        scores.append(chain.score(data))
                    except KeyError:
                        pass
            ret.append(scores)
        return ret
 def extract_features(self, X):
     """
     X: list of dictionaries d, each with the structure:
         d[phase][sensor] = data
     """
     if isinstance(X, tables.Group):
         X = utilities.dict_from_h5_group(X)        
     
     if type(X) is not list:
         X = [X]
     ret = []
     for x in X:
         scores = []
         for phase, v in x.iteritems():
             for sensor, data in v.iteritems():
                 try:
                     chain = self.chains[phase][sensor]
                     scores.append(chain.score(data))
                 except KeyError:
                     pass
         ret.append(scores)
     return ret
Пример #17
0
def load_dataset(database, adjective, phase, sensor):
    """Loads the data from a dataset corresponding to an adjective, phase and
    sensor."""

    msg = []
    if adjective not in adjectives:
        raise ValueError("%s is not a known adjective" % adjective)
    if phase not in phases:
        raise ValueError("%s is not a known phase" % phase)
    if sensor not in sensors:
        raise ValueError("%s is not a known sensor" % sensor)

    included_names = set()
    train_set = []
    test_set = []
    train_group = database.getNode("/train_test_sets", adjective).train
    for name, g in train_group._v_children.iteritems():
        if name not in included_names:
            train_set.append(
                utilities.dict_from_h5_group(g, [phase],
                                             [sensor])["data"][phase][sensor])
            included_names.add(name)
            #msg.append("0 Adding " + name + " to positive train")

    test_group = database.getNode("/train_test_sets", adjective).test
    for name, g in test_group._v_children.iteritems():
        if name not in included_names:
            test_set.append(
                utilities.dict_from_h5_group(g, [phase],
                                             [sensor])["data"][phase][sensor])
            included_names.add(name)
            #msg.append("1 Adding " + name + " to positive test")

    train_label = [1] * len(train_set)
    test_label = [1] * len(test_set)

    #now take all the other adjectives for negative class
    for other_adj in database.getNode("/train_test_sets"):
        if other_adj._v_name == adjective:
            continue

        train_group = other_adj.train
        for name, g in train_group._v_children.iteritems():
            if name not in included_names:
                train_set.append(
                    utilities.dict_from_h5_group(
                        g, [phase], [sensor])["data"][phase][sensor])
                included_names.add(name)
                #msg.append("2 Adding " + name  +" to negative train")

        test_group = other_adj.test
        for name, g in test_group._v_children.iteritems():
            if name not in included_names:
                test_set.append(
                    utilities.dict_from_h5_group(
                        g, [phase], [sensor])["data"][phase][sensor])
                included_names.add(name)
                #msg.append("3 Adding " + name  +" to negative test")

    train_label += [0] * (len(train_set) - len(train_label))
    test_label += [0] * (len(test_set) - len(test_label))

    return (train_set, train_label), (test_set, test_label)
Пример #18
0
def create_hmm_feature_set(database, object_set, adj_obj, phase_list):
    """ 
    For each object in the database, run classifier.extract_features. All the
    features are then collected in a matrix.
    If the classifier's adjective is among the objects' then the feature
    is labeled with 1, otherwise 0. 

    Parameters:
    database: either a string or an open pytables file.
        
    Returns the features and the labels as two 2-dimensional matrices.
    """
    print "Building adjective %s" % adj_obj.adjective

    # For each object in the database, extract the phase and sensor
    # data for
    for group in utilities.iterator_over_object_groups(database):
        # Pull data from h5 database
        data_dict = utilities.dict_from_h5_group(group)
        object_name = data_dict["name"]
        name = object_name.split('_')
        labels = []
        # Skip over object if it is in the set
        # Training set will skip over test objects
        # and vice versa
        if object_name in object_set:

            # Extract features
            feature_data = data_dict["data"]

            for i, phase_obj in enumerate(phase_list):
                scores = []
                set_dict = defaultdict(dict)
                if phase_obj.build == False:
                    continue

                for sensor, data in feature_data[phase_obj.phase].iteritems():
                    try:
                        chain = adj_obj.chains[phase_obj.phase][sensor]
                        scores.append(chain.score(data))
                    except KeyError:
                        pass
                #import pdb; pdb.set_trace()
                phase_obj.features.append(scores)
                # Sort out the objec's label
                if adj_obj.adjective in data_dict["adjectives"]:
                    phase_obj.labels.append(1)
                else:
                    phase_obj.labels.append(0)
                phase_obj.object_names.append(object_name)
                phase_obj.object_ids.append(int(name[-2]))

    #Iterate over all phases, convert to dictionaries and sqeeze
    #place all phases in a list
    set_dict_list = []
    for phase_obj in phase_list:
        set_dict = defaultdict(dict)
        if phase_obj.build == True:
            set_dict['features'] = np.array(phase_obj.features).squeeze()
            set_dict['labels'] = np.array(phase_obj.labels).flatten()
            set_dict['object_names'] = np.array(
                phase_obj.object_names).flatten()
            set_dict['object_ids'] = np.array(phase_obj.object_ids).flatten()
            phase_obj.wipe_data()
            #import pdb; pdb.set_trace()
        set_dict_list.append(set_dict)
    return set_dict_list
Пример #19
0
def test_adjective(classifier, database, test_object_names, adjective_report):

    true_positives = 0.0
    true_negatives = 0.0
    false_positives = 0.0
    false_negatives = 0.0

    false_positive_list = []
    false_negative_list = []
    true_positive_list = []
    true_negative_list = []

    print '\n \nTesting Adjective: %s' % classifier.adjective

    for group in utilities.iterator_over_object_groups(database):

        assert isinstance(group, tables.Group)
        data_dict = utilities.dict_from_h5_group(group)

        if data_dict['name'] not in test_object_names:
            continue

        features = classifier.extract_features(data_dict["data"])
        output = classifier.predict(features)

        # For this object - find out if the adjective applies
        # True label is 0 if adjective is false for this adjective
        true_labels = data_dict['adjectives']
        if classifier.adjective in true_labels:
            true_label = 1
        else:
            true_label = 0

        # Determine if the true label and classifier prediction match
        if true_label == 1:
            if output[0] == 1:
                true_positives += 1.0
                true_positive_list.append(data_dict['name'])
            else:
                false_negatives += 1.0
                false_negative_list.append(data_dict['name'])
        else:  # label is 0
            if output[0] == 1:
                false_positives += 1.0
                false_positive_list.append(data_dict['name'])
            else:
                true_negatives += 1.0
                true_negative_list.append(data_dict['name'])

    # Compute statistics for the adjective
    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    try:
        f1 = 2.0 * precision * recall / (precision + recall)
    except ZeroDivisionError:
        f1 = 0
    print "Precision: %f, Recall: %f, F1: %f \n" % (precision, recall, f1)
    adjective_report.write("%s, %f, %f, %f\n" %
                           (classifier.adjective, precision, recall, f1))

    print "%d False Positive Objects are: %s \n" % (
        false_positives, sorted(false_positive_list))
    print "%d False Negative Objects are: %s \n" % (
        false_negatives, sorted(false_negative_list))
    print "%d True Positive Objects are: %s\n" % (true_positives,
                                                  sorted(true_positive_list))
    print "%d True Negative Objects are: %s\n" % (true_negatives,
                                                  sorted(true_negative_list))

    return (precision, recall, f1)
def load_dataset(database, adjective, phase, sensor):
    """Loads the data from a dataset corresponding to an adjective, phase and
    sensor."""
    
    msg = []
    if adjective not in adjectives:
        raise ValueError("%s is not a known adjective" % adjective)
    if phase not in phases:
        raise ValueError("%s is not a known phase" % phase)
    if sensor not in sensors:
        raise ValueError("%s is not a known sensor" % sensor)    
    
    included_names = set()    
    train_set = []
    test_set = []
    train_group = database.getNode("/train_test_sets", adjective).train
    for name, g in train_group._v_children.iteritems():
        if name not in included_names:
            train_set.append(utilities.dict_from_h5_group(g, 
                                                          [phase], 
                                                          [sensor])["data"][phase][sensor]
                             )
            included_names.add(name)
            #msg.append("0 Adding " + name + " to positive train")
                    
    test_group = database.getNode("/train_test_sets", adjective).test
    for name, g in test_group._v_children.iteritems():
        if name not in included_names:            
            test_set.append(utilities.dict_from_h5_group(g, 
                                                         [phase], 
                                                         [sensor])["data"][phase][sensor]
                            )
            included_names.add(name)
            #msg.append("1 Adding " + name + " to positive test")
    
    train_label = [1] * len(train_set)
    test_label =  [1] * len(test_set)
    
    #now take all the other adjectives for negative class
    for other_adj in database.getNode("/train_test_sets"):
        if other_adj._v_name == adjective:
            continue
        
        train_group = other_adj.train
        for name, g in train_group._v_children.iteritems():
            if name not in included_names:
                train_set.append(utilities.dict_from_h5_group(g, 
                                                          [phase], 
                                                          [sensor])["data"][phase][sensor]                        
                                 )
                included_names.add(name)
                #msg.append("2 Adding " + name  +" to negative train")
        
        test_group = other_adj.test
        for name, g in test_group._v_children.iteritems():
            if name not in included_names:        
                test_set.append(utilities.dict_from_h5_group(g, 
                                                         [phase],                                                          
                                                         [sensor])["data"][phase][sensor]
                                )
                included_names.add(name)
                #msg.append("3 Adding " + name  +" to negative test")
                        
    train_label += [0] * (len(train_set) - len(train_label))
    test_label += [0] * (len(test_set) - len(test_label))
    
    return (train_set, train_label), (test_set, test_label)
def create_hmm_feature_set(database, object_set, adj_obj, phase_list):
    """ 
    For each object in the database, run classifier.extract_features. All the
    features are then collected in a matrix.
    If the classifier's adjective is among the objects' then the feature
    is labeled with 1, otherwise 0. 

    Parameters:
    database: either a string or an open pytables file.
        
    Returns the features and the labels as two 2-dimensional matrices.
    """
    print "Building adjective %s" % adj_obj.adjective

    # For each object in the database, extract the phase and sensor
    # data for 
    for group in utilities.iterator_over_object_groups(database):
        # Pull data from h5 database
        data_dict = utilities.dict_from_h5_group(group)
        object_name = data_dict["name"]
        name = object_name.split('_')
        labels = []
        # Skip over object if it is in the set
        # Training set will skip over test objects
        # and vice versa        
        if object_name in object_set:

            # Extract features
            feature_data = data_dict["data"]
            
            for i, phase_obj in enumerate(phase_list):
                scores = []
                set_dict = defaultdict(dict)
                if phase_obj.build == False:         
                    continue

                for sensor, data in feature_data[phase_obj.phase].iteritems():
                    try:
                        chain = adj_obj.chains[phase_obj.phase][sensor]
                        scores.append(chain.score(data))
                    except KeyError:
                        pass
                #import pdb; pdb.set_trace()
                phase_obj.features.append(scores)              
                # Sort out the objec's label
                if adj_obj.adjective in data_dict["adjectives"]:
                    phase_obj.labels.append(1)
                else:
                    phase_obj.labels.append(0)
                phase_obj.object_names.append(object_name)
                phase_obj.object_ids.append(int(name[-2]))
        
    #Iterate over all phases, convert to dictionaries and sqeeze
    #place all phases in a list
    set_dict_list = []
    for phase_obj in phase_list:
        set_dict = defaultdict(dict)
        if phase_obj.build == True:   
            set_dict['features'] = np.array(phase_obj.features).squeeze()
            set_dict['labels'] = np.array(phase_obj.labels).flatten()
            set_dict['object_names'] = np.array(phase_obj.object_names).flatten()
            set_dict['object_ids'] = np.array(phase_obj.object_ids).flatten()
            phase_obj.wipe_data()
            #import pdb; pdb.set_trace()
        set_dict_list.append(set_dict)
    return set_dict_list
def test_adjective(classifier, database, test_object_names, adjective_report):
           
    true_positives = 0.0
    true_negatives = 0.0
    false_positives = 0.0
    false_negatives = 0.0

    false_positive_list = []
    false_negative_list = []
    true_positive_list = []
    true_negative_list = []


    print '\n \nTesting Adjective: %s' % classifier.adjective
    
    for group in utilities.iterator_over_object_groups(database):
        
        assert isinstance(group, tables.Group)
        data_dict = utilities.dict_from_h5_group(group)

        if data_dict['name'] not in test_object_names:
            continue
        
        features = classifier.extract_features(data_dict["data"])
        output = classifier.predict(features)
      
        # For this object - find out if the adjective applies
        # True label is 0 if adjective is false for this adjective
        true_labels = data_dict['adjectives']
        if classifier.adjective in true_labels:
            true_label = 1
        else:
            true_label = 0

        # Determine if the true label and classifier prediction match
        if true_label == 1:
            if output[0] == 1:
                true_positives += 1.0
                true_positive_list.append(data_dict['name'])
            else:
                false_negatives += 1.0
                false_negative_list.append(data_dict['name'])
        else: # label is 0
            if output[0] == 1:
                false_positives += 1.0
                false_positive_list.append(data_dict['name'])
            else:
                true_negatives += 1.0
                true_negative_list.append(data_dict['name'])

    # Compute statistics for the adjective
    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    try:
        f1 = 2.0 * precision*recall / (precision + recall)
    except ZeroDivisionError:
        f1 = 0
    print "Precision: %f, Recall: %f, F1: %f \n" % (precision, recall, f1)
    adjective_report.write("%s, %f, %f, %f\n" % (classifier.adjective, precision, recall, f1))

    print "%d False Positive Objects are: %s \n" % (false_positives, sorted(false_positive_list))
    print "%d False Negative Objects are: %s \n" % (false_negatives, sorted(false_negative_list))
    print "%d True Positive Objects are: %s\n" % (true_positives, sorted(true_positive_list))
    print "%d True Negative Objects are: %s\n" % (true_negatives, sorted(true_negative_list))
    
    return (precision, recall, f1)