Пример #1
0
    def like_parent_like_child(classifier, node):
        """
        Use on a node that must rely on its parent for a classification
        A function that keeps looking at its parents until the entropy of its parents is not
        one

        Then use the classification of that parent's entropy to figure out the child's entropy

        :param classifier: (Attribute) the attribute for which to classify one's examples
        :param node: (Node) the node child that one wants to decide its classification
        :return:
        Attribute: return the attribute that this child should model based on their parent
        """
        parent = node
        while parent is not None:

            parent_entropy = parent.data_set.entropy(classifier=classifier)
            if parent_entropy[0] != 1:
                # there is an unequal amount of positive and negative value
                # choose the most dominant value for the attribute
                return Attribute(parent_entropy[1], 'end')
            else:
                # the data set is completely random
                # meaning that there are equal amounts of positive classifications and negative
                # classifications
                # move to the next parent
                parent = parent.parent

        else:
            # finishes the loop correctly
            # at the parent node
            # Should not reach this area unless there is a perfect split in the examples
            # which should not happen if you have good data
            print 'error: finished the loop and there is no parent with a dominant value'
            return Attribute(classifier.values.sort()[0], 'end')
Пример #2
0
    def like_parent_like_child(self, classifier, node):
        """

        :param classifier:
        :param node:
        :return:

        Attribute: return the attribute that this child should model based on their parent
        """
        while node is not None:

            parent_entropy = node.data_set.entropy(classifier=classifier)
            if parent_entropy[0] != 1:
                # there is an unequal amount of positive and negative value
                # choose the most dominant value for the attribute
                return Attribute(parent_entropy[1], 'end')
            else:
                # the data set is completely random
                # meaning that there are equal amounts of positive classifications and negative
                # classifications
                # move to the next parent
                return self.like_parent_like_child(classifier=classifier,
                                                   node=node.parent)

        else:
            # finishes the loop correctly
            # at the parent node
            # SUSPICIOUS
            print(
                'error: finished the loop and there is no parent with a dominant value'
            )
            return Attribute(classifier.values.sort()[0], 'end')
Пример #3
0
import numpy as np

p = '/vagrant/imgs/training_data/training_data/aligned'
d = os.listdir(p)

def _parse_function(filename):
    image_string = tf.read_file(filename)
    image_decoded = tf.image.decode_png(image_string, channels=3)
    image_resized = tf.image.resize_images(image_decoded, [95, 95])
    return image_resized

EPOCHS = 10
BATCH_SIZE = 16

filenames = [os.path.join(p, img_path) for img_path in d[:5]]
a = Attribute()
labels = np.array([a.get_attributes_list(img_path) for img_path in d[:5]])
# labels = labels.reshape(labels[0], labels[1], -1, -1)
print (labels.shape)
# labels = tf.constant(l)

features = [_parse_function(img_path) for img_path in d[:5]]
print([feature.shape for feature in features])
dataset = tf.data.Dataset.from_tensor_slices((features, labels)).repeat().batch(BATCH_SIZE)

# dataset = dataset.map(_parse_function)
iterator = dataset.make_one_shot_iterator()
x, y = iterator.get_next()

net = tf.layers.dense(x, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input
net = tf.layers.dense(net, 8, activation=tf.tanh)
Пример #4
0
    def id3(self, root, target_attribute, attrs, debug=False):
        """
        Recursively build a decision tree that learns how to classify a given type of data
        with a training set of data.

        :param root: (Node) the current node that the algorithm is classifying
        :param target_attribute: (Attribute) the trait of the data that we would like to classify by
        :param attrs: (Attributes) The Attributes that are related to this node's classification, excluding any
                                    Attributes that have been used higher up the hierarchy
        :param debug: (boolean) Enables or disables debugging output
        :return: void
        """
        # pass in root
        # do a general check based on entropy
        if root.data_set.entropy(classifier=target_attribute)[0] == 0:
            value = root.data_set.all_examples[0].get_value(target_attribute)
            root.attribute = Attribute(value, 'end')
            return

        # there are attributes to split upon
        # decide the split based on gain
        if len(attrs) > 0:
            # START: BEST ATTRIBUTE
            best_attributes = list()

            # find the best attribute
            for attr in attrs:
                # iterate through each value in the attribute
                gain = root.data_set.gain(target_attribute, attr, debug)

                if len(best_attributes) == 0:
                    best_attributes.append((attr, gain))
                elif best_attributes[0][1] == gain:
                    best_attributes.append((attr, gain))
                elif best_attributes[0][1] < gain:
                    best_attributes = [(attr, gain)]

            # organize alphabetically
            # "Also, if there is a tie in entropy reduction between multiple attributes, you should choose the
            # attribute
            # whose name is earlier in the alphabet (using Python's native string comparison)
            def name(elem):
                return elem[0].name

            # sort based on name
            best_attributes.sort(key=name)
            if debug is True:
                print()
                print('best attributes: ')
                for attr in best_attributes:
                    print(attr[0].name, " ", end=' ')
                print()

            # BUILD CHILDREN
            # create the attribute for this node
            root.attribute = best_attributes[0][0]
            root.attribute.values.sort()  # alphabetically sort values

            # END: BEST ATTRIBUTES
            if debug is True:
                print("best attribute: ", root.attribute.name)
                input('...')

            # ADD CHILDREN
            for value in root.attribute.values:
                example_set = [
                    x for x in root.data_set.all_examples
                    if x.get_value(root.attribute) == value
                ]

                # examples to work with
                # make new node to pass down
                next_node = Node(data=dataset.DataSet(),
                                 parent=root,
                                 children=list(),
                                 attribute=None)

                attributes = copy.copy(attrs)
                attributes.remove(root.attribute)

                # CASE: RUN OUT OF EXAMPLES
                if len(example_set) == 0:
                    if debug is True:
                        print('warning: out of examples')
                    # choose the most prevalent example from the population that falls into the parent's domain
                    parent = root
                    next_node.attribute = self.like_parent_like_child(
                        classifier=target_attribute, node=parent)

                    # no need to delve any more into next node
                    root.children.append((value, next_node))
                    continue

                # make a dataset with all the value-specific information and store in next node
                next_node.data_set.all_examples = example_set
                # update the children of the node by recursing through
                self.id3(root=next_node,
                         target_attribute=target_attribute,
                         attrs=attributes,
                         debug=debug)
                root.children.append((value, next_node))
        else:
            # RUN OUT OF FEATURES
            # no attributes
            if debug is True:
                print('warning: out of features')

            num_pos = root.data_set.partial_count(target_attribute)
            num_neg = len(root.data_set) - num_pos
            tie = num_pos == num_neg

            if tie:
                # this is what we do in the event of a tie:
                parent = root
                root.attribute = self.like_parent_like_child(
                    classifier=target_attribute, node=parent)
            else:
                # in the event of NOT a tie
                dominant_value = root.data_set.entropy(
                    classifier=target_attribute)[1]
                root.attribute = Attribute(dominant_value, 'end')