def like_parent_like_child(classifier, node): """ Use on a node that must rely on its parent for a classification A function that keeps looking at its parents until the entropy of its parents is not one Then use the classification of that parent's entropy to figure out the child's entropy :param classifier: (Attribute) the attribute for which to classify one's examples :param node: (Node) the node child that one wants to decide its classification :return: Attribute: return the attribute that this child should model based on their parent """ parent = node while parent is not None: parent_entropy = parent.data_set.entropy(classifier=classifier) if parent_entropy[0] != 1: # there is an unequal amount of positive and negative value # choose the most dominant value for the attribute return Attribute(parent_entropy[1], 'end') else: # the data set is completely random # meaning that there are equal amounts of positive classifications and negative # classifications # move to the next parent parent = parent.parent else: # finishes the loop correctly # at the parent node # Should not reach this area unless there is a perfect split in the examples # which should not happen if you have good data print 'error: finished the loop and there is no parent with a dominant value' return Attribute(classifier.values.sort()[0], 'end')
def like_parent_like_child(self, classifier, node): """ :param classifier: :param node: :return: Attribute: return the attribute that this child should model based on their parent """ while node is not None: parent_entropy = node.data_set.entropy(classifier=classifier) if parent_entropy[0] != 1: # there is an unequal amount of positive and negative value # choose the most dominant value for the attribute return Attribute(parent_entropy[1], 'end') else: # the data set is completely random # meaning that there are equal amounts of positive classifications and negative # classifications # move to the next parent return self.like_parent_like_child(classifier=classifier, node=node.parent) else: # finishes the loop correctly # at the parent node # SUSPICIOUS print( 'error: finished the loop and there is no parent with a dominant value' ) return Attribute(classifier.values.sort()[0], 'end')
import numpy as np p = '/vagrant/imgs/training_data/training_data/aligned' d = os.listdir(p) def _parse_function(filename): image_string = tf.read_file(filename) image_decoded = tf.image.decode_png(image_string, channels=3) image_resized = tf.image.resize_images(image_decoded, [95, 95]) return image_resized EPOCHS = 10 BATCH_SIZE = 16 filenames = [os.path.join(p, img_path) for img_path in d[:5]] a = Attribute() labels = np.array([a.get_attributes_list(img_path) for img_path in d[:5]]) # labels = labels.reshape(labels[0], labels[1], -1, -1) print (labels.shape) # labels = tf.constant(l) features = [_parse_function(img_path) for img_path in d[:5]] print([feature.shape for feature in features]) dataset = tf.data.Dataset.from_tensor_slices((features, labels)).repeat().batch(BATCH_SIZE) # dataset = dataset.map(_parse_function) iterator = dataset.make_one_shot_iterator() x, y = iterator.get_next() net = tf.layers.dense(x, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input net = tf.layers.dense(net, 8, activation=tf.tanh)
def id3(self, root, target_attribute, attrs, debug=False): """ Recursively build a decision tree that learns how to classify a given type of data with a training set of data. :param root: (Node) the current node that the algorithm is classifying :param target_attribute: (Attribute) the trait of the data that we would like to classify by :param attrs: (Attributes) The Attributes that are related to this node's classification, excluding any Attributes that have been used higher up the hierarchy :param debug: (boolean) Enables or disables debugging output :return: void """ # pass in root # do a general check based on entropy if root.data_set.entropy(classifier=target_attribute)[0] == 0: value = root.data_set.all_examples[0].get_value(target_attribute) root.attribute = Attribute(value, 'end') return # there are attributes to split upon # decide the split based on gain if len(attrs) > 0: # START: BEST ATTRIBUTE best_attributes = list() # find the best attribute for attr in attrs: # iterate through each value in the attribute gain = root.data_set.gain(target_attribute, attr, debug) if len(best_attributes) == 0: best_attributes.append((attr, gain)) elif best_attributes[0][1] == gain: best_attributes.append((attr, gain)) elif best_attributes[0][1] < gain: best_attributes = [(attr, gain)] # organize alphabetically # "Also, if there is a tie in entropy reduction between multiple attributes, you should choose the # attribute # whose name is earlier in the alphabet (using Python's native string comparison) def name(elem): return elem[0].name # sort based on name best_attributes.sort(key=name) if debug is True: print() print('best attributes: ') for attr in best_attributes: print(attr[0].name, " ", end=' ') print() # BUILD CHILDREN # create the attribute for this node root.attribute = best_attributes[0][0] root.attribute.values.sort() # alphabetically sort values # END: BEST ATTRIBUTES if debug is True: print("best attribute: ", root.attribute.name) input('...') # ADD CHILDREN for value in root.attribute.values: example_set = [ x for x in root.data_set.all_examples if x.get_value(root.attribute) == value ] # examples to work with # make new node to pass down next_node = Node(data=dataset.DataSet(), parent=root, children=list(), attribute=None) attributes = copy.copy(attrs) attributes.remove(root.attribute) # CASE: RUN OUT OF EXAMPLES if len(example_set) == 0: if debug is True: print('warning: out of examples') # choose the most prevalent example from the population that falls into the parent's domain parent = root next_node.attribute = self.like_parent_like_child( classifier=target_attribute, node=parent) # no need to delve any more into next node root.children.append((value, next_node)) continue # make a dataset with all the value-specific information and store in next node next_node.data_set.all_examples = example_set # update the children of the node by recursing through self.id3(root=next_node, target_attribute=target_attribute, attrs=attributes, debug=debug) root.children.append((value, next_node)) else: # RUN OUT OF FEATURES # no attributes if debug is True: print('warning: out of features') num_pos = root.data_set.partial_count(target_attribute) num_neg = len(root.data_set) - num_pos tie = num_pos == num_neg if tie: # this is what we do in the event of a tie: parent = root root.attribute = self.like_parent_like_child( classifier=target_attribute, node=parent) else: # in the event of NOT a tie dominant_value = root.data_set.entropy( classifier=target_attribute)[1] root.attribute = Attribute(dominant_value, 'end')