Exemplo n.º 1
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct):
    number=len(train_set)*pct
    partial=random.sample(train_set, int(number))
    n= ID3(partial, attribute_metadata, numerical_splits_count, depth)
    print n.label
    accuracy=validation_accuracy(n, validate_set)
    return accuracy
Exemplo n.º 2
0
def get_graph(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, iterations, lower, upper, increment):
    '''
    get_graph - Given a training set, attribute metadata, validation set, numerical splits count, depth, iterations, lower(range),
    upper(range), and increment, this function will graph the results from get_graph_data in reference to the drange
    percentages of the data.
    '''
    m = {}
    for i in range(lower, upper, increment):
        accuracy = get_graph_data(train_set, attribute_metadata, validate_set, numerical_splits_count, iterations, i, depth)
        if accuracy:
            m[i] = accuracy

    tree = ID3(train_set, attribute_metadata, numerical_splits_count, depth)

    m[upper] = validation_accuracy(tree, validate_set)

    x = []
    y = []

    for k,v in m.items():
        x.append(k)
        y.append(v)

    plt.scatter(x, y)

    plt.xlabel('Percentage of Data Used (%)')
    plt.ylabel('Validation Accuracy')
    plt.title('Learning Curve')
    plt.grid(True)
Exemplo n.º 3
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, pct):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the trainging setself.
    '''
    data_set1 = []
    temp = []
    if pct !=1:
        for i in range(0, int(pct*len(train_set))):
            rand=random.randint(0,int(pct*len(train_set))-1)
            while(1):
                if temp.count(rand)!=0:
                    rand=random.randint(0,int(pct*len(train_set))-1)
                else:
                    break
            data_set1.append(train_set[rand])
            temp.append(rand)
    else:
        data_set1=train_set

    if data_set1 !=[]:
        pct_tree = ID3(data_set1, attribute_metadata, numerical_splits_count, depth)
        return validation_accuracy(pct_tree, validate_set,attribute_metadata)
    else:
        return 0
Exemplo n.º 4
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct,depth):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the trainging setself.
    '''
    num_training_samples = int(math.floor(pct*len(train_set)))+1 #number of training samples to use 
    data_subset = random.sample(train_set,num_training_samples)
    tree = ID3(data_subset, attribute_metadata, numerical_splits_count, depth)
    return validation_accuracy(tree,validate_set)
Exemplo n.º 5
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct, depth):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the training setself.
    '''
    train_set_size = len(train_set)
    subset = random.sample(train_set, int(float(train_set_size*pct)/100))
    print len(subset)
    tree = ID3(subset, attribute_metadata, numerical_splits_count, depth)
    return validation_accuracy(tree,validate_set)
Exemplo n.º 6
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set,
                               numerical_splits_count, depth, pct):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the trainging setself.
    '''
    if pct == 0.0:
        return 0.0, 0.0
    dataSet = deepcopy(train_set)
    size = int(round((len(dataSet) * pct)))
    if size == 0:
        return 0.0, 0.0
    sub_set = sample(dataSet, size)
    tree = ID3(sub_set, attribute_metadata, deepcopy(numerical_splits_count),
               depth)
    accuracyOriginal = validation_accuracy(tree, validate_set)
    tree = reduced_error_pruning(tree, train_set, validate_set)
    accuracyPruned = validation_accuracy(tree, validate_set)
    return accuracyOriginal, accuracyPruned
Exemplo n.º 7
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set,
                               numerical_splits_count, pct, depth):
    '''
    get_graph_accuracy_partial - Given a PARTIAL training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the trainging setself.
    '''
    #depth = 'limit_depth'
    tree = ID3(train_set, attribute_metadata, numerical_splits_count, depth)
    print "splits counts after one iter: " + str(numerical_splits_count)
    return validation_accuracy(tree, validate_set)
Exemplo n.º 8
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set,
                               numerical_splits_count, pct, depth):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the training setself.
    '''
    train_set_size = len(train_set)
    subset = random.sample(train_set, int(float(train_set_size * pct) / 100))
    print len(subset)
    tree = ID3(subset, attribute_metadata, numerical_splits_count, depth)
    return validation_accuracy(tree, validate_set)
Exemplo n.º 9
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the training set.
    '''
    # randomly generate an array of indices from 0 to len(train_set) - 1
    random_indices = random.sample(range(len(train_set)), int(pct * len(train_set)))
    # train the decision tree
    decision_tree = ID3([train_set[i] for i in random_indices], attribute_metadata, numerical_splits_count, float('inf'))
    # calculate the accuracy on validation set
    return validation_accuracy(decision_tree, validate_set)
Exemplo n.º 10
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, pct, prune):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the trainging setself.
    '''
    sample_size = int(pct*len(train_set))
    sample_set = sample(train_set, sample_size)
    tree = ID3(sample_set, attribute_metadata, numerical_splits_count, depth)
    if prune:
        reduced_error_pruning(tree, train_set, validate_set)
    return validation_accuracy(tree, validate_set)
Exemplo n.º 11
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set,
                               numerical_splits_count, pct):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the trainging setself.
    '''
    length = len(train_set) * pct
    new_data_set = random.sample(train_set, int(length))
    tree = ID3(new_data_set, attribute_metadata, numerical_splits_count, 20)
    # reduced_error_pruning(tree, new_data_set, validate_set)
    accuracy = validation_accuracy(tree, validate_set)
    return accuracy
Exemplo n.º 12
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, pct):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the trainging setself.
    '''
    data_set = curve_data(train_set, pct)
    if data_set != []:
        curve_tree = ID3(data_set, attribute_metadata, numerical_splits_count, depth)
        return validation_accuracy(curve_tree, validate_set, attribute_metadata)
    else:
        return 0
    pass
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, pct):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the training setself.
    '''
    # single accuracy for whatver percentage you've chosen
    # call validation_accuracy from pruning.py
    shuffle(train_set)
    sub_data = train_set[0:pct]
    sub_numerical_splits_count = copy.copy(numerical_splits_count)
    sub_tree = ID3(sub_data, attribute_metadata, sub_numerical_splits_count, depth)
    accuracy = validation_accuracy(sub_tree, validate_set)
    return accuracy
Exemplo n.º 14
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct, depth):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the trainging setself.
    '''

    shuffle(train_set)
    frac = int(float(pct) / 100 * len(train_set))
    curr_set = train_set[:frac]
    root = ID3(curr_set, attribute_metadata, numerical_splits_count, depth)
    accuracy = validation_accuracy(root, validate_set)

    return accuracy
Exemplo n.º 15
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set,
                               numerical_splits_count, pct):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the training set.
    '''
    # randomly generate an array of indices from 0 to len(train_set) - 1
    random_indices = random.sample(range(len(train_set)),
                                   int(pct * len(train_set)))
    # train the decision tree
    decision_tree = ID3([train_set[i] for i in random_indices],
                        attribute_metadata, numerical_splits_count,
                        float('inf'))
    # calculate the accuracy on validation set
    return validation_accuracy(decision_tree, validate_set)
Exemplo n.º 16
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct, depth,iterations):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the trainging setself.
    '''
    
    if int(len(train_set)*pct) == 0:
        return 0
    else:
        examples_list = []
        for i in range(iterations):
            examples = random.sample(train_set, int(len(train_set)*pct))
            examples_list.append(examples)
    acc = 0
    for x in examples_list:
        tree = ID3(x, attribute_metadata, numerical_splits_count, depth)
        acc += validation_accuracy(tree, validate_set)
    
    return acc/iterations
Exemplo n.º 17
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set,
                               numerical_splits_count, pct):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage,
    this function will return the validation accuracy of a specified (percentage) portion of the trainging setself.
    '''
    data_set = []
    size = int(len(train_set) * pct)
    data_set = random.sample(train_set, size)
    #print size
    #for i in range(size - 1):
    #    data_set.append(train_set[i])
    #print "train_set"
    #print train_set
    tree = ID3(data_set, attribute_metadata, numerical_splits_count, 3)
    result = validation_accuracy(tree, validate_set)
    #print "result"
    #print result
    return result
Exemplo n.º 18
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, pct):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation
    set, numerical splits count, and percentage, this function will return the validation 
    accuracy of a specified (percentage) portion of the training setself.
    '''
    #make the subset
    train_subset_size = int(math.floor(pct*len(train_set)))
    
    #if percent is zero, accuracy must be zero.
    if train_subset_size == 0:
    	return 0

    train_subset = random.sample(train_set, train_subset_size)
    random.shuffle(train_subset)

    #make the tree, determine accuracy
    tree = ID3(train_subset, attribute_metadata, numerical_splits_count, depth)
    accuracy = validation_accuracy(tree, validate_set)   
    # print accuracy

    return accuracy
Exemplo n.º 19
0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set,
                               numerical_splits_count, depth, pct):
    '''
    get_graph_accuracy_partial - Given a training set, attribute metadata, validation
    set, numerical splits count, and percentage, this function will return the validation 
    accuracy of a specified (percentage) portion of the training setself.
    '''
    #make the subset
    train_subset_size = int(math.floor(pct * len(train_set)))

    #if percent is zero, accuracy must be zero.
    if train_subset_size == 0:
        return 0

    train_subset = random.sample(train_set, train_subset_size)
    random.shuffle(train_subset)

    #make the tree, determine accuracy
    tree = ID3(train_subset, attribute_metadata, numerical_splits_count, depth)
    accuracy = validation_accuracy(tree, validate_set)
    # print accuracy

    return accuracy