Exemplo n.º 1
0
def train_and_val():
    training_data = dp.read_data('dataset/splice-Xtrain.dat',
                                 'dataset/splice-Ytrain.dat')
    training_set_indices, validation_set_indices = dp.read_training_val_set(
        'dataset/train.txt', 'dataset/val.txt')
    feature = Features()
    features_labels_pair = feature.simple(training_data)
    training_set = []
    for index in training_set_indices:
        training_set.append(features_labels_pair[index])

    dp.remove_ambiguous_entry(training_set)
    k_nn = KNN(training_set, 19)

    confusion_matrix = np.zeros([3, 3])
    correct = 0.0
    total = 0.0

    validation_set = []
    for index in validation_set_indices:
        validation_set.append(features_labels_pair[index])

    dp.remove_ambiguous_entry(validation_set)
    for feature_vector, correct_class in validation_set:
        prediction = k_nn.predict_diff_bases(feature_vector, k_nn.no_weight)
        total += 1
        if prediction == correct_class:
            correct += 1
        if prediction == 0 and correct_class == 0:
            confusion_matrix[0, 0] += 1
        if prediction == 0 and correct_class == 1:
            confusion_matrix[0, 1] += 1
        if prediction == 0 and correct_class == 2:
            confusion_matrix[0, 2] += 1
        if prediction == 1 and correct_class == 0:
            confusion_matrix[1, 0] += 1
        if prediction == 1 and correct_class == 1:
            confusion_matrix[1, 1] += 1
        if prediction == 1 and correct_class == 2:
            confusion_matrix[1, 2] += 1
        if prediction == 2 and correct_class == 0:
            confusion_matrix[2, 0] += 1
        if prediction == 2 and correct_class == 1:
            confusion_matrix[2, 1] += 1
        if prediction == 2 and correct_class == 2:
            confusion_matrix[2, 2] += 1
        #print prediction, correct_class
    print confusion_matrix
    print correct / total
def train_and_val():
    training_data = dp.read_data('dataset/splice-Xtrain.dat', 'dataset/splice-Ytrain.dat')
    training_set_indices, validation_set_indices = dp.read_training_val_set('dataset/train.txt', 'dataset/val.txt')
    feature = Features()    
    features_labels_pair = feature.simple(training_data)
    training_set = []
    for index in training_set_indices:
        training_set.append(features_labels_pair[index])
    
    #dp.remove_ambiguous_entry(training_set)
    naive_bayes = NaiveBayes(training_set, 4, False)
    
    validation_set = []
    for index in validation_set_indices:
        validation_set.append(features_labels_pair[index])
    
    dp.remove_ambiguous_entry(validation_set)
    
    confusion_matrix = np.zeros([3,3])
    correct = 0.0
    total = 0.0
    for feature_vector, correct_class in validation_set: 
        prediction = naive_bayes.predict(feature_vector)
        total += 1
        if prediction == correct_class:
            correct += 1
        if prediction == 0 and correct_class == 0:
            confusion_matrix[0,0] += 1
        if  prediction == 0 and correct_class == 1:
            confusion_matrix[0,1] += 1
        if  prediction == 0 and correct_class == 2:
            confusion_matrix[0,2] += 1
        if  prediction == 1 and correct_class == 0:
            confusion_matrix[1,0] += 1
        if  prediction == 1 and correct_class == 1:
            confusion_matrix[1,1] += 1
        if  prediction == 1 and correct_class == 2:
            confusion_matrix[1,2] += 1
        if  prediction == 2 and correct_class == 0:
            confusion_matrix[2,0] += 1
        if  prediction == 2 and correct_class == 1:
            confusion_matrix[2,1] += 1
        if  prediction == 2 and correct_class == 2:
            confusion_matrix[2,2] += 1            
        #print prediction, correct_class
    print confusion_matrix      
    print correct/total
def train_and_val():
    training_data = dp.read_data('dataset/splice-Xtrain.dat', 'dataset/splice-Ytrain.dat')
    training_set_indices, validation_set_indices = dp.read_training_val_set('dataset/train.txt', 'dataset/val.txt')
    feature = Features()
    features_labels_pair = feature.amino_acid_count(training_data)
    training_set = []
    for index in training_set_indices:
        training_set.append(features_labels_pair[index])
    
    dp.remove_ambiguous_entry_plus(training_set)
    k_nn = KNN(training_set, 23)
    
    confusion_matrix = np.zeros([3,3])
    correct = 0.0
    total = 0.0
    
    validation_set = []
    for index in validation_set_indices:
        validation_set.append(features_labels_pair[index])
    
    dp.remove_ambiguous_entry_plus(validation_set)
    for feature_vector, correct_class in validation_set: 
        prediction = k_nn.predict_codon_cosine(feature_vector, k_nn.no_weight)
        total += 1
        if prediction == correct_class:
            correct += 1
        if prediction == 0 and correct_class == 0:
            confusion_matrix[0,0] += 1
        if  prediction == 0 and correct_class == 1:
            confusion_matrix[0,1] += 1
        if  prediction == 0 and correct_class == 2:
            confusion_matrix[0,2] += 1
        if  prediction == 1 and correct_class == 0:
            confusion_matrix[1,0] += 1
        if  prediction == 1 and correct_class == 1:
            confusion_matrix[1,1] += 1
        if  prediction == 1 and correct_class == 2:
            confusion_matrix[1,2] += 1
        if  prediction == 2 and correct_class == 0:
            confusion_matrix[2,0] += 1
        if  prediction == 2 and correct_class == 1:
            confusion_matrix[2,1] += 1
        if  prediction == 2 and correct_class == 2:
            confusion_matrix[2,2] += 1  
        #print prediction, correct_class
    print confusion_matrix      
    print correct/total