Пример #1
0
def test_mdr_fit():
    """Ensure that the MDR 'fit' function constructs the right matrix to count each class, as well as the right map from feature instances to labels"""
    features = np.array([[2, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0,
                                                                  0], [0, 1],
                         [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0],
                         [1, 1], [1, 1]])

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

    mdr = MDR()
    mdr.fit(features, classes)

    assert len(mdr.class_count_matrix) == 4
    assert len(mdr.feature_map) == 4

    assert mdr.class_count_matrix[(2, 0)][1] == 1
    assert mdr.class_count_matrix[(0, 0)][0] == 3
    assert mdr.class_count_matrix[(0, 0)][1] == 6
    assert mdr.class_count_matrix[(1, 1)][0] == 2
    assert mdr.class_count_matrix[(0, 1)][1] == 3

    assert mdr.feature_map[(2, 0)] == 1
    assert mdr.feature_map[(0, 0)] == 1
    assert mdr.feature_map[(1, 1)] == 0
    assert mdr.feature_map[(0, 1)] == 1
Пример #2
0
def test_custom_score(): 
	"""Ensure that the MDR 'score' method outputs the right custom score passed in from the user"""
	features = np.array([[2,0],
						[0,	0],
						[0,	1],
						[0,	0],
						[0,	0],
						[0,	0],
						[0,	1],
						[0,	0],
						[0,	0],
						[0,	1],
						[0,	0],
						[0,	0],
						[0,	0],
						[1,	1],
						[1,	1]])

	classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

	mdr = MDR() 
	mdr.fit(features, classes)
	assert mdr.score(features = features, classes = classes, scoring_function = accuracy_score) == 9./15
	assert mdr.score(features = features, classes = classes, scoring_function = zero_one_loss) == 1 - 9./15
	assert mdr.score(features = features, classes = classes, scoring_function = zero_one_loss, normalize=False) == 15 - 9
Пример #3
0
def test_mdr_fit_raise_ValueError():
    """Ensure that the MDR 'fit' function raises ValueError when it is not a binary classification (temporary)"""
    features = np.array([[2, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0,
                                                                  0], [0, 1],
                         [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0],
                         [1, 1], [1, 1]])

    classes = np.array([1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

    mdr = MDR()
    try:
        mdr.fit(features, classes)
    except ValueError:
        assert True
    else:
        assert False

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

    try:
        mdr.fit(features, classes)
    except ValueError:
        assert True
    else:
        assert False
Пример #4
0
def test_mdr_transform():
    """Ensure that the MDR 'transform' function maps a new set of feature instances to the desired labels"""
    features = np.array([[2, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0,
                                                                  0], [0, 1],
                         [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0],
                         [1, 1], [1, 1]])

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

    mdr = MDR()
    mdr.fit(features, classes)
    test_features = np.array([[2, 2], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0],
                              [1, 1], [0, 0], [0, 0], [0, 0], [0, 1], [1, 0],
                              [0, 0], [1, 0], [0, 0]])

    new_features = mdr.transform(test_features)
    assert np.array_equal(new_features,
                          [[0], [0], [1], [1], [1], [1], [0], [1], [1], [1],
                           [1], [0], [1], [0], [1]])
Пример #5
0
def test_fit():
	"""Ensure that the MDR 'fit' method constructs the right matrix to count each class, as well as the right map from feature instances to labels"""
	features = np.array([   [2,	0],
							[0,	0],
							[0,	1],
							[0,	0],
							[0,	0],
							[0,	0],
							[0,	1],
							[0,	0],
							[0,	0],
							[0,	1],
							[0,	0],
							[0,	0],
							[0,	0],
							[1,	1],
							[1,	1]])

	classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

	mdr = MDR() 
	mdr.fit(features, classes)

	assert len(mdr.unique_labels) == 2
	assert mdr.class_fraction == 1. / 3.
	assert len(mdr.class_count_matrix) == 4
	assert len(mdr.feature_map) == 4

	assert mdr.class_count_matrix[(2,0)][0] == 0 
	assert mdr.class_count_matrix[(2,0)][1] == 1
	assert mdr.class_count_matrix[(0,0)][0] == 3 
	assert mdr.class_count_matrix[(0,0)][1] == 6
	assert mdr.class_count_matrix[(1,1)][0] == 2 
	assert mdr.class_count_matrix[(1,1)][1] == 0 
	assert mdr.class_count_matrix[(0,1)][0] == 0 
	assert mdr.class_count_matrix[(0,1)][1] == 3 
	assert mdr.class_count_matrix[(2,2)][0] == 0
	assert mdr.class_count_matrix[(2,2)][1] == 0

	assert mdr.feature_map[(2,0)] == 1
	assert mdr.feature_map[(0,0)] == 0
	assert mdr.feature_map[(1,1)] == 0
	assert mdr.feature_map[(0,1)] == 1
Пример #6
0
def test_transform():
	"""Ensure that the MDR 'transform' method maps a new set of feature instances to the desired labels"""
	features = np.array([   [2,	0],
							[0,	0],
							[0,	1],
							[0,	0],
							[0,	0],
							[0,	0],
							[0,	1],
							[0,	0],
							[0,	0],
							[0,	1],
							[0,	0],
							[0,	0],
							[0,	0],
							[1,	1],
							[1,	1]])

	classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

	mdr = MDR() 
	mdr.fit(features, classes)
	test_features = np.array([	[2, 2],
								[1,	1],	
								[0,	0],	
								[0,	0],	
								[0,	0],	
								[0,	0],	
								[1,	1],	
								[0,	0],	
								[0,	0],	
								[0,	0],	
								[0,	1],	
								[1,	0],	
								[0,	0],	
								[1,	0],	
								[0,	0]])

	new_features = mdr.transform(test_features)
	assert np.array_equal(new_features, [0,0,0,0,0,0,0,0,0,0,1,0,0,0,0])
Пример #7
0
def test_score():
	"""Ensure that the MDR 'score' method outputs the right default score, as well as the right custom metric if specified"""
	features = np.array([[2,0],
						[0,	0],
						[0,	1],
						[0,	0],
						[0,	0],
						[0,	0],
						[0,	1],
						[0,	0],
						[0,	0],
						[0,	1],
						[0,	0],
						[0,	0],
						[0,	0],
						[1,	1],
						[1,	1]])

	classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

	mdr = MDR() 
	mdr.fit(features, classes)
	assert mdr.score(features, classes)	== 9./15
Пример #8
0
def test_mdr_fit_raise_ValueError():
    """Ensure that the MDR 'fit' function raises ValueError when it is not a binary classification (temporary)"""
    features = np.array([[2,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [1,    1],
                         [1,    1]])

    classes = np.array([1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

    mdr = MDR()
    try:
        mdr.fit(features, classes)
    except ValueError:
        assert True
    else:
        assert False

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

    try:
        mdr.fit(features, classes)
    except ValueError:
        assert True
    else:
        assert False
Пример #9
0
    for i in range(0, len(m2)):
        n_way_results.append( (m2[i])[1] )
#        n_way_results = tuple(n_way_results)
        n_way_features.append( (m2[i])[2] )
#        n_way_features = tuple(n_way_features)

d1 = dict(zip(n_way_results, n_way_features))
max_val = max(d1.keys())
max_feat = list(v for k, v in d1.items() if k == max_val)[0]

xtr = xtr[max_feat]
xte = xte[max_feat]

#clf.fit(mymdr.transform(xtr.values), training_classes)
#print('ekf + mdr: ', clf.score(mymdr.transform(xte.values), testing_classes))
mymdr.fit(xtr.values, training_classes)
print('ekf + mdr: ', mymdr.score(xte.values, testing_classes))


#randex = random.randint(0,3)

#selector = SelectKBest(f_classif, k=5)

# Feature selection with EKF
#xtr = _ekf(training_features, ekf_index=2)
#xte = _ekf(testing_features, ekf_index=2)

##full_data_0 = _ekf(individuals, ekf_index=0)
##full_data_2 = _ekf(individuals, ekf_index=2)
#
#xtr_2 = selector.fit_transform(training_features, training_classes)
Пример #10
0
    a5000_01h, a5000_02h, a5000_04h
]

dataset_names = [
    'a10_005h', 'a10_01h', 'a10_02h', 'a10_04h', 'a100_005h', 'a100_01h',
    'a100_02h', 'a100_04h', 'a1000_005h', 'a1000_01h', 'a1000_02h',
    'a1000_04h', 'a5000_005h', 'a5000_01h', 'a5000_02h', 'a5000_04h'
]

output_txt = '/home/ansohn/Python/venvs/mdr/gametes_logs/target_scores.txt'
with open(output_txt, 'w') as t1:
    for i in range(16):
        #        print(dataset)
        dataset = gametes_all[i]
        dataset_name = dataset_names[i]
        load_dataset = pd.read_csv(dataset, sep='\t')
        phenotype = load_dataset['Class'].values
        individuals = load_dataset.drop('Class', axis=1)
        individuals = individuals[['M0P0', 'M0P1']].values

        for i in range(30):

            X_train, X_test, y_train, y_test = train_test_split(
                individuals, phenotype, train_size=0.75, test_size=0.25)

            target_pipeline = MDR()
            target_pipeline.fit(X_train, y_train)

            t1.write('{}\t{}\tmdr-perfect\n'.format(
                dataset_name, target_pipeline.score(X_test, y_test)))