def test_cross_validation_split_all_data__no_output_data_lost(self): from main import convert_data_to_arrays, get_rdf_data, cross_validation_split_all_data KB, supports, outputs, num1, num2 = convert_data_to_arrays(get_rdf_data('rdfData/gfo-1.0.json')) # Processes data. KBs_tests, KBs_trains, X_trains, X_tests, y_trains, y_tests = cross_validation_split_all_data(7, KB, supports, outputs) expected = [] for sample in outputs: expected.append(sample.tolist()) padding_count = 0 for sample in expected: if len(sample[0]) > padding_count: padding_count = len(sample[0]) for sampleNum in range(len(expected)): for timestepNum in range(len(expected[sampleNum])): while len(expected[sampleNum][timestepNum]) < padding_count: expected[sampleNum][timestepNum].append(0.0) trains = y_trains[0].tolist() trains.extend(y_tests[0].tolist()) actual = trains for sample in range(len(expected)): for t_sample in range(len(actual)): if expected[sample] == actual[t_sample]: expected[sample] = None actual[t_sample] = None break self.assertEqual(actual, expected)
def test_pad_kb__everyone_same_size(self): from main import get_rdf_data, pad_kb data = get_rdf_data('rdfData/gfo-1.0.json') kb = data['kB'] kb = pad_kb(kb) for sample in kb: self.assertEqual(len(kb[0]), len(sample))
def test_pad_kb__removing_padding_reveals_original(self): from main import get_rdf_data, pad_kb data = get_rdf_data('rdfData/gfo-1.0.json') kb = data['kB'] KB = pad_kb(kb) for sample in KB: for index in range(len(sample)): if sample[index] == 0.0: del sample[index:] break self.assertEqual(kb, KB)
def test_get_labels_from_encoding__handles_true_and_pred_values_the_same(self): from main import get_labels_from_encoding, convert_data_to_arrays, get_rdf_data, cross_validation_split_all_data KB, supports, outputs, numConcepts, numRoles = convert_data_to_arrays(get_rdf_data('rdfData/gfo-1.0.json')) allTheData = cross_validation_split_all_data(5, KB, supports, outputs) KBs_tests, KBs_trains, X_trains, X_tests, y_trains, y_tests = allTheData trueArr, predArr = get_labels_from_encoding(y_tests[0], y_tests[0], 28, 14) self.assertEqual(trueArr.all(), predArr.all())
def test_cross_validation_split_all_data__correct_mapping_fromKBToSuppToOuts_of_test(self): from main import convert_data_to_arrays, get_rdf_data, \ cross_validation_split_all_data KB, supports, outputs, num1, num2 = convert_data_to_arrays(get_rdf_data('rdfData/gfo-1.0.json')) # Processes data. KBs_tests, KBs_trains, X_trains, X_tests, y_trains, y_tests = cross_validation_split_all_data(3, KB, supports, outputs) trueKB = KB.tolist() crossKB = KBs_tests[0].tolist() # Outputs ---------------------- trueOuts = [] for sample in outputs: trueOuts.append(sample.tolist()) padding_count = 0 for sample in trueOuts: if len(sample[0]) > padding_count: padding_count = len(sample[0]) for sampleNum in range(len(trueOuts)): for timestepNum in range(len(trueOuts[sampleNum])): while len(trueOuts[sampleNum][timestepNum]) < padding_count: trueOuts[sampleNum][timestepNum].append(0.0) actualOuts = y_tests[0].tolist() # Supports --------------------- trueSupp = [] for sample in supports: trueSupp.append(sample.tolist()) padding_count = 0 for sample in trueSupp: if len(sample[0]) > padding_count: padding_count = len(sample[0]) for sampleNum in range(len(trueSupp)): for timestepNum in range(len(trueSupp[sampleNum])): while len(trueSupp[sampleNum][timestepNum]) < padding_count: trueSupp[sampleNum][timestepNum].append(0.0) actualSupp = X_tests[0].tolist() for sample in range(len(crossKB)): for t_sample in range(len(trueKB)): if crossKB[sample][0] == trueKB[t_sample]: self.assertTrue(actualOuts[sample] == trueOuts[t_sample]) self.assertTrue(actualSupp[sample] == trueSupp[t_sample])
def test_cross_validation_split_all_data__correct_numOf_folds_returned(self): from main import convert_data_to_arrays, get_rdf_data, cross_validation_split_all_data KB, supports, outputs, num1, num2 = convert_data_to_arrays(get_rdf_data('rdfData/gfo-1.0.json')) # Processes data. KBs_tests, KBs_trains, X_trains, X_tests, y_trains, y_tests = cross_validation_split_all_data(7, KB, supports, outputs) self.assertEqual(7, KBs_tests.shape[0]) self.assertEqual(7, KBs_trains.shape[0]) self.assertEqual(7, X_tests.shape[0]) self.assertEqual(7, X_trains.shape[0]) self.assertEqual(7, y_tests.shape[0]) self.assertEqual(7, y_tests.shape[0])
def test_convert_data_to_arrays__shape_preserved(self): from main import convert_data_to_arrays, get_rdf_data data = get_rdf_data('rdfData/gfo-1.0.json') kb, supp, outs, numConcepts, numRoles = data['kB'], data['supports'], data['outputs'], data['concepts'], data[ 'roles'] KB, supports, outputs, numConcepts, numRoles = convert_data_to_arrays(data) self.assertEqual(len(kb), len(KB)) self.assertEqual(len(supp), len(supports)) self.assertEqual(len(outs), len(outputs)) for sample in range(len(kb)): self.assertEqual(len(supp[sample]), len(supports[sample])) self.assertEqual(len(outs[sample]), len(outputs[sample])) self.assertEqual(len(outputs[sample]), len(supports[sample]))
def test_cross_validation_split_all_data__correct_KB_repetition(self): from main import convert_data_to_arrays, get_rdf_data, cross_validation_split_all_data KB, supports, outputs, num1, num2 = convert_data_to_arrays(get_rdf_data('rdfData/gfo-1.0.json')) # Processes data. KBs_tests, KBs_trains, X_trains, X_tests, y_trains, y_tests = cross_validation_split_all_data(8, KB, supports, outputs) tests = KBs_tests.tolist() for cross in tests: for sample in cross: standard = sample[0] for ts in sample: self.assertTrue(standard == ts) trains = KBs_trains.tolist() for cross in trains: for sample in cross: standard = sample[0] for ts in sample: self.assertTrue(standard == ts)
def test_get_labels_from_encoding__each_sample_has_correct_size(self): from main import get_labels_from_encoding, convert_data_to_arrays, get_rdf_data, cross_validation_split_all_data KB, supports, outputs, numConcepts, numRoles = convert_data_to_arrays(get_rdf_data('rdfData/gfo-1.0.json')) allTheData = cross_validation_split_all_data(5, KB, supports, outputs) KBs_tests, KBs_trains, X_trains, X_tests, y_trains, y_tests = allTheData for i in range(len(y_tests)): trueArr, predArr = get_labels_from_encoding(y_tests[i], y_tests[i], 28, 14) self.assertEqual(trueArr.all(), predArr.all()) s = y_tests[i].shape s1 = trueArr.shape s2 = predArr.shape self.assertEqual((s[0],s[1]), s1, s2) for sample in range(len(trueArr)): for ts in range(len(trueArr[sample])): self.assertEqual(len(trueArr[sample][ts]), s[2]/3) self.assertEqual(len(predArr[sample][ts]), s[2] / 3)
def test_cross_validation_split_all_data__no_kb_data_lost(self): from main import convert_data_to_arrays, get_rdf_data, cross_validation_split_all_data KB, supports, outputs, numConcepts, numRoles = convert_data_to_arrays(get_rdf_data('rdfData/gfo-1.0.json')) # Processes data. KBs_tests, KBs_trains, X_trains, X_tests, y_trains, y_tests = cross_validation_split_all_data(5, KB, supports, outputs) expected = KB.tolist() trains = KBs_trains[0].tolist() trains.extend(KBs_tests[0].tolist()) actual = trains for sample in range(len(actual)): actual[sample] = actual[sample][0] for sample in range(len(expected)): for t_sample in range(len(actual)): if expected[sample] == actual[t_sample]: expected[sample] = None actual[t_sample] = None break self.assertEqual(actual, expected)