def test_cross_validation_split_all_data__no_output_data_lost(self):
        from main import convert_data_to_arrays, get_rdf_data, cross_validation_split_all_data

        KB, supports, outputs, num1, num2 = convert_data_to_arrays(get_rdf_data('rdfData/gfo-1.0.json'))

        # Processes data.
        KBs_tests, KBs_trains, X_trains, X_tests, y_trains, y_tests = cross_validation_split_all_data(7, KB, supports, outputs)

        expected = []
        for sample in outputs:
            expected.append(sample.tolist())

        padding_count = 0
        for sample in expected:
            if len(sample[0]) > padding_count:
                padding_count = len(sample[0])

        for sampleNum in range(len(expected)):
            for timestepNum in range(len(expected[sampleNum])):
                while len(expected[sampleNum][timestepNum]) < padding_count:
                    expected[sampleNum][timestepNum].append(0.0)

        trains = y_trains[0].tolist()
        trains.extend(y_tests[0].tolist())
        actual = trains

        for sample in range(len(expected)):
            for t_sample in range(len(actual)):
                if expected[sample] == actual[t_sample]:
                    expected[sample] = None
                    actual[t_sample] = None
                    break

        self.assertEqual(actual, expected)
    def test_pad_kb__everyone_same_size(self):
        from main import get_rdf_data, pad_kb
        data = get_rdf_data('rdfData/gfo-1.0.json')
        kb = data['kB']
        kb = pad_kb(kb)

        for sample in kb:
            self.assertEqual(len(kb[0]), len(sample))
    def test_pad_kb__removing_padding_reveals_original(self):
        from main import get_rdf_data, pad_kb
        data = get_rdf_data('rdfData/gfo-1.0.json')
        kb = data['kB']
        KB = pad_kb(kb)

        for sample in KB:
            for index in range(len(sample)):
                if sample[index] == 0.0:
                    del sample[index:]
                    break
        self.assertEqual(kb, KB)
    def test_get_labels_from_encoding__handles_true_and_pred_values_the_same(self):
        from main import get_labels_from_encoding, convert_data_to_arrays, get_rdf_data, cross_validation_split_all_data

        KB, supports, outputs, numConcepts, numRoles = convert_data_to_arrays(get_rdf_data('rdfData/gfo-1.0.json'))

        allTheData = cross_validation_split_all_data(5, KB, supports, outputs)

        KBs_tests, KBs_trains, X_trains, X_tests, y_trains, y_tests = allTheData

        trueArr, predArr = get_labels_from_encoding(y_tests[0], y_tests[0], 28, 14)

        self.assertEqual(trueArr.all(), predArr.all())
    def test_cross_validation_split_all_data__correct_mapping_fromKBToSuppToOuts_of_test(self):
        from main import convert_data_to_arrays, get_rdf_data, \
            cross_validation_split_all_data

        KB, supports, outputs, num1, num2 = convert_data_to_arrays(get_rdf_data('rdfData/gfo-1.0.json'))

        # Processes data.
        KBs_tests, KBs_trains, X_trains, X_tests, y_trains, y_tests = cross_validation_split_all_data(3, KB, supports, outputs)

        trueKB = KB.tolist()
        crossKB = KBs_tests[0].tolist()

        # Outputs ----------------------
        trueOuts = []
        for sample in outputs:
            trueOuts.append(sample.tolist())

        padding_count = 0
        for sample in trueOuts:
            if len(sample[0]) > padding_count:
                padding_count = len(sample[0])

        for sampleNum in range(len(trueOuts)):
            for timestepNum in range(len(trueOuts[sampleNum])):
                while len(trueOuts[sampleNum][timestepNum]) < padding_count:
                    trueOuts[sampleNum][timestepNum].append(0.0)

        actualOuts = y_tests[0].tolist()

        # Supports ---------------------
        trueSupp = []
        for sample in supports:
            trueSupp.append(sample.tolist())

        padding_count = 0
        for sample in trueSupp:
            if len(sample[0]) > padding_count:
                padding_count = len(sample[0])

        for sampleNum in range(len(trueSupp)):
            for timestepNum in range(len(trueSupp[sampleNum])):
                while len(trueSupp[sampleNum][timestepNum]) < padding_count:
                    trueSupp[sampleNum][timestepNum].append(0.0)

        actualSupp = X_tests[0].tolist()

        for sample in range(len(crossKB)):
            for t_sample in range(len(trueKB)):
                if crossKB[sample][0] == trueKB[t_sample]:
                    self.assertTrue(actualOuts[sample] == trueOuts[t_sample])
                    self.assertTrue(actualSupp[sample] == trueSupp[t_sample])
    def test_cross_validation_split_all_data__correct_numOf_folds_returned(self):
        from main import convert_data_to_arrays, get_rdf_data, cross_validation_split_all_data

        KB, supports, outputs, num1, num2 = convert_data_to_arrays(get_rdf_data('rdfData/gfo-1.0.json'))

        # Processes data.
        KBs_tests, KBs_trains, X_trains, X_tests, y_trains, y_tests = cross_validation_split_all_data(7, KB, supports, outputs)

        self.assertEqual(7, KBs_tests.shape[0])
        self.assertEqual(7, KBs_trains.shape[0])
        self.assertEqual(7, X_tests.shape[0])
        self.assertEqual(7, X_trains.shape[0])
        self.assertEqual(7, y_tests.shape[0])
        self.assertEqual(7, y_tests.shape[0])
    def test_convert_data_to_arrays__shape_preserved(self):
        from main import convert_data_to_arrays, get_rdf_data
        data = get_rdf_data('rdfData/gfo-1.0.json')
        kb, supp, outs, numConcepts, numRoles = data['kB'], data['supports'], data['outputs'], data['concepts'], data[
            'roles']
        KB, supports, outputs, numConcepts, numRoles = convert_data_to_arrays(data)

        self.assertEqual(len(kb), len(KB))
        self.assertEqual(len(supp), len(supports))
        self.assertEqual(len(outs), len(outputs))

        for sample in range(len(kb)):
            self.assertEqual(len(supp[sample]), len(supports[sample]))
            self.assertEqual(len(outs[sample]), len(outputs[sample]))
            self.assertEqual(len(outputs[sample]), len(supports[sample]))
    def test_cross_validation_split_all_data__correct_KB_repetition(self):
        from main import convert_data_to_arrays, get_rdf_data, cross_validation_split_all_data

        KB, supports, outputs, num1, num2 = convert_data_to_arrays(get_rdf_data('rdfData/gfo-1.0.json'))

        # Processes data.
        KBs_tests, KBs_trains, X_trains, X_tests, y_trains, y_tests = cross_validation_split_all_data(8, KB, supports, outputs)

        tests = KBs_tests.tolist()
        for cross in tests:
            for sample in cross:
                standard = sample[0]
                for ts in sample:
                    self.assertTrue(standard == ts)

        trains = KBs_trains.tolist()
        for cross in trains:
            for sample in cross:
                standard = sample[0]
                for ts in sample:
                    self.assertTrue(standard == ts)
    def test_get_labels_from_encoding__each_sample_has_correct_size(self):
        from main import get_labels_from_encoding, convert_data_to_arrays, get_rdf_data, cross_validation_split_all_data

        KB, supports, outputs, numConcepts, numRoles = convert_data_to_arrays(get_rdf_data('rdfData/gfo-1.0.json'))

        allTheData = cross_validation_split_all_data(5, KB, supports, outputs)

        KBs_tests, KBs_trains, X_trains, X_tests, y_trains, y_tests = allTheData

        for i in range(len(y_tests)):
            trueArr, predArr = get_labels_from_encoding(y_tests[i], y_tests[i], 28, 14)

            self.assertEqual(trueArr.all(), predArr.all())

            s = y_tests[i].shape
            s1 = trueArr.shape
            s2 = predArr.shape
            self.assertEqual((s[0],s[1]), s1, s2)

            for sample in range(len(trueArr)):
                for ts in range(len(trueArr[sample])):
                    self.assertEqual(len(trueArr[sample][ts]), s[2]/3)
                    self.assertEqual(len(predArr[sample][ts]), s[2] / 3)
    def test_cross_validation_split_all_data__no_kb_data_lost(self):
        from main import convert_data_to_arrays, get_rdf_data, cross_validation_split_all_data

        KB, supports, outputs, numConcepts, numRoles = convert_data_to_arrays(get_rdf_data('rdfData/gfo-1.0.json'))

        # Processes data.
        KBs_tests, KBs_trains, X_trains, X_tests, y_trains, y_tests = cross_validation_split_all_data(5, KB, supports, outputs)

        expected = KB.tolist()
        trains = KBs_trains[0].tolist()
        trains.extend(KBs_tests[0].tolist())
        actual = trains

        for sample in range(len(actual)):
            actual[sample] = actual[sample][0]

        for sample in range(len(expected)):
            for t_sample in range(len(actual)):
                if expected[sample] == actual[t_sample]:
                    expected[sample] = None
                    actual[t_sample] = None
                    break

        self.assertEqual(actual, expected)