def transform_problem(multilabel_dataset, feature_selector, classifier) -> []: partitioned_datasets = partition_dataset(multilabel_dataset) fitted_feature_selectors = \ Stream(partitioned_datasets) \ .map(lambda elem: select_features(elem, feature_selector)).as_list() partitioned_results = \ Stream(fitted_feature_selectors) \ .map(lambda elem: classify(elem, classifier)) \ .as_list() return combine_partitioned_results(multilabel_dataset, partitioned_results)
def partition_dataset(multilabel_dataset): partitioned_labels = partition_labels(multilabel_dataset) return \ Stream(partitioned_labels) \ .map(lambda label_partition: augment_label(multilabel_dataset, label_partition)) \ .as_list()
def test_chaining(self): expected = [3, 5] actual = \ Stream([1, 2, 3, 4]) \ .filter(lambda elem: elem%2 == 0) \ .map(lambda elem: elem+1) \ .as_list() \ assert expected == actual, "should be [3, 5]"
def test_append_elem(self): expected = [1, 2, 3, 4] actual = \ Stream([1, 2]) \ .append_elem(3) \ .append_elem(4) \ .as_list() print("actual: ", actual) assert expected == actual, "should be [1, 2, 3, 4]"
def test_append_list(self): expected = [1, 2, 3, 4, 5, 6] actual = \ Stream([1, 2]) \ .append_list([3, 4]) \ .append_list([5, 6]) \ .as_list() print("actual: ", actual) assert expected == actual, "should be [1, 2, 3, 4, 5, 6]"
def test_map(self): expected = [2, 3, 4, 5] actual = Stream([1, 2, 3, 4]).map(lambda elem: elem + 1).as_list() assert expected == actual, "should be [2, 3, 4, 5]"
def test_filter(self): expected = [2, 4] actual = Stream([1, 2, 3, 4]).filter(lambda elem: elem % 2 == 0).as_list() assert expected == actual, "should be all even numbers"
def create_labels_dataframe(multilabel_dataset, partitioned_results): labels = get_total_labels_test(multilabel_dataset) label_names = Stream(labels).map(lambda elem: elem[0]).as_list() return DataFrame(partitioned_results, index=label_names).transpose()
def create_features_dataframe(multilabel_dataset): features = get_instance_features_test(multilabel_dataset) total_features = get_total_features_test(multilabel_dataset) feature_names = Stream(total_features).map(lambda elem: elem[0]).as_list() return DataFrame(features, columns=feature_names)