def import_dataset_arff(f, explain_indices: List[int], random_explain_dataset: bool) -> Tuple[Dataset, Dataset, List[str]]: dataset = load_arff(f) dataset_len = len(dataset) train_indices = list(range(dataset_len)) if random_explain_dataset: random.seed(1) # small dataset MAX_SAMPLE_COUNT = 100 if dataset_len < (2 * MAX_SAMPLE_COUNT): samples = int(0.2 * dataset_len) else: samples = MAX_SAMPLE_COUNT # Randomly pick some instances to remove from the training dataset and use in the # explain dataset explain_indices = list(random.sample(train_indices, samples)) for i in explain_indices: train_indices.remove(i) train_dataset = Dataset.from_indices(train_indices, dataset) explain_dataset = Dataset.from_indices(explain_indices, dataset) return train_dataset, explain_dataset, [str(i) for i in explain_indices]
def import_datasets_arff(f, f_explain, explain_indices: List[int], random_explain_dataset: bool) -> Tuple[Dataset, Dataset, List[str]]: train_dataset = load_arff(f) explain_dataset = load_arff(f_explain) len_explain_dataset = len(explain_dataset) if random_explain_dataset: random.seed(7) explain_indices = list(random.sample(range(len_explain_dataset), 300)) explain_dataset = Dataset.from_indices(explain_indices, explain_dataset) return train_dataset, explain_dataset, [str(i) for i in explain_indices]