def test_aleph_mysql(self): conv = AlephConverter(self.context, target_att_val = 'east') aleph = Aleph() theory, features = aleph.induce('induce_features', conv.positive_examples(), conv.negative_examples(), conv.background_knowledge()) with open(os.path.join(RESULTS_FOLDER, 'wrappers', 'aleph', 'trains.arff')) as f: self.assertMultiLineEqual(theory, f.read())
def test_aleph_mysql(self): conv = AlephConverter(self.context, target_att_val='east') aleph = Aleph() theory, features = aleph.induce('induce_features', conv.positive_examples(), conv.negative_examples(), conv.background_knowledge()) with open(os.path.join(RESULTS_FOLDER, 'wrappers', 'aleph', 'trains.arff')) as f: self.assertMultiLineEqual(theory, f.read())
from rdm.db import DBVendor, DBConnection, DBContext, AlephConverter from rdm.wrappers import Aleph # Provide connection information connection = DBConnection( 'ilp', # User 'ilp123', # Password 'workflow.ijs.si', # Host 'ilp', # Database ) # Define learning context context = DBContext(connection, target_table='trains', target_att='direction') # Convert the data and induce features using Aleph conv = AlephConverter(context, target_att_val='east') aleph = Aleph() theory, features = aleph.induce('induce_features', conv.positive_examples(), conv.negative_examples(), conv.background_knowledge()) print(theory)
def transform(algorithm, context, target_att_value, seed, result_file, transformations, fold_nums=10): fold_num = 0 for train_context, test_context in cv_split(context, folds=fold_nums, random_seed=seed): fold_num += 1 print("FOLD", fold_num) with open(result_file, 'a') as f: f.write("FOLD {}\n".format(fold_num)) #ALEPH if algorithm == "aleph": start = time.time() conv = AlephConverter(train_context, target_att_val=target_att_value) aleph = Aleph() train_arff, features = aleph.induce('induce_features', conv.positive_examples(), conv.negative_examples(), conv.background_knowledge(), printOutput=False) data = arff.loads(str(train_arff)) entries = [] targets = [] for entry in data['data']: en = list(entry) features_target = en[-1] features_train = en[0:len(en) - 1] features_train = [1 if x == "+" else 0 for x in features_train] entries.append(features_train) targets.append(features_target) tmp_learner = 'aleph' test_arff = mapper.domain_map(features, tmp_learner, train_context, test_context, format="csv", positive_class=target_att_value) test_ins = test_arff.split("\n") entries_test = [] targets_test = [] for entry in test_ins: en = entry.strip().split(",") if en[-1] != '': features_target = en[-1] features_train = en[0:len(en) - 1] features_train = [ 1 if x == "+" else 0 for x in features_train ] entries_test.append(features_train) targets_test.append(features_target) targets_test = [ 'positive' if x == target_att_value else 'negative' for x in targets_test ] train_features = pd.DataFrame(entries).to_numpy() train_targets = pd.DataFrame(targets).to_numpy() test_features = pd.DataFrame(entries_test).to_numpy() test_targets = pd.DataFrame(targets_test).to_numpy() le = preprocessing.LabelEncoder() le.fit(train_targets) targets_train_encoded = le.transform(train_targets) targets_test_encoded = le.transform(test_targets) end = time.time() run_time = end - start train_data = (train_features, targets_train_encoded) test_data = (test_features, targets_test_encoded) pickle.dump( train_data, open("{}_{}_train.p".format(transformations, fold_num), "wb")) pickle.dump( test_data, open("{}_{}_test.p".format(transformations, fold_num), "wb")) print(algorithm, " TIME:", run_time) with open(result_file, 'a') as f: f.write("{} TIME: {}\n".format(algorithm, run_time)) #RSD elif algorithm == "rsd": start = time.time() conv = RSDConverter(train_context) rsd = RSD() features, train_arff, _ = rsd.induce(conv.background_knowledge(), examples=conv.all_examples(), cn2sd=False) data = arff.loads(str(train_arff)) entries = [] targets = [] for entry in data['data']: en = list(entry) features_target = en[-1] features_train = en[0:len(en) - 1] features_train = [1 if x == "+" else 0 for x in features_train] entries.append(features_train) targets.append(features_target) tmp_learner = 'rsd' test_arff = mapper.domain_map(features, tmp_learner, train_context, test_context, format="csv") test_ins = test_arff.split("\n") entries_test = [] targets_test = [] for entry in test_ins: en = entry.strip().split(",") if en[-1] != '': features_target = en[-1] features_train = en[0:len(en) - 1] features_train = [ 1 if x == "+" else 0 for x in features_train ] entries_test.append(features_train) targets_test.append(features_target) train_features = pd.DataFrame(entries).to_numpy() train_targets = pd.DataFrame(targets).to_numpy() test_features = pd.DataFrame(entries_test).to_numpy() test_targets = pd.DataFrame(targets_test).to_numpy() le = preprocessing.LabelEncoder() le.fit(train_targets) targets_train_encoded = le.transform(train_targets) targets_test_encoded = le.transform(test_targets) end = time.time() run_time = end - start train_data = (train_features, targets_train_encoded) test_data = (test_features, targets_test_encoded) pickle.dump( train_data, open("{}_{}_train.p".format(transformations, fold_num), "wb")) pickle.dump( test_data, open("{}_{}_test.p".format(transformations, fold_num), "wb")) print(algorithm, " TIME:", run_time) with open(result_file, 'a') as f: f.write("{} TIME: {}\n".format(algorithm, run_time)) #Treeliker elif algorithm == "treeliker": start = time.time() conv = TreeLikerConverter(train_context) conv2 = TreeLikerConverter(test_context) treeliker = TreeLiker(conv.dataset(), conv.default_template(), conv2.dataset()) train_arff, test_arff = treeliker.run() wtag = False entries = [] targets = [] entries_test = [] targets_test = [] for entry in train_arff.split("\n"): if wtag: en = entry.split(",") if len(en) > 1: en = [x.replace(" ", "") for x in en] targets.append(en[-1]) en = [1 if "+" in x else 0 for x in en] entries.append(en[0:len(en) - 1]) if "@data" in entry: wtag = True wtag = False for entry in test_arff.split("\n"): if wtag: en = entry.split(",") if len(en) > 1: en = [x.replace(" ", "") for x in en] targets_test.append(en[-1]) en = [1 if "+" in x else 0 for x in en] entries_test.append(en[0:len(en) - 1]) if "@data" in entry: wtag = True train_features = pd.DataFrame(entries).to_numpy() train_targets = pd.DataFrame(targets).to_numpy() test_features = pd.DataFrame(entries_test).to_numpy() test_targets = pd.DataFrame(targets_test).to_numpy() le = preprocessing.LabelEncoder() le.fit(train_targets) targets_train_encoded = le.transform(train_targets) targets_test_encoded = le.transform(test_targets) end = time.time() run_time = end - start train_data = (train_features, targets_train_encoded) test_data = (test_features, targets_test_encoded) pickle.dump( train_data, open("{}_{}_train.p".format(transformations, fold_num), "wb")) pickle.dump( test_data, open("{}_{}_test.p".format(transformations, fold_num), "wb")) print(algorithm, " TIME:", run_time) with open(result_file, 'a') as f: f.write("{} TIME: {}\n".format(algorithm, run_time)) #Wordification elif algorithm == "wordification": start = time.time() corange = OrangeConverter(train_context) torange = OrangeConverter(test_context) wordification = Wordification(corange.target_Orange_table(), corange.other_Orange_tables(), train_context) wordification.run(1) wordification.calculate_weights() train_arff = wordification.to_arff() wordification_test = Wordification(torange.target_Orange_table(), torange.other_Orange_tables(), test_context) wordification_test.run(1) wordification_test.calculate_weights() idfs = wordification.idf docs = wordification_test.resulting_documents classes = [str(a) for a in wordification_test.resulting_classes] feature_names = wordification.word_features feature_vectors = [] for doc in docs: doc_vec = [] for feature in feature_names: cnt = 0 for x in doc: if x == feature: cnt += 1 idf = cnt * idfs[feature] doc_vec.append(idf) feature_vectors.append(doc_vec) print(feature_vectors, classes) test_arff = wordification_test.to_arff() entries = [] targets = [] entries_test = [] targets_test = [] wtag = False for entry in train_arff.split("\n"): if wtag: en = entry.split(",") if len(en) > 1: en = [x.replace(" ", "") for x in en] targets.append(en[-1]) entries.append([float(x) for x in en[0:len(en) - 1]]) if "@DATA" in entry: wtag = True wtag = False targets_test = classes entries_test = feature_vectors train_features = pd.DataFrame(entries).to_numpy() train_targets = pd.DataFrame(targets).to_numpy() test_features = pd.DataFrame(entries_test).to_numpy() test_targets = pd.DataFrame(targets_test).to_numpy() le = preprocessing.LabelEncoder() le.fit(np.concatenate([train_targets, test_targets])) targets_train_encoded = le.transform(train_targets) targets_test_encoded = le.transform(test_targets) end = time.time() run_time = end - start train_data = (train_features, targets_train_encoded) test_data = (test_features, targets_test_encoded) pickle.dump( train_data, open("{}_{}_train.p".format(transformations, fold_num), "wb")) pickle.dump( test_data, open("{}_{}_test.p".format(transformations, fold_num), "wb")) print(algorithm, " TIME:", run_time) with open(result_file, 'a') as f: f.write("{} TIME: {}\n".format(algorithm, run_time)) #relaggs/nrelaggs else: converter = context_converter(train_context, test_context, verbose=0) train_data = converter.get_train() test_data = converter.get_test() plan = converter.get_plan() pickle.dump( train_data, open("{}_{}_train.p".format(transformations, fold_num), "wb")) pickle.dump( test_data, open("{}_{}_test.p".format(transformations, fold_num), "wb")) pickle.dump( plan, open("{}_{}_plan.p".format(transformations, fold_num), "wb")) run_time = converter.get_time() print(algorithm, " TIME:", run_time) with open(result_file, 'a') as f: f.write("{} TIME: {}\n".format(algorithm, run_time))
random_seed=0): # Find features on the train set start = timer() if learner == "RSD": conv = RSDConverter(train_context) rsd = RSD() features, train_arff, _ = rsd.induce( conv.background_knowledge(), # Background knowledge examples=conv.all_examples(), # Training examples cn2sd=False # Disable built-in subgroup discovery ) if learner == "aleph": conv = AlephConverter(context, target_att_val=target_attr_value) aleph = Aleph() train_arff, features = aleph.induce('induce_features', conv.positive_examples(), conv.negative_examples(), conv.background_knowledge(), printOutput=False) if learner == "treeliker": conv = TreeLikerConverter(train_context) conv2 = TreeLikerConverter(test_context) treeliker = TreeLiker(conv.dataset(), conv.default_template(), conv2.dataset()) # Runs RelF by default train_arff, test_arff = treeliker.run() wtag = False entries = []