if int(record[0]) == x.predict(record, rules)["predict"]: corrects += 1 else: pr_rules = x.predict(record, rules)["rule"] cr_rules = x.get_rule_truth(record)["rule"] edit += x.detect_cluster(pr_rules, cr_rules, record[1:len(record)]) print(' Training Time: {:.2f}s'.format(time.time() - now)) print(" Correct: {}, Total: {}, Accuracy: {:.2f}%".format( corrects, len(data), 100 * corrects / len(data))) return 100 * corrects / len(data) if __name__ == "__main__": # read data data = csv.read_file(cf.full_path, 'float') kf = KFold(n_splits=cf.k_fold, shuffle=cf.shuffle) result_data = [1 for i in range(cf.num_classes)] for i in range(cf.num_classes): data_class = [] for j in data: if j[0] == i + 1: data_class.append(j) result_data[i] = (data_class, list(kf.split(data_class))) x = [] y = [] for i in range(cf.k_fold): print("Fold {}/{} ".format(i + 1, cf.k_fold))
import time import config as cf from code import csv_processor as csv from code.clustering import Clustering from code.make_rule import Rule from code.predict import Predict if __name__ == "__main__": _1st = time.time() # read data data = csv.read_file(cf.train_path, 'float') # clustering and save clusters = Clustering(data).clusters csv.write_file(cf.clusters_path, clusters) _2nd = time.time() print(' __Clustering time: {:.2f}s'.format(_2nd - _1st)) # make_rule and save rules = Rule(data, clusters).colection_rules() csv.write_file(cf.rule_path, rules) _3rd = time.time() print(' __Making rule time: {:.2f}s'.format(_3rd - _2nd)) # Predict and save data x = Predict(data, clusters, rules) corrects = 0 edit = [] editCollection = []
def predict(data, slug): now = time.time() x = Predict(data, clusters, rules) corrects = 0 edit = [] editCollection = [] for ix, record in enumerate(data): if int(record[0]) == x.predict(record, rules)["predict"]: corrects += 1 else: pr_rules = x.predict(record, rules)["rule"] cr_rules = x.get_rule_truth(record)["rule"] edit += x.detect_cluster(pr_rules, cr_rules, record[1:len(record)]) print('\n __{} >> Predict time: {:.2f}s'.format(slug, time.time() - now)) print(" __Correct: {}, Total: {}, Accuracy: {:.2f}%\n".format( corrects, len(data), 100 * corrects / len(data))) if __name__ == "__main__": # read data data = csv.read_file(cf.train_path, 'float') test_data = csv.read_file(cf.test_path, 'float') clusters = csv.read_file(cf.clusters_path, 'float') rules = csv.read_file(cf.rule_path, 'float') predict(data, "Train Data") predict(test_data, "Test Data")
"rule_id": id, "rule": r[id][1: self.num_properties +1] } def num_corrects(self, isPrint=1): corrects = 0 now = time.time() for ix, record in enumerate(self.data): if int(record[0]) == self.predict(record, self.rules)["predict"]: corrects += 1 if isPrint: print(" Correct: {}, Total: {}, Accuracy: {:.2f}%, Time: {:.2f} s".format(corrects, len(self.data), 100*corrects / len(self.data), time.time() - now)) return corrects if __name__ == "__main__": # read data data = csv.read_file(cf.train_path, 'float') clusters = csv.read_file(cf.clusters_path, 'float') rules = csv.read_file(cf.rule_path, 'float') edit = csv.read_file(cf.editFmc_path, 'float') fmc = csv.read_file(cf.fmc_path, 'float') x = Predict(data, clusters, rules, fmc) print('\n Train Data >>') correct = x.num_corrects() testData = csv.read_file(cf.test_path, 'float') x = Predict(testData, clusters, rules, fmc) print('\n Test Data >>') correct = x.num_corrects() print('\n')
if not loai: self.rules.append(rule) def colection_rules(self): percent = [0 for i in range(cf.num_classes)] for i in self.data: percent[int(i[0])-1] += 1 a = [(i/sum(percent))**1 for i in percent] b = [int(i*cf.num_rules) for i in a] def GetBurn(rule): return rule[self.num_properties + 1] self.rules.sort(reverse=True, key=GetBurn) _rules = [] for rule in self.rules: if b[int(rule[0]) - 1] > 0: _rules.append(rule) b[int(rule[0]) - 1] -= 1 def SortByClass(rule): return rule[0] _rules.sort(key=SortByClass) return _rules if __name__ == "__main__": # read data data = csv.read_file(cf.train_path, 'float') clusters = csv.read_file(cf.clusters_path, 'float') # make_rule and save x = Rule(data, clusters) csv.write_file(cf.rule_path, x.colection_rules())
def train_func(data): now = time.time() # clustering and save clusters = Clustering(data).clusters csv.write_file(cf.clusters_path, clusters) # make_rule and save rules = Rule(data, clusters).colection_rules() csv.write_file(cf.rule_path, rules) # Predict and save data x = Predict(data, clusters, rules) corrects = 0 edit = [] editCollection = [] for ix, record in enumerate(data): if int(record[0]) == x.predict(record, rules)["predict"]: corrects += 1 else: pr_rules = x.predict(record, rules)["rule"] cr_rules = x.get_rule_truth(record)["rule"] edit += x.detect_cluster(pr_rules, cr_rules, record[1: len(record) ]) for e in edit: if 0.5 < e[2] and e[2] < 0.99 and e[2] + e[4] == 1: editCollection.append(e) def editCollectionSort(x): return x[2] editCollection.sort(key=editCollectionSort) # default fcm_path fmc = [[0.5 for i in range(2*cf.k_mean)] for i in range(len(data[0]) - 1)] csv.write_file(cf.fmc_path, fmc) edit = editCollection x = ha_predict.Predict(data, clusters, rules, fmc) correct = x.num_corrects() train_old = correct*100/len(data) for attr, flase, u_fasle, true, u_true in edit: fmc = csv.read_file(cf.fmc_path, 'float') if true > flase: fmc[attr][2*true] = u_true/u_fasle*fmc[attr][2*true - 1] else: fmc[attr][2*true + 1] = u_true/u_fasle*fmc[attr][2*true + 2] x = ha_predict.Predict(data, clusters, rules, fmc) y = x.num_corrects() if correct <= y: print(" -----> edited", (attr, flase, u_fasle, true, u_true ), "\n") correct = y csv.write_file(cf.fmc_path, fmc) else: print(" -----> rejected", (attr, flase, u_fasle, true, u_true ), "\n") print(" Time: {:.2f}s\n T1FS Train Accuracy: {:.3f}%\n HA_T2FS Accuracy: {:.3f}%".format(time.time() - now, train_old, correct*100/len(data))) return train_old, correct*100/len(data)