Exemplo n.º 1
0
        if int(record[0]) == x.predict(record, rules)["predict"]:
            corrects += 1
        else:
            pr_rules = x.predict(record, rules)["rule"]
            cr_rules = x.get_rule_truth(record)["rule"]
            edit += x.detect_cluster(pr_rules, cr_rules, record[1:len(record)])
    print(' Training Time: {:.2f}s'.format(time.time() - now))
    print(" Correct: {}, Total: {}, Accuracy: {:.2f}%".format(
        corrects, len(data), 100 * corrects / len(data)))
    return 100 * corrects / len(data)


if __name__ == "__main__":

    # read data
    data = csv.read_file(cf.full_path, 'float')

    kf = KFold(n_splits=cf.k_fold, shuffle=cf.shuffle)

    result_data = [1 for i in range(cf.num_classes)]

    for i in range(cf.num_classes):
        data_class = []
        for j in data:
            if j[0] == i + 1: data_class.append(j)
        result_data[i] = (data_class, list(kf.split(data_class)))

    x = []
    y = []
    for i in range(cf.k_fold):
        print("Fold {}/{} ".format(i + 1, cf.k_fold))
Exemplo n.º 2
0
import time
import config as cf
from code import csv_processor as csv
from code.clustering import Clustering
from code.make_rule import Rule
from code.predict import Predict

if __name__ == "__main__":
    _1st = time.time()

    # read data
    data = csv.read_file(cf.train_path, 'float')

    # clustering and save
    clusters  = Clustering(data).clusters
    csv.write_file(cf.clusters_path, clusters)
    _2nd = time.time()
    print('  __Clustering time: {:.2f}s'.format(_2nd - _1st))

    # make_rule and save
    rules = Rule(data, clusters).colection_rules()
    csv.write_file(cf.rule_path, rules)
    _3rd = time.time()
    print('  __Making rule time: {:.2f}s'.format(_3rd - _2nd))

    # Predict and save data
    x = Predict(data, clusters, rules)

    corrects = 0
    edit = []
    editCollection = []
Exemplo n.º 3
0
def predict(data, slug):
    now = time.time()
    x = Predict(data, clusters, rules)

    corrects = 0
    edit = []
    editCollection = []

    for ix, record in enumerate(data):
        if int(record[0]) == x.predict(record, rules)["predict"]:
            corrects += 1
        else:
            pr_rules = x.predict(record, rules)["rule"]
            cr_rules = x.get_rule_truth(record)["rule"]
            edit += x.detect_cluster(pr_rules, cr_rules, record[1:len(record)])
    print('\n  __{} >> Predict time: {:.2f}s'.format(slug, time.time() - now))
    print("  __Correct: {}, Total: {}, Accuracy: {:.2f}%\n".format(
        corrects, len(data), 100 * corrects / len(data)))


if __name__ == "__main__":

    # read data
    data = csv.read_file(cf.train_path, 'float')
    test_data = csv.read_file(cf.test_path, 'float')
    clusters = csv.read_file(cf.clusters_path, 'float')
    rules = csv.read_file(cf.rule_path, 'float')

    predict(data, "Train Data")
    predict(test_data, "Test Data")
Exemplo n.º 4
0
            "rule_id": id,
            "rule": r[id][1: self.num_properties +1]
        }
    def num_corrects(self, isPrint=1):
        corrects = 0
        now = time.time()
        for ix, record in enumerate(self.data):
            if int(record[0]) == self.predict(record, self.rules)["predict"]:
                corrects += 1
        if isPrint:
            print("  Correct: {}, Total: {}, Accuracy: {:.2f}%, Time: {:.2f} s".format(corrects, len(self.data), 100*corrects / len(self.data), time.time() - now))
        return corrects

if __name__ == "__main__":
    # read data
    data = csv.read_file(cf.train_path, 'float')
    clusters = csv.read_file(cf.clusters_path, 'float')
    rules = csv.read_file(cf.rule_path, 'float')
    edit = csv.read_file(cf.editFmc_path, 'float')
    fmc = csv.read_file(cf.fmc_path, 'float')

    x = Predict(data, clusters, rules, fmc)
    print('\n  Train Data >>')
    correct = x.num_corrects()

    testData = csv.read_file(cf.test_path, 'float')
    x = Predict(testData, clusters, rules, fmc)
    print('\n  Test Data >>')
    correct = x.num_corrects()
    print('\n')
Exemplo n.º 5
0
        if not loai:
            self.rules.append(rule)

    def colection_rules(self):
        percent = [0 for i in range(cf.num_classes)]
        for i in self.data:
            percent[int(i[0])-1] += 1
        a = [(i/sum(percent))**1 for i in percent]
        b = [int(i*cf.num_rules) for i in a]
        def GetBurn(rule):
            return rule[self.num_properties + 1]
        self.rules.sort(reverse=True, key=GetBurn)
        _rules = []
        for rule in self.rules:
            if b[int(rule[0]) - 1] > 0:
                _rules.append(rule)
                b[int(rule[0]) - 1] -= 1
        def SortByClass(rule):
            return rule[0]
        _rules.sort(key=SortByClass)
        return _rules

if __name__ == "__main__":
    # read data
    data = csv.read_file(cf.train_path, 'float')
    clusters = csv.read_file(cf.clusters_path, 'float')

    # make_rule and save
    x = Rule(data, clusters)
    csv.write_file(cf.rule_path, x.colection_rules())
Exemplo n.º 6
0
def train_func(data):
    now = time.time()

    # clustering and save
    clusters  = Clustering(data).clusters
    csv.write_file(cf.clusters_path, clusters)

    # make_rule and save
    rules = Rule(data, clusters).colection_rules()
    csv.write_file(cf.rule_path, rules)

    # Predict and save data
    x = Predict(data, clusters, rules)

    corrects = 0
    edit = []
    editCollection = []

    for ix, record in enumerate(data):
        if int(record[0]) == x.predict(record, rules)["predict"]:
            corrects += 1
        else:
            pr_rules = x.predict(record, rules)["rule"]
            cr_rules = x.get_rule_truth(record)["rule"]
            edit += x.detect_cluster(pr_rules, cr_rules, record[1: len(record) ])

    for e in edit:
        if 0.5 < e[2] and e[2] < 0.99 and e[2] + e[4] == 1:
            editCollection.append(e)

    def editCollectionSort(x):
        return x[2]

    editCollection.sort(key=editCollectionSort)

    # default fcm_path
    fmc = [[0.5 for i in range(2*cf.k_mean)] for i in range(len(data[0]) - 1)]
    csv.write_file(cf.fmc_path, fmc)
    
    edit = editCollection
    
    x = ha_predict.Predict(data, clusters, rules, fmc)
    correct = x.num_corrects()

    train_old = correct*100/len(data)

    for attr, flase, u_fasle, true, u_true in edit:
        
        fmc = csv.read_file(cf.fmc_path, 'float')        
        if true > flase:
            fmc[attr][2*true] = u_true/u_fasle*fmc[attr][2*true - 1]
        else:
            fmc[attr][2*true + 1] = u_true/u_fasle*fmc[attr][2*true + 2]
        
        x = ha_predict.Predict(data, clusters, rules, fmc)
        y = x.num_corrects()
        if correct <= y:
            print("  -----> edited", (attr, flase, u_fasle, true, u_true ), "\n")
            correct = y
            csv.write_file(cf.fmc_path, fmc)
        else: 
            print("  -----> rejected", (attr, flase, u_fasle, true, u_true ), "\n")
    
    print("  Time: {:.2f}s\n  T1FS Train Accuracy: {:.3f}%\n  HA_T2FS Accuracy: {:.3f}%".format(time.time() - now, train_old, correct*100/len(data)))
    return train_old, correct*100/len(data)