Exemplo n.º 1
0
    def test_create_dataset(self):
        self.klass.start()
        data, name = self.klass.get_data()
        dataset = DataSet()
        dataset.set(eval(data.get(name)))

        training_set, validation_set, test_set = dataset.split_train_validation_test_data(
        )
        print dataset.get()
        print len(training_set.get()), training_set.get()
        print len(validation_set.get()), validation_set.get()
        print len(test_set.get()), test_set.get()

        training_list = training_set.get()
        variance = Variance()
        variance_value = variance.calculate(training_list,
                                            is_tuple=True,
                                            index=1)
        mean = Mean()
        mean_value = mean.calculate(training_list, is_tuple=True, index=1)
        print "Variance: %f" % variance_value
        print "Mean: %f" % mean_value

        print "*" * 10, datetime.now().strftime(
            '%Y-%m-%d %H:%M:%S'), "Finish Test", "*" * 10
Exemplo n.º 2
0
    def test_dataset(self):
        dataset = DataSet()
        dataset.set([('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5),
                     ('f', 6), ('g', 7), ('h', 8), ('i', 9), ('j', 10)])

        training_set, validation_set, test_set = dataset.split_train_validation_test_data(
        )
        print dataset.get()
        print len(training_set.get()), training_set.get()
        print len(validation_set.get()), validation_set.get()
        print len(test_set.get()), test_set.get()

        training_list = training_set.get()
        variance = Variance()
        variance_value = variance.calculate(training_list,
                                            is_tuple=True,
                                            index=1)
        standart_deviation = StandartDeviation()
        standart_deviation_value = standart_deviation.calculate(training_list,
                                                                is_tuple=True,
                                                                index=1)
        mean = Mean()
        mean_value = mean.calculate(training_list, is_tuple=True, index=1)
        print "Variance: %f" % variance_value
        print "Standart Deviation: %f" % standart_deviation_value
        print "Mean: %f" % mean_value

        print "*" * 10, datetime.now().strftime(
            '%Y-%m-%d %H:%M:%S'), "Finish Test", "*" * 10
Exemplo n.º 3
0
def demo3():
    data_list = [3, 55, 10000, 2, 100, 104, 23, 1, 22, 20, 303219, 50, 21]
    shuffle(data_list)
    dataset = DataSet()
    dataset.set(data_list)
    train, validation, test = dataset.split_train_validation_test_data()
    training_list = train.get()
    validation_list = validation.get()
    test_list = test.get()
    print training_list, validation_list, test_list
    standart_deviation = StandartDeviation()
    standart_deviation_value = standart_deviation.calculate(training_list)
    mean = Mean()
    mean_value = mean.calculate(training_list)
    # print "Training Set: %s, Validation Set: %s, Test Set: %s" % (training_list, validation_list, test_list)
    print "Standart Deviation: %f, Mean Value: %f" % (standart_deviation_value, mean_value)
    z_value = ZValue()
    counter = 0
    for val in validation_list:
        z_value.calculate(val, mean=mean_value, standart_deviation=standart_deviation_value)
        table_value = z_value.find_from_table()
        if table_value == -1:
            print "This val is anomaly:", val
            counter += 1
    print "Anomaly Count: %d, Dataset Count: %d" % (counter, dataset.__len__())
    counter = 0
    for val in test_list:
        z_value.calculate(val, mean=mean_value, standart_deviation=standart_deviation_value)
        table_value = z_value.find_from_table()
        if table_value == -1:
            print "This val is anomaly:", val
            counter += 1
    print "Anomaly Count: %d, Dataset Count: %d" % (counter, dataset.__len__())
Exemplo n.º 4
0
def main():
    write_klass = CreateData(**{
        'log_active': settings.LOG,
        'limit': settings.LIMIT
    })
    write_klass.start()
    for interface_name in settings.INTERFACE_NAMES:
        print "Interface Name: %s" % interface_name
        klass = ReadCsv(**{
            'log_active': False,
            'interface_name': interface_name
        })
        klass.start()
        data, name = klass.get_data()
        dataset = DataSet()
        dataset.set(eval(data.get(name)))
        if dataset.__len__() <= 1:
            return
        train, validation, test = dataset.split_train_validation_test_data()
        training_list = train.get()
        validation_list = validation.get()
        test_list = test.get()
        if train.__len__() <= 1:
            return
        standart_deviation = StandartDeviation()
        standart_deviation_value = standart_deviation.calculate(training_list,
                                                                is_tuple=True,
                                                                index=1)
        mean = Mean()
        mean_value = mean.calculate(training_list, is_tuple=True, index=1)
        print "Standart Deviation: %f, Mean Value: %f" % (
            standart_deviation_value, mean_value)
        z_value = ZValue()
        counter = 0
        for val in validation_list:
            z_value.calculate(val,
                              mean=mean_value,
                              standart_deviation=standart_deviation_value,
                              is_tuple=True,
                              index=1)
            table_value = z_value.find_from_table()
            if table_value == -1:
                print "This val is anomaly:", val
                counter += 1
        print "Anomaly Count: %d, Dataset Count: %d" % (counter,
                                                        dataset.__len__())
        counter = 0
        for val in test_list:
            z_value.calculate(val,
                              mean=mean_value,
                              standart_deviation=standart_deviation_value,
                              is_tuple=True,
                              index=1)
            table_value = z_value.find_from_table()
            if table_value == -1:
                print "This val is anomaly:", val
                counter += 1
        print "Anomaly Count: %d, Dataset Count: %d" % (counter,
                                                        dataset.__len__())
Exemplo n.º 5
0
    def test_algorithm_with_tuple(self):
        mean = Mean()
        data_list = [("a", 1), ("b", 2), ("c", 3), ("d", 4), ("e", 5)]
        self.assertEquals(3, mean.calculate(data_list, is_tuple=True, index=1))

        data_list = [("a", "a", 1), ("b", "b", 2), ("c", "c", 3),
                     ("d", "d", 4), ("e", "e", 5)]
        self.assertEquals(3.0, mean.calculate(data_list,
                                              is_tuple=True,
                                              index=2))
Exemplo n.º 6
0
    def __algorithm(self):
        try:
            mean = Mean()
            mean_value = mean.calculate(self._data)
            values = map(lambda x: (float(x) - mean_value), self._data)
            sum_formula = SumFormula()
            sum_of_powers = sum_formula.calculate(values, power=2)

            result = sum_of_powers / (self._n - 1)
            return round(result, 4)
        except:
            print ""
Exemplo n.º 7
0
 def test_algorithm_with_list(self):
     data_list = [1, 2, 3, 4, 5]
     standart_deviation = StandartDeviation()
     standart_deviation_value = standart_deviation.calculate(data_list)
     mean = Mean()
     mean_value = mean.calculate(data_list)
     print standart_deviation_value, mean_value
     z_value = ZValue()
     z1 = z_value.calculate(88, mean=100, standart_deviation=10)
     z2 = z_value.calculate(112, mean=100, standart_deviation=10)
     z3 = z_value.calculate(5, mean=100, standart_deviation=10)
     print z1, z2, z3
Exemplo n.º 8
0
def demo1():
    data_list = []
    value_size = 1000000
    val = 0
    while val < value_size:
        data_list.append(val)
        val += 1
    shuffle(data_list)
    dataset = DataSet()
    dataset.set(data_list)
    train, validation, test = dataset.split_train_validation_test_data()
    training_list = train.get()
    validation_list = validation.get()
    test_list = test.get()
    standart_deviation = StandartDeviation()
    standart_deviation_value = standart_deviation.calculate(training_list)
    mean = Mean()
    mean_value = mean.calculate(training_list)
    # print "Training Set: %s, Validation Set: %s, Test Set: %s" % (training_list, validation_list, test_list)
    print "Standart Deviation: %f, Mean Value: %f" % (standart_deviation_value,
                                                      mean_value)
    z_value = ZValue()
    counter = 0
    for val in validation_list:
        z_value.calculate(val,
                          mean=mean_value,
                          standart_deviation=standart_deviation_value)
        table_value = z_value.find_from_table()
        if table_value == -1:
            print "This val is anomaly:", val
            counter += 1
    print "Anomaly Count: %d, Dataset Count: %d" % (counter, dataset.__len__())
    counter = 0
    for val in test_list:
        z_value.calculate(val,
                          mean=mean_value,
                          standart_deviation=standart_deviation_value)
        table_value = z_value.find_from_table()
        if table_value == -1:
            print "This val is anomaly:", val
            counter += 1
    print "Anomaly Count: %d, Dataset Count: %d" % (counter, dataset.__len__())
Exemplo n.º 9
0
def demo4(limit):
    path = os.path.join(BASE_PATH, '../data/demo' + str(limit) + '.csv')

    for interface_name in INTERFACE_NAMES:
        print "Interface Name: %s" % interface_name
        klass = ReadCsv(**{'log_active': False, 'path': path, 'interface_name': interface_name})
        klass.start()
        data, name = klass.get_data()
        dataset = DataSet()
        dataset.set(eval(data.get(name)))
        if dataset.__len__() <= 1:
            return
        train, validation, test = dataset.split_train_validation_test_data()
        training_list = train.get()
        validation_list = validation.get()
        test_list = test.get()
        if train.__len__() <= 1:
            return
        standart_deviation = StandartDeviation()
        standart_deviation_value = standart_deviation.calculate(training_list, is_tuple=True, index=1)
        mean = Mean()
        mean_value = mean.calculate(training_list, is_tuple=True, index=1)
        print "Standart Deviation: %f, Mean Value: %f" % (standart_deviation_value, mean_value)
        z_value = ZValue()
        counter = 0
        for val in validation_list:
            z_value.calculate(val, mean=mean_value, standart_deviation=standart_deviation_value, is_tuple=True, index=1)
            table_value = z_value.find_from_table()
            if table_value == -1:
                print "This val is anomaly:", val
                counter += 1
        print "Anomaly Count: %d, Dataset Count: %d" % (counter, dataset.__len__())
        counter = 0
        for val in test_list:
            z_value.calculate(val, mean=mean_value, standart_deviation=standart_deviation_value, is_tuple=True, index=1)
            table_value = z_value.find_from_table()
            if table_value == -1:
                print "This val is anomaly:", val
                counter += 1
        print "Anomaly Count: %d, Dataset Count: %d" % (counter, dataset.__len__())
Exemplo n.º 10
0
 def test_algorithm_with_list(self):
     mean = Mean()
     data_list = [1, 2, 3, 4, 5]
     self.assertEquals(3, mean.calculate(data_list))
     data_list = [1, 2, 3, 4]
     self.assertEquals(2.5, mean.calculate(data_list))