Exemplo n.º 1
0
class DimensionalityTest(unittest.TestCase):
    DELTA = 1e-3
    DECIMAL = 6
    session_id = al.id_session_creator()

    @ignore_warnings
    def setUp(self):
        set_backend(KHIVABackend.KHIVA_BACKEND_CPU)
        self.project_id = "aljuaritmo"
        self.language_code = "en"
        self.session_client = dialogflow.SessionsClient()
        self.session = self.session_client.session_path(
            self.project_id, self.session_id)

        self.workspace = al.Workspace()
        self.workspace.init_current()

    @ignore_warnings
    def test_features(self):
        order = 'Compute features on energy'

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoFeatures')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.95)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'energy')

        tt = pd.DataFrame([[4, 4, 6, 6, 7], [4, 7, 7, 8, 8]]).transpose()
        self.workspace.save_dataset('energy', tt)
        al.do_features(data['queryResult']['parameters'])
        features_results = al.Workspace().get_dataset("energyFeatures0")

        expected = [[153.0, 242.0], [3.0, 4.0], [3.0, 4.0], [2.0, 1.0],
                    [0.800000011920929, 0.6000000238418579], [0.0, 0.0],
                    [1.0, 1.0], [0.0, 1.0],
                    [-2.407405376434326, 3.2510287761688232], [1.0, 1.0],
                    [0.4000000059604645, 0.20000000298023224], [1.0, 0.0],
                    [3.0, 4.0], [2.0, 1.0], [7.0, 8.0],
                    [0.6000000238418579, 0.800000011920929], [4.0, 4.0],
                    [0.0, 0.0], [5.40000057220459, 6.800000190734863],
                    [6.0, 7.0], [0.6000000238418579, 0.800000011920929],
                    [0.6000000238418579, 0.6000000238418579],
                    [-0.1656368374824524, -1.735581874847412],
                    [1.1999999284744263, 1.469693899154663], [10.0, 15.0],
                    [27.0, 34.0], [1.4399999380111694, 2.1600000858306885],
                    [1.0, 1.0]]
        for i in range(len(expected)):
            self.assertAlmostEqual(features_results[0][i],
                                   expected[i][0],
                                   delta=self.DELTA)
            self.assertAlmostEqual(features_results[1][i],
                                   expected[i][1],
                                   delta=self.DELTA)

    @ignore_warnings
    def tearDown(self):
        self.workspace.clean_workspace()
Exemplo n.º 2
0
class PrintingTests(unittest.TestCase):
    session_id = al.id_session_creator()

    @ignore_warnings
    def setUp(self):
        self.project_id = "aljuaritmo"
        self.language_code = "en"
        self.session_client = dialogflow.SessionsClient()
        self.session = self.session_client.session_path(self.project_id, self.session_id)

    def test_print(self):
        order = "Print energy"
        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'], 'PrintResult')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'], 0.9)
        self.assertEqual(data['queryResult']['parameters']['Dataset'], 'energy')
Exemplo n.º 3
0
class PlottingTests(unittest.TestCase):
    session_id = al.id_session_creator()

    @ignore_warnings
    def setUp(self):
        self.project_id = "aljuaritmo"
        self.language_code = "en"
        self.session_client = dialogflow.SessionsClient()
        self.session = self.session_client.session_path(
            self.project_id, self.session_id)

    def test_plot(self):
        order = "Plot energy"
        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'ShowResult')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'energy')

    def test_plot_any_columns(self):
        order = "Plot titanic for Fare and Age"
        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'ShowResult')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'titanic')
        self.assertEqual(data['queryResult']['parameters']['columns'],
                         ['Fare', 'Age'])

    def test_plot_from_to(self):
        order = "Plot titanic from 10 to 30"
        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'ShowResult')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.85)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'titanic')
        self.assertEqual(data['queryResult']['parameters']['from'], 10)
        self.assertEqual(data['queryResult']['parameters']['to'], 30)
Exemplo n.º 4
0
class PrintingTests(unittest.TestCase):
    session_id = al.id_session_creator()

    @ignore_warnings
    def setUp(self):
        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/franco.gonzalez/Desktop/Credentials/" \
                                                       "Aljuaritmo-3ac32e58ff41.json"
        self.project_id = "aljuaritmo"
        self.language_code = "en"
        self.session_client = dialogflow.SessionsClient()
        self.session = self.session_client.session_path(
            self.project_id, self.session_id)

    def test_print_default(self):
        order = "Print it"
        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'PrintResult')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['Dataset'], '')

    def test_print_current_dataset(self):
        order = "Print the current dataset"
        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'PrintResult')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'current_dataset')

    def test_print_any_dataset(self):
        order = "Print energy"
        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'PrintResult')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'energy')
Exemplo n.º 5
0
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import json
import re
import sys

import click
import dialogflow_v2 as dialogflow
from google.protobuf import json_format as pbjson

import aljuarismi as al

project_id = 'aljuaritmo'
session_id = al.id_session_creator()
language_code = 'en'


def detect_intent_text(project_id, session_id, text, language_code):
    """
    Detects the intent of the text and execute some instruction

    Using the same `session_id` between requests allows continuation of the conversation.

    :param project_id: ID of the project
    :param session_id: ID of the session
    :param text: The text input for analyse
    :param language_code: Code of the language
    """
Exemplo n.º 6
0
class NormalizationTest(unittest.TestCase):
    DELTA = 1e-6
    DECIMAL = 6
    session_id = al.id_session_creator()

    @ignore_warnings
    def setUp(self):
        set_backend(KHIVABackend.KHIVA_BACKEND_CPU)
        self.project_id = "aljuaritmo"
        self.language_code = "en"
        self.session_client = dialogflow.SessionsClient()
        self.session = self.session_client.session_path(
            self.project_id, self.session_id)

        self.workspace = al.Workspace()
        self.workspace.init_current()

    @ignore_warnings
    def test_znorm(self):
        order = 'Execute znorm on timeserie'

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoNormalization')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'znorm')
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'timeserie')

        tt = pd.DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]])
        self.workspace.save_dataset('timeserie', tt)
        al.do_normalization(data['queryResult']['parameters'])
        znorm_result = al.Workspace().get_dataset(
            "znorm0").to_numpy().flatten()

        expected = [
            -1.341640786499870, -0.447213595499958, 0.447213595499958,
            1.341640786499870
        ]
        for i in range(len(expected)):
            self.assertAlmostEqual(znorm_result[i],
                                   expected[i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(znorm_result[i + 4],
                                   expected[i],
                                   delta=self.DELTA)

    @unittest.skip
    def test_znorm_in_place(self):
        tss = Array(data=[[0, 1, 2, 3], [4, 5, 6, 7]])
        znorm_in_place(tss)
        tss = tss.to_numpy()
        self.assertAlmostEqual(tss[0][0], -1.341640786499870, delta=self.DELTA)
        self.assertAlmostEqual(tss[0][1], -0.447213595499958, delta=self.DELTA)
        self.assertAlmostEqual(tss[0][2], 0.447213595499958, delta=self.DELTA)
        self.assertAlmostEqual(tss[0][3], 1.341640786499870, delta=self.DELTA)

        self.assertAlmostEqual(tss[1][0], -1.341640786499870, delta=self.DELTA)
        self.assertAlmostEqual(tss[1][1], -0.447213595499958, delta=self.DELTA)
        self.assertAlmostEqual(tss[1][2], 0.447213595499958, delta=self.DELTA)
        self.assertAlmostEqual(tss[1][3], 1.341640786499870, delta=self.DELTA)

    @ignore_warnings
    def test_max_min_norm(self):
        order = 'Execute maximal minimal normalization on timeserie'

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoNormalization')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'max_min_norm')
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'timeserie')

        tt = pd.DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]])
        self.workspace.save_dataset('timeserie', tt)
        al.do_normalization(data['queryResult']['parameters'])

        max_min_norm_result = al.Workspace().get_dataset(
            "max_min_norm0").to_numpy().flatten()
        expected = [0.0, 0.3333333333333, 0.66666667, 1.0]

        for i in range(len(expected)):
            self.assertAlmostEqual(max_min_norm_result[i],
                                   expected[i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(max_min_norm_result[i + 4],
                                   expected[i],
                                   delta=self.DELTA)

    @ignore_warnings
    def test_max_min_norm_with_param(self):
        order = 'Execute maximal minimal normalization on timeserie with max value of 2 and min value of 1'

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoNormalization')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'max_min_norm')
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'timeserie')
        self.assertEqual(data['queryResult']['parameters']['max'], 2)
        self.assertEqual(data['queryResult']['parameters']['min'], 1)

        tt = pd.DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]])
        self.workspace.save_dataset('timeserie', tt)
        al.do_normalization(data['queryResult']['parameters'])

        max_min_norm_result = al.Workspace().get_dataset(
            "max_min_norm0").to_numpy().flatten()
        expected = [1.0, 1.3333333333333, 1.66666667, 2.0]

        for i in range(len(expected)):
            self.assertAlmostEqual(max_min_norm_result[i],
                                   expected[i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(max_min_norm_result[i + 4],
                                   expected[i],
                                   delta=self.DELTA)

    @unittest.skip
    def test_max_min_norm_in_place(self):
        tss = Array([[0, 1, 2, 3], [4, 5, 6, 7]])
        max_min_norm_in_place(tss, 2.0, 1.0)
        tss = tss.to_numpy()
        expected = np.array([[1.0, 1.3333333333333, 1.66666667, 2.0],
                             [1.0, 1.3333333333333, 1.66666667, 2.0]])
        np.testing.assert_array_almost_equal(tss,
                                             expected,
                                             decimal=self.DECIMAL)

    @ignore_warnings
    def test_decimal_scaling_norm(self):
        order = 'Execute the decimal scaling normalization on timeserie'

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoNormalization')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'decimal_scaling_norm')
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'timeserie')

        tt = pd.DataFrame([[0, 1, -2, 3], [40, 50, 60, -70]])
        self.workspace.save_dataset('timeserie', tt)
        al.do_normalization(data['queryResult']['parameters'])

        max_min_norm_result = al.Workspace().get_dataset(
            "dec_sca_norm0").to_numpy().flatten()
        expected = [[0.0, 0.1, -0.2, 0.3], [0.4, 0.5, 0.6, -0.7]]

        for i in range(len(expected)):
            self.assertAlmostEqual(max_min_norm_result[i],
                                   expected[0][i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(max_min_norm_result[i + 4],
                                   expected[1][i],
                                   delta=self.DELTA)

    @unittest.skip
    def test_decimal_scaling_norm_in_place(self):
        tss = Array([[0, 1, -2, 3], [40, 50, 60, -70]])
        decimal_scaling_norm_in_place(tss)
        tss = tss.to_numpy()
        expected = np.array([[0.0, 0.1, -0.2, 0.3], [0.4, 0.5, 0.6, -0.7]])
        np.testing.assert_array_almost_equal(tss,
                                             expected,
                                             decimal=self.DECIMAL)

    @ignore_warnings
    def test_mean_norm(self):
        order = 'Execute the mean norm on timeserie'

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoNormalization')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'mean_norm')
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'timeserie')

        tt = pd.DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]])
        self.workspace.save_dataset('timeserie', tt)
        al.do_normalization(data['queryResult']['parameters'])

        max_min_norm_result = al.Workspace().get_dataset(
            "mean_norm0").to_numpy().flatten()
        expected = [-0.5, -0.166666667, 0.166666667, 0.5]

        for i in range(len(expected)):
            self.assertAlmostEqual(max_min_norm_result[i],
                                   expected[i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(max_min_norm_result[i + 4],
                                   expected[i],
                                   delta=self.DELTA)

    @unittest.skip
    def test_mean_norm_in_place(self):
        a = Array([[0, 1, 2, 3], [4, 5, 6, 7]])
        mean_norm_in_place(a)
        expected = np.array([[-0.5, -0.166666667, 0.166666667, 0.5],
                             [-0.5, -0.166666667, 0.166666667, 0.5]])
        np.testing.assert_array_almost_equal(a.to_numpy(),
                                             expected,
                                             decimal=self.DECIMAL)

    @ignore_warnings
    def tearDown(self):
        self.workspace.clean_workspace()
Exemplo n.º 7
0
class MatrixTest(unittest.TestCase):
    DELTA = 1e-6
    DECIMAL = 6
    session_id = al.id_session_creator()

    @ignore_warnings
    def setUp(self):
        set_backend(KHIVABackend.KHIVA_BACKEND_CPU)
        self.project_id = "aljuaritmo"
        self.language_code = "en"
        self.session_client = dialogflow.SessionsClient()
        self.session = self.session_client.session_path(self.project_id, self.session_id)

        self.workspace = al.Workspace()
        self.workspace.init_current()

    @ignore_warnings
    def test_stomp_self_join(self):
        order = "Execute stomp self join on energy with subsequence length of 3"

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'], 'DoMatrix_Stomp')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'], 0.8)
        self.assertEqual(data['queryResult']['parameters']['operation'], 'stomp_self_join')
        self.assertEqual(data['queryResult']['parameters']['m'], 3)
        self.assertEqual(data['queryResult']['parameters']['Dataset'], 'energy')

        tt1 = pd.DataFrame([10, 10, 11, 11, 10, 11, 10, 10, 11, 11, 10, 11, 10, 10])
        self.workspace.save_dataset('energy', tt1)

        expected_index = [6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5]

        al.do_matrix(data['queryResult']['parameters'])
        stomp_self_join_result = al.Workspace().get_dataset('stomp0')
        for i in range(6):
            self.assertAlmostEqual(stomp_self_join_result['profile'].to_numpy()[i], 0.0, delta=1e-2)
            self.assertEqual(stomp_self_join_result['index'].to_numpy()[i], expected_index[i])

    @ignore_warnings
    def test_stomp(self):
        order = "Execute stomp on energy and consumption with a subsequence length of 3"

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'], 'DoMatrix_Stomp')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'], 0.8)
        self.assertEqual(data['queryResult']['parameters']['operation'], 'stomp')
        self.assertEqual(data['queryResult']['parameters']['m'], 3)
        self.assertEqual(data['queryResult']['parameters']['Dataset'], 'energy')
        self.assertEqual(data['queryResult']['parameters']['Dataset2'], 'consumption')

        tt1 = pd.DataFrame([10, 11, 10, 11])
        tt2 = pd.DataFrame([10, 11, 10, 11, 10, 11, 10, 11])
        self.workspace.save_dataset('energy', tt1)
        self.workspace.save_dataset('consumption', tt2)

        expected_index = [0, 1, 0, 1, 0, 1]

        al.do_matrix(data['queryResult']['parameters'])
        stomp_result = al.Workspace().get_dataset('stomp0')
        for i in range(6):
            self.assertAlmostEqual(stomp_result['profile'].to_numpy()[i], 0, delta=1e-2)
            self.assertAlmostEqual(stomp_result['index'].to_numpy()[i], expected_index[i])

    @ignore_warnings
    def test_find_best_n_motifs(self):
        order = "Execute stomp on energy and consumption with a subsequence length of 3"

        data = response(self, order)

        tt1 = pd.DataFrame([10, 10, 10, 10, 10, 10, 9, 10, 10, 10, 10, 10, 11, 10, 9])
        tt2 = pd.DataFrame([10, 11, 10, 9])
        self.workspace.save_dataset('energy', tt1)
        self.workspace.save_dataset('consumption', tt2)

        al.do_matrix(data['queryResult']['parameters'])

        order = "Find best 1 motif from stomp0"

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'], 'DoMatrix_Best')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'], 0.8)
        self.assertEqual(data['queryResult']['parameters']['operation'], 'find_best_n_motifs')
        self.assertEqual(data['queryResult']['parameters']['n'], 1)

        al.do_matrix(data['queryResult']['parameters'])

        find_best_n_motifs_result = al.Workspace().get_dataset('motifs0')

        for i in range(3):
            self.assertEqual(find_best_n_motifs_result['col0'][i], 10)

    @ignore_warnings
    def test_find_best_n_discords(self):
        order = "Execute stomp on energy and consumption with a subsequence length of 3"

        data = response(self, order)

        tt1 = pd.DataFrame([11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11])
        tt2 = pd.DataFrame([9, 10.1, 10.2, 10.1, 10.2, 10.1, 10.2, 10.1, 10.2, 10.1, 10.2, 10.1, 9])
        self.workspace.save_dataset('energy', tt1)
        self.workspace.save_dataset('consumption', tt2)

        al.do_matrix(data['queryResult']['parameters'])

        order = "Find best 2 discords from stomp0"

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'], 'DoMatrix_Best')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'], 0.8)
        self.assertEqual(data['queryResult']['parameters']['operation'], 'find_best_n_discords')
        self.assertEqual(data['queryResult']['parameters']['n'], 2)

        al.do_matrix(data['queryResult']['parameters'])

        find_best_n_discord_result = al.Workspace().get_dataset('discords0')

        expected_result = [11, 10, 11]

        for i in range(1):
            col = find_best_n_discord_result['col' + str(i)].tolist()
            for j in range(2):
                self.assertEqual(col[j], expected_result[j])

    @ignore_warnings
    def tearDown(self):
        self.workspace.clean_workspace()
Exemplo n.º 8
0
class ClusteringTests(unittest.TestCase):
    DELTA = 1e-3
    DECIMAL = 6
    session_id = al.id_session_creator()

    @ignore_warnings
    def setUp(self):
        set_backend(KHIVABackend.KHIVA_BACKEND_CPU)
        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/franco.gonzalez/Desktop/Credentials/" \
                                                       "Aljuaritmo-3ac32e58ff41.json"
        self.project_id = "aljuaritmo"
        self.language_code = "en"
        self.session_client = dialogflow.SessionsClient()
        self.session = self.session_client.session_path(
            self.project_id, self.session_id)

        self.workspace = al.Workspace()
        self.workspace.init_current()

    @ignore_warnings
    def test_kmeans(self):
        order = "Do the kmeans of 3 clusters"

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoClustering')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'kmeans')
        self.assertEqual(data['queryResult']['parameters']['number'], 3)
        self.assertEqual(data['queryResult']['parameters']['Dataset'], '')
        tts = pd.DataFrame([[0.0, 1.0, 2.0, 3.0], [6.0, 7.0, 8.0, 9.0],
                            [2.0, -2.0, 4.0, -4.0], [8.0, 5.0, 3.0, 1.0],
                            [15.0, 10.0, 5.0, 0.0], [7.0, -7.0, 1.0, -1.0]])

        expected_c = pd.DataFrame([[0.0, 0.1667, 0.3333, 0.5],
                                   [1.5, -1.5, 0.8333, -0.8333],
                                   [4.8333, 3.6667, 2.6667, 1.6667]])

        (centroid, labels) = al.kmean(tts, data['queryResult']['parameters'])

        for i in range(len(expected_c)):
            self.assertAlmostEqual(centroid[0][i],
                                   expected_c[0][i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(centroid[1][i],
                                   expected_c[1][i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(centroid[2][i],
                                   expected_c[2][i],
                                   delta=self.DELTA)

    @ignore_warnings
    def test_kmeans_dataset(self):
        order = "Do the kmeans of 3 clusters for energy"

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoClustering')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'kmeans')
        self.assertEqual(data['queryResult']['parameters']['number'], 3)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'energy')

        tts = pd.DataFrame([[0.0, 1.0, 2.0, 3.0], [6.0, 7.0, 8.0, 9.0],
                            [2.0, -2.0, 4.0, -4.0], [8.0, 5.0, 3.0, 1.0],
                            [15.0, 10.0, 5.0, 0.0], [7.0, -7.0, 1.0, -1.0]])

        self.workspace.save_dataset('energy', tts)

        expected_c = pd.DataFrame([[0.0, 0.1667, 0.3333, 0.5],
                                   [1.5, -1.5, 0.8333, -0.8333],
                                   [4.8333, 3.6667, 2.6667, 1.6667]])

        al.do_clustering(data['queryResult']['parameters'])

        (centroid, labels) = (al.Workspace().get_dataset('centroids0'),
                              al.Workspace().get_dataset('labels0'))

        for i in range(len(expected_c)):
            self.assertAlmostEqual(centroid[0][i],
                                   expected_c[0][i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(centroid[1][i],
                                   expected_c[1][i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(centroid[2][i],
                                   expected_c[2][i],
                                   delta=self.DELTA)

    @ignore_warnings
    def test_kshape(self):
        order = "Do the kshape of 3 clusters"

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoClustering')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'kshape')
        self.assertEqual(data['queryResult']['parameters']['number'], 3)
        self.assertEqual(data['queryResult']['parameters']['Dataset'], '')
        tts = pd.DataFrame([[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
                            [0.0, 10.0, 4.0, 5.0, 7.0, -3.0, 0.0],
                            [-1.0, 15.0, -12.0, 8.0, 9.0, 4.0, 5.0],
                            [2.0, 8.0, 7.0, -6.0, -1.0, 2.0, 9.0],
                            [-5.0, -5.0, -6.0, 7.0, 9.0, 9.0, 0.0]])

        expected_c = pd.DataFrame(
            [[-0.5234, 0.1560, -0.3627, -1.2764, -0.7781, 0.9135, 1.8711],
             [-0.7825, 1.5990, 0.1701, 0.4082, 0.8845, -1.4969, -0.7825],
             [-0.6278, 1.3812, -2.0090, 0.5022, 0.6278, 0.0000, 0.1256]])

        (centroid, labels) = al.kshape(tts, data['queryResult']['parameters'])

        for i in range(len(expected_c)):
            self.assertAlmostEqual(centroid[0][i],
                                   expected_c[0][i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(centroid[1][i],
                                   expected_c[1][i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(centroid[2][i],
                                   expected_c[2][i],
                                   delta=self.DELTA)

    @ignore_warnings
    def test_shape_dataset(self):
        order = "Do the kshape of 3 clusters for energy"

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoClustering')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'kshape')
        self.assertEqual(data['queryResult']['parameters']['number'], 3)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'energy')

        tts = pd.DataFrame([[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
                            [0.0, 10.0, 4.0, 5.0, 7.0, -3.0, 0.0],
                            [-1.0, 15.0, -12.0, 8.0, 9.0, 4.0, 5.0],
                            [2.0, 8.0, 7.0, -6.0, -1.0, 2.0, 9.0],
                            [-5.0, -5.0, -6.0, 7.0, 9.0, 9.0, 0.0]])

        self.workspace.save_dataset('energy', tts)

        expected_c = pd.DataFrame(
            [[-0.5234, 0.1560, -0.3627, -1.2764, -0.7781, 0.9135, 1.8711],
             [-0.7825, 1.5990, 0.1701, 0.4082, 0.8845, -1.4969, -0.7825],
             [-0.6278, 1.3812, -2.0090, 0.5022, 0.6278, 0.0000, 0.1256]])

        al.do_clustering(data['queryResult']['parameters'])

        (centroid, labels) = (al.Workspace().get_dataset('centroids0'),
                              al.Workspace().get_dataset('labels0'))

        for i in range(len(expected_c)):
            self.assertAlmostEqual(centroid[0][i],
                                   expected_c[0][i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(centroid[1][i],
                                   expected_c[1][i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(centroid[2][i],
                                   expected_c[2][i],
                                   delta=self.DELTA)

    @ignore_warnings
    def tearDown(self):
        self.workspace.clean_workspace()
Exemplo n.º 9
0
class DimensionalityTest(unittest.TestCase):
    DELTA = 1e-6
    DECIMAL = 6
    session_id = al.id_session_creator()

    @ignore_warnings
    def setUp(self):
        set_backend(KHIVABackend.KHIVA_BACKEND_CPU)
        self.project_id = "aljuaritmo"
        self.language_code = "en"
        self.session_client = dialogflow.SessionsClient()
        self.session = self.session_client.session_path(self.project_id, self.session_id)

        self.workspace = al.Workspace()
        self.workspace.init_current()

    @ignore_warnings
    def test_ramer_douglas_peucker(self):
        order = 'Execute ramerDouglasPeucker on timeserie with an epsilon of 1.0'

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'], 'DoDimensionality')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'], 0.85)
        self.assertEqual(data['queryResult']['parameters']['operation'], 'ramer_douglas_peucker')
        self.assertEqual(data['queryResult']['parameters']['Dataset'], 'timeserie')
        self.assertEqual(data['queryResult']['parameters']['number'], 1.0)

        tt = pd.DataFrame([0, 0.1, -0.1, 5.0, 6.0, 7.0, 8.1, 9.0, 9.0, 9.0])
        self.workspace.save_dataset('timeserie', tt)
        al.do_dimensionality(data['queryResult']['parameters'])
        ramer_douglas_peucker_result = al.Workspace().get_dataset("RDP0")
        ind = ramer_douglas_peucker_result.index.to_list()
        val = ramer_douglas_peucker_result.values
        expected = [[0, 2, 3, 6, 9], [0, -0.1, 5.0, 8.1, 9.0]]
        for i in range(len(expected)):
            self.assertAlmostEqual(ind[i], expected[0][i], delta=self.DELTA)
            self.assertAlmostEqual(val[i], expected[1][i], delta=self.DELTA)

    @ignore_warnings
    def test_visvalingam(self):
        order = 'Execute visvalingam on timeserie at 5 points'

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'], 'DoDimensionality')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'], 0.85)
        self.assertEqual(data['queryResult']['parameters']['operation'], 'visvalingam')
        self.assertEqual(data['queryResult']['parameters']['Dataset'], 'timeserie')
        self.assertEqual(data['queryResult']['parameters']['number'], 5)

        tt = pd.DataFrame([0, 0.1, -0.1, 5.0, 6.0, 7.0, 8.1, 9.0, 9.0, 9.0])
        self.workspace.save_dataset('timeserie', tt)
        al.do_dimensionality(data['queryResult']['parameters'])
        visvalingam_result = al.Workspace().get_dataset("visvalingam0")
        ind = visvalingam_result.index.to_list()
        val = visvalingam_result.values
        expected = [[0, 2, 3, 6, 9], [0, -0.1, 5.0, 9.0, 9.0]]
        for i in range(len(expected)):
            self.assertAlmostEqual(ind[i], expected[0][i], delta=self.DELTA)
            self.assertAlmostEqual(val[i], expected[1][i], delta=self.DELTA)

    @ignore_warnings
    def test_paa(self):
        order = 'Execute paa on timeserie at 5 points'

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'], 'DoDimensionality')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'], 0.85)
        self.assertEqual(data['queryResult']['parameters']['operation'], 'paa')
        self.assertEqual(data['queryResult']['parameters']['Dataset'], 'timeserie')
        self.assertEqual(data['queryResult']['parameters']['number'], 5)

        tt = pd.DataFrame([0, 0.1, -0.1, 5.0, 6.0, 7.0, 8.1, 9.0, 9.0, 9.0])
        self.workspace.save_dataset('timeserie', tt)
        al.do_dimensionality(data['queryResult']['parameters'])
        paa_result = al.Workspace().get_dataset("paa0")
        val = paa_result.values
        expected = [0.05, 2.45, 6.5, 8.55, 9.0]
        for i in range(len(expected)):
            self.assertAlmostEqual(val[i], expected[i], delta=self.DELTA)

    @ignore_warnings
    def test_pip(self):
        order = 'Execute pip on timeserie at 6 points'

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'], 'DoDimensionality')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'], 0.85)
        self.assertEqual(data['queryResult']['parameters']['operation'], 'pip')
        self.assertEqual(data['queryResult']['parameters']['Dataset'], 'timeserie')
        self.assertEqual(data['queryResult']['parameters']['number'], 6)

        tt = pd.DataFrame([0.0, 0.1, -0.1, 5.0, 6.0, 7.0, 8.1, 9.0, 9.0, 9.0])
        self.workspace.save_dataset('timeserie', tt)
        al.do_dimensionality(data['queryResult']['parameters'])
        visvalingam_result = al.Workspace().get_dataset("pip0")
        ind = visvalingam_result.index.to_list()
        val = visvalingam_result.values
        expected = [[0, 2, 3, 6, 9], [0.0, -0.1, 5.0, 8.1, 9.0, 9.0]]
        for i in range(len(expected)):
            self.assertAlmostEqual(ind[i], expected[0][i], delta=self.DELTA)
            self.assertAlmostEqual(val[i], expected[1][i], delta=self.DELTA)

    @ignore_warnings
    def tearDown(self):
        self.workspace.clean_workspace()
Exemplo n.º 10
0
class DatasetTests(unittest.TestCase):
    session_id = al.id_session_creator()

    @ignore_warnings
    def setUp(self):

        self.project_id = "aljuaritmo"
        self.language_code = "en"
        self.session_client = dialogflow.SessionsClient()
        self.session = self.session_client.session_path(
            self.project_id, self.session_id)

        self.workspace = al.Workspace()
        self.workspace.save_dataset_path('titanic', 'datasets')
        self.workspace.init_current()

    @ignore_warnings
    def test_load_dataset(self):
        order = "load dataset titanic.csv"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'LoadDataset')
        self.assertEqual(data['queryResult']['intentDetectionConfidence'], 1.0)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'titanic')

        al.load_dataset(data['queryResult']['parameters'])
        self.workspace = al.Workspace()
        dataset = self.workspace.get_dataset('current')
        titanic = pd.read_csv("datasets/titanic.csv")
        self.assertEqual(dataset.to_json(), titanic.to_json())

    @ignore_warnings
    def test_create_random(self):
        order = "create random dataset for 5 row and 10 columns between -12.1 and 80"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'RandomDataset')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        al.create_dataset(data['queryResult']['parameters'])
        self.workspace = al.Workspace()
        random = self.workspace.get_dataset('current')
        self.assertEqual(random.shape,
                         (int(data['queryResult']['parameters']['rows']),
                          int(data['queryResult']['parameters']['columns'])),
                         '(n_row, n_column) do not match')
        self.assertGreaterEqual(
            random.values.min(),
            float(data['queryResult']['parameters']['values'][0]))
        self.assertLessEqual(
            random.values.max(),
            float(data['queryResult']['parameters']['values'][1]))

    @ignore_warnings
    def test_subdataset_rows(self):
        order = "obtain a subset by rows from random0 from 10 to 60"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'SubDatasetRow')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'random0')
        self.assertEqual(data['queryResult']['parameters']['from'], 10)
        self.assertEqual(data['queryResult']['parameters']['to'], 60)

        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})

        al.get_subdataset_rows(data['queryResult']['parameters'])

        dataset = al.Workspace().get_dataset('subrow0random0')

        index = dataset.index
        nrow = dataset.index.size

        self.assertEqual(index.min(),
                         data['queryResult']['parameters']['from'])
        self.assertEqual(index.max(),
                         data['queryResult']['parameters']['to'] - 1)
        self.assertEqual(nrow, 50)

    @ignore_warnings
    def test_subdataset_cols(self):
        order = "obtain a subset from random0 by columns at col0, col2, and col7"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'SubDatasetCols')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'random0')
        self.assertEqual(data['queryResult']['parameters']['cols'],
                         ['col0', 'col2', 'col7'])

        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})

        al.get_subdataset_columns(data['queryResult']['parameters'])

        dataset = al.Workspace().get_dataset('subcol0random0')

        ncol = dataset.columns.size
        cols = dataset.columns.to_list()
        expected = ['col0', 'col2', 'col7']

        self.assertEqual(ncol, 3)
        for n in range(3):
            self.assertEqual(cols[n], expected[n])

    @ignore_warnings
    def test_join_columns(self):
        order = "join by columns the datasets random0 and random1"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'JoinByCols')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'random0')
        self.assertEqual(data['queryResult']['parameters']['Dataset2'],
                         'random1')

        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})
        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})

        al.join_by_cols(data['queryResult']['parameters'])

        dataset = al.Workspace().get_dataset('join0')

        ncol = dataset.columns.size
        nrow = dataset.index.size

        self.assertEqual(ncol, 20)
        self.assertEqual(nrow, 200)

    @ignore_warnings
    def test_join_rows(self):
        order = "join by rows the datasets random0 and random1"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'JoinByRows')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'random0')
        self.assertEqual(data['queryResult']['parameters']['Dataset2'],
                         'random1')

        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})
        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})

        al.join_by_rows(data['queryResult']['parameters'])

        dataset = al.Workspace().get_dataset('join0')

        ncol = dataset.columns.size
        nrow = dataset.index.size

        self.assertEqual(ncol, 10)
        self.assertEqual(nrow, 400)

    @ignore_warnings
    def test_split_rows(self):
        order = "split random0 by 20 rows"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'SplitByRows')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'random0')
        self.assertEqual(data['queryResult']['parameters']['split'], 20)

        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})

        al.split_by_rows(data['queryResult']['parameters'])

        datasets = re.findall('random0r[0-9]*',
                              str(al.Workspace().get_all_dataset()))
        num_datasets = len(datasets)
        self.assertEqual(num_datasets, 10)
        workspace = al.Workspace()
        for num in range(num_datasets):
            dataset = workspace.get_dataset(datasets[num])
            ncol = dataset.columns.size
            nrow = dataset.index.size
            self.assertEqual(ncol, 10)
            self.assertEqual(nrow, 20)

    @ignore_warnings
    def test_split_cols(self):
        order = "split the dataset random0 by 2 columns"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'SplitByCols')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'random0')
        self.assertEqual(data['queryResult']['parameters']['split'], 2)

        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})

        al.split_by_cols(data['queryResult']['parameters'])

        datasets = re.findall('random0c[0-9]*',
                              str(al.Workspace().get_all_dataset()))
        num_datasets = len(datasets)
        self.assertEqual(num_datasets, 5)
        workspace = al.Workspace()
        for num in range(num_datasets):
            dataset = workspace.get_dataset(datasets[num])
            ncol = dataset.columns.size
            nrow = dataset.index.size
            self.assertEqual(ncol, 2)
            self.assertEqual(nrow, 200)

    @ignore_warnings
    def test_change_name(self):
        order = "rename random0 to r_a_n_d_o_m_0"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'ChangeName')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.95)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'random0')
        self.assertEqual(data['queryResult']['parameters']['NameInto'],
                         'r_a_n_d_o_m_0')

        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})

        self.assertTrue(al.Workspace().get_dataset('random0') is not None)
        self.assertTrue(al.Workspace().get_dataset('r_a_n_d_o_m_0') is None)
        data1 = al.Workspace().get_dataset('random0')

        al.change_name(data['queryResult']['parameters'])

        self.assertTrue(al.Workspace().get_dataset('random0') is None)
        self.assertTrue(
            al.Workspace().get_dataset('r_a_n_d_o_m_0') is not None)
        data2 = al.Workspace().get_dataset('r_a_n_d_o_m_0')

        self.assertTrue(pd.DataFrame.equals(data1, data2))

    @ignore_warnings
    def tearDown(self):
        self.workspace.clean_workspace()
Exemplo n.º 11
0
class LibraryTest(unittest.TestCase):
    session_id = al.id_session_creator()

    def setUp(self):
        set_backend(KHIVABackend.KHIVA_BACKEND_CPU)
        self.project_id = "aljuaritmo"
        self.language_code = "en"
        self.session_client = dialogflow.SessionsClient()
        self.session = self.session_client.session_path(
            self.project_id, self.session_id)

    def test_get_backend(self):
        order = 'give me the current backend'
        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'GetBackend')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.85)
        self.assertEqual(data['queryResult']['parameters']['library'],
                         'backend')

    def test_get_backends(self):
        order = 'get all backends'
        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'GetBackend')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.85)
        self.assertEqual(data['queryResult']['parameters']['library'],
                         'backends')

    def test_set_backend(self):
        backends = get_backends()
        cuda = backends & KHIVABackend.KHIVA_BACKEND_CUDA.value
        opencl = backends & KHIVABackend.KHIVA_BACKEND_OPENCL.value
        cpu = backends & KHIVABackend.KHIVA_BACKEND_CPU.value
        b = get_backend()
        if cuda:
            order = 'set CUDA backend'
            data = response(self, order)
            self.assertEqual(data['queryResult']['intent']['displayName'],
                             'SetBackend')
            self.assertGreater(
                data['queryResult']['intentDetectionConfidence'], 0.85)
            self.assertEqual(data['queryResult']['parameters']['library'],
                             'backend')
            self.assertEqual(
                data['queryResult']['parameters']['backend'].upper(), 'CUDA')

            al.set_library_backend(data['queryResult']['parameters'])
            self.assertEqual(get_backend(), KHIVABackend.KHIVA_BACKEND_CUDA)
            set_backend(b)

        if opencl:
            order = 'set OpenCL backend'
            data = response(self, order)
            self.assertEqual(data['queryResult']['intent']['displayName'],
                             'SetBackend')
            self.assertGreater(
                data['queryResult']['intentDetectionConfidence'], 0.85)
            self.assertEqual(data['queryResult']['parameters']['library'],
                             'backend')
            self.assertEqual(data['queryResult']['parameters']['backend'],
                             'OpenCL')

            al.set_library_backend(data['queryResult']['parameters'])
            self.assertEqual(get_backend(), KHIVABackend.KHIVA_BACKEND_OPENCL)
            set_backend(b)

        if cpu:
            order = 'set CPU backend'
            data = response(self, order)
            self.assertEqual(data['queryResult']['intent']['displayName'],
                             'SetBackend')
            self.assertGreater(
                data['queryResult']['intentDetectionConfidence'], 0.85)
            self.assertEqual(data['queryResult']['parameters']['library'],
                             'backend')
            self.assertEqual(data['queryResult']['parameters']['backend'],
                             'CPU')

            al.set_library_backend(data['queryResult']['parameters'])
            self.assertEqual(get_backend(), KHIVABackend.KHIVA_BACKEND_CPU)
            set_backend(b)