Exemplo n.º 1
0
class TestSAX(object):
    def setUp(self):
        # All tests will be run with 6 letter words
        # and 5 letter alphabet
        self.sax = SAX(6, 5, 1e-6)

    def test_to_letter_rep(self):
        arr = [7, 1, 4, 4, 4, 4]
        (letters, indices) = self.sax.to_letter_rep(arr)
        assert letters == 'eacccc'

    def test_long_to_letter_rep(self):
        long_arr = [
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 10,
            100
        ]
        (letters, indices) = self.sax.to_letter_rep(long_arr)
        assert letters == 'bbbbce'

    def test_compare_strings(self):
        base_string = 'aaabbc'
        similar_string = 'aabbbc'
        dissimilar_string = 'ccddbc'
        similar_score = self.sax.compare_strings(base_string, similar_string)
        dissimilar_score = self.sax.compare_strings(base_string,
                                                    dissimilar_string)
        assert similar_score < dissimilar_score
Exemplo n.º 2
0
class TestSAX(object):
    def setUp(self):
        # All tests will be run with 6 letter words
        # and 5 letter alphabet
        self.sax = SAX(6, 5, 1e-6)

    def test_to_letter_rep(self):
        arr = [7, 1, 4, 4, 4, 4]
        (letters, indices) = self.sax.to_letter_rep(arr)
        assert letters == 'eacccc'

    def test_to_letter_rep_missing(self):
        arr = [7, 1, 4, 4, np.nan, 4]
        (letters, indices) = self.sax.to_letter_rep(arr)
        assert letters == 'eacc-c'

    def test_long_to_letter_rep(self):
        long_arr = [
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 10,
            100
        ]
        (letters, indices) = self.sax.to_letter_rep(long_arr)
        assert letters == 'bbbbce'

    def test_long_to_letter_rep_missing(self):
        long_arr = [
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, np.nan, 1, 1, 6, 6, 6, 6,
            10, 100
        ]
        (letters, indices) = self.sax.to_letter_rep(long_arr)
        assert letters == 'bbb-ce'

    def test_compare_strings(self):
        base_string = 'aaabbc'
        similar_string = 'aabbbc'
        dissimilar_string = 'ccddbc'
        similar_score = self.sax.compare_strings(base_string, similar_string)
        dissimilar_score = self.sax.compare_strings(base_string,
                                                    dissimilar_string)
        assert similar_score < dissimilar_score

    def test_compare_strings_missing(self):
        assert self.sax.compare_strings('a-b-c-', 'b-c-d-') == 0

    def test_normalize_missing(self):
        # two arrays which should normalize to the same result
        # except one should contain a nan value in place of the input nan value
        incomplete_arr_res = self.sax.normalize([1, 0, 0, 0, 0, 1, np.nan])
        complete_arr_res = self.sax.normalize([1, 0, 0, 0, 0, 1])
        assert np.array_equal(incomplete_arr_res[:-1], complete_arr_res)
        assert np.isnan(incomplete_arr_res[-1])

    def test_normalize_under_epsilon(self):
        array_under_epsilon = self.sax.normalize([1e-7, 2e-7, 1.5e-7])
        assert np.array_equal(array_under_epsilon, [0, 0, 0])
Exemplo n.º 3
0
class TestSAX(object):
    def setUp(self):
        # All tests will be run with 6 letter words
        # and 5 letter alphabet
        self.sax = SAX(6, 5, 1e-6)

    def test_to_letter_rep(self):
        arr = [7, 1, 4, 4, 4, 4]
        (letters, indices, letter_boundries) = self.sax.to_letter_rep(arr)
        assert letters == 'eacccc'

    def test_long_to_letter_rep(self):
        long_arr = [
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 10,
            100
        ]
        (letters, indices, letter_boundries) = self.sax.to_letter_rep(long_arr)
        assert letters == 'bbbbce'

    def test_compare_strings(self):
        base_string = 'aaabbc'
        similar_string = 'aabbbc'
        dissimilar_string = 'ccddbc'
        similar_score = self.sax.compare_strings(base_string, similar_string)
        dissimilar_score = self.sax.compare_strings(base_string,
                                                    dissimilar_string)
        assert similar_score < dissimilar_score

    def test_from_letter_rep(self):
        arr = [7, 1, 4, 4, 4, 4]
        (letters, indices, letter_boundries) = self.sax.to_letter_rep(arr)
        reconstructed = self.sax.from_letter_rep(letters, indices,
                                                 letter_boundries)
        assert allclose(reconstructed, [6.21, 1.78, 4.0, 4.0, 4.0, 4.0],
                        atol=0.01)

    def test_breakpoints(self):
        assert allclose(self.sax.breakpoints(3), [-0.43, 0.43], atol=0.01)
        assert allclose(self.sax.breakpoints(2), [0], atol=0.01)
        assert allclose(self.sax.breakpoints(20), [
            -1.64, -1.28, -1.04, -0.84, -0.67, -0.52, -0.39, -0.25, -0.13, 0,
            0.13, 0.25, 0.39, 0.52, 0.67, 0.84, 1.04, 1.28, 1.64
        ],
                        atol=0.01)

    def test_interval_centres(self):
        assert allclose(self.sax.interval_centres(2), [-0.67, 0.67], atol=0.01)
        assert allclose(self.sax.interval_centres(3), [-0.96, 0.0, 0.96],
                        atol=0.01)
        assert allclose(self.sax.interval_centres(30), [
            -2.12, -1.64, -1.38, -1.19, -1.03, -0.90, -0.78, -0.67, -0.57,
            -0.47, -0.38, -0.29, -0.21, -0.12, -0.04, 0.04, 0.12, 0.21, 0.29,
            0.38, 0.47, 0.57, 0.67, 0.78, 0.90, 1.03, 1.19, 1.38, 1.64, 2.12
        ],
                        atol=0.01)
Exemplo n.º 4
0
def ComputeSax(sample_data, sample_data2):

    sample_data = sample_data.as_matrix()
    sample_data2 = sample_data2.as_matrix()

    #########################################
    # SAX - Symbolic aggregate approximation
    #http://www.cs.ucr.edu/~eamonn/SAX.pdf
    ##########################################
    #PARAMETERS:
    #W: The number of PAA segments representing the time series - aka the len()
    # of the string representing the timeseries - useful for dimensionality reduction
    #Alphabet size: Alphabet size (e.g., for the alphabet = {a,b,c} = 3)

    downsample_ratio = 200
    word_length = len(sample_data[:, 1]) / downsample_ratio
    alphabet_size = 7

    s = SAX(word_length, alphabet_size)

    mic_distances = []
    for mic in range(1, 5):
        (x1String, x1Indices) = s.to_letter_rep(sample_data[:, mic])
        (x2String, x2Indices) = s.to_letter_rep(sample_data2[:, mic])

        #print x1String

        x1x2ComparisonScore = s.compare_strings(x1String, x2String)

        mic_distances.append(x1x2ComparisonScore)
        #print "Mic: " + str(mic) + ", distance= " + str(x1x2ComparisonScore)
    return mic_distances
Exemplo n.º 5
0
class TestSAX(object):
    def setUp(self):
        # All tests will be run with 6 letter words
        # and 5 letter alphabet
        self.sax = SAX(6, 5, 1e-6)

    def test_to_letter_rep(self):
        arr = [7,1,4,4,4,4]
        (letters, indices) = self.sax.to_letter_rep(arr)
        assert letters == 'eacccc'

    def test_to_letter_rep_missing(self):
        arr = [7,1,4,4,np.nan,4]
        (letters, indices) = self.sax.to_letter_rep(arr)
        assert letters == 'eacc-c'

    def test_long_to_letter_rep(self):
        long_arr = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,6,6,6,6,10,100]
        (letters, indices) = self.sax.to_letter_rep(long_arr)
        assert letters == 'bbbbce'

    def test_long_to_letter_rep_missing(self):
        long_arr = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,np.nan,1,1,6,6,6,6,10,100]
        (letters, indices) = self.sax.to_letter_rep(long_arr)
        assert letters == 'bbb-ce'

    def test_compare_strings(self):
        base_string = 'aaabbc'
        similar_string = 'aabbbc'
        dissimilar_string = 'ccddbc'
        similar_score = self.sax.compare_strings(base_string, similar_string)
        dissimilar_score = self.sax.compare_strings(base_string, dissimilar_string)
        assert similar_score < dissimilar_score

    def test_compare_strings_missing(self):
        assert self.sax.compare_strings('a-b-c-', 'b-c-d-') == 0

    def test_normalize_missing(self):
        # two arrays which should normalize to the same result
        # except one should contain a nan value in place of the input nan value
        incomplete_arr_res = self.sax.normalize([1,0,0,0,0,1,np.nan])
        complete_arr_res = self.sax.normalize([1,0,0,0,0,1])
        assert np.array_equal(incomplete_arr_res[:-1], complete_arr_res)
        assert np.isnan(incomplete_arr_res[-1])
    def test_normalize_under_epsilon(self):
        array_under_epsilon = self.sax.normalize([1e-7, 2e-7, 1.5e-7])
        assert np.array_equal(array_under_epsilon, [0,0,0])
Exemplo n.º 6
0
class TestSAX(object):
    def setUp(self):
        # All tests will be run with 6 letter words
        # and 5 letter alphabet
        self.sax = SAX(6, 5, 1e-6)

    def test_to_letter_rep(self):
        arr = [7, 1, 4, 4, 4, 4]
        (letters, indices) = self.sax.to_letter_rep(arr)
        assert letters == "eacccc"

    def test_long_to_letter_rep(self):
        long_arr = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 10, 100]
        (letters, indices) = self.sax.to_letter_rep(long_arr)
        assert letters == "bbbbce"

    def test_compare_strings(self):
        base_string = "aaabbc"
        similar_string = "aabbbc"
        dissimilar_string = "ccddbc"
        similar_score = self.sax.compare_strings(base_string, similar_string)
        dissimilar_score = self.sax.compare_strings(base_string, dissimilar_string)
        assert similar_score < dissimilar_score
def min_dist_sax(t1String,t2String,word,alpha,eps=0.000001):
    s=SAX(word,alpha,eps)
    return s.compare_strings(t1String,t2String)
Exemplo n.º 8
0
def min_dist_sax(t1String, t2String, word, alpha, eps=0.000001):
    s = SAX(word, alpha, eps)
    return s.compare_strings(t1String, t2String)