示例#1
0
 def test_DTWKmeans_inertia_positive(self):
     list_of_series = make_flat_dataset([-1.0,0,0.5],10,additive_noise_factor=0.3,level_noise_factor=0.3,lengths=[5])
     num_clusters = 3
     iterations = 1
     random_seed = 101
     clts_1 = DTWKmeans(num_clust = num_clusters, num_iter = iterations, random_seed=random_seed)
     clts_1.fit(list_of_series)
     intertia = clts_1._inertia(list_of_series)
     assert intertia > 0
示例#2
0
 def test_random_seed(self):
     list_levels = [-1.0, 0, 0.5]
     list_size = [10, 5, 20]
     list_lenghts = [5]
     random_seed = 101
     list_of_series_1 = make_flat_dataset(list_levels,
                                          list_size,
                                          additive_noise_factor=0.3,
                                          level_noise_factor=0.3,
                                          lengths=list_lenghts,
                                          random_seed=random_seed)
     list_of_series_2 = make_flat_dataset(list_levels,
                                          list_size,
                                          additive_noise_factor=0.3,
                                          level_noise_factor=0.3,
                                          lengths=list_lenghts,
                                          random_seed=random_seed)
     assert lists_of_series_are_equal(list_of_series_1, list_of_series_2)
示例#3
0
 def test_flat_unbalanced_clusters(self):
     list_levels = [-1.0, 0, 0.5]
     list_size = [10, 5, 20]
     list_lenghts = [5]
     list_of_series = make_flat_dataset(list_levels,
                                        list_size,
                                        additive_noise_factor=0.3,
                                        level_noise_factor=0.3,
                                        lengths=list_lenghts)
     assert len(list_of_series) == sum(list_size)
示例#4
0
 def test_flat_equal_clusters(self):
     list_levels = [-1.0, 0, 0.5]
     scalar_size = 10
     list_lenghts = [5]
     list_of_series = make_flat_dataset(list_levels,
                                        scalar_size,
                                        additive_noise_factor=0.0,
                                        level_noise_factor=0.0,
                                        lengths=list_lenghts)
     assert len(list_of_series) == scalar_size * len(list_levels)
示例#5
0
def flat_dataset(random_seed=101):
        # build the dataset around 3 levels
        levels = [1.5,0,-1.5]
        # with different number of elements for each cluster
        sizes = [15,30,10]
        # set random seed for reproduceability, you can remove the argument to allow different results for each run
        list_of_series = make_flat_dataset(levels,sizes,  
                                        additive_noise_factor=0.4,level_noise_factor=0.4,
                                        lengths=[10],random_seed=random_seed)
        return list_of_series
示例#6
0
 def test_DTWKmeans_inertia_decrease_with_iteration_increase(self):
     list_of_series = make_flat_dataset([-1.0,0,0.5],10,additive_noise_factor=0.3,level_noise_factor=0.3,lengths=[5])
     num_clusters = 3
     random_seed = 101
     clts_1 = DTWKmeans(num_clust = num_clusters, num_iter=1, random_seed=random_seed)
     clts_1.fit(list_of_series)
     clts_2 = DTWKmeans(num_clust = num_clusters, num_iter=2, random_seed=random_seed)
     clts_2.fit(list_of_series)
     print (clts_1._inertia(list_of_series))
     print (clts_2._inertia(list_of_series))
     #assert False
     assert clts_1._inertia(list_of_series) >= clts_2._inertia(list_of_series)
示例#7
0
 def test_DTWKmeans_fit_is_reproduceable_using_random_seed(self):
     list_of_series = make_flat_dataset([-1.0,0,1.0],10,additive_noise_factor=0.1,level_noise_factor=0.1,lengths=[5])
     num_clusters = 3
     iterations = 1
     random_seed = 101
     clts_1 = DTWKmeans(num_clust = num_clusters, num_iter = iterations, random_seed=random_seed)
     clts_1.fit(list_of_series)
     df1 = pd.DataFrame(clts_1.cluster_centers_)
     clts_2 = DTWKmeans(num_clust = num_clusters, num_iter = iterations, random_seed=random_seed)
     clts_2.fit(list_of_series)
     df2 = pd.DataFrame(clts_2.cluster_centers_)
     assert np.all(df1.values==df2.values)