Exemplo n.º 1
0
def augment_train_set(x_train, y_train, classes, N, dba_iters=5, 
                      weights_method_name = 'as', distance_algorithm='DTW',
                      limit_N = True):
    """
    This method takes a dataset and augments it using the method in icdm2017. 
    :param x_train: The original train set
    :param y_train: The original labels set 
    :param N: The number of synthetic time series. 
    :param dba_iters: The number of dba iterations to converge.
    :param weights_method_name: The method for assigning weights (see constants.py)
    :param distance_algorithm: The name of the distance algorithm used (see constants.py)
    """
    # get the weights function
    #weights_fun = utils.constants.WEIGHTS_METHODS[weights_method_name]
    weights_fun = get_weights_average_selected

    # synthetic train set and labels 
    synthetic_x_train = []
    synthetic_y_train = []
    # loop through each class
    k = 0
    for c in classes:
        k += 1
        print('class {} of {}'.format(k, len(classes)))
        # get the MTS for this class 
        c_x_train = x_train[np.where(y_train==c)]

        if len(c_x_train) == 1 :
            # skip if there is only one time series per set
            continue

        if limit_N == True:
            # limit the nb_prototypes
            nb_prototypes_per_class = min(N, len(c_x_train))
        else:
            # number of added prototypes will re-balance classes
            nb_prototypes_per_class = N + (N - len(c_x_train))

        # get the pairwise matrix 
        if weights_method_name == 'aa': 
            # then no need for dist_matrix 
            dist_pair_mat = None 
        else:
            dist_pair_mat = calculate_dist_matrix(c_x_train, dist_fun, dist_fun_params)

        t = 0
        # loop through the number of synthtectic examples needed
        for n in range(nb_prototypes_per_class):
            t += 1
            print('nb_prototypes_per_class {} of {}'.format(t, nb_prototypes_per_class))
            # get the weights and the init for avg method 
            weights, init_avg = weights_fun(c_x_train, dist_pair_mat, distance_algorithm=distance_algorithm)
            # get the synthetic data 
            synthetic_mts = dba(c_x_train, dba_iters, weights=weights)  
            # add the synthetic data to the synthetic train set
            synthetic_x_train.append(synthetic_mts)
            # add the corresponding label 
            synthetic_y_train.append(c)
    # return the synthetic set 
    return np.array(synthetic_x_train), np.array(synthetic_y_train)
Exemplo n.º 2
0
        # get the pairwise matrix 
        if weights_method_name == 'aa': 
            # then no need for dist_matrix 
            dist_pair_mat = None 
        else:
            dist_pair_mat = calculate_dist_matrix(c_x_train, dist_fun, dist_fun_params)

        t = 0
        # loop through the number of synthtectic examples needed
        for n in range(nb_prototypes_per_class):
            t += 1
            print('nb_prototypes_per_class {} of {}'.format(t, nb_prototypes_per_class))
            # get the weights and the init for avg method 
            weights, init_avg = weights_fun(c_x_train, dist_pair_mat, distance_algorithm=distance_algorithm)
            # get the synthetic data 
            synthetic_mts = dba(c_x_train, dba_iters, weights=weights)  
            # add the synthetic data to the synthetic train set
            synthetic_x_train.append(synthetic_mts)
            # add the corresponding label 
            synthetic_y_train.append(c)
    # return the synthetic set 
    return np.array(synthetic_x_train), np.array(synthetic_y_train)


if __name__ == '__main__':
    tseries = utils.tseries
    dist_pair_mat = calculate_dist_matrix(tseries, dist_fun, dist_fun_params)
    weights, init_dba = get_weights_average_selected(tseries, dist_pair_mat)
    print(weights)
    print(init_dba)
Exemplo n.º 3
0
    def augment_train_set(x_train,
                          y_train,
                          N,
                          dba_iters=1,
                          weights_method_name='aa',
                          distance_algorithm='dtw',
                          limit_N=True):
        """
        This method takes a dataset and augments it using the method in icdm2017.
        :param x_train: The original train set
        :param y_train: The original labels set
        :param N: The number of synthetic time series.
        :param dba_iters: The number of dba iterations to converge.
        :param weights_method_name: The method for assigning weights (see constants.py)
        :param distance_algorithm: The name of the distance algorithm used (see constants.py)
        """
        # get the weights function
        weights_fun = utils.constants.WEIGHTS_METHODS[weights_method_name]
        # get the distance function
        dist_fun = utils.constants.DISTANCE_ALGORITHMS[distance_algorithm]
        # get the distance function params
        dist_fun_params = utils.constants.DISTANCE_ALGORITHMS_PARAMS[
            distance_algorithm]
        # synthetic train set and labels
        synthetic_x_train = []
        synthetic_y_train = []

        if limit_N == True:
            # limit the nb_prototypes
            nb_prototypes_per_class = min(N, len(x_train))
        else:
            # number of added prototypes will re-balance classes
            nb_prototypes_per_class = N + (N - len(x_train))

        # get the pairwise matrix

        # loop through the number of synthtectic examples needed
        for n in range(nb_prototypes_per_class):
            # get the weights and the init for avg method
            indices = np.random.randint(x_train.shape[0], size=20)
            x_train_random = x_train[indices, :]
            y_train_random = y_train[indices, :]
            if weights_method_name == 'aa':
                # then no need for dist_matrix
                dist_pair_mat = None
            else:
                dist_pair_mat = calculate_dist_matrix(x_train_random, dist_fun,
                                                      dist_fun_params)
            weights, init_avg = weights_fun(
                x_train_random,
                dist_pair_mat,
                distance_algorithm=distance_algorithm)

            # get the synthetic data
            synthetic_mts_x = dba(x_train_random,
                                  dba_iters,
                                  verbose=False,
                                  distance_algorithm=distance_algorithm,
                                  weights=weights,
                                  init_avg_method='manual',
                                  init_avg_series=init_avg)

            # add the synthetic data to the synthetic train set
            synthetic_x_train.append(synthetic_mts_x)
            new_y = 0
            sum_weights = 0
            weights_y = np.mean(weights, axis=1)
            for s in range(y_train_random.shape[0]):
                new_y += y_train_random[s] * weights_y[s]
                sum_weights += weights_y[s]

        # update the new weighted y
            new_y = new_y / sum_weights
            synthetic_y_train.append(new_y)
            print('File', ind, 'new_data_number', n)
        # return the synthetic set
        return np.array(synthetic_x_train), np.array(synthetic_y_train)

        # In[90]:

        n = 100
        syn_train_set = augment_train_set(X_p,
                                          y,
                                          n,
                                          weights_method_name='as',
                                          distance_algorithm='dtw')

        # get the synthetic train and labels
        syn_x_train, syn_y_train = syn_train_set
        # concat the synthetic with the reduced random train and labels
        aug_x_train = np.array(X_p.tolist() + syn_x_train.tolist())
        aug_y_train = np.array(y.tolist() + syn_y_train.tolist())

        # In[ ]:

        aug_data = {'aug_x': aug_x_train, 'aug_y': aug_y_train}

        aug_res_address = res_addr + 'session_' + ind + '/aug_power_data_session_' + ind + '.mat'
        io.savemat(aug_res_address, aug_data)

        original_data = {'x': X_p, 'y': y}
        res_address = res_addr + 'session_' + ind + '/power_data_session_' + ind + '.mat'

        io.savemat(res_address, original_data)
        print(time.clock() - start_time)