def get_subdataset_columns(parameters): """ Obtains a subset of the dataset by its columns. :param parameters: The parameter of the function(dataset name,...). """ workspace = al.Workspace() data_name = parameters['Dataset'] dataset = workspace.get_dataset(data_name) cols = [] if parameters["cols"]: cols = parameters['cols'] else: stop = False while not stop: cols.append(al.obtain_column(dataset)) print('Do you want to continue? yes or no?') response = al.query_input() if response == 'no': stop = True dataset = dataset[cols] num = workspace.get_counter('sub') name = 'subcol' + str(num) + data_name workspace.save_dataset(name, dataset) txt = 'The sub-dataset by the rows is saved as ' + name print(txt)
def paa(dataset, parameters): """ Executes the function paa of khiva. :param dataset: The dataset which is computed. :param parameters: The parameters of the function (number of points). :return: The timeserie with the reduced points. """ num_points = al.get_int_number(parameters) if dataset.columns.size > 1: dataset = dataset[al.obtain_column(dataset)] k_array = kv.Array(dataset) result = kv.paa(k_array, num_points) return result.to_pandas()
def ramer_douglas_peucker(dataset, parameters): """ Executes the function Ramer-Douglas-Peucker of khiva. :param dataset: The dataset which is computed. :param parameters: The parameters of the function (epsilon). :return: The timeserie with the reduced points. """ epsilon = al.get_float_number(parameters) if dataset.columns.size > 1: dataset = dataset[al.obtain_column(dataset)] k_array = kv.Array([range(dataset.size), dataset.to_numpy().flatten()]) result = kv.ramer_douglas_peucker(k_array, epsilon).transpose() result = result.to_pandas() result.set_index(0, inplace=True) result.set_index(result.index.astype('int32'), inplace=True) return result
def pip(dataset, parameters): """ Executes the function pip of khiva. :param dataset: The dataset which is computed. :param parameters: The parameters of the function (number of pip). :return: The timeserie with the reduced points. """ num_pip = al.get_int_number(parameters) if dataset.columns.size > 1: dataset = dataset[al.obtain_column(dataset)] k_array = kv.Array([range(dataset.size), dataset.to_numpy().flatten()]) result = kv.pip(k_array, num_pip).transpose() result = result.to_pandas() result.set_index(0, inplace=True) result.set_index(result.index.astype('int32'), inplace=True) return result
def plot_dataset(dataset, parameters): """ Plots graphically a column of the dataset. :param dataset: The current dataset. :param parameters: The parameter for the graphic. """ a = int(parameters['from'] if parameters['from'] else 0) b = int(parameters['to'] if parameters['to'] else dataset.index.size) if b < a: print( 'This operation cannot be done.\nThe starting row number is greater than the last row number.' ) raise Exception() if b: dataset = dataset.iloc[:b] if a: dataset = dataset.iloc[a:] ncol = dataset.columns.size if ncol > 1: if not parameters["columns"]: column_name = al.obtain_column(dataset) plt.figure() plt.plot(dataset[column_name]) plt.title(column_name) plt.show(block=False) else: for column_name in parameters["columns"]: plt.figure() plt.plot(dataset[column_name]) plt.title(column_name) plt.show(block=False) else: plt.figure() plt.plot(dataset) plt.title(parameters["Dataset"]) plt.show(block=False)
def do_matrix(parameters): """ Do a operation of matrix. :param parameters: The parameters of the function (name of the operation, number of clusters, ...). """ op = parameters.pop("operation") workspace = al.Workspace() if op == "stomp" and not parameters.get('Dataset2'): op = 'stomp_self_join' if op == "stomp": data_name = parameters["Dataset"] data_name2 = parameters["Dataset2"] dataset1 = workspace.get_dataset(data_name) dataset2 = workspace.get_dataset(data_name2) if dataset2 is None: if not data_name2 == "": print("The object " + data_name2 + " does not exist.") al.voice("The object " + data_name2 + " does not exist.") print("Please, provide the two datasets that should be stomped.") al.voice("Please, provide the two datasets that should be stomped.") return col = '' if dataset1.columns.size > 1: col = al.obtain_column(dataset1) dataset1 = dataset1[col] if dataset2.columns.size > 1: dataset2 = dataset2[al.obtain_column(dataset2)] (stomp, m) = al.stomp(dataset1.values, dataset2.values, parameters) number = workspace.get_counter('matrix_stomp') workspace.save_dataset('stomp' + str(number), (stomp.to_json(), m, col, dataset1.to_json())) print("The stomp is stored as stomp" + str(number)) elif op == "stomp_self_join": data_name = parameters["Dataset"] dataset1 = workspace.get_dataset(data_name) col = '' if dataset1.columns.size > 1: col = al.obtain_column(dataset1) dataset1 = dataset1[col] (stomp, m) = al.stomp_self_join(dataset1.values, parameters) number = workspace.get_counter('matrix_stomp') workspace.save_dataset('stomp' + str(number), (stomp.to_json(), m, col, dataset1.to_json())) print("The stomp is stored as stomp" + str(number)) elif op == "find_best_n_discords": stomp_name = parameters['Dataset'] stomp = workspace.get_dataset(stomp_name) m, col, dataset = workspace.get_value(stomp_name)[1:] discords = al.find_best_n_discords(stomp, m, parameters, col, pd.read_json(dataset).sort_index()) number = workspace.get_counter('matrix_best_d') workspace.save_dataset('discords' + str(number), discords) print('The best ' + str(int(parameters['n'])) + ' discord segments are stored as discords' + str(number)) elif op == "find_best_n_motifs": stomp_name = parameters['Dataset'] stomp = workspace.get_dataset(stomp_name) m, col, dataset = workspace.get_value(stomp_name)[1:] motifs = al.find_best_n_motifs(stomp, m, parameters, col, pd.read_json(dataset).sort_index()) number = workspace.get_counter('matrix_best_m') workspace.save_dataset('motifs' + str(number), motifs) print('The best ' + str(int(parameters['n'])) + ' motifs segments are stored as motifs' + str(number))