def create_dataset(parameters): """ Creates a random dataset and saves it. :param parameters: The parameters for the creation (number of rows, numbers of columns,...). """ workspace = al.Workspace() num_rows, num_col, values = rand_param(parameters) print('Creating the random dataset') al.voice('Creating the random dataset') tt = pd.DataFrame(index=range(num_rows)) for n in range(num_col): tt['col' + str(n)] = pd.DataFrame(np.random.uniform(values[0], values[1], size=num_rows), dtype='float32') rand = workspace.get_counter('rand') workspace.save_dataset('random' + str(rand), tt) workspace.save_dataset('current', tt) print( 'Created and saved as random{} which has {} columns, {} rows and values ' 'between {} and {}'.format(str(rand), num_col, num_rows, values[0], values[1])) al.voice( 'Created and saved as random{} which has {} columns, {} rows and values ' 'between {} and {}'.format(str(rand), num_col, num_rows, values[0], values[1]))
def join_by_rows(parameters): """ Join two dataset with the same number of columns. :param parameters: The parameters of the function (dataset names). """ workspace = al.Workspace() name_data1 = parameters['Dataset'] name_data2 = parameters['Dataset2'] dataset1 = workspace.get_dataset(name_data1) dataset2 = workspace.get_dataset(name_data2) if dataset2 is None: if not name_data2 == "": print("The object " + name_data2 + " does not exist.") al.voice("The object " + name_data2 + " does not exist.") print("Please, provide the two datasets that should be joined.") al.voice("Please, provide the two datasets that should be joined.") return if dataset1.columns.size != dataset2.columns.size: print( 'Not able to execute.\nThe datasets have different number of columns' ) return dataset = pd.concat([dataset1, dataset2], ignore_index=True) num = workspace.get_counter('join') name = 'join' + str(num) workspace.save_dataset(name, dataset) print('The resulting dataset between ' + name_data1 + ' and ' + name_data2 + ' is saved as ' + name)
def split_by_rows(parameters): """ Split a dataset into n datasets of m rows. :param parameters: The parameters of the function (dataset name, size of the split dataset for the rows). """ workspace = al.Workspace() name_data = parameters['Dataset'] dataset = workspace.get_dataset(name_data) if parameters['split']: div = int(parameters['split']) else: print('How many rows will each dataset have?') al.voice('How many rows will each dataset have?') query = al.query_input() while not al.isnumber(query): print( 'Incorrect input.\nIt is not a number.\nPlease introduce one:') al.voice( 'Incorrect input.\nIt is not a number.\nPlease introduce one.') query = al.query_input() div = int(query) it = 0 names = [] while it < dataset.index.size: div_dataset = dataset.iloc[it:it + div] num = workspace.get_counter('split') name = name_data + 'r' + str(num) names.append(name) workspace.save_dataset(name, div_dataset) it = it + div print('The splits of ' + name_data + ' are saved as: ' + str(names)[1:-1])
def get_subdataset_columns(parameters): """ Obtains a subset of the dataset by its columns. :param parameters: The parameter of the function(dataset name,...). """ workspace = al.Workspace() data_name = parameters['Dataset'] dataset = workspace.get_dataset(data_name) cols = [] if parameters["cols"]: cols = parameters['cols'] else: stop = False while not stop: cols.append(al.obtain_column(dataset)) print('Do you want to continue? yes or no?') al.voice('Do you want to continue? yes or no?') response = al.query_input() if response == 'no': stop = True dataset = dataset[cols] num = workspace.get_counter('sub') name = 'subcol' + str(num) + data_name workspace.save_dataset(name, dataset) txt = 'The sub-dataset by the rows is saved as ' + name print(txt) al.voice(txt)
def ask_for_dataset_extension(files): """ Ask for the dataset extension file. :param files: All files with the same name. :return: The extension file. """ print('What is the file extension (txt, csv)?') al.voice('What is the file extension (txt, csv)?') print('All the files that has been found with the same name: ' + str(files)) return al.query_input()
def ask_for_dataset_path(): """ Ask for the dataset path. :return: The path introduced. """ print('Where is it located?') al.voice('Where is it located?') print('Current path is ' + os.getcwd()) query = al.query_input() if query == 'here': path = os.getcwd() else: path = query return path
def do_normalization(parameters): """ Do an operation of normalization. :param parameters: The parameters of the function (name of the operation, dataset, ...). """ op = parameters.pop("operation") workspace = al.Workspace() data_name = parameters["Dataset"] dataset = workspace.get_dataset(data_name) name = '' if not re.search("_in_place$", op): if op == 'decimal_scaling_norm': norm = al.decimal_scaling_norm(dataset) num_norm = str(workspace.get_counter('norm')) name = 'dec_sca_norm' + num_norm workspace.save_dataset(name, norm) elif op == 'max_min_norm': norm = al.max_min_norm(dataset, parameters) num_norm = str(workspace.get_counter('norm')) name = 'max_min_norm' + num_norm workspace.save_dataset(name, norm) elif op == 'mean_norm': norm = al.mean_norm(dataset) num_norm = str(workspace.get_counter('norm')) name = 'mean_norm' + num_norm workspace.save_dataset(name, norm) elif op == 'znorm': norm = al.znorm(dataset, parameters) num_norm = str(workspace.get_counter('norm')) name = 'znorm' + num_norm workspace.save_dataset(name, norm) print('The normalization is stored as ' + name) al.voice('The normalization is stored as ' + name) else: if op == 'decimal_scaling_norm_in_place': pass elif op == 'max_min_norm_in_place': pass elif op == 'mean_norm_in_place': pass elif op == 'znorm_in_place': pass
def do_features(parameters): """ Execute the feature operations. :param parameters: The parameters for this function (name_dataset). """ workspace = al.Workspace() data_name = parameters['Dataset'] dataset = workspace.get_dataset(data_name) features = al.features(dataset) num_norm = str(workspace.get_counter('feat')) name = data_name + 'Features' + num_norm workspace.save_dataset(name, features) print('The features are stored as ' + name) al.voice('The features are stored as ' + name)
def get_int_number(parameters): """ Obtains or asks for a integer number. :param parameters: the parameters of the function(number,...). :return: An integer number. """ if parameters["number"]: num_points = int(parameters["number"]) else: print('How many point do you want to reduce it?') al.voice('How many point do you want to reduce it?') query = al.query_input() while not query.isnumeric(): print('Incorrect input.\nIt is not a number.\nPlease introduce one.') al.voice('Incorrect input.\nIt is not a number.\nPlease introduce one.') query = al.query_input() num_points = int(query) return num_points
def get_epsilon(parameters): """ Obtains or asks for the epsilon. :param parameters: the parameters of the function(number,...). :return: The epsilon value. """ # Although we are asking for the epsilon, the value it is saved in the key number if parameters["number"]: num_points = parameters["number"] else: print('What is the value of epsilon?') al.voice('What is the value of epsilon?') query = al.query_input() while not query.isnumeric(): print('Incorrect input.\nIt is not a number.\nPlease introduce one.') al.voice('Incorrect input.\nIt is not a number.\nPlease introduce one.') query = al.query_input() num_points = float(query) return num_points
def exiting_yes(response): """ Exit the program saving the workspace. :param response: The response from Dialogflow. """ print('Saving the workspace') al.voice('Saving the workspace') print('Saved workspace') al.voice('Saved workspace') print('DEBUG: Fulfillment text: {}'.format(response)) al.voice(response) print('Closing program') al.voice('Closing program') exit()
def exiting_no(response): """ Exit the program deleting the workspace. :param response: The response from Dialogflow. """ print('Deleting workspace') al.voice('Deleting workspace') al.Workspace().clean_workspace() print('Deleted workspace') al.voice('Deleted workspace') print('DEBUG: Fulfillment text: {}'.format(response)) al.voice(response) print('Closing program') al.voice('Closing program') exit()
def get_subdataset_rows(parameters): """ Obtains a subset of the dataset by its rows. :param parameters: The parameter of the function(dataset name,...). """ workspace = al.Workspace() data_name = parameters['Dataset'] dataset = workspace.get_dataset(data_name) if parameters["from"]: index_a = int(parameters["from"]) else: print('From what row number?') al.voice('From what row number?') query = al.query_input() while not al.isnumber(query): print( 'Incorrect input.\nIt is not a number.\nPlease introduce one:') al.voice( 'Incorrect input.\nIt is not a number.\nPlease introduce one.') query = al.query_input() index_a = int(query) if parameters["to"]: index_b = int(parameters['to']) else: print('To what row number?') al.voice('To what row number?') query = al.query_input() while not al.isnumber(query): print( 'Incorrect input.\nIt is not a number.\nPlease introduce one:') al.voice( 'Incorrect input.\nIt is not a number.\nPlease introduce one.') query = al.query_input() index_b = int(query) if index_b < index_a: print( 'This operation cannot be done.\nThe starting row number is greater than the last row number.' ) raise Exception() dataset = dataset.iloc[index_a:index_b] num = workspace.get_counter('sub') name = 'subrow' + str(num) + data_name workspace.save_dataset(name, dataset) txt = 'The sub-dataset by the rows is saved as ' + name print(txt)
def do_matrix(parameters): """ Do a operation of matrix. :param parameters: The parameters of the function (name of the operation, number of clusters, ...). """ op = parameters.pop("operation") workspace = al.Workspace() if op == "stomp" and not parameters.get('Dataset2'): op = 'stomp_self_join' if op == "stomp": data_name = parameters["Dataset"] data_name2 = parameters["Dataset2"] dataset1 = workspace.get_dataset(data_name) dataset2 = workspace.get_dataset(data_name2) if dataset2 is None: if not data_name2 == "": print("The object " + data_name2 + " does not exist.") al.voice("The object " + data_name2 + " does not exist.") print("Please, provide the two datasets that should be stomped.") al.voice("Please, provide the two datasets that should be stomped.") return col = '' if dataset1.columns.size > 1: col = al.obtain_column(dataset1) dataset1 = dataset1[col] if dataset2.columns.size > 1: dataset2 = dataset2[al.obtain_column(dataset2)] (stomp, m) = al.stomp(dataset1.values, dataset2.values, parameters) number = workspace.get_counter('matrix_stomp') workspace.save_dataset('stomp' + str(number), (stomp.to_json(), m, col, dataset1.to_json())) print("The stomp is stored as stomp" + str(number)) elif op == "stomp_self_join": data_name = parameters["Dataset"] dataset1 = workspace.get_dataset(data_name) col = '' if dataset1.columns.size > 1: col = al.obtain_column(dataset1) dataset1 = dataset1[col] (stomp, m) = al.stomp_self_join(dataset1.values, parameters) number = workspace.get_counter('matrix_stomp') workspace.save_dataset('stomp' + str(number), (stomp.to_json(), m, col, dataset1.to_json())) print("The stomp is stored as stomp" + str(number)) elif op == "find_best_n_discords": stomp_name = parameters['Dataset'] stomp = workspace.get_dataset(stomp_name) m, col, dataset = workspace.get_value(stomp_name)[1:] discords = al.find_best_n_discords(stomp, m, parameters, col, pd.read_json(dataset).sort_index()) number = workspace.get_counter('matrix_best_d') workspace.save_dataset('discords' + str(number), discords) print('The best ' + str(int(parameters['n'])) + ' discord segments are stored as discords' + str(number)) elif op == "find_best_n_motifs": stomp_name = parameters['Dataset'] stomp = workspace.get_dataset(stomp_name) m, col, dataset = workspace.get_value(stomp_name)[1:] motifs = al.find_best_n_motifs(stomp, m, parameters, col, pd.read_json(dataset).sort_index()) number = workspace.get_counter('matrix_best_m') workspace.save_dataset('motifs' + str(number), motifs) print('The best ' + str(int(parameters['n'])) + ' motifs segments are stored as motifs' + str(number))
def detect_intent_text(project_id, session_id, text, language_code): """ Detects the intent of the text and execute some instruction Using the same `session_id` between requests allows continuation of the conversation. :param project_id: ID of the project :param session_id: ID of the session :param text: The text input for analyse :param language_code: Code of the language """ session_client = dialogflow.SessionsClient() session = session_client.session_path(project_id, session_id) print('Session path: {}\n'.format(session)) text_input = dialogflow.types.TextInput(text=text, language_code=language_code) query_input = dialogflow.types.QueryInput(text=text_input) response = session_client.detect_intent(session=session, query_input=query_input) """Conversion of Protocol Buffer to JSON""" response_json = pbjson.MessageToJson(response) data = json.loads(response_json) parameters = data['queryResult']['parameters'] print(parameters) print('=' * 20) print('DEBUG: Query text: {}'.format(response.query_result.query_text)) print('DEBUG: Detected intent: {} (confidence: {})\n'.format( response.query_result.intent.display_name, response.query_result.intent_detection_confidence)) try: if response.query_result.intent.display_name == 'RandomDataset': al.create_dataset(parameters) elif response.query_result.intent.display_name == 'LoadDataset': al.load_dataset(parameters) elif response.query_result.intent.display_name == 'ShowWorkspace': workspace = al.Workspace() print(list(workspace.get_all_dataset())) elif response.query_result.intent.display_name == 'GetBackend': al.get_library_backend(parameters['library']) elif response.query_result.intent.display_name == 'SetBackend': al.set_library_backend(parameters) elif response.query_result.intent.display_name == 'Exit - yes': al.exiting_yes(response.query_result.fulfillment_text) elif response.query_result.intent.display_name == 'Exit - no': al.exiting_no(response.query_result.fulfillment_text) elif not re.search("^Default|Exit", response.query_result.intent.display_name): if not parameters.get("Dataset"): parameters['Dataset'] = 'current' if al.check_dataset(parameters): if response.query_result.intent.display_name == 'ChangeName': al.change_name(parameters) elif response.query_result.intent.display_name == 'ShowResult': al.execute_plot(parameters) elif response.query_result.intent.display_name == 'PrintResult': al.execute_print(parameters) elif response.query_result.intent.display_name == 'SubDatasetRow': al.get_subdataset_rows(parameters) elif response.query_result.intent.display_name == 'SubDatasetCols': al.get_subdataset_columns(parameters) elif response.query_result.intent.display_name == 'JoinByCols': al.join_by_cols(parameters) elif response.query_result.intent.display_name == 'JoinByRows': al.join_by_rows(parameters) elif response.query_result.intent.display_name == 'SplitByCols': al.split_by_cols(parameters) elif response.query_result.intent.display_name == 'SplitByRows': al.split_by_rows(parameters) elif response.query_result.intent.display_name == 'DoDimensionality': al.do_dimensionality(parameters) elif response.query_result.intent.display_name == 'DoClustering': al.do_clustering(parameters) elif response.query_result.intent.display_name == 'DoMatrix_Stomp': al.do_matrix(parameters) elif response.query_result.intent.display_name == 'DoMatrix_Best': al.do_matrix(parameters) elif response.query_result.intent.display_name == 'DoNormalization': al.do_normalization(parameters) elif response.query_result.intent.display_name == 'DoFeatures': al.do_features(parameters) else: if parameters["Dataset"] != 'current': print("The object " + parameters["Dataset"] + " does not exist.") al.voice("The object " + parameters["Dataset"] + " does not exist.") else: print("There is no loaded dataset.") al.voice("There is no loaded dataset.") print("Please, load a dataset or use a previously stored one before using any function.") al.voice("Please, load a dataset or use a previously stored one before using any function.") return print('DEBUG: Fulfillment text: {}\n'.format(response.query_result.fulfillment_text)) if response.query_result.fulfillment_text: al.voice(response.query_result.fulfillment_text) except Exception as e: print('An error in the execution has been raised.') print(e) return
def rand_param(parameters): """ Obtains the parameters for the random dataset generator. :param parameters: The parameters for the creation (number of rows, numbers of columns,...). :return: A tuple of the parameters. """ num_rows, num_col, values = 0, 0, [] if parameters["columns"]: num_col = int(parameters["columns"]) else: print('How many columns?') al.voice('How many columns?') query = al.query_input() while not query.isnumeric(): print( 'Incorrect input.\nIt is not a number.\nPlease introduce one.') al.voice( 'Incorrect input.\nIt is not a number.\nPlease introduce one.') query = al.query_input() num_col = int(query) if parameters["rows"]: num_rows = int(parameters["rows"]) else: print('How many rows?') al.voice('How many rows?') query = al.query_input() while not query.isnumeric(): print( 'Incorrect input.\nIt is not a number.\nPlease introduce one.') al.voice( 'Incorrect input.\nIt is not a number.\nPlease introduce one.') query = al.query_input() num_rows = int(query) if parameters["values"]: values = parameters["values"] else: print('What is the minimum value?') al.voice('What is the minimum value?') query = al.query_input() while not al.isnumber(query): print( 'Incorrect input.\nIt is not a number.\nPlease introduce one:') al.voice( 'Incorrect input.\nIt is not a number.\nPlease introduce one.') query = al.query_input() values.append(float(query)) print('And the maximum?') al.voice('And the maximum?') query = al.query_input() while not al.isnumber(query): print( 'Incorrect input.\nIt is not a number.\nPlease introduce one:') al.voice( 'Incorrect input.\nIt is not a number.\nPlease introduce one.') query = al.query_input() values.append(float(query)) return num_rows, num_col, values