'labelSittingDown', 'labelSitting', 'labelStandingFromLying', 'labelOnAllFours', 'labelSittingOnTheGround', 'labelStandingFromSitting', 'labelStandingFromSittingOnTheGround'], 'max', '') # Get the resulting pandas data table dataset = DataSet.data_table # Plot the data DataViz = VisualizeDataset() # Boxplot DataViz.plot_dataset_boxplot(dataset, ['ankle_l_x', 'ankle_l_y', 'ankle_l_z', 'ankle_r_x', 'ankle_r_y', 'ankle_r_z', 'belt_x', 'belt_y', 'belt_z', 'chest_x', 'chest_y', 'chest_z']) # Plot all data DataViz.plot_dataset(dataset, ['ankle_l_', 'ankle_r_', 'belt_', 'chest_', 'label'], ['like', 'like', 'like', 'like', 'like'], ['line', 'line', 'line', 'line', 'points']) # And print a summary of the dataset util.print_statistics(dataset) datasets.append(copy.deepcopy(dataset)) # And print the table that has been included in the book util.print_latex_table_statistics_two_datasets(datasets[0], datasets[1]) # Finally, store the last dataset we have generated (250 ms). #dataset.to_csv(result_dataset_path + 'chapter2_result.csv')
dataset_own = DataSetOwn.data_table # dataset_cs = DataSetCS.data_table # Plot the data DataViz = VisualizeDataset() # Boxplot DataViz.plot_dataset_boxplot(dataset_own, ['acc_phone_x','acc_phone_y','acc_phone_z']) # DataViz.plot_dataset_boxplot(dataset_cs, ['acc_phone_x','acc_phone_y','acc_phone_z']) # Plot all data DataViz.plot_dataset(dataset_own, ['acc_', 'gyr_', 'mag_', 'press_' ,'pedom_phone_', 'label'], ['like', 'like', 'like', 'like', 'like', 'like'], ['line', 'line', 'line','line', 'points', 'points']) # DataViz.plot_dataset(dataset_cs, ['acc_phone', 'gyr_phone', 'mag_phone', 'press_phone_', 'label'], ['like', 'like', 'like', 'like', 'like'], ['line', 'line', 'line', 'line', 'points']) # And print a summary of the dataset util.print_statistics(dataset_own) datasets_own.append(copy.deepcopy(dataset_own)) # util.print_statistics(dataset_cs) # datasets_cs.append(copy.deepcopy(dataset_cs)) # And print the table that has been included in the book util.print_latex_table_statistics_two_datasets(datasets_own[0], datasets_own[1]) util.print_latex_table_statistics_two_datasets(datasets_cs[0], datasets_cs[1]) # Finally, store the last dataset we have generated (250 ms). dataset_own.to_csv(result_dataset_path + 'chapter2_result_own.csv') #dataset_cs.to_csv(result_dataset_path + 'chapter2_result_cs.csv')
def main(): # Set a granularity (the discrete step size of our time series data) and choose if all resulting datasets should # be saved. A course-grained granularity of one instance per minute, and a fine-grained one with four instances # per second are used. GRANULARITIES = [60000, 250] SAVE_VERSIONS = False # We can call Path.mkdir(exist_ok=True) to make any required directories if they don't already exist. [path.mkdir(exist_ok=True, parents=True) for path in [DATASET_PATH, RESULT_PATH]] # Create object to visualize the data and save figures DataViz = VisualizeDataset(module_path=__file__) datasets = [] for milliseconds_per_instance in GRANULARITIES: print( f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.') # Create an initial dataset object with the base directory for our data and a granularity and add selected # measurements to it data_engineer = CreateDataset(base_dir=DATASET_PATH, granularity=milliseconds_per_instance) # Add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values data_engineer.add_numerical_dataset(file='accelerometer_phone.csv', timestamp_col='timestamps', value_cols=['x', 'y', 'z'], aggregation='avg', prefix='acc_phone_') data_engineer.add_numerical_dataset(file='accelerometer_smartwatch.csv', timestamp_col='timestamps', value_cols=['x', 'y', 'z'], aggregation='avg', prefix='acc_watch_') # Add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values data_engineer.add_numerical_dataset(file='gyroscope_phone.csv', timestamp_col='timestamps', value_cols=['x', 'y', 'z'], aggregation='avg', prefix='gyr_phone_') data_engineer.add_numerical_dataset(file='gyroscope_smartwatch.csv', timestamp_col='timestamps', value_cols=['x', 'y', 'z'], aggregation='avg', prefix='gyr_watch_') # Add the heart rate (continuous numerical measurements) and aggregate by averaging the values data_engineer.add_numerical_dataset(file='heart_rate_smartwatch.csv', timestamp_col='timestamps', value_cols=['rate'], aggregation='avg', prefix='hr_watch_') # Add the labels provided by the users as binary attributes (i.e. add a one to the attribute representing the # specific value for a label if it occurs within an interval). These are categorical events that might overlap. data_engineer.add_event_dataset(file='labels.csv', start_timestamp_col='label_start', end_timestamp_col='label_end', value_col='label', aggregation='binary') # Add the amount of light sensed by the phone (continuous numerical measurements) and aggregate by averaging data_engineer.add_numerical_dataset(file='light_phone.csv', timestamp_col='timestamps', value_cols=['lux'], aggregation='avg', prefix='light_phone_') # Add the magnetometer data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values data_engineer.add_numerical_dataset(file='magnetometer_phone.csv', timestamp_col='timestamps', value_cols=['x', 'y', 'z'], aggregation='avg', prefix='mag_phone_') data_engineer.add_numerical_dataset(file='magnetometer_smartwatch.csv', timestamp_col='timestamps', value_cols=['x', 'y', 'z'], aggregation='avg', prefix='mag_watch_') # Add the pressure sensed by the phone (continuous numerical measurements) and aggregate by averaging again data_engineer.add_numerical_dataset(file='pressure_phone.csv', timestamp_col='timestamps', value_cols=['pressure'], aggregation='avg', prefix='press_phone_') # Get the resulting pandas data table dataset = data_engineer.data_table # Create boxplots DataViz.plot_dataset_boxplot(dataset=dataset, cols=['acc_phone_x', 'acc_phone_y', 'acc_phone_z', 'acc_watch_x', 'acc_watch_y', 'acc_watch_z']) # Plot all data DataViz.plot_dataset(data_table=dataset, columns=['acc_', 'gyr_', 'hr_watch_rate', 'light_phone_lux', 'mag_', 'press_phone_', 'label'], match=['like', 'like', 'like', 'like', 'like', 'like', 'like', 'like'], display=['line', 'line', 'line', 'line', 'line', 'line', 'points', 'points']) # Print a summary of the dataset util.print_statistics(dataset=dataset) datasets.append(copy.deepcopy(dataset)) # Save the various versions of the created datasets with logical filenames if needed if SAVE_VERSIONS: dataset.to_csv(RESULT_PATH / f'chapter2_result_{milliseconds_per_instance}') # Make a table like the one shown in the book, comparing the two datasets produced util.print_latex_table_statistics_two_datasets(dataset1=datasets[0], dataset2=datasets[1]) # Finally, store the last dataset we generated (250 ms) dataset.to_csv(RESULT_PATH / RESULT_FNAME)
'acc_phone_x', 'acc_phone_y', 'acc_phone_z', ]) DataViz.plot_dataset_boxplot(dataset_running, [ 'acc_phone_x', 'acc_phone_y', 'acc_phone_z', ]) # # Plot all data # DataViz.plot_dataset(dataset, ['acc_', 'gyr_', 'label'], # ['like', 'like', 'like', ], # ['line', 'line', 'line', ]) # And print a summary of the dataset. print(dataset.columns) # print(dataset['acc_phone_x'].max(), dataset['acc_phone_y'].max(), dataset['acc_phone_z'].max()) print(dataset_walking['acc_phone_x'].min(), dataset_walking['acc_phone_y'].min(), dataset_walking['acc_phone_z'].min()) print(dataset_walking['acc_phone_x'].mean(), dataset_walking['acc_phone_y'].mean(), dataset_walking['acc_phone_z'].mean()) print(dataset_walking['acc_phone_x'].std(), dataset_walking['acc_phone_y'].std(), dataset_walking['acc_phone_z'].std()) util.print_statistics(dataset_walking, describe=True) util.print_statistics(dataset_sitting, describe=True) util.print_statistics(dataset_running, describe=True)