def main(): # Set a granularity (the discrete step size of our time series data) and choose if all resulting datasets should # be saved. A course-grained granularity of one instance per minute, and a fine-grained one with four instances # per second are used. GRANULARITIES = [60000, 250] SAVE_VERSIONS = False # We can call Path.mkdir(exist_ok=True) to make any required directories if they don't already exist. [path.mkdir(exist_ok=True, parents=True) for path in [DATASET_PATH, RESULT_PATH]] # Create object to visualize the data and save figures DataViz = VisualizeDataset(module_path=__file__) datasets = [] for milliseconds_per_instance in GRANULARITIES: print( f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.') # Create an initial dataset object with the base directory for our data and a granularity and add selected # measurements to it data_engineer = CreateDataset(base_dir=DATASET_PATH, granularity=milliseconds_per_instance) # Add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values data_engineer.add_numerical_dataset(file='accelerometer_phone.csv', timestamp_col='timestamps', value_cols=['x', 'y', 'z'], aggregation='avg', prefix='acc_phone_') data_engineer.add_numerical_dataset(file='accelerometer_smartwatch.csv', timestamp_col='timestamps', value_cols=['x', 'y', 'z'], aggregation='avg', prefix='acc_watch_') # Add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values data_engineer.add_numerical_dataset(file='gyroscope_phone.csv', timestamp_col='timestamps', value_cols=['x', 'y', 'z'], aggregation='avg', prefix='gyr_phone_') data_engineer.add_numerical_dataset(file='gyroscope_smartwatch.csv', timestamp_col='timestamps', value_cols=['x', 'y', 'z'], aggregation='avg', prefix='gyr_watch_') # Add the heart rate (continuous numerical measurements) and aggregate by averaging the values data_engineer.add_numerical_dataset(file='heart_rate_smartwatch.csv', timestamp_col='timestamps', value_cols=['rate'], aggregation='avg', prefix='hr_watch_') # Add the labels provided by the users as binary attributes (i.e. add a one to the attribute representing the # specific value for a label if it occurs within an interval). These are categorical events that might overlap. data_engineer.add_event_dataset(file='labels.csv', start_timestamp_col='label_start', end_timestamp_col='label_end', value_col='label', aggregation='binary') # Add the amount of light sensed by the phone (continuous numerical measurements) and aggregate by averaging data_engineer.add_numerical_dataset(file='light_phone.csv', timestamp_col='timestamps', value_cols=['lux'], aggregation='avg', prefix='light_phone_') # Add the magnetometer data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values data_engineer.add_numerical_dataset(file='magnetometer_phone.csv', timestamp_col='timestamps', value_cols=['x', 'y', 'z'], aggregation='avg', prefix='mag_phone_') data_engineer.add_numerical_dataset(file='magnetometer_smartwatch.csv', timestamp_col='timestamps', value_cols=['x', 'y', 'z'], aggregation='avg', prefix='mag_watch_') # Add the pressure sensed by the phone (continuous numerical measurements) and aggregate by averaging again data_engineer.add_numerical_dataset(file='pressure_phone.csv', timestamp_col='timestamps', value_cols=['pressure'], aggregation='avg', prefix='press_phone_') # Get the resulting pandas data table dataset = data_engineer.data_table # Create boxplots DataViz.plot_dataset_boxplot(dataset=dataset, cols=['acc_phone_x', 'acc_phone_y', 'acc_phone_z', 'acc_watch_x', 'acc_watch_y', 'acc_watch_z']) # Plot all data DataViz.plot_dataset(data_table=dataset, columns=['acc_', 'gyr_', 'hr_watch_rate', 'light_phone_lux', 'mag_', 'press_phone_', 'label'], match=['like', 'like', 'like', 'like', 'like', 'like', 'like', 'like'], display=['line', 'line', 'line', 'line', 'line', 'line', 'points', 'points']) # Print a summary of the dataset util.print_statistics(dataset=dataset) datasets.append(copy.deepcopy(dataset)) # Save the various versions of the created datasets with logical filenames if needed if SAVE_VERSIONS: dataset.to_csv(RESULT_PATH / f'chapter2_result_{milliseconds_per_instance}') # Make a table like the one shown in the book, comparing the two datasets produced util.print_latex_table_statistics_two_datasets(dataset1=datasets[0], dataset2=datasets[1]) # Finally, store the last dataset we generated (250 ms) dataset.to_csv(RESULT_PATH / RESULT_FNAME)
# We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values # dataset.add_numerical_dataset('gyroscope_phone.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_phone_') # dataset.add_numerical_dataset('gyroscope_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_watch_') # We add the heart rate (continuous numerical measurements) and aggregate by averaging again # dataset.add_numerical_dataset('heart_rate_smartwatch.csv', 'timestamps', ['rate'], 'avg', 'hr_watch_') # We add the labels provided by the users. These are categorical events that might overlap. We add them # as binary attributes (i.e. add a one to the attribute representing the specific value for the label if it # occurs within an interval). dataset.add_event_dataset('labels.csv', 'label_start', 'label_end', 'label', 'binary') dataset.add_numerical_dataset('added_timestamps_result.csv', 'timestamps', ['attitude.roll', 'attitude.pitch', 'attitude.yaw', 'gravity.x', 'gravity.y', 'gravity.z', 'rotationRate.x', 'rotationRate.y', 'rotationRate.z', 'userAcceleration.x', 'userAcceleration.y', 'userAcceleration.z']) # We add the amount of light sensed by the phone (continuous numerical measurements) and aggregate by averaging # dataset.add_numerical_dataset('light_phone.csv', 'timestamps', ['lux'], 'avg', 'light_phone_') # We add the magnetometer data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values # dataset.add_numerical_dataset('magnetometer_phone.csv', 'timestamps', ['x','y','z'], 'avg', 'mag_phone_') # dataset.add_numerical_dataset('magnetometer_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'mag_watch_') # We add the pressure sensed by the phone (continuous numerical measurements) and aggregate by averaging again # dataset.add_numerical_dataset('pressure_phone.csv', 'timestamps', ['pressure'], 'avg', 'press_phone_') # Get the resulting pandas data table
granularities = [60000, 250] datasets_own = [] datasets_cs = [] for milliseconds_per_instance in granularities: # Create an initial dataset object with the base directory for our data and a granularity DataSetOwn = CreateDataset(dataset_path_own, milliseconds_per_instance) # DataSetCS = CreateDataset(dataset_path_cs, milliseconds_per_instance) # Add the selected measurements to it. # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values/ DataSetOwn.add_numerical_dataset('acc_custom.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_phone_') # DataSetCS.add_numerical_dataset('accelerometer_phone.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_phone_') # DataSetCS.add_numerical_dataset('accelerometer_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_watch_') # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values/ DataSetOwn.add_numerical_dataset('gyro_custom.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_phone_') # DataSetCS.add_numerical_dataset('gyroscope_phone.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_phone_') # DataSetCS.add_numerical_dataset('gyroscope_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_watch_') # We add the heart rate (continuous numerical measurements) and aggregate by averaging again # DataSetCS.add_numerical_dataset('heart_rate_smartwatch.csv', 'timestamps', ['rate'], 'avg', 'hr_watch_') # We add the labels provided by the users. These are categorical events that might overlap. We add them # as binary attributes (i.e. add a one to the attribute representing the specific value for the label if it # occurs within an interval).
# per second granularities = [250] datasets = [] for milliseconds_per_instance in granularities: # Create an initial dataset object with the base directory for our data and a granularity DataSet = CreateDataset(dataset_path, milliseconds_per_instance) # Add the selected measurements to it. # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values/ DataSet.add_numerical_dataset('accelerometer-kx023.csv_out.csv', 'timestamp', ['x', 'y', 'z'], 'avg', 'acc_phone_') print("first set") # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values/ DataSet.add_numerical_dataset('orientation.csv_out.csv', 'timestamp', ['x', 'y', 'z'], 'avg', 'gyr_phone_') print("second set") # We add the labels provided by the users. These are categorical events that might overlap. We add them # as binary attributes (i.e. add a one to the attribute representing the specific value for the label if it # occurs within an interval). DataSet.add_event_dataset('status.csv', 'timestampBeg', 'timestampEnd', 'label', 'binary') # We add the amount of light sensed by the phone (continuous numerical measurements) and aggregate by averaging again DataSet.add_numerical_dataset('light-bh1745.csv_out.csv', 'timestamp',
from Chapter2.CreateDataset import CreateDataset from util.VisualizeDataset import VisualizeDataset from util import util from pathlib import Path import copy import os import sys DATASET_PATH = './datasets/crowdsignals/csv-participant-one/' RESULT_PATH = './intermediate_datafiles/' RESULT_FNAME = sys.argv[2] if len(sys.argv) > 2 else 'chapter2_result.csv' dataset = CreateDataset(DATASET_PATH, 250) dataset.add_numerical_dataset('accelerometer_phone.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'acc_phone_', None) dataset.add_numerical_dataset('accelerometer_smartwatch.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'acc_watch_', None) dataset.add_numerical_dataset('gyroscope_phone.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'gyr_phone_', None) dataset.add_numerical_dataset('gyroscope_smartwatch.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'gyr_watch_', None) dataset.add_event_dataset('labels.csv', 'label_start', 'label_end', 'label', 'binary') dataset = dataset.data_table dataset_walking = dataset[dataset['labelWalking'] == 1] dataset_sitting = dataset[dataset['labelSitting'] == 1] dataset_running = dataset[dataset['labelRunning'] == 1]
# print os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) folders = os.listdir(path) # labels milliseconds_per_instance = 50 samples_dataframe = pd.DataFrame() frames = [] for label in folders: samples = os.listdir(path + "\\" + label) for sample in samples: sensors = os.listdir(path + "\\" + label + "\\" + sample) dataSet = CreateDataset(path + "\\" + label + "\\" + sample + "\\", milliseconds_per_instance) # for sensor in sensors: dataSet.add_numerical_dataset("Accelerometer.csv", 'Time (s)', ['X (m/s^2)', 'Y (m/s^2)', 'Z (m/s^2)'], 'avg', "Accelerometer") dataSet.add_numerical_dataset("Gyroscope.csv", 'Time (s)', ['X (rad/s)', 'Y (rad/s)', 'Z (rad/s)'], 'avg', "Gyroscope") dataSet.data_table = dataSet.data_table[~(np.isnan(dataSet.data_table['GyroscopeZ (rad/s)']))] # todo: useful? length = len(dataSet.data_table) dataSet.data_table = dataSet.data_table[(length - 53): (length - 1)] # same length for every sample FreqAbs = FourierTransformation() transformations = [] number_frequencies = 50 for column in list(dataSet.data_table.columns):
# Set a granularity (i.e. how big are our discrete time steps). We start very # coarse grained, namely one measurement per minute, and secondly use four measurements # per second granularities = [60000, 250] datasets = [] for milliseconds_per_instance in granularities: # Create an initial dataset object with the base directory for our data and a granularity DataSet = CreateDataset(dataset_path, milliseconds_per_instance) # Add the selected measurements to it. # Add numerical measurements DataSet.add_numerical_dataset('accelerometer.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'acc_') DataSet.add_numerical_dataset('linear_acceleration.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'lin_acc_') DataSet.add_numerical_dataset('magnetometer.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'mag_') DataSet.add_numerical_dataset('Gyroscope.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'gyr_') DataSet.add_numerical_dataset('light.csv', 'timestamps', ['illuminance'], 'avg', 'light_') DataSet.add_numerical_dataset('location.csv', 'timestamps', ['latitude', 'height', 'velocity'], 'avg',
'Location.csv': 'loc_', 'Magnetometer.csv': 'mag_' } time_column_name = 'Time (s)' granularities = [60000, 1000, 250] task = 'final_plot' if __name__ == '__main__': if task == '1_1': sensor = 'Gyroscope.csv' dataset = CreateDataset('datasets/Running_2020-06-04_12-40-48/', 250) dataset.add_numerical_dataset('Gyroscope.csv', time_column_name, sensors[sensor], 'avg', axis_abbreviations[sensor]) dataset = dataset.data_table fig = plt.figure(figsize=(5, 3.5)) ax = fig.add_subplot(111) ax.boxplot([ dataset['gyr_Gyroscope x (rad/s)'], dataset['gyr_Gyroscope y (rad/s)'], dataset['gyr_Gyroscope z (rad/s)'] ], widths=0.6) xlabels = ["gyr_x", "gyr_y", 'gyr_z'] ax.set_xticklabels(xlabels) plt.ylim([-5, 5]) plt.savefig('figures/1_1_selected/running_gyr_250.png')
for path in [DATASET_PATH, RESULT_PATH] ] datasets = [] for personid in os.listdir(DATASET_PATH): DATASET_PATH_ = DATASET_PATH / personid print(f'Creating numerical datasets from files in {DATASET_PATH_}.') # Create an initial dataset object with the base directory for our data and a granularity dataset = CreateDataset(DATASET_PATH_, milliseconds_per_instance) # Add the selected measurements to it. # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values dataset.add_numerical_dataset('motion.csv', 'time', ['acc_x', 'acc_y', 'acc_z'], 'avg', '') #dataset.add_numerical_dataset('accelerometer_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_watch_') # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values dataset.add_numerical_dataset('heart_rate.csv', 'time', ['heartrate'], 'avg', '') dataset.add_numerical_dataset('steps.csv', 'time', ['steps'], 'avg', '') #dataset.add_numerical_dataset('labels.csv', 'time', ['label'], 'avg', '') # We add the heart rate (continuous numerical measurements) and aggregate by averaging again #dataset.add_numerical_dataset('heart_rate_smartwatch.csv', 'timestamps', ['rate'], 'avg', 'hr_watch_') # We add the labels provided by the users. These are categorical events that might overlap. We add them # as binary attributes (i.e. add a one to the attribute representing the specific value for the label if it # occurs within an interval).
# coarse grained, namely one measurement per minute, and secondly use four measurements # per second granularities = [2000] datasets = [] for milliseconds_per_instance in granularities: # Create an initial dataset object with the base directory for our data and a granularity DataSet = CreateDataset(dataset_path, milliseconds_per_instance) # Add the selected measurements to it. # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values/ DataSet.add_numerical_dataset('accelerometer_phone.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'acc_phone_') # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values/ DataSet.add_numerical_dataset('gyroscope_phone.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'gyr_phone_') # We add the heart rate (continuous numerical measurements) and aggregate by averaging again #DataSet.add_numerical_dataset('heart_rate_smartwatch.csv', 'timestamps', ['rate'], 'avg', 'hr_watch_') # We add the labels provided by the users. These are categorical events that might overlap. We add them # as binary attributes (i.e. add a one to the attribute representing the specific value for the label if it # occurs within an interval). DataSet.add_event_dataset('labels.csv', 'label_start', 'label_end', 'label', 'binary')
prefix5 = "linacc_" file5 = "LinearAcceleration.csv" cols5 = [ "Linear Acceleration x (m/s^2)", "Linear Acceleration y (m/s^2)", "Linear Acceleration z (m/s^2)" ] cols_pre5 = [prefix5 + x for x in cols5] prefix6 = "hr_" file6 = "heartdf2.csv" cols6 = ["Heart Rate"] cols_pre6 = [prefix6 + x for x in cols5] # DataSet = CreateDataset(dataset_path, 60000) DataSet = CreateDataset(dataset_path, 500) DataSet.add_numerical_dataset(file1, 'Time (s)', cols1, 'avg', prefix1) DataSet.add_numerical_dataset(file2, 'Time (s)', cols2, 'avg', prefix2) DataSet.add_numerical_dataset(file3, 'Time (s)', cols3, 'avg', prefix3) DataSet.add_numerical_dataset(file4, 'Time (s)', cols4, 'avg', prefix4) DataSet.add_numerical_dataset(file5, 'Time (s)', cols5, 'avg', prefix5) DataSet.add_numerical_dataset(file6, 'Time (s)', cols6, 'avg', prefix6) DataSet.add_event_dataset('labels.csv', 'label_start', 'label_end', 'label', 'binary') dataset = DataSet.data_table dataset.to_csv(result_dataset_path + "chapter2_final2s.csv")
# per second granularities = [8] datasets = [] for milliseconds_per_instance in granularities: # Create an initial dataset object with the base directory for our data and a granularity DataSet = CreateDataset(dataset_path, milliseconds_per_instance) # Add the selected measurements to it. # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values/ DataSet.add_numerical_dataset('EMG_data.csv', 'timestamps', ['a', 'b', 'c', 'd', 'e', 'g', 'h', 'i'], 'avg', 'EMG_') # DataSet.add_numerical_dataset('accelerometer_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'acc_watch_') # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values/ # DataSet.add_numerical_dataset('gyroscope_phone.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_phone_') # DataSet.add_numerical_dataset('gyroscope_smartwatch.csv', 'timestamps', ['x','y','z'], 'avg', 'gyr_watch_') # We add the heart rate (continuous numerical measurements) and aggregate by averaging again # DataSet.add_numerical_dataset('heart_rate_smartwatch.csv', 'timestamps', ['rate'], 'avg', 'hr_watch_') # We add the labels provided by the users. These are categorical events that might overlap. We add them # as binary attributes (i.e. add a one to the attribute representing the specific value for the label if it # occurs within an interval). DataSet.add_event_dataset('labels_EMG.csv', 'label_start', 'label_end',
path.mkdir(exist_ok=True, parents=True) for path in [DATASET_PATH, RESULT_PATH] ] datasets = [] for milliseconds_per_instance in GRANULARITIES: print( f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.' ) # Create an initial dataset object with the base directory for our data and a granularity dataset = CreateDataset(DATASET_PATH, milliseconds_per_instance) # Add the selected measurements to it. if user == 'user_2': dataset.add_numerical_dataset('accelerometer_phone.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'acc_phone_') dataset.add_numerical_dataset('proximity_phone.csv', 'timestamps', ['distance'], 'avg', 'prox_phone_') dataset.add_event_dataset('labels.csv', 'label_start', 'label_end', 'label', 'binary') dataset = dataset.data_table # Plot the data DataViz = VisualizeDataset(__file__, user) # Boxplot DataViz.plot_dataset_boxplot( dataset, ['acc_phone_x', 'acc_phone_y', 'acc_phone_z']) print(dataset) print(dataset.shape) # Plot all data
for path in [DATASET_PATH, RESULT_PATH] ] datasets = [] for milliseconds_per_instance in GRANULARITIES: print( f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.' ) # Create an initial dataset object with the base directory for our data and a granularity dataset = CreateDataset(DATASET_PATH, milliseconds_per_instance) # Add the selected measurements to it. # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values dataset.add_numerical_dataset('Accelerometer.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'acc_') # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values dataset.add_numerical_dataset('Gyroscope.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'gyr_') # We add the heart rate (continuous numerical measurements) and aggregate by averaging again # We add the labels provided by the users. These are categorical events that might overlap. We add them # as binary attributes (i.e. add a one to the attribute representing the specific value for the label if it # occurs within an interval). dataset.add_event_dataset('labels.csv', 'label_start', 'label_end', 'label', 'binary') # We add the magnetometer data (continuous numerical measurements) of the phone and the smartwatch
# coarse grained, namely one measurement per minute, and secondly use four measurements # per second granularities = [60000, 250] datasets = [] for milliseconds_per_instance in granularities: # Create an initial dataset object with the base directory for our data and a granularity DataSet = CreateDataset(dataset_path, milliseconds_per_instance) # Add the selected measurements to it. # We add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values/ DataSet.add_numerical_dataset('accelerometer_phone.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'acc_phone_') DataSet.add_numerical_dataset('accelerometer_smartwatch.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'acc_watch_') # We add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch # and aggregate the values per timestep by averaging the values/ DataSet.add_numerical_dataset('gyroscope_phone.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'gyr_phone_') DataSet.add_numerical_dataset('gyroscope_smartwatch.csv', 'timestamps', ['x', 'y', 'z'], 'avg', 'gyr_watch_') # We add the heart rate (continuous numerical measurements) and aggregate by averaging again DataSet.add_numerical_dataset('heart_rate_smartwatch.csv', 'timestamps', ['rate'], 'avg', 'hr_watch_') # We add the labels provided by the users. These are categorical events that might overlap. We add them
# We can call Path.mkdir(exist_ok=True) to make any required directories if they don't already exist. [path.mkdir(exist_ok=True, parents=True) for path in [DATASET_PATH, RESULT_PATH]] datasets = [] for milliseconds_per_instance in GRANULARITIES: print(f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.') # Create an initial dataset object with the base directory for our data and a granularity dataset = CreateDataset(DATASET_PATH, milliseconds_per_instance) # Add the selected measurements to it. # if user == 'user_2': try: dataset.add_numerical_dataset('activity.csv', 'time', ['value'], 'avg', 'act') except: pass try: dataset.add_numerical_dataset('appCat_builtin.csv', 'time', ['value'], 'avg', 'built') except: pass try: dataset.add_numerical_dataset('appCat_communication.csv', 'time', ['value'], 'avg', 'comm') except: pass try: dataset.add_numerical_dataset('appCat_entertainment.csv', 'time', ['value'], 'avg', 'ent') except: pass try:
# coarse grained, namely one measurement per minute, and secondly use four measurements # per second granularities = [60000, 250] datasets = [] for milliseconds_per_instance in granularities: # Create an initial dataset object with the base directory for our data and a granularity DataSet = CreateDataset(dataset_path, milliseconds_per_instance) # Add the selected measurements to it. # Add numerical measurements DataSet.add_numerical_dataset('A01_parsed_raw_data.csv', 'timestamp', ['ankle_l_x', 'ankle_l_y', 'ankle_l_z', 'ankle_r_x', 'ankle_r_y', 'ankle_r_z', 'belt_x', 'belt_y', 'belt_z', 'chest_x', 'chest_y', 'chest_z'], 'avg', '') # We add the labels provided by the users. These are categorical events that might overlap. We add them # as binary attributes (i.e. add a one to the attribute representing the specific value for the label if it # occurs within an interval). DataSet.add_binary_labels_dataset('A01_parsed_raw_data.csv', 'timestamp', ['labelWalking', 'labelFalling', 'labelLyingDown', 'labelLying', 'labelSittingDown', 'labelSitting', 'labelStandingFromLying', 'labelOnAllFours', 'labelSittingOnTheGround', 'labelStandingFromSitting', 'labelStandingFromSittingOnTheGround'], 'max', '') # Get the resulting pandas data table dataset = DataSet.data_table
path.mkdir(exist_ok=True, parents=True) for path in [DATASET_PATH, RESULT_PATH] ] datasets = [] for milliseconds_per_instance in GRANULARITIES: print( f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.' ) # Create an initial dataset object with the base directory for our data and a granularity dataset = CreateDataset(DATASET_PATH, milliseconds_per_instance) # Add the selected measurements to it. dataset.add_numerical_dataset('accelerometer.csv', 'timestamp', ['x', 'y', 'z'], 'avg', 'acc_phone_', True) dataset.add_event_dataset('labels.csv', 'label_start', 'label_end', 'label', 'binary', True) dataset.add_numerical_dataset('gyroscope.csv', 'timestamp', ['x', 'y', 'z'], 'avg', 'gyr_phone_', True) dataset.add_numerical_dataset('barometer.csv', 'timestamp', ['x'], 'avg', 'bar_phone_', True) dataset.add_numerical_dataset('linear_accelerometer.csv', 'timestamp', ['x', 'y', 'z'], 'avg', 'lin_acc_phone_', True) # dataset.add_numerical_dataset('location.csv', 'timestamp', ['latitude','longitude','height', 'velocity', 'direction', 'horizontal_accuracy', 'vertical_accuracy'], 'avg', 'loc_phone_', True) dataset.add_numerical_dataset('magnetometer.csv', 'timestamp', ['x', 'y', 'z'], 'avg', 'mag_phone_', True) dataset.add_numerical_dataset('proximity.csv', 'timestamp', ['distance'], 'avg', 'prox_phone_', True)