Python print_statistics примеры, util.util.print_statistics Python примеры использования

Пример #1

0

Показать файл

                                       'labelSittingDown', 'labelSitting', 'labelStandingFromLying', 'labelOnAllFours',
                                       'labelSittingOnTheGround', 'labelStandingFromSitting',
                                       'labelStandingFromSittingOnTheGround'], 'max', '')

    # Get the resulting pandas data table

    dataset = DataSet.data_table

    # Plot the data

    DataViz = VisualizeDataset()

    # Boxplot
    DataViz.plot_dataset_boxplot(dataset, ['ankle_l_x', 'ankle_l_y', 'ankle_l_z', 'ankle_r_x', 'ankle_r_y', 'ankle_r_z',
                                           'belt_x', 'belt_y', 'belt_z', 'chest_x', 'chest_y', 'chest_z'])

    # Plot all data
    DataViz.plot_dataset(dataset, ['ankle_l_', 'ankle_r_', 'belt_', 'chest_', 'label'], ['like', 'like', 'like', 'like', 'like'], ['line', 'line', 'line', 'line', 'points'])

    # And print a summary of the dataset

    util.print_statistics(dataset)
    datasets.append(copy.deepcopy(dataset))

# And print the table that has been included in the book

util.print_latex_table_statistics_two_datasets(datasets[0], datasets[1])

# Finally, store the last dataset we have generated (250 ms).
#dataset.to_csv(result_dataset_path + 'chapter2_result.csv')

Пример #2

0

Показать файл

Файл: our_ch2.py Проект: tomescumihail93/ML4QS

    dataset_own = DataSetOwn.data_table
#    dataset_cs = DataSetCS.data_table

    # Plot the data
    DataViz = VisualizeDataset()

    # Boxplot
    DataViz.plot_dataset_boxplot(dataset_own, ['acc_phone_x','acc_phone_y','acc_phone_z'])
#    DataViz.plot_dataset_boxplot(dataset_cs, ['acc_phone_x','acc_phone_y','acc_phone_z'])

    # Plot all data
    DataViz.plot_dataset(dataset_own, ['acc_', 'gyr_', 'mag_', 'press_' ,'pedom_phone_', 'label'], ['like', 'like', 'like', 'like', 'like', 'like'], ['line', 'line', 'line','line', 'points', 'points'])
#    DataViz.plot_dataset(dataset_cs, ['acc_phone', 'gyr_phone', 'mag_phone', 'press_phone_', 'label'], ['like', 'like', 'like', 'like', 'like'], ['line', 'line', 'line', 'line', 'points'])

    # And print a summary of the dataset

    util.print_statistics(dataset_own)
    datasets_own.append(copy.deepcopy(dataset_own))
    
#    util.print_statistics(dataset_cs)
#    datasets_cs.append(copy.deepcopy(dataset_cs))

# And print the table that has been included in the book

util.print_latex_table_statistics_two_datasets(datasets_own[0], datasets_own[1])
util.print_latex_table_statistics_two_datasets(datasets_cs[0], datasets_cs[1])

# Finally, store the last dataset we have generated (250 ms).
dataset_own.to_csv(result_dataset_path + 'chapter2_result_own.csv')
#dataset_cs.to_csv(result_dataset_path + 'chapter2_result_cs.csv')

Пример #3

0

Показать файл

def main():
    # Set a granularity (the discrete step size of our time series data) and choose if all resulting datasets should
    # be saved. A course-grained granularity of one instance per minute, and a fine-grained one with four instances
    # per second are used.
    GRANULARITIES = [60000, 250]
    SAVE_VERSIONS = False

    # We can call Path.mkdir(exist_ok=True) to make any required directories if they don't already exist.
    [path.mkdir(exist_ok=True, parents=True) for path in [DATASET_PATH, RESULT_PATH]]

    # Create object to visualize the data and save figures
    DataViz = VisualizeDataset(module_path=__file__)

    datasets = []
    for milliseconds_per_instance in GRANULARITIES:
        print(
            f'Creating numerical datasets from files in {DATASET_PATH} using granularity {milliseconds_per_instance}.')

        # Create an initial dataset object with the base directory for our data and a granularity and add selected
        # measurements to it
        data_engineer = CreateDataset(base_dir=DATASET_PATH, granularity=milliseconds_per_instance)

        # Add the accelerometer data (continuous numerical measurements) of the phone and the smartwatch
        # and aggregate the values per timestep by averaging the values
        data_engineer.add_numerical_dataset(file='accelerometer_phone.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='acc_phone_')
        data_engineer.add_numerical_dataset(file='accelerometer_smartwatch.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='acc_watch_')

        # Add the gyroscope data (continuous numerical measurements) of the phone and the smartwatch
        # and aggregate the values per timestep by averaging the values
        data_engineer.add_numerical_dataset(file='gyroscope_phone.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='gyr_phone_')
        data_engineer.add_numerical_dataset(file='gyroscope_smartwatch.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='gyr_watch_')

        # Add the heart rate (continuous numerical measurements) and aggregate by averaging the values
        data_engineer.add_numerical_dataset(file='heart_rate_smartwatch.csv', timestamp_col='timestamps',
                                            value_cols=['rate'], aggregation='avg', prefix='hr_watch_')

        # Add the labels provided by the users as binary attributes (i.e. add a one to the attribute representing the
        # specific value for a label if it occurs within an interval). These are categorical events that might overlap.
        data_engineer.add_event_dataset(file='labels.csv', start_timestamp_col='label_start',
                                        end_timestamp_col='label_end',
                                        value_col='label', aggregation='binary')

        # Add the amount of light sensed by the phone (continuous numerical measurements) and aggregate by averaging
        data_engineer.add_numerical_dataset(file='light_phone.csv', timestamp_col='timestamps', value_cols=['lux'],
                                            aggregation='avg', prefix='light_phone_')

        # Add the magnetometer data (continuous numerical measurements) of the phone and the smartwatch
        # and aggregate the values per timestep by averaging the values
        data_engineer.add_numerical_dataset(file='magnetometer_phone.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='mag_phone_')
        data_engineer.add_numerical_dataset(file='magnetometer_smartwatch.csv', timestamp_col='timestamps',
                                            value_cols=['x', 'y', 'z'], aggregation='avg', prefix='mag_watch_')

        # Add the pressure sensed by the phone (continuous numerical measurements) and aggregate by averaging again
        data_engineer.add_numerical_dataset(file='pressure_phone.csv', timestamp_col='timestamps',
                                            value_cols=['pressure'],
                                            aggregation='avg', prefix='press_phone_')

        # Get the resulting pandas data table
        dataset = data_engineer.data_table

        # Create boxplots
        DataViz.plot_dataset_boxplot(dataset=dataset, cols=['acc_phone_x', 'acc_phone_y', 'acc_phone_z', 'acc_watch_x',
                                                            'acc_watch_y', 'acc_watch_z'])

        # Plot all data
        DataViz.plot_dataset(data_table=dataset,
                             columns=['acc_', 'gyr_', 'hr_watch_rate', 'light_phone_lux', 'mag_', 'press_phone_',
                                      'label'],
                             match=['like', 'like', 'like', 'like', 'like', 'like', 'like', 'like'],
                             display=['line', 'line', 'line', 'line', 'line', 'line', 'points', 'points'])

        # Print a summary of the dataset
        util.print_statistics(dataset=dataset)
        datasets.append(copy.deepcopy(dataset))

        # Save the various versions of the created datasets with logical filenames if needed
        if SAVE_VERSIONS:
            dataset.to_csv(RESULT_PATH / f'chapter2_result_{milliseconds_per_instance}')

    # Make a table like the one shown in the book, comparing the two datasets produced
    util.print_latex_table_statistics_two_datasets(dataset1=datasets[0], dataset2=datasets[1])

    # Finally, store the last dataset we generated (250 ms)
    dataset.to_csv(RESULT_PATH / RESULT_FNAME)

Пример #4

0

Показать файл

Файл: Assignment1_2.py Проект: kim66003/ML4QS_group25

    'acc_phone_x',
    'acc_phone_y',
    'acc_phone_z',
])
DataViz.plot_dataset_boxplot(dataset_running, [
    'acc_phone_x',
    'acc_phone_y',
    'acc_phone_z',
])

# # Plot all data
# DataViz.plot_dataset(dataset, ['acc_', 'gyr_',  'label'],
#                               ['like', 'like', 'like', ],
#                               ['line', 'line', 'line', ])

# And print a summary of the dataset.
print(dataset.columns)
# print(dataset['acc_phone_x'].max(), dataset['acc_phone_y'].max(), dataset['acc_phone_z'].max())
print(dataset_walking['acc_phone_x'].min(),
      dataset_walking['acc_phone_y'].min(),
      dataset_walking['acc_phone_z'].min())
print(dataset_walking['acc_phone_x'].mean(),
      dataset_walking['acc_phone_y'].mean(),
      dataset_walking['acc_phone_z'].mean())
print(dataset_walking['acc_phone_x'].std(),
      dataset_walking['acc_phone_y'].std(),
      dataset_walking['acc_phone_z'].std())
util.print_statistics(dataset_walking, describe=True)
util.print_statistics(dataset_sitting, describe=True)
util.print_statistics(dataset_running, describe=True)

Python print_statistics примеры использования