Пример #1
0
    def feature_processing(self, filename, trip=1, workers=8):
        """Read the data records and create data features
        """
        accelerometer_data = dm.trip_data_to_df(filename, trip=trip)
        segments = self.split_segments(accelerometer_data)

        dim = ceil(len(segments) / workers)
        chunks = (segments[k:k + dim] for k in range(0, len(segments), dim))

        with ProcessPoolExecutor(max_workers=workers) as executor:
            futures = [
                executor.submit(self.features, chunk) for chunk in chunks
            ]

        segment_features = []

        for future in futures:
            segment_features.append(future.result())

        segments_df = pd.concat(segment_features, ignore_index=True)
        segments_df = segments_df.sort_values('Time')

        # Average features to 1 minute intervals
        segments = self.split_segments(
            segments_df, time_intervals=self.AVG_FEATURES_INTERVALS)

        dim = ceil(len(segments) / workers)
        chunks = (segments[k:k + dim] for k in range(0, len(segments), dim))

        with ProcessPoolExecutor(max_workers=workers) as executor:
            futures = [
                executor.submit(self.average_features, chunk)
                for chunk in chunks
            ]

        average_features = []
        for future in futures:
            average_features.append(future.result())

        avg_features_df = pd.concat(average_features, ignore_index=True)
        avg_features_df = avg_features_df.sort_values('Time')

        return avg_features_df
Пример #2
0
    def get_joined_trips_data(self, trips):

        joineds = []

        for trip in trips:
            print(f"processing trip {trip}")
            files = ['Pixel_accelerometer', f'Pixel_gyro_{trip}']
            results = []

            for file in files:
                result = self.feature_processing(file, trip=trip)
                file = self.clean_file_name(file)
                result.rename(columns={
                    'msm': f'msm_{file}',
                    'variance': f'variance_{file}'
                },
                              inplace=True)
                results.append(result)

            # results = []
            # for future in futures:
            # results.append(future.result())

            pre_result = pd.concat(results, axis=1)
            result = self.remove_dup_columns(pre_result)
            # print(result)

            data = dm.trip_data_to_df("Pixel_activity", trip=trip)
            # Average features to 1 minute intervals
            data_segments = self.split_segments(
                data, time_intervals=self.AVG_FEATURES_INTERVALS)

            avg_classification_df = self.average_classification(data_segments)

            joined_df = self.join_sensors_classification_data(
                result, avg_classification_df)

            joineds.append(joined_df)

        return joineds