예제 #1
0
    def uci_hapt(self):
        """
        Sampling rate = 50
        1 WALKING
        2 WALKING_UPSTAIRS
        3 WALKING_DOWNSTAIRS
        4 SITTING
        5 STANDING
        6 LAYING
        7 STAND_TO_SIT
        8 SIT_TO_STAND
        9 SIT_TO_LIE
        10 LIE_TO_SIT
        11 STAND_TO_LIE
        12 LIE_TO_STAND
        """
        self.name = "UCI HAPT"
        subfolder = 'UCI/HAPT Data Set/RawData/'
        data_file = self.root_folder + subfolder + '/data.npz'
        if not self.simple_labels:
            activity_map = {
                1: 'WALKING',
                2: 'WALKING_UPSTAIRS',
                3: 'WALKING_DOWNSTAIRS',
                4: 'SITTING',
                5: 'STANDING',
                6: 'LAYING',
                7: 'STAND_TO_SIT',
                8: 'SIT_TO_STAND',
                9: 'SIT_TO_LIE',
                10: 'LIE_TO_SIT',
                11: 'STAND_TO_LIE',
                12: 'LIE_TO_STAND'
            }
        else:
            activity_map = {
                1: 'WALKING',
                2: 'STAIRS',
                3: 'STAIRS',
                4: 'INACTIVE',
                5: 'INACTIVE',
                6: 'INACTIVE',
                7: 'TRANSITION',
                8: 'TRANSITION',
                9: 'TRANSITION',
                10: 'TRANSITION',
                11: 'TRANSITION',
                12: 'TRANSITION'
            }

        # Load files
        files = sorted(glob.glob(self.root_folder + subfolder + 'acc_*'))
        labels = pd.read_csv(self.root_folder + subfolder + 'labels.txt',
                             names=['exp', 'user', 'activity', 'start', 'end'],
                             header=None,
                             sep=' ')

        # Extract signals from the files and split them into segments. UCI HAR V1 uses 128 window length with
        # a step size of 64
        data_array = np.empty((0, self.n_samples, 3))
        y = np.empty((0))
        users = np.empty((0))

        # for exp, user in labels[['exp', 'user']].drop_duplicates().values:
        #     print("Loading %s" % self.root_folder + subfolder + 'acc_exp%02d_user%02d.txt' % (exp, user))
        #     values = pd.read_csv(self.root_folder + subfolder + 'acc_exp%02d_user%02d.txt' % (exp, user), sep=' ').values
        #     idx = ((labels['exp']==exp) & (labels['user']==user))
        #
        #     for activity, start, end in labels[['activity', 'start', 'end']][idx].values:
        #         segment = values[start:end]
        #
        #         # Pad a segment to a multiple of n_samples
        #         if segment.shape[0] <= self.n_samples:
        #             pad_width = int(np.ceil(segment.shape[0]/float(self.n_samples))*self.n_samples - segment.shape[0])
        #             segment = np.pad(segment, ((0, pad_width), (0, 0)), 'edge').reshape(1, self.n_samples, 3)
        #
        #         # Segment with a rolling window allowing overlap
        #         else:
        #             try:
        #                 segment = rolling_window(segment, (self.n_samples, 0), self.step).swapaxes(1, 2)
        #             except ValueError, e:
        #                 print(e)
        #                 print(segment.shape)
        #
        #         # Collect data
        #         data_array = np.concatenate((data_array, segment))
        #         y = np.concatenate((y, [activity]*segment.shape[0]))
        #         users = np.concatenate((users, [(user)]*segment.shape[0]))
        for exp, user in labels[['exp', 'user']].drop_duplicates().values:
            print("Loading %s" % self.root_folder + subfolder +
                  'acc_exp%02d_user%02d.txt' % (exp, user))
            df = pd.read_csv(self.root_folder + subfolder +
                             'acc_exp%02d_user%02d.txt' % (exp, user),
                             sep=' ')
            idx = ((labels['exp'] == exp) & (labels['user'] == user))

            values = df.values
            activities = np.zeros((values.shape[0], 1))
            for activity, start, end in labels[['activity', 'start',
                                                'end']][idx].values:
                activities[start:end] = activity

            values = np.concatenate((values, activities), axis=1)

            # Segment into windows with overlap
            segmented = rolling_window(values, (self.n_samples, 0),
                                       self.step).swapaxes(1, 2)

            # Find y label
            # t = []
            # for idx in range(segmented.shape[0]):
            # t.append(segmented[idx, -1, -1].astype('int'))
            t_idx = int(self.n_samples / 2)
            t = np.asarray(segmented[:, t_idx, -1]).astype('int')

            # Remove samples without label
            # idx = t != 0
            # segmented = segmented[idx]
            # t = t[idx]

            # Collect data
            y = np.concatenate((y, t))
            data_array = np.concatenate((data_array, segmented[:, :, :-1]))
            users = np.concatenate((users, [(user)] * len(t)))

        if self.expand:
            y = expand_target(y, data_array.shape[1])

        # Add features to data
        data_array, stats = self.add_features(
            data_array,
            normalise=self.normalize,
            add_roll=self.add_roll,
            add_pitch=self.add_pitch,
            add_filter=self.add_filter,
            comp_magnitude=self.comp_magnitude)

        # Convert to common labels
        if self.common_labels:
            y = self.map_to_common_activities(y, activity_map)
        else:
            # y = y - 1
            pass

        return data_array, y.astype('int'), self.name, users, stats
예제 #2
0
    def idash(self):
        """
        This dataset contains motion sensor data of 16 physical activities
        (walking, jogging, stair climbing, etc.) collected on 16 adults using
        an iPod touch device (Apple Inc.). The data sampling rate was 30 Hz.
        The collection time for an activity varied from 20 seconds to 17 minutes.
        """
        self.name = "IDASH"
        sub_folder = 'Physical Activity Sensor Data-Public/Public/iDASH_activity_dataset/'
        sr = 30.
        labels = [
            '400m_brisk_walk_pocket', '400m_jog_pocket',
            '400m_normal_walk_pocket', '400m_slow_walk_pocket',
            'Sit_and_walk_test_pocket', 'Stair_ascend_brisk_pocket',
            'Stair_ascend_normal_pocket', 'Stair_descend_brisk_pocket',
            'Stair_descend_normal_pocket', 'Step_test_brisk_pocket',
            'Step_test_normal_pocket', 'Treadmill_jog_5.5mph_pocket',
            'Treadmill_walk_1.5mph_pocket', 'Treadmill_walk_3.0mph_pocket',
            'Treadmill_walk_4.0mph_pocket', 'Walking_pocket'
        ]
        activity_map = {
            0: 'WALKING',
            1: 'JOGGING',
            2: 'WALKING',
            3: 'WALKING',
            4: 'WALKING',
            5: 'STAIRS',
            6: 'STAIRS',
            7: 'STAIRS',
            8: 'STAIRS',
            9: 'STEP',
            10: 'STEP',
            11: 'JOGGING',
            12: 'WALKING',
            13: 'WALKING',
            14: 'WALKING',
            15: 'WALKING'
        }

        # Load data
        subjects = list(range(1, 17))
        cols = [0, 1, 2]
        tmp_seg = np.empty((0, self.n_samples * sr / SR, len(cols)))
        y = []
        users = []
        for subject in subjects:
            files = sorted(
                glob.glob(self.root_folder + sub_folder + '%d/*' % subject))
            for idx, csv_file in enumerate(files):
                if not "blank" in csv_file:
                    tmp = pd.read_csv(csv_file, sep=',', usecols=cols).values
                    tmp = rolling_window(tmp,
                                         window=(self.n_samples * sr / SR, 0),
                                         step=self.step * sr / SR).swapaxes(
                                             1, 2)
                    tmp_seg = np.vstack((tmp_seg, tmp))
                    y.append(np.ones((tmp.shape[0], ), dtype=np.int) * idx)
                    users.append(
                        np.ones((tmp.shape[0], ), dtype=np.int) * subject)

        users = np.asarray(list(itertools.chain.from_iterable(users)))
        y = list(itertools.chain.from_iterable(y))

        n_windows, sequence_length, n_features = tmp_seg.shape
        data_array = np.empty((n_windows, self.n_samples, n_features))
        for idx in range(n_windows):
            for f in range(n_features):
                data_array[idx, :, f] = resample(tmp_seg[idx, :, f],
                                                 self.n_samples)

        data_array, stats = self.add_features(
            data_array,
            normalise=self.normalize,
            add_roll=self.add_roll,
            add_pitch=self.add_pitch,
            add_filter=self.add_filter,
            comp_magnitude=self.comp_magnitude)
        y = self.map_to_common_activities(y, activity_map)
        return data_array, y.astype('int'), self.name, users, stats
예제 #3
0
    def wisdm2(self):
        """
        Sampling rate: 20hz
        Activity: Walking, jogging, sitting, standing, upstairs, downstairs
        :return: tuple of data
        """
        self.name = "WISDM2"
        sub_folder = 'WISDM/Real/'
        filename = 'WISDM_at_v2.0_raw.txt'
        columns = ['user', 'labels', 'timestamp', 'x', 'y', 'z']
        dtypes = {
            'user': np.int32,
            'labels': np.str,
            'timestamp': np.float64,
            'x': np.float32,
            'y': np.float32,
            'z': np.float32
        }
        df = pd.read_csv(self.root_folder + sub_folder + filename,
                         names=columns,
                         lineterminator=';',
                         dtype=dtypes)
        df = df.dropna()
        activity_map = {
            'Walking': 'WALKING',
            'Upstairs': 'STAIRS',
            'Stairs': 'STAIRS',
            'Downstairs': 'STAIRS',
            'Sitting': 'INACTIVE',
            'Standing': 'INACTIVE',
            'LyingDown': 'INACTIVE',
            'Jogging': 'JOGGING'
        }
        df['labels'] = df['labels'].apply(activity_map.get)
        df['labels'] = df['labels'].apply(MAP_ACTIVITY.get)
        sr = 20.

        tmp = np.empty((0, 5))
        for user in df['user'].unique():
            tmp = np.concatenate((tmp, df[['x', 'y', 'z', 'labels', 'user'
                                           ]][df['user'] == user].values),
                                 axis=0)

        # window samples into the equivalent of n_samples at 50 Hz.
        tmp = rolling_window(tmp,
                             window=(self.n_samples * sr / SR, 0),
                             step=self.step * sr / SR).swapaxes(1, 2)
        n_windows, sequence_length, n_features = tmp.shape
        y = np.zeros(n_windows)
        users = np.zeros(n_windows)
        data_array = np.empty((n_windows, self.n_samples, 3))
        for idx in range(n_windows):
            y[idx] = np.argmax(np.bincount(tmp[idx, :, -2].astype('int')))
            users[idx] = np.argmax(np.bincount(tmp[idx, :, -1].astype('int')))
            for f in range(3):
                data_array[idx, :, f] = resample(tmp[idx, :, f],
                                                 self.n_samples)

        data_array = self.add_features(data_array,
                                       normalise=self.normalize,
                                       ratio=0,
                                       add_roll=self.add_roll,
                                       add_pitch=self.add_pitch,
                                       add_filter=self.add_filter,
                                       comp_magnitude=self.comp_magnitude)
        return data_array, y.astype('int'), self.name, users
예제 #4
0
    def uci_mhealth(self):
        '''
        #Activities: 12
        #Sensor devices: 3 (chest, left ankel, right arm)
        #Subjects: 10
        L1: Standing still (1 min)
        L2: Sitting and relaxing (1 min)
        L3: Lying down (1 min)
        L4: Walking (1 min)
        L5: Climbing stairs (1 min)
        L6: Waist bends forward (20x)
        L7: Frontal elevation of arms (20x)
        L8: Knees bending (crouching) (20x)
        L9: Cycling (1 min)
        L10: Jogging (1 min)
        L11: Running (1 min)
        L12: Jump front & back (20x)

        :return: data
        '''
        self.name = "UCI mHealth"
        sub_folder = 'UCI/mHealth/'
        sr = 50
        activity_map = {
            0: 'NULL',
            1: 'INACTIVE',
            2: 'INACTIVE',
            3: 'INACTIVE',
            4: 'WALKING',
            5: 'STAIRS',
            6: 'BEND_FORWARD',
            7: 'ARM_ELEVATION',
            8: 'KNEE_BEND',
            9: 'CYCLING',
            10: 'JOGGING',
            11: 'RUNNING',
            12: 'JUMP'
        }

        # Load the first subject and then the rest iteratively
        data = pd.read_csv(self.root_folder + sub_folder +
                           'mHealth_subject%d.log' % 1,
                           sep='\t',
                           usecols=[0, 1, 2, 23],
                           names=['x', 'y', 'z', 'labels'])

        data_array = rolling_window(data.values,
                                    window=(self.n_samples, 0),
                                    step=self.step).swapaxes(1, 2)
        users = np.ones((data_array.shape[0])) * 1
        for idx in range(2, 11):
            tmp = pd.read_csv(self.root_folder + sub_folder +
                              'mHealth_subject%d.log' % idx,
                              sep='\t',
                              usecols=[0, 1, 2, 23],
                              names=['x', 'y', 'z', 'label']).values
            tmp = rolling_window(tmp,
                                 window=(self.n_samples, 0),
                                 step=self.step).swapaxes(1, 2)
            data_array = np.concatenate((data_array, tmp))
            users = np.concatenate((users, idx * np.ones(tmp.shape[0])))

        y = np.empty(users.shape)
        for idx in range(users.shape[0]):
            y[idx] = np.argmax(
                np.bincount(data_array[idx, :, -1].astype('int')))
        data_array = data_array[:, :, :3]

        data_array, stats = self.add_features(
            data_array,
            normalise=self.normalize,
            ratio=0,
            add_roll=self.add_roll,
            add_pitch=self.add_pitch,
            add_filter=self.add_filter,
            comp_magnitude=self.comp_magnitude)
        y = self.map_to_common_activities(y, activity_map)
        return data_array, y.astype('int'), self.name, users, stats
 def concat_sequence(x, window, step):
     return rolling_window(x.reshape(-1, x.shape[-1]).swapaxes(0, 1), window, step)\
         .swapaxes(0, 1).swapaxes(1, 2)