Пример #1
0
 def shuffle(self):
     shuffle_count = self.chunk_count - 1
     shuffle_counter = 0
     for iteration in range(0, shuffle_count):
         chunk1_id = 2 + iteration % (self.chunk_count - 1)
         chunk2_id = random.randint(1, chunk1_id - 1)
         print(' Shuffling files {id1} <-> {id2} ..'.format(id1=chunk1_id,
                                                            id2=chunk2_id))
         chunk1_path = self.features_path + str(chunk1_id) + '.csv'
         chunk2_path = self.features_path + str(chunk2_id) + '.csv'
         chunk1 = pd.read_csv(chunk1_path, sep=";", low_memory=False)
         chunk2 = pd.read_csv(chunk2_path, sep=";", low_memory=False)
         # merge two chunks and shuffle the merged rows
         chunk1 = chunk1.append(
             other=chunk2,
             ignore_index=True).sample(frac=1).reset_index(drop=True)
         # save shuffled chunks
         border = int(len(chunk1.index) / 2)
         util.write(chunk1.ix[0:border, :], address=chunk1_path)
         util.write(chunk1.ix[border:, :], address=chunk2_path)
         shuffle_counter += 1
         print(
             ' Feature files {id1} <-> {id2} shuffled ({c} of {t})'.format(
                 id1=chunk1_id,
                 id2=chunk2_id,
                 c=shuffle_counter,
                 t=shuffle_count))
     return self
 def save(self):
     """
         Save the extracted features to file
     :return:
     """
     util.write(self.features, address=self.config[const.FEATURE])
     print(len(self.features.index), 'feature vectors are written to file')
 def save_features(self):
     """
         Save the extracted features to file
     :return:
     """
     util.write(self.features, address=self.features_path)
     print(len(self.features.index), 'feature vectors are written to file')
Пример #4
0
 def save_features(self, chunk_id=1):
     """
         Save the extracted features to file
     :return:
     """
     util.write(self.features,
                address=self.features_path + str(chunk_id) + '.csv')
     print(len(self.features.index), 'feature vectors are written to file')
    def save(self):
        """
            Save pre-processed data to files given in config
        :return:
        """

        # Write pre-processed data to csv file
        util.write(self.obs, self.config[const.OBSERVED])
        util.write(self.missing, self.config[const.OBSERVED_MISSING])
        self.stations.to_csv(self.config[const.STATIONS], sep=';', index=False)
        self.grids.to_csv(self.config[const.GRIDS], sep=';', index=False)
        print('Data saved.')
        return self
    def process(self):
        """
            Load and PreProcess the data
        :return:
        """
        iterators = dict()
        iterators['forecast'] = pd.read_csv(self.config[const.GRID_FORECAST], sep=';', iterator=True,
                                            low_memory=False, float_precision='round_trip')
        iterators['live'] = pd.read_csv(self.config[const.GRID_LIVE], sep=';', iterator=True,
                                        low_memory=False, float_precision='round_trip')
        iterators['history'] = (pd.read_csv(self.config[const.GRID_DATA], iterator=True, low_memory=False,
                               chunksize=1500000, float_precision='round_trip'))

        id_map = self.get_grid_id_maps()
        # id_grid is for visualizing purpose using data-frame viewer
        # id_list = [id_map[grid_id] for grid_id in sorted(id_map.keys())]
        # id_grid = np.flipud(np.array(id_list).reshape((self.row, self.column), order='F'))
        collection = {measure: dict() for measure in self.get_measures()}

        # Add historical / live / forecast grid data to coarsened statistics
        for category, grid in iterators.items():
            for i, chunk in enumerate(grid):
                print(' merge grid chunk (%s, %d) ..' % (category, i + 1))
                chunk.rename(columns={'stationName': const.GID, 'wind_speed/kph': const.WSPD}, inplace=True)
                # convert wind speed and direction to polar values (x, y)
                chunk[const.WSPD], chunk[const.WDIR] = reform.wind_transform(
                    speed=chunk[const.WSPD], direction=chunk[const.WDIR])
                time_group = chunk.groupby([const.TIME])  # each time is a square of points
                for time, group in time_group:
                    self.add(time, group, id_map, collection)

        # Add live grid data
        for measure in collection:
            collect = collection[measure]
            # build final data table sorted by time ascending
            data = list()
            for time, stats in sorted(collect.items()):
                values = [v / c if c > 0 else 0 for v, c in
                           zip(stats['values'], stats['counts'])]
                data.append([time] + values)
            columns = self.get_columns()
            df = pd.DataFrame(data=data, columns=columns)
            df[const.TIME] = pd.to_datetime(df[const.TIME], utc=True)
            # group_hours = self.config[const.GROUP_HOURS]
            # run_average_df = times.running_average_df(df=df, time_key=const.TIME, value_keys=columns[1:],
            #                                           group_hours=group_hours, direction=1, whole_group=False)
            print('%d x (%d, %d) coarsened grid generated for (%s, %s)' % (
                len(collect), self.sample_row, self.sample_column, self.city, measure))
            util.write(df, self.config[const.GRID_COARSE] % measure)

        return self
Пример #7
0
 def save_test(self, predicted_values):
     augmented_test = util.add_columns(self._test,
                                       columns=predicted_values,
                                       name_prefix='f')
     util.write(augmented_test, address=self.test_path)
     print(len(augmented_test.index), 'predicted tests are written to file')
 def save_test(self, predicted_values):
     augmented_test = util.add_columns(self._test, columns=predicted_values, name_prefix='f')
     test_path = self.config[const.FEATURE_DIR] + self.feature_indicator + \
                 str(self.time_steps) + '_lstm_tests.csv'
     util.write(augmented_test, address=test_path)
     print(len(augmented_test.index), 'predicted tests are written to file')