def import_from_cache(self, cached_stat, sections): # we (deliberately) use duplicate indices to cache GoodSectionResults grouped_by_index = cached_stat.groupby(level=0) tz = get_tz(cached_stat) for tf_start, df_grouped_by_index in grouped_by_index: grouped_by_end = df_grouped_by_index.groupby('end') for tf_end, sections_df in grouped_by_end: end = tz_localize_naive(tf_end, tz) timeframe = TimeFrame(tf_start, end) if timeframe in sections: timeframes = [] for _, row in sections_df.iterrows(): #~ print('Computing sections...') #~ print('\n=== type(row)\n%s\n' % (type(row))) #~ print('\n=== row\n%s\n' % (row)) #~ print('\n=== dir(row)\n%s\n' % (dir(row))) #~ print('\n=== row.__dict__\n%s\n' % (row.__dict__)) #~ print('\n=== row.index\n%s\n' % (row.index)) #~ print('\n=== row.index.__dict__\n%s\n' % (row.index.__dict__)) #~ print('\n=== dir(row.index)\n%s\n' % (dir(row.index))) #~ print('\n=== row.index[2]\n%s\n' % (row.index[2])) #~ print('\n=== row.iloc[1]\n%s\n' % (row.iloc[1])) #~ print('\n=== row.iloc[2]\n%s\n' % (row.iloc[2])) section_start = tz_localize_naive( row.iloc[2], tz) # row['section_start'] section_end = tz_localize_naive( row.iloc[1], tz) # row['section_end'] timeframes.append(TimeFrame(section_start, section_end)) self.append(timeframe, {'sections': [timeframes]})
def import_from_cache(self, cached_stat, sections): # we (deliberately) use duplicate indices to cache GoodSectionResults grouped_by_index = cached_stat.groupby(level=0) tz = get_tz(cached_stat) for name, group in grouped_by_index: assert group['end'].unique().size == 1 end = tz_localize_naive(group['end'].iloc[0], tz) timeframe = TimeFrame(name, end) if timeframe in sections: timeframes = [] for _, row in group.iterrows(): section_start = tz_localize_naive(row['section_start'], tz) section_end = tz_localize_naive(row['section_end'], tz) timeframes.append(TimeFrame(section_start, section_end)) self.append(timeframe, {'sections': [timeframes]})
def import_from_cache(self, cached_stat, sections): # we (deliberately) use duplicate indices to cache GoodSectionResults grouped_by_index = cached_stat.groupby(level=0) tz = get_tz(cached_stat) for tf_start, df_grouped_by_index in grouped_by_index: grouped_by_end = df_grouped_by_index.groupby('end') for tf_end, sections_df in grouped_by_end: end = tz_localize_naive(tf_end, tz) timeframe = TimeFrame(tf_start, end) if timeframe in sections: timeframes = [] for _, row in sections_df.iterrows(): section_start = tz_localize_naive(row['section_start'], tz) section_end = tz_localize_naive(row['section_end'], tz) timeframes.append(TimeFrame(section_start, section_end)) self.append(timeframe, {'sections': [timeframes]})
def import_from_cache(self, cached_stat, sections): # we (deliberately) use duplicate indices to cache GoodSectionResults grouped_by_index = cached_stat.groupby(level=0) tz = get_tz(cached_stat) for tf_start, df_grouped_by_index in grouped_by_index: grouped_by_end = df_grouped_by_index.groupby('end') for tf_end, sections_df in grouped_by_end: end = tz_localize_naive(tf_end, tz) timeframe = TimeFrame(tf_start, end) if timeframe in sections: timeframes = [] for _, row in sections_df.iterrows(): section_start = tz_localize_naive( row['section_start'], tz) section_end = tz_localize_naive(row['section_end'], tz) timeframes.append(TimeFrame(section_start, section_end)) self.append(timeframe, {'sections': [timeframes]})
def import_from_cache(self, cached_stat, sections): """ Parameters ---------- cached_stat : DataFrame of cached data sections : list of nilmtk.TimeFrame objects describing the sections we want to load stats for. """ tz = get_tz(cached_stat) usable_sections_from_cache = [] def append_row(row, section): # Save, disable, then re-enable `pd.SettingWithCopyWarning` # from http://stackoverflow.com/a/20627316/732596 chained_assignment = pd.options.mode.chained_assignment pd.options.mode.chained_assignment = None # We stripped off the timezone when exporting to cache # so now we must put the timezone back. row['end'] = tz_localize_naive(row['end'], tz) pd.options.mode.chained_assignment = chained_assignment if row['end'] == section.end: usable_sections_from_cache.append(row) for section in sections: if not section: continue try: rows_matching_start = cached_stat.loc[section.start] except KeyError: pass else: if isinstance(rows_matching_start, pd.Series): append_row(rows_matching_start, section) else: for row_i in range(rows_matching_start.shape[0]): row = rows_matching_start.iloc[row_i] append_row(row, section) self._data = pd.DataFrame(usable_sections_from_cache) self._data.sort_index(inplace=True)
def import_from_cache(self, cached_stat, sections): """ Converts the data from the cache back into the original data form used during runtime. Parameters ---------- cached_stat : DataFrame of cached data sections : list of nilmtk.TimeFrame objects describing the sections we want to load stats for. """ if cached_stat.empty: return tz = get_tz(cached_stat) usable_sections_from_cache = [] def append_row(row, section): row = row.astype(object) # We stripped off the timezone when exporting to cache # so now we must put the timezone back. row['end'] = tz_localize_naive(row['end'], tz) if row['end'] == section.end: usable_sections_from_cache.append(row) for section in sections: if not section: continue try: rows_matching_start = cached_stat.loc[section.start] except KeyError: pass else: if isinstance(rows_matching_start, pd.Series): append_row(rows_matching_start, section) else: for row_i in range(rows_matching_start.shape[0]): row = rows_matching_start.iloc[row_i] append_row(row, section) self._data = pd.DataFrame(usable_sections_from_cache) self._data.sort_index(inplace=True)
def import_from_cache(self, cached_stat, sections): """ Parameters ---------- cached_stat : DataFrame of cached data sections : list of nilmtk.TimeFrame objects describing the sections we want to load stats for. """ if cached_stat.empty: return tz = get_tz(cached_stat) usable_sections_from_cache = [] def append_row(row, section): row = row.astype(object) # We stripped off the timezone when exporting to cache # so now we must put the timezone back. row['end'] = tz_localize_naive(row['end'], tz) if row['end'] == section.end: usable_sections_from_cache.append(row) for section in sections: if not section: continue try: rows_matching_start = cached_stat.loc[section.start] except KeyError: pass else: if isinstance(rows_matching_start, pd.Series): append_row(rows_matching_start, section) else: for row_i in range(rows_matching_start.shape[0]): row = rows_matching_start.iloc[row_i] append_row(row, section) self._data = pd.DataFrame(usable_sections_from_cache) self._data.sort_index(inplace=True)
def import_from_cache(self, cached_stat, sections): ''' As explained in 'export_to_cache' the sections have to be stored rowwise. This function parses the lines and rearranges them as a proper AboveFreqSectionsResult again. ''' # we (deliberately) use duplicate indices to cache AboveFreqSectionResults grouped_by_index = cached_stat.groupby(level=0) tz = get_tz(cached_stat) for tf_start, df_grouped_by_index in grouped_by_index: grouped_by_end = df_grouped_by_index.groupby('end') for tf_end, sections_df in grouped_by_end: end = tz_localize_naive(tf_end, tz) timeframe = TimeFrame(tf_start, end) if timeframe in sections: timeframes = [] for _, row in sections_df.iterrows(): section_start = tz_localize_naive( row['section_start'], tz) section_end = tz_localize_naive(row['section_end'], tz) timeframes.append(TimeFrame(section_start, section_end)) self.append(timeframe, {'sections': [timeframes]})