def read_timeseries_data(self, data_column_names='value', hide_exceptions=False, **filters): # This function needs to be sped up """reads timeseries data to dataframe from database. Stored in self.raw_values""" # rowmap is used in ordering the data when read from the sql table headers = util.sql_read_headers(self.sql_data_table) rowmap = [headers.index(self.column_names[level]) for level in self.index_levels] data_col_ind = [] for data_col in util.put_in_list(data_column_names): data_col_ind.append(headers.index(data_col)) # read each line of the data_table matching an id and assign the value to self.raw_values data = [] if len(filters): merged_dict = dict({self.data_id_key: self.id}, **filters) read_data = util.sql_read_table(self.sql_data_table, return_iterable=True, **merged_dict) else: read_data = util.sql_read_table(self.sql_data_table, return_iterable=True, **dict([(self.data_id_key, self.id)])) if read_data: for row in read_data: try: data.append([row[i] for i in rowmap] + [row[i] * (self.unit_prefix if hasattr(self, 'unit_prefix') else 1) for i in data_col_ind ]) except: if hide_exceptions == False: print (self.id, row, i) column_names = self.df_index_names + util.put_in_list(data_column_names) self.raw_values = pd.DataFrame(data, columns=column_names).set_index(keys=self.df_index_names).sort_index() else: self.raw_values = None
def read_timeseries_data(self, **filters): # This function needs to be sped up """reads timeseries data to dataframe from database. Stored in self.raw_values""" # rowmap is used in ordering the data when read from the sql table headers = util.sql_read_headers(self.sql_data_table) rowmap = [headers.index(self.column_names[level]) for level in self.index_levels] data_col_ind = headers.index('value') # read each line of the data_table matching an id and assign the value to self.raw_values data = [] if len(filters): merged_dict = dict({self.primary_key: self.id}, **filters) read_data = util.sql_read_table(self.sql_data_table, return_iterable=True, **merged_dict) else: read_data = util.sql_read_table(self.sql_data_table, return_iterable=True, **dict([(self.primary_key, self.id)])) if read_data: for row in read_data: try: data.append([row[i] for i in rowmap] + [row[data_col_ind] * (self.unit_prefix if hasattr(self, 'unit_prefix') else 1)]) except: print (self.id, row, i) column_names = self.df_index_names + ['value'] self.raw_values = pd.DataFrame(data, columns=column_names).set_index(keys=self.df_index_names).sort_index() self.data = True else: self.raw_values = None self.data = False
def read_geography_data(self): # df2.loc[('kentucky', 'total', 'east north central', 'western interconnection'), 'households'] headers = util.sql_read_headers('GeographyMap') # colmap and rowmap are used in ordering the data when read from the sql table colmap = [] for col in self.map_keys: colmap.append(headers.index(col)) rowmap = [] for row in self.geographies.keys(): rowmap.append(headers.index(row)) for row in util.sql_read_table('GeographyMap'): self.values.loc[tuple([row[i] for i in rowmap]), tuple(self.map_keys)] = [row[i] for i in colmap]
def read_index_levels(self): """ creates a dictionary to store level headings (for database lookup) and level elements. Stored as attr 'index_level' """ data_table_columns = [x for x in util.sql_read_headers(self.sql_data_table) if x not in self.data_id_key] for id, index_level, column_name in util.sql_read_table('IndexLevels'): if column_name not in data_table_columns: continue elements = util.sql_read_table(self.sql_data_table, column_names=column_name, return_iterable=True, return_unique=True, **dict([(self.data_id_key, self.id)])) if len(elements): self.index_levels[index_level] = elements self.column_names[index_level] = column_name
def read_index_levels(self): """ creates a dictionary to store level headings (for database lookup) and level elements. Stored as attr 'index_level' """ data_table_columns = [x for x in util.sql_read_headers(self.sql_data_table) if x not in self.primary_key] for id, index_level, column_name in util.sql_read_table('IndexLevels'): if column_name not in data_table_columns: continue elements = util.sql_read_table(self.sql_data_table, column_names=column_name, return_iterable=True, return_unique=True, **dict([(self.primary_key, self.id)])) if len(elements): self.index_levels[index_level] = elements self.column_names[index_level] = column_name
def read_timeseries_data(self, data_column_names='value', **filters): # This function needs to be sped up """reads timeseries data to dataframe from database. Stored in self.raw_values""" # rowmap is used in ordering the data when read from the sql table headers = util.sql_read_headers(self.sql_data_table) filters[self.data_id_key] = self.id # Check for a sensitivity specification for this table and id. If there is no relevant sensitivity specified # but the data table has a sensitivity column, we set the sensitivity filter to "None", which will filter # the data table rows down to those where sensitivity is NULL, which is the default, no-sensitivity condition. if 'sensitivity' in headers: filters['sensitivity'] = None if hasattr(self, 'scenario'): # Note that this will return None if the scenario doesn't specify a sensitivity for this table and id filters['sensitivity'] = self.scenario.get_sensitivity(self.sql_data_table, self.id) # read each line of the data_table matching an id and assign the value to self.raw_values read_data = util.sql_read_table(self.sql_data_table, return_iterable=True, **filters) self.inspect_index_levels(headers, read_data) self._validate_other_indexes(headers, read_data) rowmap = [headers.index(self.column_names[level]) for level in self.index_levels] data_col_ind = [headers.index(data_col) for data_col in util.put_in_list(data_column_names)] unit_prefix = self.unit_prefix if hasattr(self, 'unit_prefix') else 1 if read_data: data = [] for row in read_data: try: data.append([row[i] for i in rowmap] + [row[i] * unit_prefix for i in data_col_ind]) except: logging.warning('error reading table: {}, row: {}'.format(self.sql_data_table, row)) raise column_names = self.df_index_names + util.put_in_list(data_column_names) self.raw_values = pd.DataFrame(data, columns=column_names).set_index(keys=self.df_index_names).sort() # print the duplicate values duplicate_index = self.raw_values.index.duplicated(keep=False) #keep = False keeps all of the duplicate indices if any(duplicate_index): logging.warning('Duplicate indices in table: {}, parent id: {}, by default the first index will be kept.'.format(self.sql_data_table, self.id)) logging.warning(self.raw_values[duplicate_index]) self.raw_values = self.raw_values.groupby(level=self.raw_values.index.names).first() else: self.raw_values = None # We didn't find any timeseries data for this object, so now we want to let the user know if that # might be a problem. We only expect to find timeseries data if self actually existed in the database # (as opposed to being a placeholder). The existence of self in the database is flagged by self.data. if self.data: if getattr(self, 'reference_tech_id', None): logging.debug('No {} found for {} with id {}; using reference technology values instead.'.format( self.sql_data_table, self.sql_id_table, self.id )) else: msg = 'No {} or reference technology found for {} with id {}.'.format( self.sql_data_table, self.sql_id_table, self.id ) if re.search("Cost(New|Replacement)?Data$", self.sql_data_table): # The model can run fine without cost data and this is sometimes useful during model # development so we just gently note if cost data is missing. logging.debug(msg) else: # Any other missing data is likely to be a real problem so we complain logging.critical(msg)