Exemplos de AttributeCache._get_column_names_and_years em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: opus_core.store.attribute_cache

Classe / Tipo: AttributeCache

Método / Função: _get_column_names_and_years

Exemplos em hotexamples.com: 2

AttributeCache._get_column_names_and_years em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de opus_core.store.attribute_cache.AttributeCache._get_column_names_and_years em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

AttributeCache(30)

get_flt_storage_for_year(12)

get_storage_location(4)

write_table(4)

_get_sorted_list_of_years(3)

get_table_names(3)

_get_column_names_and_years(2)

delete_computed_tables(1)

get_column_names(1)

load_table(1)

Métodos Frequentes

AttributeCache (30)

get_flt_storage_for_year (12)

get_storage_location (4)

write_table (4)

_get_sorted_list_of_years (3)

get_table_names (3)

_get_column_names_and_years (2)

delete_computed_tables (1)

get_column_names (1)

load_table (1)

Exemplo n.º 1

0

Exibir arquivo

class DatasetSizeModel(Model): """Checks if all datasets after collapsing over all years have attributes of the same size.""" def __init__(self, directory=None): if directory is None: directory = SimulationState().get_cache_directory() self.cache = AttributeCache(directory) def run(self): year_orig = SimulationState().get_current_time() years = self.years_in_cache() SimulationState().set_current_time(years[0]) storages = {} for year in years: storages[year] = flt_storage(os.path.join(self.cache.get_storage_location(), '%s' % year)) tables = self.cache.get_table_names() counts = pd.Series(np.zeros(len(tables), dtype="int32"), index=tables) for table in tables: columns = self.cache._get_column_names_and_years(table) values = [] names = [] colyears = [] for col, year in columns: if col in names: continue data = storages[year].load_table(table, column_names=col) values.append(data[col].size) names.append(col) colyears.append(year) values = np.array(values) if(all(values == values[0])): continue # all attributes have the same size # there is an inconsistency in attributes length names = np.array(names) colyears = np.array(colyears) uc = np.unique(values, return_counts=True) imax = np.argmax(uc[1]) idx = np.where(values <> uc[0][imax])[0] df = pd.DataFrame({"column": names[idx], "year": colyears[idx], "size": values[idx]}) df = df.append(pd.DataFrame({"column": np.array(["all other columns"]), "year": np.array([years[0]]), "size": np.array([uc[0][imax]])})) logger.log_status("Inconsistency in table ", table, ":\n", df) counts[table] = df.shape[0] - 1 SimulationState().set_current_time(year_orig) logger.log_status("Model total:", counts.sum(), ' size inconsistencies found.') return counts def years_in_cache(self): return self.cache._get_sorted_list_of_years(start_with_current_year=False)

Exemplo n.º 2

0

Exibir arquivo

class DataStructureModel(Model): """ Checks the structure of datasets in a given cache (or run cache) when compared to a reference cache. It writes out all columns that are missing as well as those that are not present in the reference cache. It can also compare the sizes of the datasets. """ def __init__(self, reference_location=None): """ "reference_location" is the directory of the reference cache and should include the year. If it is None, the simulation directory in its start year is taken. """ if reference_location is None: reference_location = os.path.join(SimulationState().get_cache_directory(), "%s" % SimulationState().get_start_time()) self.reference_storage = flt_storage(reference_location) def run(self, directory=None, check_size=True): """ "directory" is the cache to be compared to the reference. It should not include the year as the model checks all years. Set "check_sizes" to False if no size check of the datasets is required. """ if directory is None: directory = SimulationState().get_cache_directory() self.cache = AttributeCache(directory) year_orig = SimulationState().get_current_time() years = self.years_in_cache() SimulationState().set_current_time(years[0]) storages = {} for year in years: storages[year] = flt_storage(os.path.join(self.cache.get_storage_location(), '%s' % year)) df = pd.DataFrame(columns=["Table", "Less-than-ref", "More-than-ref", "Year", "Size", "Size-ref"]) tables = self.cache.get_table_names() for table in tables: columns_list = self.cache.get_column_names(table) columns = Set(columns_list) ref_columns_list = self.reference_storage.get_column_names(table, lowercase=True) ref_columns = Set(ref_columns_list) more = columns.difference(ref_columns) less = ref_columns.difference(columns) samesize = True if check_size: table_size = self.cache.load_table(table, columns_list[0])[columns_list[0]].size reftable_size = self.reference_storage.load_table(table, ref_columns_list[0])[ref_columns_list[0]].size if table_size <> reftable_size: samesize = False if len(more) == 0 and len(less) == 0 and samesize: continue df.loc[df.shape[0]] = [table, ', '.join(less), ', '.join(more), '', 0, 0] if len(more) == 0 and samesize: continue # if there are columns in the "more" column, write out the corresponding years columns_and_years = self.cache._get_column_names_and_years(table) more_years = [] for col, year in columns_and_years: if col in more: more_years.append(year) df.loc[df.shape[0]-1, "Year"] = ', '.join(np.unique(np.array(more_years).astype("str"))) if not samesize: # there is difference in table sizes df.loc[df.shape[0]-1, "Size"] = table_size df.loc[df.shape[0]-1, "Size-ref"] = reftable_size if not check_size or (df['Size'].sum()==0 and df['Size-ref'].sum()==0): # remove the size columns if not used del df['Size'] del df['Size-ref'] if df.shape[0] > 0: logger.log_status("Differences in data structure relative to %s:" % self.reference_storage.get_storage_location()) logger.log_status(df) else: logger.log_status("Data structure corresponds to the one in %s" % self.reference_storage.get_storage_location()) return df def years_in_cache(self): return self.cache._get_sorted_list_of_years(start_with_current_year=False)