def warehouse_install(args, w,config): from ..library import new_library import os.path from ambry.util import init_log_rate from ..dbexceptions import NotFoundError if not w.exists(): w.create() l = new_library(config.library(args.library_name)) w.logger = Logger('Warehouse Install',init_log_rate(prt,N=2000)) if os.path.isfile(args.term): # Assume it is a Manifest file. from ..warehouse.manifest import Manifest m = Manifest(args.term) partitions = m.partitions views = m.views else: partitions = [args.term] views = [] for p in partitions: try: w.install(p) except NotFoundError: err("Partition {} not found in external library".format(p)) if m.sql: w.run_sql(m.sql[w.database.driver]) for view in views: w.install_view(view)
def warehouse_remove(args, w,config): from functools import partial from ambry.util import init_log_rate w.logger = Logger('Warehouse Remove',init_log_rate(prt,N=2000)) w.remove(args.term )
def tables_list(self, add_columns = True): """ :param add_columns: :return: """ from collections import defaultdict from ambry.orm.source import DataSource from ambry.util import init_log_rate def prt(v): print v lr = init_log_rate(prt) tables = defaultdict(lambda: dict(table=None, universe = None, columns = [])) year = self.year release = self.release table_id = None seen = set() ignore = set() #name, universe, description, columns i = 0 with self.dep('table_sequence').datafile.reader as r: for row in r: if int(row['year']) != int(year) or int(row['release']) != int(release): print "Ignore {} {} != {} {} ".format(row['year'], row['release'], year, release) continue if row['table_id'] in ignore: continue if int(row['sequence_number'] ) > 117: # Not sure where the higher sequence numbers are, but they aren't in this distribution. continue i += 1 table_name = row['table_id'] if row['start']: # Breaking here ensures we've loaded all of the columns for # the previous tables. if self.limited_run and i > 1000: break if table_name in seen: ignore.add(table_name) continue else: seen.add(table_name) start = int(float(row['start'])) length = int(row['table_cells']) tables[table_name] = dict( name = row['table_id'], universe=None, description=row['title'].title(), columns=[], data = dict( sequence = int(row['sequence_number']), start=start, length=length, ) ) #self.log("Added table: {}".format(row['table_id'])) elif 'Universe' in row['title']: tables[table_name]['universe'] = row['title'].replace('Universe: ','').strip() elif add_columns and row['is_column'] == 'Y': col_name = table_name+"{:03d}".format(int(row['line'])) col_names = [ c['name'] for c in tables[table_name]['columns'] ] if col_name in col_names: raise Exception("Already have {} in {}".format(col_name, col_names)) tables[table_name]['columns'].append(dict( name=col_name, description=row['title'], datatype = 'float', data=dict(start=row['segment_column'])) ) # Add the margin of error column tables[table_name]['columns'].append(dict( name=col_name+'_m90', description="Margin of error for: "+col_name, datatype = 'float', data=dict(start=row['segment_column'])) ) return tables