def create_table(params, db, task_table_name): # creates table schema based on the result file of run 1 with open(get_result_file(params, params.run_prefix, 1), "r") as res: result_params = [param.strip() for param in res.readline().rstrip().split("\t")] result_params_sample = [value.strip() for value in res.readline().rstrip().split("\t")] while len(result_params_sample) < len(result_params): result_params_sample.append("") # check if table name already exists cursor = db.cursor() cursor.execute( "SELECT name FROM sqlite_master WHERE type='table' AND name='{}'".format( params.task_table_name.strip("[]") ) ) if cursor.fetchone(): return # table identifiers cannot be parameterized, so have to use string concatenation create_table_command = """CREATE TABLE IF NOT EXISTS {}(""".format(task_table_name) # include one item for each result_params create_table_command += "{} INTEGER, ".format(params.run_prefix) # time is in seconds since epoch create_table_command += "parsed_date REAL, " for p in range(len(result_params)): p_schema = "".join( ( '"', result_params[p].strip(), '" ', type_map.get(type(convert_strictest(result_params_sample[p])), "TEXT"), ) ) p_schema += ", " create_table_command += p_schema # treat with unique keys primary_keys = "PRIMARY KEY({},".format( params.run_prefix ) # run always considered primary key for primary_key in params.key_params: if primary_key not in result_params: print("{} does not exist in result file of run 1".format(primary_key)) sys.exit(3) primary_keys += primary_key primary_keys += "," primary_keys = primary_keys[:-1] # remove trailing , primary_keys += ")" create_table_command += primary_keys create_table_command += ")" print(create_table_command) cursor = db.cursor() cursor.execute(create_table_command) # reinitialize tracked columns for newly created table initialize_tracked_columns(params, db)
def add_run_to_db(params, run): resfilename = get_result_file(params, run) run_number = get_trailing_num(run) try: parsed_date = os.path.getmtime(resfilename) except OSError: print("file {} not found; skipping".format(resfilename)) return with open(resfilename, 'r') as res: # make sure table is compatible with run data by inserting any new columns # always called "run" in table (converted from whatever prefix users use) result_params = ["run", "parsed_date"] result_params.extend(res.readline().split('\t')) if result_params[-1] == '\n': result_params.pop() result_params = ["".join(('\"',p.strip(),'\"')) for p in result_params] pre_sample_pos = res.tell() result_params_sample = res.readline().split('\t') # go back to presample location for normal line iteration res.seek(pre_sample_pos, os.SEEK_SET) # add new column to table for c in range(len(result_params)): if result_params[c] not in params.tracked_columns: print("ADDING {} as new column".format(result_params[c])) add_column_to_table(params, db, result_params[c], result_params_sample[c-2]) # -2 accounts for run and parsed date # add value rows rows_to_add = [] for line in res: # run number and parsed_date are always recorded result_params_val = [run_number, parsed_date] result_params_val.extend(line.split('\t')) if result_params_val[-1] == '\n': result_params_val.pop() # something must be wrong here if len(result_params_val) > len(result_params): print("There are {} values for only {} parameters in run {}; \ skipping run".format(len(result_params_val), len(result_params), run_number)) # skip this run return # for when the last column value is the empty string while len(result_params_val) < len(result_params): result_params_val.append('') rows_to_add.append(tuple(convert_strictest(param) if param != nullval else None for param in result_params_val)) param_placeholders = ("?,"*len(result_params)).rstrip(',') # specify columns to insert into since ordering of columns in file may not match table insert_rows_command = "INSERT OR IGNORE INTO {} ({}) VALUES ({})".format( params.task_table_name, ','.join(result_params), param_placeholders) cursor = db.cursor() cursor.executemany(insert_rows_command, rows_to_add)
def add_column_to_table(params, db, column_name, sample_val): cursor = db.cursor() col_type = type_map.get(type(convert_strictest(sample_val)), "TEXT") cursor.execute( "ALTER TABLE {table} ADD COLUMN {col} {type}".format( table=params.task_table_name, col=column_name, type=col_type ) ) params.tracked_columns.add(column_name)
def add_column_to_table(params, db, column_name, sample_val): cursor = db.cursor() col_type = type_map.get(type(convert_strictest(sample_val)), "TEXT") cursor.execute("ALTER TABLE {table} ADD COLUMN {col} {type}".format( table=params.task_table_name, col=column_name, type=col_type)) params.tracked_columns.add(column_name)
def create_table(params, db, task_table_name): # creates table schema based on the result file of run 1 with open(get_result_file(params, params.run_prefix + "1"), 'r') as res: result_params = res.readline().rstrip().split('\t') result_params_sample = res.readline().rstrip().split('\t') while len(result_params_sample) < len(result_params): result_params_sample.append('') # check if table name already exists cursor = db.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='{}'".format(params.task_table_name.strip("[]"))) if cursor.fetchone(): return # table identifiers cannot be parameterized, so have to use string concatenation create_table_command = '''CREATE TABLE IF NOT EXISTS {}('''.format(task_table_name) # include one item for each result_params create_table_command += "{} INTEGER, ".format(params.run_prefix) # time is in seconds since epoch create_table_command += "parsed_date REAL, " for p in range(len(result_params)): p_schema = "".join(('\"',result_params[p].strip(),'\" ', type_map.get(type(convert_strictest(result_params_sample[p])), "TEXT"))) p_schema += ", " create_table_command += p_schema # treat with unique keys primary_keys = "PRIMARY KEY({},".format(params.run_prefix) # run always considered primary key for primary_key in params.key_params: if primary_key not in result_params: print("{} does not exist in result file of run 1".format(primary_key)) sys.exit(3) primary_keys += primary_key primary_keys += ',' primary_keys = primary_keys[:-1] # remove trailing , primary_keys += ')' create_table_command += primary_keys create_table_command += ')' print(create_table_command) cursor = db.cursor() cursor.execute(create_table_command) # reinitialize tracked columns for newly created table initialize_tracked_columns(params, db)
def add_run_to_db(params, run): resfilename = get_result_file(params, run) run_number = get_trailing_num(run) try: parsed_date = os.path.getmtime(resfilename) # throw away unless newer than latest or run number greater than maximum if parsed_date <= params.last_parsed_date and run_number <= params.last_run: return except OSError: print("file {} not found; skipping".format(resfilename)) return params.last_run += 1 print("run {} added ({}) ({})".format(run_number, params.last_run, parsed_date)) with open(resfilename, 'rb') as res: # make sure table is compatible with run data by inserting any new columns # always called "run" in table (converted from whatever prefix users use) csvreader = csv.reader(res, delimiter=params.delimiter) result_params = ["run", "parsed_date"] result_params.extend(csvreader.next()) empty_end = False if not result_params[-1]: # empty or None empty_end = True result_params.pop() result_params = ["".join(('\"',p.strip(),'\"')) for p in result_params] print(result_params) result_params_sample = [params.last_run, parsed_date] result_params_sample.extend(csvreader.next()) if empty_end: result_params_sample.pop() while len(result_params_sample) < len(result_params): result_params_sample.append('') # add new column to table for c in range(len(result_params)): if result_params[c] not in params.tracked_columns: print("ADDING {} as new column".format(result_params[c])) add_column_to_table(params, db, result_params[c], result_params_sample[c]) # add value rows rows_to_add = [tuple(convert_strictest(param) if param != nullval else None for param in result_params_sample)] for line in csvreader: # run number and parsed_date are always recorded result_params_val = [params.last_run, parsed_date] result_params_val.extend(line) if empty_end: result_params_val.pop() # something must be wrong here if len(result_params_val) > len(result_params): print("There are {} values for only {} parameters in run {}; \ skipping run".format(len(result_params_val), len(result_params), run_number)) # skip this run params.last_run -= 1 return # for padding when columns have unequal depth while len(result_params_val) < len(result_params): result_params_val.append('') rows_to_add.append(tuple(convert_strictest(param) if param != nullval else None for param in result_params_val)) print("rows to add") print(rows_to_add) param_placeholders = ("?,"*len(result_params)).rstrip(',') # specify columns to insert into since ordering of columns in file may not match table insert_rows_command = "INSERT OR IGNORE INTO {} ({}) VALUES ({})".format( params.task_table_name, ','.join(result_params), param_placeholders) cursor = db.cursor() cursor.executemany(insert_rows_command, rows_to_add)
def create_table(params, db): runs = get_runs(params) # select how to and which runs to use for a certain range natural_sort(runs) if len(runs) < 1: print("No runs in this directory, cannot create table"); sys.exit(2) # creates table schema based on the result file of the first run with open(get_result_file(params, runs[0]), 'rb') as res: csvreader = csv.reader(res, delimiter=params.delimiter) result_params = csvreader.next() result_params_sample = csvreader.next() # empty end if not result_params[-1]: result_params.pop() result_params_sample.pop() while len(result_params_sample) < len(result_params): result_params_sample.append('') result_params = [param.strip() for param in result_params] # check if table name already exists cursor = db.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='{}'".format(params.task_table_name.strip("[]"))) if cursor.fetchone(): return # table identifiers cannot be parameterized, so have to use string concatenation create_table_command = '''CREATE TABLE IF NOT EXISTS {}('''.format(params.task_table_name) # include one item for each result_params create_table_command += "{} INTEGER, ".format(params.run_prefix) # time is in seconds since epoch create_table_command += "parsed_date REAL, " for p in range(len(result_params)): p_schema = "".join(('\"',result_params[p].strip(),'\" ', type_map.get(type(convert_strictest(result_params_sample[p])), "TEXT"))) p_schema += ", " create_table_command += p_schema # treat with unique keys primary_keys = "PRIMARY KEY({},".format(params.run_prefix) # run always considered primary key for primary_key in params.key_params: if primary_key not in result_params: print("{} does not exist in result file of run 1".format(primary_key)) sys.exit(3) primary_keys += "".join(('\"', primary_key, '\"')) primary_keys += ',' primary_keys = primary_keys[:-1] # remove trailing , primary_keys += ')' create_table_command += primary_keys create_table_command += ')' print(create_table_command) cursor = db.cursor() cursor.execute(create_table_command) # reinitialize tracked columns for newly created table initialize_tracked_columns(params, db)