def test_rewrite_multiple(self): sql = """SELECT * FROM @./data/remap.csv WHERE frm = 'y' SELECT * FROM @./data/test1.csv WHERE foo = 'bar';""" table_remap = {} sql, map = rewrite_sql([sql], table_remap) self.assertEqual( """SELECT * FROM "remap" WHERE frm = 'y' SELECT * FROM "test1" WHERE foo = 'bar';""", sql) self.assertDictEqual( { 'remap': expand_path_and_exists('./data/remap.csv')[0], 'test1': expand_path_and_exists('./data/test1.csv')[0] }, map)
def test_rewrite_dquotes(self): sql = """SELECT * FROM @"./data/remap.csv" WHERE frm = 'y';""" table_remap = {} sql, map = rewrite_sql([sql], table_remap) self.assertEqual("""SELECT * FROM "remap" WHERE frm = 'y';""", sql) self.assertEqual( {'remap': expand_path_and_exists('./data/remap.csv')[0]}, map)
def rewrite_sql(sql, table_remap=None): """ Re-write the SQL, replacing @filenames with table names. Leave non-@ prefixed table names as-is. Handle stdin - and @- :param sql: :param table_remap: :return: """ table_remap = table_remap or {} tables, rewrite, i = {}, [], 0 for s in sql: s = apply_char_replacements(s) for m in FROM_PATTERN.finditer(s): if m.group(2): grp, path = 2, m.group(2) elif m.group(3): grp, path = 3, m.group(3) elif m.group(4): grp, path = 4, m.group(4) else: raise Error("Path parsing error.") if path != '-': path, exists = expand_path_and_exists(path) if not exists: raise FileNotFoundError(f"File not found: {path}") rewrite.append(s[i:m.start(grp) - (2 if grp == 2 else 1 if grp == 3 else 0)]) i = m.end(grp) + (1 if grp == 2 else 0) if path != '-': filename = os.path.basename(path) tablename = os.path.splitext(filename)[0] else: filename = '-' tablename = 'stdin' if path in table_remap: tablename = table_remap[path] elif filename in table_remap: tablename = table_remap[filename] elif tablename in table_remap: tablename = table_remap[tablename] rewrite.append(f'"{tablename}"') tables[tablename] = path rewrite.append(s[i:]) return ''.join(rewrite), tables
def execute(sql: str, headers=None, filters=None, output='-', output_format='csv', skip_lines=0, output_delimiter=',', column_remapping=None, table_remapping=None, auto_filter=False, save_db=None, load_db=None, dialect='unix', input_delimiter=',', input_quotechar='"', debug_=False): """ :param filters: {"col": [["filter", ...args...], ...] :param sql: :param headers: :param output: :param output_format: :param skip_lines: :param output_delimiter: :param column_remapping: {"col": "map_to_col", ...} :param table_remapping: {"table": "map_to_col", ...} :param auto_filter: :param save_db: :param load_db: :param dialect: :param input_delimiter: :param input_quotechar: :param debug_: :return: """ global DEBUG DEBUG = debug_ column_remapping = column_remapping or {} headers = headers or [] if headers and isinstance(headers, str): headers = [h.strip() for h in headers.split(',')] filters = filters or {} # Re-write the SQL, replacing filenames with table names and apply table re-mapping(s) sql, tables = rewrite_sql(sql, table_remapping) debug(sql, 'sql=') debug(tables, 'tables=') # Open the database if save_db: path, exists = expand_path_and_exists(save_db) if exists: raise Error("fDatabase file {path} already exists.") con = sqlite3.connect(path) elif load_db: path, exists = expand_path_and_exists(load_db) if not exists: raise FileNotFoundError(f"Database file {path} not found.") con = sqlite3.connect(path) else: con = sqlite3.connect(":memory:") cur = con.cursor() # Read each CSV or TSV file and insert into a SQLite table based on the filename of the file for tablename, path in tables.items(): with open(path) as f: if skip_lines: [f.readline() for _ in range(skip_lines)] reader = csv.reader(f, dialect=dialect, delimiter=input_delimiter, quotechar=input_quotechar) first, colnames = True, [] for row in reader: # debug(row) row = [n.strip() for n in row if n] if first: placeholders = ', '.join(['?'] * len(row)) col_src = headers if headers else row colnames = [ column_remapping.get(n.strip()) or n.strip() for n in col_src ] # Apply auto filtering if auto_filter: for col in colnames: if col not in filters: filters[col] = [['num']] debug(filters, 'filters (auto)=') debug(colnames, 'colnames=') colnames_str = ','.join(f'"{c}"' for c in colnames) s = f"""CREATE TABLE "{tablename}" ({colnames_str});""" debug(s) try: cur.execute(s) except sqlite3.OperationalError as e: raise Error( "Failed to create table. Most likely cause is missing headers. " "Use --headers/-r and/or --skip-lines/-k to setup headers." ) first = False continue filtered_row = apply_filters(filters, colnames, row) s = f"""INSERT INTO "{tablename}" ({colnames_str}) VALUES ({placeholders});""" # debug(f"{s}, {filtered_row}") cur.execute(s, filtered_row) con.commit() debug(sql, 'sql=') do_output(sql, cur, output, output_format, output_delimiter) con.close()
def execute( sql: str, headers=None, filters=None, output='-', output_format='table', skip_lines=0, output_delimiter=',', column_remapping=None, table_remapping=None, auto_filter=False, save_db=None, load_db=None, # dialect='unix', input_format='csv', input_delimiter=',', input_encoding='utf-8', input_compression=None, #input_quotechar='"', debug_=False): """ :param input_format: :param filters: {"col": [["filter", ...args...], ...] :param sql: :param headers: :param output: :param output_format: :param skip_lines: :param output_delimiter: :param column_remapping: {"col": "map_to_col", ...} :param table_remapping: {"table": "map_to_col", ...} :param auto_filter: :param save_db: :param load_db: # :param dialect: :param input_delimiter: # :param input_quotechar: :param debug_: :return: """ global DEBUG DEBUG = debug_ column_remapping = column_remapping or {} headers = headers or [] if headers and isinstance(headers, str): headers = [h.strip() for h in headers.split(',')] # debug(headers, "headers=") filters = filters or {} # Re-write the SQL, replacing filenames with table names and apply table re-mapping(s) sql, tables = rewrite_sql(sql, table_remapping) debug(sql, 'sql=') debug(tables, 'tables=') # Open the database if save_db: path, exists = expand_path_and_exists(save_db) if exists: raise Error("fDatabase file {path} already exists.") con = sqlite3.connect(path) elif load_db: path, exists = expand_path_and_exists(load_db) if not exists: raise FileNotFoundError(f"Database file {path} not found.") con = sqlite3.connect(path) else: con = sqlite3.connect(":memory:") cur = con.cursor() # if load_db: # # Check for table conflicts # s = f"""SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;""" # result = cur.execute(s) # for tables in result.fetchall(): # pass # Read each CSV or TSV file and insert into a SQLite table based on the filename of the file for tablename, path in tables.items(): print(path) with Stream( path, format=input_format, delimiter=input_delimiter, skip_rows=range(1, skip_lines + 1), custom_parsers={}, custom_loaders={ 's3': S3Loader, 'gs': GSLoader }, custom_writers={}, ignore_blank_headers=True, encoding=input_encoding, compression=input_compression, headers=headers if headers else 1, # fill_merged_cells=True, ) as stream: debug(stream.headers, "headers=") debug(stream.encoding, "encoding=") # print(stream.sample) first, colnames, line_num = True, [], 0 for row in stream: # print(row) debug(row, "row=") if not row: error(f"Skipping blank line num. {line_num}\n") continue row = [ n.strip() if isinstance(n, str) else n for n in row if not isinstance(n, str) or (isinstance(n, str) and n) ] # debug(row, "row=") if first: placeholders = ','.join(['?'] * len(row)) debug(placeholders, "placeholders=") colnames = [ column_remapping.get(n.strip()) or n.strip() for n in stream.headers ] # Check for duplicate column names dups = set(x for x in colnames if colnames.count(x) > 1) if dups: raise Error( f"Invalid duplicate column name(s): {', '.join(dups)}" ) # Apply auto filtering if auto_filter: for col in colnames: if col not in filters: filters[col] = [['num']] debug(filters, 'filters (auto)=') debug(colnames, 'colnames=') colnames_str = ','.join(f'"{c}"' for c in colnames) check_filters_against_columns(filters, colnames) s = f"""CREATE TABLE "{tablename}" ({colnames_str});""" debug(s) try: cur.execute(s) except sqlite3.OperationalError as e: raise Error( "Failed to create table. Most likely cause is missing headers. " "Use --headers/-r and/or --skip-lines/-k to setup headers." ) first = False # continue filtered_row = apply_filters(filters, colnames, row) if len(filtered_row) != len(colnames): error( f"Warning: Invalid row: {row!r} (line={line_num}). Skipping...\n" ) continue s = f"""INSERT INTO "{tablename}" ({colnames_str}) VALUES ({placeholders});""" debug(f"{s}, {filtered_row}") cur.execute(s, filtered_row) line_num += 1 con.commit() debug(sql, 'sql=') do_output(sql, cur, output, output_format, output_delimiter) con.close()
def rewrite_sql(sql, table_remap=None): """ Re-write the SQL, replacing @filenames with table names. Leave non-@ prefixed table names as-is. Handle stdin - and @- :param sql: :param table_remap: :return: """ table_remap = table_remap or {} tables, rewrite, i = {}, [], 0 for s in sql: # print(s) s = apply_char_replacements(s) for m in FROM_PATTERN.finditer(s): # print(m, m.groups()) if m.group(2): grp, path = 2, m.group(2) elif m.group(3): grp, path = 3, m.group(3) elif m.group(4): grp, path = 4, m.group(4) else: raise Error("Path parsing error.") # print(path) if path != '-': parse_result = urlparse(path) scheme = parse_result.scheme # print(repr(scheme)) if scheme in {'http', 'https'}: pass elif scheme == 's3': pass elif scheme == 'gs': pass elif scheme in {'file', ''}: path = parse_result.path path, exists = expand_path_and_exists(path) if not exists: raise FileNotFoundError(f"File not found: {path}") else: raise Error("Invalid URL scheme: {scheme}") rewrite.append(s[i:m.start(grp) - (2 if grp == 2 else 1 if grp == 3 else 0)]) i = m.end(grp) + (1 if grp == 2 else 0) if path != '-': filename = os.path.basename(path) tablename = os.path.splitext(filename)[0] else: filename = '-' tablename = 'stdin' if path in table_remap: tablename = table_remap[path] elif filename in table_remap: tablename = table_remap[filename] elif tablename in table_remap: tablename = table_remap[tablename] if tablename.upper() in RESERVED_WORDS: sys.stderr.write( f"Warning: Table name {tablename} is a SQLite reserved word." ) rewrite.append(f'"{tablename}"') tables[tablename] = path rewrite.append(s[i:]) return ''.join(rewrite), tables