示例#1
0
 def test_rewrite_multiple(self):
     sql = """SELECT * FROM @./data/remap.csv WHERE frm = 'y' SELECT * FROM @./data/test1.csv WHERE foo = 'bar';"""
     table_remap = {}
     sql, map = rewrite_sql([sql], table_remap)
     self.assertEqual(
         """SELECT * FROM "remap" WHERE frm = 'y' SELECT * FROM "test1" WHERE foo = 'bar';""",
         sql)
     self.assertDictEqual(
         {
             'remap': expand_path_and_exists('./data/remap.csv')[0],
             'test1': expand_path_and_exists('./data/test1.csv')[0]
         }, map)
示例#2
0
 def test_rewrite_dquotes(self):
     sql = """SELECT * FROM @"./data/remap.csv" WHERE frm = 'y';"""
     table_remap = {}
     sql, map = rewrite_sql([sql], table_remap)
     self.assertEqual("""SELECT * FROM "remap" WHERE frm = 'y';""", sql)
     self.assertEqual(
         {'remap': expand_path_and_exists('./data/remap.csv')[0]}, map)
示例#3
0
def rewrite_sql(sql, table_remap=None):
    """
    Re-write the SQL, replacing @filenames with table names.
    Leave non-@ prefixed table names as-is.
    Handle stdin - and @-
    :param sql:
    :param table_remap:
    :return:
    """
    table_remap = table_remap or {}
    tables, rewrite, i = {}, [], 0
    for s in sql:
        s = apply_char_replacements(s)
        for m in FROM_PATTERN.finditer(s):
            if m.group(2):
                grp, path = 2, m.group(2)
            elif m.group(3):
                grp, path = 3, m.group(3)
            elif m.group(4):
                grp, path = 4, m.group(4)
            else:
                raise Error("Path parsing error.")

            if path != '-':
                path, exists = expand_path_and_exists(path)
                if not exists:
                    raise FileNotFoundError(f"File not found: {path}")

            rewrite.append(s[i:m.start(grp) -
                             (2 if grp == 2 else 1 if grp == 3 else 0)])
            i = m.end(grp) + (1 if grp == 2 else 0)

            if path != '-':
                filename = os.path.basename(path)
                tablename = os.path.splitext(filename)[0]
            else:
                filename = '-'
                tablename = 'stdin'

            if path in table_remap:
                tablename = table_remap[path]
            elif filename in table_remap:
                tablename = table_remap[filename]
            elif tablename in table_remap:
                tablename = table_remap[tablename]

            rewrite.append(f'"{tablename}"')
            tables[tablename] = path

        rewrite.append(s[i:])

    return ''.join(rewrite), tables
示例#4
0
def execute(sql: str,
            headers=None,
            filters=None,
            output='-',
            output_format='csv',
            skip_lines=0,
            output_delimiter=',',
            column_remapping=None,
            table_remapping=None,
            auto_filter=False,
            save_db=None,
            load_db=None,
            dialect='unix',
            input_delimiter=',',
            input_quotechar='"',
            debug_=False):
    """
    :param filters:  {"col": [["filter", ...args...], ...]
    :param sql:
    :param headers:
    :param output:
    :param output_format:
    :param skip_lines:
    :param output_delimiter:
    :param column_remapping: {"col": "map_to_col", ...}
    :param table_remapping:  {"table": "map_to_col", ...}
    :param auto_filter:
    :param save_db:
    :param load_db:
    :param dialect:
    :param input_delimiter:
    :param input_quotechar:
    :param debug_:
    :return:
    """

    global DEBUG
    DEBUG = debug_
    column_remapping = column_remapping or {}
    headers = headers or []
    if headers and isinstance(headers, str):
        headers = [h.strip() for h in headers.split(',')]
    filters = filters or {}

    # Re-write the SQL, replacing filenames with table names and apply table re-mapping(s)
    sql, tables = rewrite_sql(sql, table_remapping)
    debug(sql, 'sql=')
    debug(tables, 'tables=')

    # Open the database
    if save_db:
        path, exists = expand_path_and_exists(save_db)
        if exists:
            raise Error("fDatabase file {path} already exists.")
        con = sqlite3.connect(path)
    elif load_db:
        path, exists = expand_path_and_exists(load_db)
        if not exists:
            raise FileNotFoundError(f"Database file {path} not found.")
        con = sqlite3.connect(path)
    else:
        con = sqlite3.connect(":memory:")

    cur = con.cursor()

    # Read each CSV or TSV file and insert into a SQLite table based on the filename of the file
    for tablename, path in tables.items():
        with open(path) as f:
            if skip_lines:
                [f.readline() for _ in range(skip_lines)]

            reader = csv.reader(f,
                                dialect=dialect,
                                delimiter=input_delimiter,
                                quotechar=input_quotechar)
            first, colnames = True, []

            for row in reader:
                # debug(row)
                row = [n.strip() for n in row if n]

                if first:
                    placeholders = ', '.join(['?'] * len(row))
                    col_src = headers if headers else row
                    colnames = [
                        column_remapping.get(n.strip()) or n.strip()
                        for n in col_src
                    ]

                    # Apply auto filtering
                    if auto_filter:
                        for col in colnames:
                            if col not in filters:
                                filters[col] = [['num']]
                        debug(filters, 'filters (auto)=')

                    debug(colnames, 'colnames=')
                    colnames_str = ','.join(f'"{c}"' for c in colnames)

                    s = f"""CREATE TABLE "{tablename}" ({colnames_str});"""
                    debug(s)
                    try:
                        cur.execute(s)
                    except sqlite3.OperationalError as e:
                        raise Error(
                            "Failed to create table. Most likely cause is missing headers. "
                            "Use --headers/-r and/or --skip-lines/-k to setup headers."
                        )

                    first = False
                    continue

                filtered_row = apply_filters(filters, colnames, row)

                s = f"""INSERT INTO "{tablename}" ({colnames_str}) VALUES ({placeholders});"""
                # debug(f"{s}, {filtered_row}")
                cur.execute(s, filtered_row)

    con.commit()

    debug(sql, 'sql=')
    do_output(sql, cur, output, output_format, output_delimiter)
    con.close()
示例#5
0
文件: __init__.py 项目: jwelch92/tql
def execute(
        sql: str,
        headers=None,
        filters=None,
        output='-',
        output_format='table',
        skip_lines=0,
        output_delimiter=',',
        column_remapping=None,
        table_remapping=None,
        auto_filter=False,
        save_db=None,
        load_db=None,
        # dialect='unix',
        input_format='csv',
        input_delimiter=',',
        input_encoding='utf-8',
        input_compression=None,
        #input_quotechar='"',
        debug_=False):
    """
    :param input_format:
    :param filters:  {"col": [["filter", ...args...], ...]
    :param sql:
    :param headers:
    :param output:
    :param output_format:
    :param skip_lines:
    :param output_delimiter:
    :param column_remapping: {"col": "map_to_col", ...}
    :param table_remapping:  {"table": "map_to_col", ...}
    :param auto_filter:
    :param save_db:
    :param load_db:
    # :param dialect:
    :param input_delimiter:
    # :param input_quotechar:
    :param debug_:
    :return:
    """

    global DEBUG
    DEBUG = debug_
    column_remapping = column_remapping or {}
    headers = headers or []
    if headers and isinstance(headers, str):
        headers = [h.strip() for h in headers.split(',')]
        # debug(headers, "headers=")
    filters = filters or {}

    # Re-write the SQL, replacing filenames with table names and apply table re-mapping(s)
    sql, tables = rewrite_sql(sql, table_remapping)
    debug(sql, 'sql=')
    debug(tables, 'tables=')

    # Open the database
    if save_db:
        path, exists = expand_path_and_exists(save_db)
        if exists:
            raise Error("fDatabase file {path} already exists.")
        con = sqlite3.connect(path)
    elif load_db:
        path, exists = expand_path_and_exists(load_db)
        if not exists:
            raise FileNotFoundError(f"Database file {path} not found.")
        con = sqlite3.connect(path)
    else:
        con = sqlite3.connect(":memory:")

    cur = con.cursor()

    # if load_db:
    #     # Check for table conflicts
    #     s = f"""SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;"""
    #     result = cur.execute(s)
    #     for tables in result.fetchall():
    #         pass

    # Read each CSV or TSV file and insert into a SQLite table based on the filename of the file
    for tablename, path in tables.items():
        print(path)
        with Stream(
                path,
                format=input_format,
                delimiter=input_delimiter,
                skip_rows=range(1, skip_lines + 1),
                custom_parsers={},
                custom_loaders={
                    's3': S3Loader,
                    'gs': GSLoader
                },
                custom_writers={},
                ignore_blank_headers=True,
                encoding=input_encoding,
                compression=input_compression,
                headers=headers if headers else 1,
                # fill_merged_cells=True,
        ) as stream:

            debug(stream.headers, "headers=")
            debug(stream.encoding, "encoding=")
            # print(stream.sample)

            first, colnames, line_num = True, [], 0
            for row in stream:
                # print(row)
                debug(row, "row=")
                if not row:
                    error(f"Skipping blank line num. {line_num}\n")
                    continue
                row = [
                    n.strip() if isinstance(n, str) else n for n in row
                    if not isinstance(n, str) or (isinstance(n, str) and n)
                ]
                # debug(row, "row=")
                if first:

                    placeholders = ','.join(['?'] * len(row))
                    debug(placeholders, "placeholders=")
                    colnames = [
                        column_remapping.get(n.strip()) or n.strip()
                        for n in stream.headers
                    ]

                    # Check for duplicate column names
                    dups = set(x for x in colnames if colnames.count(x) > 1)
                    if dups:
                        raise Error(
                            f"Invalid duplicate column name(s): {', '.join(dups)}"
                        )

                    # Apply auto filtering
                    if auto_filter:
                        for col in colnames:
                            if col not in filters:
                                filters[col] = [['num']]
                        debug(filters, 'filters (auto)=')

                    debug(colnames, 'colnames=')
                    colnames_str = ','.join(f'"{c}"' for c in colnames)

                    check_filters_against_columns(filters, colnames)

                    s = f"""CREATE TABLE "{tablename}" ({colnames_str});"""
                    debug(s)
                    try:
                        cur.execute(s)
                    except sqlite3.OperationalError as e:
                        raise Error(
                            "Failed to create table. Most likely cause is missing headers. "
                            "Use --headers/-r and/or --skip-lines/-k to setup headers."
                        )

                    first = False
                    # continue

                filtered_row = apply_filters(filters, colnames, row)
                if len(filtered_row) != len(colnames):
                    error(
                        f"Warning: Invalid row: {row!r} (line={line_num}). Skipping...\n"
                    )
                    continue
                s = f"""INSERT INTO "{tablename}" ({colnames_str}) VALUES ({placeholders});"""
                debug(f"{s}, {filtered_row}")
                cur.execute(s, filtered_row)
                line_num += 1

    con.commit()

    debug(sql, 'sql=')
    do_output(sql, cur, output, output_format, output_delimiter)
    con.close()
示例#6
0
def rewrite_sql(sql, table_remap=None):
    """
    Re-write the SQL, replacing @filenames with table names.
    Leave non-@ prefixed table names as-is.
    Handle stdin - and @-
    :param sql:
    :param table_remap:
    :return:
    """
    table_remap = table_remap or {}
    tables, rewrite, i = {}, [], 0
    for s in sql:
        # print(s)
        s = apply_char_replacements(s)
        for m in FROM_PATTERN.finditer(s):
            # print(m, m.groups())
            if m.group(2):
                grp, path = 2, m.group(2)
            elif m.group(3):
                grp, path = 3, m.group(3)
            elif m.group(4):
                grp, path = 4, m.group(4)
            else:
                raise Error("Path parsing error.")

            # print(path)
            if path != '-':
                parse_result = urlparse(path)
                scheme = parse_result.scheme
                # print(repr(scheme))
                if scheme in {'http', 'https'}:
                    pass
                elif scheme == 's3':
                    pass
                elif scheme == 'gs':
                    pass
                elif scheme in {'file', ''}:
                    path = parse_result.path
                    path, exists = expand_path_and_exists(path)
                    if not exists:
                        raise FileNotFoundError(f"File not found: {path}")
                else:
                    raise Error("Invalid URL scheme: {scheme}")

            rewrite.append(s[i:m.start(grp) -
                             (2 if grp == 2 else 1 if grp == 3 else 0)])
            i = m.end(grp) + (1 if grp == 2 else 0)

            if path != '-':
                filename = os.path.basename(path)
                tablename = os.path.splitext(filename)[0]
            else:
                filename = '-'
                tablename = 'stdin'

            if path in table_remap:
                tablename = table_remap[path]
            elif filename in table_remap:
                tablename = table_remap[filename]
            elif tablename in table_remap:
                tablename = table_remap[tablename]

            if tablename.upper() in RESERVED_WORDS:
                sys.stderr.write(
                    f"Warning: Table name {tablename} is a SQLite reserved word."
                )

            rewrite.append(f'"{tablename}"')
            tables[tablename] = path

        rewrite.append(s[i:])

    return ''.join(rewrite), tables