Exemplo n.º 1
0
    def test_table_records_reimport(self):
        with TemporaryDirectory() as workdir:
            workdir = Path(workdir)

            schema = {
                _safe_column_name(col): dtype
                for col, dtype in get_schema().items()
            }
            sqlite_file = workdir / "tmp.sqlite"
            tables_folder = SRC / "test" / "data"
            with create_sqlite_database(db_file=sqlite_file) as conn:
                for table_path in tables_folder.glob("*.csv"):
                    table_name = _safe_table_name(table_path.stem)
                    table_import_from_file(conn, table_path, schema=schema)

                    # Export the records to a list
                    records_output_1 = list(table_select_all(conn, table_name))

                    # Import the list of records
                    table_name_2 = table_name + "_new"
                    table_import_from_records(conn,
                                              table_name_2,
                                              records_output_1,
                                              schema=schema)

                    # Re-export the records as a list
                    records_output_2 = list(
                        table_select_all(conn, table_name_2))

                    for record1, record2 in zip(records_output_1,
                                                records_output_2):
                        self.assertDictEqual(record1, record2)
Exemplo n.º 2
0
    def test_table_file_reimport(self):
        with TemporaryDirectory() as workdir:
            workdir = Path(workdir)

            sqlite_file = workdir / "tmp.sqlite"
            tables_folder = SRC / "test" / "data"

            # Verify that all tables were imported
            with create_sqlite_database(db_file=sqlite_file) as conn:
                for table_path in tables_folder.glob("*.csv"):
                    table_name = _safe_table_name(table_path.stem)
                    table_import_from_file(conn,
                                           table_path,
                                           table_name=table_name)
                    self._check_table_not_empty(conn, table_name)

                    # Dirty hack used to compare appropriate column names. Ideally this would be
                    # handled by the SQL module, which should convert the table and column names to
                    # whatever they were prior to sanitizing them.
                    temp_file_path_1 = workdir / f"{table_name}.1.csv"
                    column_adapter = {
                        col:
                        _safe_column_name(col).replace("[",
                                                       "").replace("]", "")
                        for col in get_table_columns(table_path)
                    }
                    table_rename(table_path, temp_file_path_1, column_adapter)

                    temp_file_path_2 = workdir / f"{table_name}.2.csv"
                    table_export_csv(conn, table_name, temp_file_path_2)
                    _compare_tables_equal(self, temp_file_path_1,
                                          temp_file_path_2)
Exemplo n.º 3
0
    def test_import_tables_into_sqlite(self):
        with temporary_directory() as workdir:
            intermediate = workdir / "intermediate"
            intermediate.mkdir(parents=True, exist_ok=True)

            # Copy all test tables into the temporary directory
            publish_global_tables(
                SRC / "test" / "data", intermediate, use_table_names=V3_TABLE_LIST
            )

            # Create the SQLite file and open it
            sqlite_output = workdir / "database.sqlite"
            table_paths = list(intermediate.glob("*.csv"))
            import_tables_into_sqlite(table_paths, sqlite_output)
            with create_sqlite_database(sqlite_output) as conn:

                # Verify that each table contains all the data
                for table in table_paths:
                    temp_path = workdir / f"{table.stem}.csv"
                    table_export_csv(conn, _safe_table_name(table.stem), temp_path)

                    _compare_tables_equal(self, table, temp_path)
Exemplo n.º 4
0
def merge_output_tables_sqlite(
    tables_folder: Path,
    output_path: Path,
    sqlite_file: Path = None,
    drop_empty_columns: bool = False,
    use_table_names: List[str] = None,
) -> None:
    """
    Build a flat view of all tables combined, joined by <key> or <key, date>. This function
    requires index.csv to be present under `tables_folder`.

    Arguments:
        table_paths: List of CSV files to join into a single table.
        output_path: Output path for the resulting CSV file.
        sqlite_path: Path for the SQLite database to use for importing data, defaults to a temporary
            database on disk.
        drop_empty_columns: Flag determining whether columns with null values only should be
            removed from the output.
    """
    # Default to a known list of tables to use when none is given
    table_paths = _get_tables_in_folder(tables_folder, use_table_names
                                        or V2_TABLE_LIST)

    # Use a temporary directory for intermediate files
    with temporary_directory() as workdir:

        # Use two temporary tables as I/O for intermediate operations
        temp_table_input, temp_table_output = "tmp_table_name_1", "tmp_table_name_2"

        # Start with all combinations of <location key x date>
        keys_and_dates_table_path = workdir / f"{temp_table_input}.csv"
        _logger.log_info("Creating keys and dates table")
        index_table = [
            table for table in table_paths if table.stem == "index"
        ][0]
        _make_location_key_and_date_table(index_table,
                                          keys_and_dates_table_path)

        # Create an SQLite database
        _logger.log_info("Importing all tables into SQLite")
        database_file = sqlite_file or workdir / "database.sqlite"
        import_tables_into_sqlite([keys_and_dates_table_path] + table_paths,
                                  database_file)

        with create_sqlite_database(database_file) as conn:

            _logger.log_info(f"Merging all tables into a flat output")
            for table in table_paths:
                _logger.log_info(f"Merging {table.stem}")

                # Read the table's header to determine how to merge it
                table_name = _safe_table_name(table.stem)
                table_columns = get_table_columns(table)
                join_on = [
                    col for col in ("key", "location_key", "date")
                    if col in table_columns
                ]

                # Join with the current intermediate table
                sql_table_join(
                    conn,
                    left=temp_table_input,
                    right=table_name,
                    on=join_on,
                    how="left outer",
                    into_table=temp_table_output,
                )

                # Flip-flop the I/O tables to avoid a copy
                temp_table_input, temp_table_output = temp_table_output, temp_table_input

        sort_values = ("location_key", "date")
        _logger.log_info(f"Exporting output as CSV")
        sql_export_csv(conn,
                       temp_table_input,
                       output_path=output_path,
                       sort_by=sort_values)

        # Remove the intermediate tables from the SQLite database
        sql_table_drop(conn, temp_table_input)
        sql_table_drop(conn, temp_table_output)