Exemplo n.º 1
0
    def test_table_grouped_tail_real_data(self):
        with temporary_directory() as workdir:

            for table_path in (SRC / "test" / "data").glob("*.csv"):
                table = read_table(table_path, schema=SCHEMA)
                test_output = workdir / f"latest_{table_path.name}"
                pandas_output = workdir / f"latest_pandas_{table_path.name}"

                # Create the latest slice of the given table
                table_grouped_tail(table_path, test_output, ["key"])

                # Create a latest slice using pandas grouping
                table = table.groupby("key").aggregate(
                    agg_last_not_null).reset_index()
                export_csv(table, path=pandas_output, schema=SCHEMA)

                # Converting to a CSV in memory sometimes produces out-of-order values
                with open_file_like(test_output) as fd1, open_file_like(
                        pandas_output) as fd2:
                    test_result_lines = list(sorted(fd1))
                    pandas_result_lines = list(sorted(fd2))

                self.assertEqual(len(test_result_lines),
                                 len(pandas_result_lines))
                for line1, line2 in zip(test_result_lines,
                                        pandas_result_lines):
                    self.assertEqual(line1, line2)
Exemplo n.º 2
0
    def test_open_file_like_handle(self):
        with temporary_directory() as workdir:
            temp_file_path = workdir / "temp.txt"

            fd1 = open(temp_file_path, "w")
            fd1.write("hello")
            with open_file_like(fd1, "w") as fd2:
                fd2.seek(0, SEEK_END)
                fd2.write(" ")
            with open_file_like(fd1, "w") as fd2:
                fd2.seek(0, SEEK_END)
                fd2.write("world")
            fd1.close()

            self._assert_file_contents_equal(temp_file_path, "hello world")
Exemplo n.º 3
0
    def test_open_file_like_file(self):
        with temporary_directory() as workdir:
            temp_file_path = workdir / "temp.txt"

            with open_file_like(temp_file_path, "w") as fd:
                fd.write("hello world")

            self._assert_file_contents_equal(temp_file_path, "hello world")
Exemplo n.º 4
0
def _compare_tables_equal(test_case: ProfiledTestCase, table1: Path, table2: Path) -> None:
    cols1 = get_table_columns(table1)
    cols2 = get_table_columns(table2)
    test_case.assertEqual(set(cols1), set(cols2))

    # Converting to a CSV in memory sometimes produces out-of-order values
    with open_file_like(table1) as fd1, open_file_like(table2) as fd2:
        records1 = list(line_reader(fd1, skip_empty=True))
        records2 = list(line_reader(fd2, skip_empty=True))
        test_case.assertEqual(len(records1), len(records2))

        reader1 = csv.reader(records1)
        reader2 = csv.reader(records2)
        for record1, record2 in zip(reader1, reader2):
            record1 = {col: val for col, val in zip(cols1, record1)}
            record2 = {col: val for col, val in zip(cols2, record2)}
            test_case.assertEqual(record1, record2)
Exemplo n.º 5
0
    def parse(self, sources: Dict[str, str], aux: Dict[str, DataFrame], **parse_opts) -> DataFrame:
        dataframes = {}
        for name, fname in sources.items():
            with open_file_like(fname, "r") as fd:
                data = json.load(fd)["integrated_county_timeseries_external_data"]
                dataframes[name] = DataFrame.from_records(data)

        return self.parse_dataframes(dataframes, aux=aux, **parse_opts)