Exemplo n.º 1
0
    def test_list_tables(self):
        from pyspark.sql.catalog import Table
        spark = self.spark
        with self.database("some_db"):
            spark.sql("CREATE DATABASE some_db")
            with self.table("tab1", "some_db.tab2", "tab3_via_catalog"):
                with self.tempView("temp_tab"):
                    self.assertEquals(spark.catalog.listTables(), [])
                    self.assertEquals(spark.catalog.listTables("some_db"), [])
                    spark.createDataFrame([(1, 1)]).createOrReplaceTempView("temp_tab")
                    spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet")
                    spark.sql("CREATE TABLE some_db.tab2 (name STRING, age INT) USING parquet")

                    schema = StructType([StructField("a", IntegerType(), True)])
                    description = "this a table created via Catalog.createTable()"
                    spark.catalog.createTable(
                        "tab3_via_catalog", schema=schema, description=description)

                    tables = sorted(spark.catalog.listTables(), key=lambda t: t.name)
                    tablesDefault = \
                        sorted(spark.catalog.listTables("default"), key=lambda t: t.name)
                    tablesSomeDb = \
                        sorted(spark.catalog.listTables("some_db"), key=lambda t: t.name)
                    self.assertEquals(tables, tablesDefault)
                    self.assertEquals(len(tables), 3)
                    self.assertEquals(len(tablesSomeDb), 2)
                    self.assertEquals(tables[0], Table(
                        name="tab1",
                        database="default",
                        description=None,
                        tableType="MANAGED",
                        isTemporary=False))
                    self.assertEquals(tables[1], Table(
                        name="tab3_via_catalog",
                        database="default",
                        description=description,
                        tableType="MANAGED",
                        isTemporary=False))
                    self.assertEquals(tables[2], Table(
                        name="temp_tab",
                        database=None,
                        description=None,
                        tableType="TEMPORARY",
                        isTemporary=True))
                    self.assertEquals(tablesSomeDb[0], Table(
                        name="tab2",
                        database="some_db",
                        description=None,
                        tableType="MANAGED",
                        isTemporary=False))
                    self.assertEquals(tablesSomeDb[1], Table(
                        name="temp_tab",
                        database=None,
                        description=None,
                        tableType="TEMPORARY",
                        isTemporary=True))
                    self.assertRaisesRegexp(
                        AnalysisException,
                        "does_not_exist",
                        lambda: spark.catalog.listTables("does_not_exist"))
 def test_scrape_all_tables(self) -> None:
     tables = [Table(name="test_table1", database="test_schema1", description=None,
                     tableType="delta", isTemporary=False),
               Table(name="test_table3", database="test_schema1", description=None,
                     tableType="delta", isTemporary=False)]
     actual = self.dExtractor.scrape_all_tables(tables)
     self.assertEqual(2, len(actual))
    def test_scrape_tables(self) -> None:
        table = Table(name="test_table1",
                      database="test_schema1",
                      description=None,
                      tableType="delta",
                      isTemporary=False)
        actual = self.dExtractor.scrape_table(table)

        expected = ScrapedTableMetadata(schema="test_schema1",
                                        table="test_table1")
        expected.set_columns([
            ScrapedColumnMetadata(name="a",
                                  description=None,
                                  data_type="string",
                                  sort_order=0),
            ScrapedColumnMetadata(name="b",
                                  description=None,
                                  data_type="int",
                                  sort_order=1)
        ])
        if actual is not None:
            self.assertEqual(expected.schema, actual.schema)
            self.assertEqual(expected.table, actual.table)
            self.assertEqual(expected.columns, actual.columns)
            self.assertEqual(expected.failed_to_scrape,
                             actual.failed_to_scrape)
            self.assertEqual(expected.is_view, actual.is_view)
            self.assertIsNotNone(actual.table_detail)
        else:
            self.assertIsNotNone(actual)
 def test_table_does_not_exist(self) -> None:
     table = Table(name="test_table5",
                   database="test_schema1",
                   description=None,
                   tableType="delta",
                   isTemporary=False)
     actual = self.dExtractor.scrape_table(table)
     self.assertIsNone(actual)
 def test_create_last_updated(self) -> None:
     scraped_table = self.dExtractor.scrape_table(
         Table("test_table1", "test_schema1", None, "delta", False))
     actual_last_updated = None
     if scraped_table:
         actual_last_updated = self.dExtractor.create_table_last_updated(
             scraped_table)
     self.assertIsNotNone(actual_last_updated)
Exemplo n.º 6
0
 def makeTable(
     name,
     database,
     description,
     tableType,
     isTemporary,
 ):
     return Table(
         name=name,
         catalog=None,
         namespace=[database]
         if database is not None else None,
         description=description,
         tableType=tableType,
         isTemporary=isTemporary,
     )