def test_create_database(self):

        self.skip_test_if_no_creds()
        db = read_database_folder(
            os.path.join(os.path.dirname(__file__), "data/data_types/"))
        db.create_glue_database(delete_if_exists=True)

        sql = """
        select * from test_data_types.test_table
        """

        run_athena_sql(sql)

        db = get_existing_database_from_glue_catalogue("test_data_types")

        tab = TableMeta(
            name="test_table_2",
            location="database/test/test_table/",
            data_format="json",
        )

        tab.add_column(
            "robin_entity_id",
            "struct<arr_key:array<character>,dict_key:struct<nest_arr:array<long>,nest_dict:struct<a_key:character,b_key:character>>>",
            description="an ID for each entity",
        )
        db.add_table(tab)

        db.update_glue_database()

        sql = """
        select * from test_data_types.test_table_2
        """
        run_athena_sql(sql)
示例#2
0
    def test_glue_database_creation(self):
        session = boto3.Session()
        credentials = session.get_credentials()
        has_access_key = True
        try:
            ac = credentials.access_key
        except:
            has_access_key = False

        if has_access_key:
            db = read_database_folder('example/meta_data/db1/')
            db_suffix = '_unit_test_'
            db.name = db.name + db_suffix
            db.create_glue_database()
            resp = _glue_client.get_tables(DatabaseName=db.name)
            test_created = all(
                [r['Name'] in db.table_names for r in resp['TableList']])
            self.assertTrue(
                test_created,
                msg=
                "Note this requires user to have correct credentials to create a glue database"
            )
            self.assertEqual(db.delete_glue_database(), 'database deleted')
            self.assertEqual(
                db.delete_glue_database(),
                'Cannot delete as database not found in glue catalogue')
        else:
            print(
                "\n***\nCANNOT RUN THIS UNIT TEST AS DO NOT HAVE ACCESS TO AWS.\n***\nskipping ..."
            )
            self.assertTrue(True)
示例#3
0
 def test_db_value_properties(self):
     db = read_database_folder('example/meta_data/db1/')
     db.name = 'new_name'
     self.assertEqual(db.name, 'new_name')
     db.description = 'new description'
     self.assertEqual(db.description, 'new description')
     db.bucket = 'new-bucket'
     self.assertEqual(db.bucket, 'new-bucket')
     db.base_folder = 'new/folder/location'
     self.assertEqual(db.base_folder, 'new/folder/location')
示例#4
0
 def test_db_value_properties(self):
     db = read_database_folder("example/meta_data/db1/")
     db.name = "new_name"
     self.assertEqual(db.name, "new_name")
     db.description = "new description"
     self.assertEqual(db.description, "new description")
     db.bucket = "new-bucket"
     self.assertEqual(db.bucket, "new-bucket")
     db.base_folder = "new/folder/location"
     self.assertEqual(db.base_folder, "new/folder/location")
示例#5
0
    def test_add_remove_table(self):
        db = read_database_folder('example/meta_data/db1/')
        self.assertRaises(ValueError, db.remove_table, 'not_a_table')
        db.remove_table('employees')
        tns = db.table_names
        self.assertEqual(set(tns), set(['teams', 'pay']))

        emp_table = read_table_json('example/meta_data/db1/employees.json')
        db.add_table(emp_table)
        t = all(t in ['teams', 'employees', 'pay'] for t in db.table_names)
        self.assertTrue(t)

        self.assertRaises(ValueError, db.add_table, 'not a table obj')
        self.assertRaises(ValueError, db.add_table, emp_table)
示例#6
0
    def test_add_remove_table(self):
        db = read_database_folder("example/meta_data/db1/")
        self.assertRaises(ValueError, db.remove_table, "not_a_table")
        db.remove_table("employees")
        tns = db.table_names
        self.assertEqual(set(tns), set(["teams", "pay"]))

        emp_table = read_table_json("example/meta_data/db1/employees.json")
        db.add_table(emp_table)
        t = all(t in ["teams", "employees", "pay"] for t in db.table_names)
        self.assertTrue(t)

        self.assertRaises(ValueError, db.add_table, "not a table obj")
        self.assertRaises(ValueError, db.add_table, emp_table)
示例#7
0
    def test_db_test_column_types_align(self):
        db = read_database_folder("example/meta_data/db1/")
        # Should pass
        db.test_column_types_align()

        db.table("pay").update_column(column_name="employee_id",
                                      type="character")

        # Should pass
        db.test_column_types_align(exclude_tables=["pay"])

        # Should fail
        with self.assertRaises(MetaColumnTypeMismatch):
            db.test_column_types_align()
示例#8
0
    def test_glue_database_creation(self):

        self.skip_test_if_no_creds()
        db = read_database_folder("example/meta_data/db1/")
        db_suffix = "_unit_test_"
        db.name = db.name + db_suffix
        db.create_glue_database()
        resp = _glue_client.get_tables(DatabaseName=db.name)
        test_created = all(
            [r["Name"] in db.table_names for r in resp["TableList"]])
        self.assertTrue(
            test_created,
            msg=
            ("Note this requires user to have correct credentials to create a glue "
             "database"),
        )
        self.assertEqual(db.delete_glue_database(), "database deleted")
        self.assertEqual(db.delete_glue_database(),
                         "database not found in glue catalogue")
示例#9
0
    def test_table_to_dict(self):
        db = read_database_folder("example/meta_data/db1/")
        expected_dict = read_json("example/meta_data/db1/teams.json")
        test_dict = db.table("teams").to_dict()

        # Null out schema as may need changing when on branch but still need to unit
        # test
        expected_dict["$schema"] = ""
        test_dict["$schema"] = ""

        self.assertDictEqual(test_dict, expected_dict)

        # Test file with glue specific
        expected_dict2 = read_json("example/meta_data/db1/pay.json")
        test_dict2 = db.table("pay").to_dict()

        # Null out schema as may need changing when on branch but still need to unit
        # test
        expected_dict2["$schema"] = ""
        test_dict2["$schema"] = ""

        self.assertDictEqual(test_dict2, expected_dict2)
示例#10
0
 def test_db_table(self):
     db = read_database_folder('example/meta_data/db1/')
     self.assertTrue(isinstance(db.table('employees'), TableMeta))
     self.assertRaises(ValueError, db.table, 'not_a_table_object')
示例#11
0
 def test_db_s3_database_path(self):
     db = read_database_folder('example/meta_data/db1/')
     self.assertEqual(db.s3_database_path,
                      's3://my-bucket/database/database1')
示例#12
0
    def test_db_glue_name(self):
        db = read_database_folder('example/meta_data/db1/')
        self.assertEqual(db.name, 'workforce')

        db_dev = read_database_folder('example/meta_data/db1/')
        self.assertEqual(db_dev.name, 'workforce')
示例#13
0
 def test_db_name_validation(self):
     db = read_database_folder('example/meta_data/db1/')
     with self.assertRaises(ValueError):
         db.name = 'bad-name'
示例#14
0
 def test_db_table_names(self):
     db = read_database_folder('example/meta_data/db1/')
     t = all(t in ['teams', 'employees', 'pay'] for t in db.table_names)
     self.assertTrue(t)
示例#15
0
 def test_read_json(self):
     db = read_database_folder('example/meta_data/db1/')
     self.assertEqual(db.name, 'workforce')
     self.assertEqual(db.description, 'Example database')
     self.assertEqual(db.bucket, 'my-bucket')
     self.assertEqual(db.base_folder, 'database/database1')
def main():
    db = read_database_folder('meta_data/curated/')
    db.delete_glue_database()
    db.create_glue_database()

    db.refresh_all_table_partitions()
 def test_can_create_glue_table(self, mock_client_create_table):
     self.skip_test_if_no_creds()
     db = read_database_folder(
         os.path.join(os.path.dirname(__file__), "data/data_types/"))
     db.create_glue_database(delete_if_exists=True)
     self.assertTrue(mock_client_create_table.called)
示例#18
0
 def test_location(self):
     db = read_database_folder('example/meta_data/db1/')
     tbl = db.table('teams')
     gtd = tbl.glue_table_definition()
     location = gtd["StorageDescriptor"]["Location"]
     self.assertTrue(location == 's3://my-bucket/database/database1/teams/')
示例#19
0
 def test_table_to_dict(self):
     db = read_database_folder('example/meta_data/db1/')
     test_dict = read_json('example/meta_data/db1/teams.json')
     self.assertDictEqual(test_dict, db.table('teams').to_dict())
示例#20
0
 def test_db_table_names(self):
     db = read_database_folder("example/meta_data/db1/")
     t = all(t in ["teams", "employees", "pay"] for t in db.table_names)
     self.assertTrue(t)
示例#21
0
if __name__ == "__main__":
    from etl_manager import meta

    db_meta = meta.read_database_folder("glue/meta_data/occupeye_db/")
    db_meta.create_glue_database()
示例#22
0
 def test_read_json(self):
     db = read_database_folder("example/meta_data/db1/")
     self.assertEqual(db.name, "workforce")
     self.assertEqual(db.description, "Example database")
     self.assertEqual(db.bucket, "my-bucket")
     self.assertEqual(db.base_folder, "database/database1")