def helper_execute_select_into(query)->pd.DataFrame: config=TstConfig() db=str(config.get("DATABASE_LOCATION","SQLITE_DB_FOR_TESTING")) table2=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING2")) classObj=SqliteDataWarehouse(LocalConfig()) retVal=classObj.select_into(query,db,table2) return retVal
def __init__(self): PlatformConfig.__init__(self) testConfig = TstConfig() self.data_lake_path = self.get_env("HM_LAKE_PATH") self.sqlite_db_path = self.get_env("HM_SQLITE_WAREHOUSE_DBPATH") self.lake_bucket = self.get_env( "LAKE_BUCKET", os.path.join(testConfig.get_base_folder(), "sqlite_lake_bucket")) self.lake_path = self.get_env("LAKE_PATH") self.warehouse_dataset = self.get_env('WAREHOUSE_DATASET', "hyper_model") self.warehouse_location = self.get_env("WAREHOUSE_LOCATION", "./data") self.k8s_namespace = self.get_env('K8S_NAMESPACE') self.k8s_cluster = self.get_env('K8S_CLUSTER') self.kfp_artifact_path = self.get_env( 'KFP_ARTIFACT_PATH', os.path.join(testConfig.get_base_folder(), "data")) self.ci_commit = self.get_env("CI_COMMIT_SHA", "no-commit") self.is_local_dev = self.ci_commit == "no-commit" self.gitlab_token = self.get_env("GITLAB_TOKEN", None) self.gitlab_project = self.get_env("GITLAB_PROJECT", None) self.gitlab_url = self.get_env("GITLAB_URL", None) self.default_sql_lite_db_file = f"{self.warehouse_location}/default.db"
def test_dataframe_from_table_size()->None: config=TstConfig() db=str(config.get("DATABASE_LOCATION","SQLITE_DB_FOR_TESTING")) table1=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING1")) df=helper_dataframe_from_table(db,table1) row,actual_column_count_table1=get_row_column_count(db,table1) assert df.shape == (row,actual_column_count_table1)
def helper_execute_import_csv()->bool: config=TstConfig() #get testing csv file db=str(config.get("DATABASE_LOCATION","SQLITE_DB_FOR_TESTING")) table=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING1")) csv=str(config.get("FLAT_FILE_LOCATION","CSV_DATA_FILE")) classObj=SqliteDataWarehouse(LocalConfig()) retVal=classObj.import_csv(csv,db,table) return retVal
def test_file_md5(): config=TstConfig() dummy_file=config.get("FLAT_FILE_LOCATION","DUMMY_FILE_LOCATION") #<To-Do> need to determine how to test this method # As this is different for every system or OS expected_return_value="8348f98f61cf58d6e7921c173bd0286d"# on windows actual_return_value=file_md5(dummy_file) #assert expected_return_value==actual_return_value assert True
def test_dataframe_from_query_size()->None: config=TstConfig() localConfig=LocalConfig() table2=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING2")) # query method to return method returning value db=localConfig.default_sql_lite_db_file table2=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING2")) expected_row_count,expected_column_count= populate_table_in_database(db,table2) retVal=helper_dataframe_from_query(f"select * from {table2}") assert (expected_row_count,expected_column_count)==retVal.shape
def test_import_csv_length_of_db()->None: config=TstConfig() db=str(config.get("DATABASE_LOCATION","SQLITE_DB_FOR_TESTING")) table=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING1")) expected_table_length =5129 retVal=helper_execute_import_csv() tbl_len_query_result=execute_query(db,f'SELECT count(*) as c FROM {table}') actual_table_length=tbl_len_query_result.iloc[0][0] assert expected_table_length == actual_table_length
def test_select_into_row_count()-> None: config=TstConfig() db=str(config.get("DATABASE_LOCATION","SQLITE_DB_FOR_TESTING")) table1=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING1")) table2=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING2")) # Confining to 30 as we just want to limit the size # The assumption is that the row count of table 1 is more than 30 query=f"select * from {table1} LIMIT 30" helper_execute_select_into(query) row_count,col_count=get_row_column_count(db,table2) expected_row_count=30 assert expected_row_count == row_count
def test_download(): classObj = LocalDataLake(LocalConfig()) config = TstConfig() from_location = str( config.get("FLAT_FILE_LOCATION", "DUMMY_FILE_LOCATION2")) to_folder = str(config.get("FLAT_FILE_LOCATION", "TEMPERORY_FILES_FOLDER")) file_name = general.get_filename_from_path(from_location) from_folder = from_location[0:len(from_location) - len(file_name) - 1] to_file_path = os.path.join(to_folder, file_name) retVal = classObj.download(from_location, to_file_path) assert retVal == True
def test_table_schema(): config=TstConfig() table2=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING2")) db=str(config.get("DATABASE_LOCATION","SQLITE_DB_FOR_TESTING")) populate_table_in_database(db,table2) #populated test data in table2 classObj=SqliteDataWarehouse(LocalConfig()) retVal=classObj.table_schema(db, table2) testDataColumns=[] testDataColumns.append(SqlColumn("id", "int64", True)) testDataColumns.append(SqlColumn("title", "object", True)) testDataColumns.append(SqlColumn("author", "object", True)) testDataColumns.append(SqlColumn("price", "object", True)) testDataColumns.append(SqlColumn("year", "object", True)) expectedTable=SqlTable(table2,table2,testDataColumns) # see if all column count match assert expectedTable==retVal
def test_upload(): classObj = LocalDataLake(LocalConfig()) config = TstConfig() from_location = str(config.get("FLAT_FILE_LOCATION", "DUMMY_FILE_LOCATION")) to_location = str( config.get("FLAT_FILE_LOCATION", "TEMPERORY_FILES_FOLDER")) file_name = general.get_filename_from_path(from_location) from_folder = from_location[0:len(from_location) - len(file_name) - 1] to_folder = to_location # db=str(config.get("DATABASE_LOCATION","SQLITE_DB_FOR_TESTING")) # table=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING2")) # csv=str(config.get("FLAT_FILE_LOCATION","CSV_DATA_FILE")) # retVal=classObj.upload(os.path.join(to_location,file_name),csv,table) retVal = classObj.upload(to_folder, from_folder, file_name) assert retVal == True
def test_dry_run(): config=TstConfig() localConfig=LocalConfig() table2=str(config.get("DATABASE_LOCATION","SQLITE_TABLE_FOR_TESTING2")) db=localConfig.default_sql_lite_db_file populate_table_in_database(db,table2) #populated test data in table2 classObj=SqliteDataWarehouse(LocalConfig()) retVal=classObj.dry_run(f"select * from {table2} ") testDataColumns=[] testDataColumns.append(SqlColumn("id", "int64", True)) testDataColumns.append(SqlColumn("title", "object", True)) testDataColumns.append(SqlColumn("author", "object", True)) testDataColumns.append(SqlColumn("price", "object", True)) testDataColumns.append(SqlColumn("year", "object", True)) #see if all column values match for col in retVal: assert col in testDataColumns # see if all column count match assert len(testDataColumns)==len(retVal)
def test_test_configuration_nagative(): test_config = TstConfig() assert test_config.get("NOT THERE", "NOT THERE") == None
def test_test_configuration_positive(): test_config = TstConfig() assert test_config.get("TESTING_THE_TEST_FILE", "TEST_ENTRY") == "1"
import pandas import numpy from pandas import DataFrame import os.path from hypermodel.tests.utilities import create_test_data import logging from hypermodel.tests.utilities.configurations import TstConfig from typing import List, Dict, Any import math import random config = TstConfig() def prepare_csv_file() -> str: csvLocation = config.get("FLAT_FILE_LOCATION", "CSV_DATA_FILE") if not os.path.exists(csvLocation): create_test_data.create_csv_file() return csvLocation def get_test_dataframe() -> pandas.DataFrame: return pandas.read_csv(prepare_csv_file()) def get_test_dataframe_feature_names() -> List[str]: prepare_csv_file() features = [ "REGISTER_NAME", #text "CRED_LIC_NUM", #int "CRED_LIC_NAME", #text