def validate(spark): """ Validated the data loaded into the data warehouse """ parquetpaths = { 'i94': 'parquets/i94.parquet', 'airports': 'parquets/airports.parquet', 'cities': 'parquets/cities.parquet', 'visa': 'parquets/visas.parquet', 'countries': 'parquets/countries.parquet', 'transport_modes': 'parquets/trasnp_modes.parquet', 'ports': 'parquets/ports.parquet', 'states': 'parquets/states.parquet' } validator = Validator(spark) df_i94_loaded = validator.get_facts(parquetpaths['i94']) df_airport_loaded, df_cities_loaded, df_visas_loaded, df_transp_modes_loaded, df_countries_loaded, df_states_loaded, df_ports_loaded = validator.get_dimensions( parquetpaths['airports'], parquetpaths['cities'], parquetpaths['visa'], parquetpaths['transport_modes'], parquetpaths['countries'], parquetpaths['states'], parquetpaths['ports']) assert (validator.contain_data(df_i94_loaded)) assert (validator.contain_data(df_airport_loaded)) assert (validator.contain_data(df_cities_loaded)) assert (validator.contain_data(df_visas_loaded)) assert (validator.contain_data(df_transp_modes_loaded)) assert (validator.contain_data(df_countries_loaded)) assert (validator.contain_data(df_states_loaded)) assert (validator.contain_data(df_ports_loaded))