def test_for_week_ending_column_date_format(self):
     # arrange
     paths_list_that_contains_sbc_data = [
         'unittests_resources/file_with_sbc_report.csv'
     ]
     # act
     base_df = clean_and_load_all_sbc_data_to_df_from(
         paths_list_that_contains_sbc_data)
     # assert
     for date_string in base_df['Week ending']:
         self.assertNotEqual(date_string, "")
         # if the format will not be correct strptime will throw exception and test wil fail
         datetime.strptime(date_string, "%Y-%m-%d")
 def test_for_loading_csv_that_not_contains_soybean_condition_report(self):
     # arrange
     paths_list_that_contains_sbc_data = [
         'unittests_resources/file_without_sbc_report.csv'
     ]
     # act
     base_df = clean_and_load_all_sbc_data_to_df_from(
         paths_list_that_contains_sbc_data)
     # assert
     self.assertTrue(
         base_df.empty,
         'returns empty data frame if csv file not contains Soybean Condition reports'
     )
Пример #3
0
 def test_for_transformation_for_columns_order_in_df(self):
     # arrange
     paths_list_that_contains_sbc_data = [
         'unittests_resources/file_with_sbc_report.csv'
     ]
     # act
     transformed_df = transform_soybean_condition_report(
         clean_and_load_all_sbc_data_to_df_from(
             paths_list_that_contains_sbc_data))
     # assert
     self.assertEqual(transformed_df.columns.get_loc('Week ending'), 0)
     self.assertEqual(transformed_df.columns.get_loc('State'), 1)
     self.assertEqual(transformed_df.columns.get_loc('Condition'), 2)
     self.assertEqual(transformed_df.columns.get_loc('Percent'), 3)
Пример #4
0
 def test_for_transformation_for_state_col_clarity(self):
     # arrange
     paths_list_that_contains_sbc_data = [
         'unittests_resources/file_with_sbc_report.csv'
     ]
     # act
     base_df = clean_and_load_all_sbc_data_to_df_from(
         paths_list_that_contains_sbc_data)
     state_cols = base_df['State']
     # assert
     self.assertIsNot(('"' in state_cols),
                      'there is no " character in state column')
     self.assertEqual(state_cols.isnull().sum(), 0)
     for c in state_cols:
         self.assertNotEquals(c, "Previous week")
         self.assertNotEquals(c, "Previous year")
Пример #5
0
 def test_for_transformation_of_not_empty_df(self):
     # arrange
     paths_list_that_contains_sbc_data = [
         'unittests_resources/file_with_sbc_report.csv'
     ]
     # act
     transformed_df = transform_soybean_condition_report(
         clean_and_load_all_sbc_data_to_df_from(
             paths_list_that_contains_sbc_data))
     # assert
     self.assertIsNot(
         transformed_df.empty,
         'returns not empty data frame if csv file contains Soybean Condition reports'
     )
     self.assertEqual(len(transformed_df.columns), 4)
     self.assertTrue('Week ending' in transformed_df)
     self.assertTrue('State' in transformed_df)
     self.assertTrue('Condition' in transformed_df)
     self.assertTrue('Percent' in transformed_df)
 def test_for_loading_csv_that_contains_soybean_condition_report(self):
     # arrange
     paths_list_that_contains_sbc_data = [
         'unittests_resources/file_with_sbc_report.csv'
     ]
     # act
     base_df = clean_and_load_all_sbc_data_to_df_from(
         paths_list_that_contains_sbc_data)
     # assert
     self.assertIsNot(
         base_df.empty,
         'returns not empty data frame if csv file contains Soybean Condition reports'
     )
     self.assertEqual(len(base_df.columns), 7)
     self.assertTrue('Week ending' in base_df)
     self.assertTrue('State' in base_df)
     self.assertTrue('Very poor' in base_df)
     self.assertTrue('Poor' in base_df)
     self.assertTrue('Fair' in base_df)
     self.assertTrue('Good' in base_df)
     self.assertTrue('Excellent' in base_df)
Пример #7
0
def run(cfg):
    reports_year = 2016
    output_file_name = 'soybean_condition_' + str(reports_year) + '.csv'

    output_dir = cfg.COOKED_DATA_MAIN_PATH + '/' + output_file_name

    # 1
    _create_directories_if_missing(cfg.RAW_DATA_MAIN_PATH)
    download_all_reports_for(cfg, reports_year)
    # 2
    downloaded_files_locations = \
        get_downloaded_files_paths_in(cfg.RAW_DATA_MAIN_PATH)
    # 2
    base_soybean_condition_df = \
        clean_and_load_all_sbc_data_to_df_from(downloaded_files_locations)
    # 3
    transformed_soybean_condition_df = \
        transform_soybean_condition_report(base_soybean_condition_df)
    # 4
    _create_directories_if_missing(cfg.COOKED_DATA_MAIN_PATH)
    transformed_soybean_condition_df.to_csv(output_dir, index=False)

    print('saved data frame as csv to ' + output_dir)
    print(transformed_soybean_condition_df)