def get_report_data(prefix = "reports/I", suffix = ".csv"): objects = [obj for obj in helpers.get_matching_s3_keys(prefix = prefix, suffix = suffix)] try: df_report = pd.concat([pd.read_csv(helpers.get_object_s3(obj), encoding='latin-1') for obj in objects], ignore_index = True) except Exception as e: print(e) return(df_report)
def get_health_data_file(): with open('./gma_secrets.json') as secrets: health_name = json.load(secrets)['health_schedule'] health_name = health_name['name'] df = pd.read_csv(helpers.get_object_s3(health_name)) df.dropna(inplace=True) return (df)
def check_data_from_db(file_name_s3_source, file_name_s3_target, var): engine = helpers.db_engine() df = pd.read_csv(helpers.get_object_s3(file_name_s3_source)) df_db = pd.read_sql("SELECT id, {} FROM enrolment_record where file_name_s3 = '{}'".format(var, file_name_s3_target), con = engine) df_check = pd.merge(df, df_db, how = 'inner', on = ['id'], indicator = True) return(df_check, df_db, df)
def check_data_from_s3(file_name_s3_source, file_name_s3_target): df_source = pd.read_csv(helpers.get_object_s3(file_name_s3_source)) df_target = pd.read_csv(helpers.get_object_s3(file_name_s3_target)) variables_list = ['phone', 'jc_status', 'time_pref', 'time_pref_label', 'breastfeeding', 'pregnant', 'children_under6', 'teenage_girls', 'nocategory'] df_check = pd.merge(df_source, df_target, how = 'inner', on = ['id'], indicator = True) for var in variables_list: df_check[var + '_check_failed'] = (df_check[var + '_x'] != df_check[var + '_y']).astype(int) check_columns = [var + '_check_failed' for var in variables_list] df_check['check'] = df_check[check_columns].sum(axis = 1) df_check = df_check.loc[df_check['check'] != 0] df_check['file_name_source'] = file_name_s3_source df_check['file_name_target'] = file_name_s3_target return(df_check)
def get_script_data_file(script_data_file): df = pd.read_csv(helpers.get_object_s3(script_data_file)) return (df)
def get_camp_data_file(camp_data_file): df = pd.read_csv(helpers.get_object_s3(camp_data_file)) return(df)