# buffer on either side of immun schedule date to consider good immun_buffer = 7 # what file type in folders to get case_data_regex = re.compile(r'Cases_\d\d\d.csv') location_columns = ['doc_id', 'block_name', 'district_name', 'state_name'] real_state_list = [ 'Madhya Pradesh', 'Chhattisgarh', 'Andhra Pradesh', 'Bihar', 'Jharkhand', 'Rajasthan' ] # , 'Uttar Pradesh', 'Maharashtra'] # ------------- don't edit below here ----------------------------- # start logging gf.start_logging(output_dir) os.chdir(output_dir) # define immune schedule dates preg_tasks = { 'ANC 1 (immuns)': 0, 'ANC 2 (immuns)': 0, 'ANC 3 (immuns)': 42, 'ANC 4 (immuns)': 42, 'TT 1 (immuns)': 42, 'TT 2 (immuns)': 42, 'TT Booster (immuns)': 42 } # get tasks df logging.info('Getting task case data')
'has_rch', 'rch_id', 'closed', 'owner_id', 'opened_date', 'dob', 'sex', 'caseid' ] real_state_list = [ 'Madhya Pradesh', 'Chhattisgarh', 'Andhra Pradesh', 'Bihar', 'Jharkhand', 'Rajasthan' ] # Practice Use Case on small dataset #target_dir = (r'C:\Users\theism\Documents\Dimagi\Data\person_phone_aadhar-ap-anantapur2') #output_dir = (r'C:\Users\theism\Documents\Dimagi\Data\person_phone_aadhar-ap-anantapur2\test') #case_data_regex = re.compile(r'cases_\d\d\d.csv') # ------------- don't edit below here ----------------------------- gen_func.start_logging(output_dir) logging.info('Starting scripts to analyze aadhar data...') # combine all csv into one dataframe case_df = gen_func.csv_files_to_df(target_dir, case_data_regex, case_date_cols, cols_to_use) # clean case data and start to get age distribution information output_dict = {} case_clean_df, output_dict = case_func.clean_case_data(case_df, output_dict) case_clean_df = case_func.add_age_info(case_clean_df) location_column_names = ['doc_id', 'district_name'] case_clean_df = gen_func.add_locations(case_clean_df, 'owner_id', location_column_names) case_clean_df = case_clean_df.loc[(
""" Created on Sat Jan 27 10:50:49 2018 @author: theism """ import os import pandas as pd import gen_func as gf import re import logging data_dir = r'C:\Users\theism\Downloads\[DA] Post Natal Care\[DA] Post Natal Care' data_regex = re.compile(r'Forms_\d\d\d.csv') output_df = pd.DataFrame() output_df = output_df.fillna('') output_name = 'combined_file.csv' file_list = gf.data_file_list(data_dir, data_regex) gf.start_logging(data_dir) for data_file in file_list: # get data logging.info('going through %' % data_file) input_df = pd.read_csv(os.path.join(data_dir, data_file), infer_datetime_format=True, low_memory=False) output_df = pd.concat([output_df, input_df], axis=1) output_df.to_csv(os.path.join(data_dir, output_name)) logging.info('all files combined, output saved to directory')