else: model_suffix = time.strftime("%d_%m_%Y") directory = 'model_' + model_suffix if not os.path.exists(directory): os.makedirs(directory) ########################## ### Load the data ########################## if args.input_data: print('Loading data from {0}'.format(args.input_data)) df = pd.read_csv(args.input_data, parse_dates='Full_date', low_memory=False) df['Full_date'] = rd.date_lookup(df['Full_date']) else: print('Reading and loading data. Saving to {}'.format(directory + '/all_data.csv')) df = rd.read_data(read_weather_station=False, read_water_sensor=False, add_each_beach_data=True) df.to_csv(directory + '/all_data.csv', index=False) ############################### ### Prepare Predictors ############################### if args.input_processed: print('Using Preprocessed data from {0} and {1}'.format( args.input_processed, args.input_meta)) datafilename = args.input_processed
if __name__ == '__main__': # Command Line Argument parsing parser = argparse.ArgumentParser(description='Process beach data.') parser.add_argument('-i', '--input', type=str, metavar='input_filename', help='input CSV filename') parser.add_argument('-v', '--verbose', action='count', default=0) args = parser.parse_args() # Load the data if args.input: df = pd.read_csv(args.input, parse_dates='Full_date') df['Full_date'] = rd.date_lookup(df['Full_date']) else: df = rd.read_data(read_weather_station=False, read_water_sensor=False) # Extract the EPA (technically USGS) model performance from the data epa_model_df = df[['Drek_Prediction', 'Escherichia.coli']].dropna() # Prepare the data predictors, meta_info = prepare_data(df) timestamps = meta_info['Full_date'] classes = meta_info['Escherichia.coli'] > 235 print('Using the following columns as predictors:') for c in predictors.columns: print('\t' + str(c))
if args.suffix: model_suffix = args.suffix else: model_suffix = time.strftime("%d_%m_%Y") directory = 'model_'+model_suffix if not os.path.exists(directory): os.makedirs(directory) ########################## ### Load the data ########################## if args.input_data: print('Loading data from {0}'.format(args.input_data)) df = pd.read_csv(args.input_data, parse_dates='Full_date', low_memory=False) df['Full_date'] = rd.date_lookup(df['Full_date']) else: print('Reading and loading data. Saving to {}'.format(directory+'/all_data.csv')) df = rd.read_data(read_weather_station=False, read_water_sensor=False, add_each_beach_data=True) df.to_csv(directory+'/all_data.csv', index=False) ############################### ### Prepare Predictors ############################### if args.input_processed: print('Using Preprocessed data from {0} and {1}'.format(args.input_processed, args.input_meta )) datafilename = args.input_processed metadatafilename = args.input_meta data_processed = pd.read_csv(datafilename) meta_info = pd.read_csv(metadatafilename, parse_dates='Full_date') meta_info['Full_date'] = rd.date_lookup(meta_info['Full_date'])
return predictors, meta_info if __name__ == '__main__': # Command Line Argument parsing parser = argparse.ArgumentParser(description='Process beach data.') parser.add_argument('-i', '--input', type=str, metavar='input_filename', help='input CSV filename') parser.add_argument('-v', '--verbose', action='count', default=0) args = parser.parse_args() # Load the data if args.input: df = pd.read_csv(args.input, parse_dates='Full_date') df['Full_date'] = rd.date_lookup(df['Full_date']) else: df = rd.read_data(read_weather_station=False, read_water_sensor=False) # Extract the EPA (technically USGS) model performance from the data epa_model_df = df[['Drek_Prediction', 'Escherichia.coli']].dropna() # Prepare the data predictors, meta_info = prepare_data(df) timestamps = meta_info['Full_date'] classes = meta_info['Escherichia.coli'] > 235 print('Using the following columns as predictors:') for c in predictors.columns: print('\t' + str(c))