def ingest2db(df_, fsc, params, run_id, model_name): # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.csv", run_label=f"{model_name} run for {params['shocked_region']} region.", point_resolution_meters=100000) db_session.add(meta) db_session.commit() # Add parameters to DB for pp, vv in params.items(): param = Parameters(run_id=run_id, model=model_name, parameter_name=pp, parameter_value=vv, parameter_type="string") db_session.add(param) db_session.commit() # Ingest outputs to DB feature_name = fsc['outputs'][0]['name'] feature_description = fsc['outputs'][0]['description'] df_['datetime'] = datetime(year=2018, month=1, day=1) df_['run_id'] = run_id df_['model'] = model_name df_['feature_name'] = feature_name df_['feature_description'] = feature_description df_['feature_value'] = df_[feature_name].apply(lambda x: int(x)) db_session.bulk_insert_mappings(Output, df_.to_dict(orient="records")) db_session.commit()
def ingest_to_db(InRaster, run_id, *, model_name, start, included_months, total_months, params, basin): # Add metadata object to DB meta = Metadata(run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/PIHM_results/{run_id}.tif", run_label=f"{model_name} run for {basin} Basin.", point_resolution_meters=200) db_session.add(meta) db_session.commit() # Add parameters to DB print("Storing parameters...") for pp, vv in params.items(): if pp == 'basin': p_type = 'string' else: p_type = 'float' param = Parameters(run_id=run_id, model=model_name, parameter_name=pp, parameter_value=vv, parameter_type=p_type ) db_session.add(param) db_session.commit() # iterate over the bands that should be included (1 per month) for month in range(1, included_months + 2): date_ = start + relativedelta(months=month-1) date_str = date_.strftime("%m/%d/%Y") print(f"Processing {model_name} {date_str}") # Convert Raster to GeoPandas feature_name = m['outputs'][0]['name'] feature_description = m['outputs'][0]['description'] gdf = raster2gpd(InRaster,feature_name,band=month) print(f"Performing spatial merge") # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Iterate over years for each band to ensure that there is continous # annual data # Set run fields: datetime, run_id, model gdf['datetime'] = date_ gdf['run_id'] = run_id gdf['model'] = model_name gdf['feature_description'] = feature_description if 'geometry' in gdf: del(gdf['geometry']) del(gdf['index_right']) # perform bulk insert of entire geopandas DF print(f"Ingesting {date_str} of {model_name} for basin {basin} to database\n") db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()
def ingest_to_db(InRaster, run_id, *, model_name, params, m): # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.tif", run_label=f"{model_name} run.", point_resolution_meters=1000000) db_session.add(meta) db_session.commit() # Add parameters to DB print("Storing parameters...") for pp, vv in params.items(): if pp == 'year' or pp == 'month': p_type = 'integer' else: p_type = 'string' param = Parameters(run_id=run_id, model=model_name, parameter_name=pp, parameter_value=vv, parameter_type=p_type) db_session.add(param) db_session.commit() band = bands[params['commodity']] # Convert Raster to GeoPandas feature_name = m['outputs'][0]['name'] feature_description = m['outputs'][0]['description'] gdf = raster2gpd(InRaster, feature_name, band=band) print(f"GDF size is {gdf.shape[0]} before rounding lat/lon") gdf = gdf.drop_duplicates() print(f"GDF size is {gdf.shape[0]} after rounding lat/lon") print(f"Performing spatial merge") # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Iterate over years for each band to ensure that there is continous # annual data # Set run fields: datetime, run_id, model gdf['datetime'] = datetime(year=params['year'], month=params['month'], day=1) gdf['run_id'] = run_id gdf['model'] = model_name gdf['feature_description'] = feature_description if 'geometry' in gdf: del (gdf['geometry']) del (gdf['index_right']) # perform bulk insert of entire geopandas DF db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()
def process_dssat(df, params, dssat, model_name, file): """ Primary function for processing DSSAT """ run_id, model_config, run_obj = gen_run(model_name, params, file) # generate temp CSV and push it to S3 df.to_csv("tmp.csv", index=False) time.sleep(1) try: s3_bucket.upload_file("tmp.csv", run_obj['key'], ExtraArgs={'ACL': 'public-read'}) except Exception as e: print(e) print("Retrying file upload...") try: s3_bucket.upload_file("tmp.csv", run_obj['key'], ExtraArgs={'ACL': 'public-read'}) except: pass # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{file}", run_label=df.RUN_NAME.iloc[0], point_resolution_meters=10000) db_session.add(meta) db_session.commit() # Add parameters to DB for p_name, p_value in params.items(): if p_name == 'rainfall': p_value = float(p_value) param = Parameters(run_id=run_id, model=model_name, parameter_name=p_name, parameter_value=p_value, parameter_type=param_types[p_name]) db_session.add(param) db_session.commit() gdf = gpd.GeoDataFrame(df) gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') gdf['run_id'] = run_id gdf['model'] = model_name if 'geometry' in gdf: del (gdf['geometry']) del (gdf['index_right']) return gdf, run_id
def process(df, params, m, model_name, file): """ Primary function for processing DSSAT """ run_id, model_config, run_obj = gen_run(model_name, params, file) try: s3_bucket.upload_file(file, run_obj['key'], ExtraArgs={'ACL': 'public-read'}) except Exception as e: print(e) print("Retrying file upload...") try: s3_bucket.upload_file(file, run_obj['key'], ExtraArgs={'ACL': 'public-read'}) except: pass # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{file}", run_label=f"Run for {model_name}", point_resolution_meters=m.get("point_resolution_meters", 1000)) db_session.add(meta) db_session.commit() # Add parameters to DB for p_name, p_value in params.items(): param = Parameters(run_id=run_id, model=model_name, parameter_name=p_name, parameter_value=p_value, parameter_type=get_type(p_value)) db_session.add(param) db_session.commit() df['geometry'] = df.apply(lambda x: Point(x.longitude, x.latitude), axis=1) gdf = gpd.GeoDataFrame(df) gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') gdf['run_id'] = run_id gdf['model'] = model_name if 'geometry' in gdf: del (gdf['geometry']) del (gdf['index_right']) return gdf, run_id
def get_parameters(access_token): map_type = get_map_type() country_param = get_parameter(api.get_countries, access_token, None, "countryID", "countryName", "Country") state_param = get_parameter(api.get_states, access_token, str(country_param.id), "stateID", "stateName", "State") district_param = get_parameter(api.get_districts, access_token, str(state_param.id), "districtID", "districtName", "District") parameters = Parameters(map_type, district_param) parameters = update_parameters_by_map_type(map_type, parameters, access_token, str(country_param.id)) if confirm_parameter_selection(parameters): return parameters else: print("\n\nRe-Enter Parameters") return get_parameters(access_token)
def ingest2db(self): init_db() # Load Admin2 shape from GADM logging.info("Loading GADM shapes...") admin2 = gpd.read_file(f'{self.gadm}/gadm36_2.shp') admin2['country'] = admin2['NAME_0'] admin2['state'] = admin2['NAME_1'] admin2['admin1'] = admin2['NAME_1'] admin2['admin2'] = admin2['NAME_2'] admin2 = admin2[['geometry','country','state','admin1','admin2']] # Add metadata object to DB # TODO: add run_label and run_description meta = Metadata(run_id=self.run_id, model=self.name, run_description=self.features[self._type]['run_description'], raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}', point_resolution_meters=5000) logging.info("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB logging.info("Storing parameters...") for param_name, param_val in self.model_config.items(): if param_name != 'run_id': if param_name == 'year': param_type = 'integer' elif param_name == 'bbox': param_type = 'array' param_val = json.dumps(param_val) elif param_name == 'dekad': param_type = 'integer' param_val = int(param_val) else: param_type = 'string' param = Parameters(run_id=self.run_id, model=self.name, parameter_name=param_name, parameter_value=param_val, parameter_type=param_type) db_session.add(param) db_session.commit() # Process tiff file into point data logging.info("Processing tiff...") InRaster = f"{self.result_path}/{self.result_name}.tiff" feature_name = self.features[self._type]['feature_name'] feature_description = self.features[self._type]['feature_description'] gdf = raster2gpd(InRaster,feature_name) # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Set run fields: datetime, run_id, model # first convert dekad of year to day of year # note: dekad is a 10 day period so dekad 25 ends the 250th day of the year # since dekad 01 contains days 1 through 10 so dekad 01 should yield Jan 1 gdf['datetime'] = datetime(self.year, 1, 1) + timedelta((int(self.dekad) * 10) - 11) gdf['run_id'] = self.run_id gdf['model'] = self.name gdf['feature_description'] = feature_description del(gdf['geometry']) del(gdf['index_right']) # perform bulk insert of entire geopandas DF logging.info("Storing point data output...") db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()
def set_parameters(paei): prm = Parameters() p = paei[0] a = paei[1] e = paei[2] i = paei[3] if p >= PAEIStyle.PAEI_START: prm.set_priority = 1 prm.max_count_tasks = 5 prm.send_message_fast_realize = 1 if a >= PAEIStyle.PAEI_START: prm.need_more_description = 1 if e >= PAEIStyle.PAEI_START: prm.final_date_count = 2 prm.need_report = 1 prm.send_message_deadline = 1 prm.need_think = 1 prm.date_last_change_task = 1 if i >= PAEIStyle.PAEI_START: prm.do_not_like_task = 1 prm.save() return prm
random_state=42, shuffle=True) del data_X, data_Y, data_Y_encoded #Training Parameters drop_rate = 0.1 w_regularizer = regularizers.l2(0.01) batch_size = 12 epochs = 20 embedding_size = 100 width = X_train.shape[1] high = X_train.shape[2] depth = X_train.shape[3] channels = 1 #Black and white validation = False val_size = 0.2 params_dict = { 'w_regularizer': w_regularizer, 'batch_size': batch_size, 'drop_rate': drop_rate, 'epochs': epochs, 'image_shape': (width, high, depth, channels), 'validation': validation, 'val_size': val_size} params = Parameters(params_dict) # Create the model extractor = FeatureExtractor3D(params, embedding_size) extractor.model.summary() # Train with data extractor.train_tiplet_loss(X_train, Y_train, plot = True)
def ingest2db(self): init_db() # Load Admin2 shape from GADM logging.info("Loading GADM shapes...") admin2 = gpd.read_file(f"{self.config['GADM']['GADM_PATH']}/gadm36_2.shp") admin2['country'] = admin2['NAME_0'] admin2['state'] = admin2['NAME_1'] admin2['admin1'] = admin2['NAME_1'] admin2['admin2'] = admin2['NAME_2'] admin2 = admin2[['geometry','country','state','admin1','admin2']] # Add metadata object to DB # TODO: add run_label and run_description logging.info("Storing metadata...") meta = Metadata(run_id=self.run_id, model=self.name, run_description=self.descriptions['management_practice'][self.model_config['management_practice']], raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}', # 5 arc minutes (~10km) point_resolution_meters=10000) logging.info("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB logging.info("Storing parameters...") for param_name, param_val in self.model_config.items(): if param_name == 'run_id': pass else: param = Parameters(run_id=self.run_id, model=self.name, parameter_name=param_name, parameter_value=param_val, parameter_type=self.descriptions['parameters'][param_name]) db_session.add(param) db_session.commit() # Process CSV and normalize it logging.info("Processing points...") # get result file path if self.model_config["management_practice"] == "combined": # combined CSV path = f"{self.result_path}/out/eth_docker/test/pp.csv" else: # individual management practices m = self.model_config["management_practice"] path = f"{self.result_path}/out/eth_docker/test/{m}/pp_{m}.csv" df = pd.read_csv(path, index_col=False) df['latitude'] = df['LATITUDE'] df['longitude'] = df['LONGITUDE'] df['geometry'] = df.apply(lambda x: Point(x.longitude, x.latitude), axis=1) df['year'] = df['HDAT'].apply(lambda x: int(str(x)[:4])) df['days'] = df['HDAT'].apply(lambda x: int(str(x)[4:])) df['datetime'] = df.apply(lambda x: datetime(x.year, 1, 1) + timedelta(x.days - 1), axis=1) df['run_id'] = self.run_id df['model'] = self.name df['Production'] = df['HWAH'] * df['HARVEST_AREA'] # for combined runs only we need to convert the run name to an encoded # float so that it can go into the database if 'RUN_NAME' in df: df['management_practice'] = df['RUN_NAME'].apply(lambda x: self.descriptions['encoding'][x]) gdf = gpd.GeoDataFrame(df) # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') base_cols = ['run_id','model','latitude','longitude', 'datetime','admin1','admin2','state', 'country'] feature_cols = ['feature_name','feature_description','feature_value'] # Need to iterate over features to generate one GDF per feature # then upload the GDF per feature to ensure that rows are added for each # feature for feature_name, feature_description in self.descriptions['features'].items(): # specific handling for "combined" file if feature_name == 'management_practice': if self.model_config["management_practice"] != "combined": # if not a combined file, then just move onto the next # in the for loop and do nothing for this feature_name continue cols_to_select = base_cols + [feature_name] gdf_ = gdf[cols_to_select] # generate new interim GDF gdf_['feature_name'] = feature_name gdf_['feature_description'] = feature_description gdf_['feature_value'] = gdf_[feature_name] gdf_ = gdf_[base_cols + feature_cols] # perform bulk insert of entire geopandas DF logging.info(f"Storing point data output for {feature_name}...") db_session.bulk_insert_mappings(Output, gdf_.to_dict(orient="records")) db_session.commit()
run_id, model_config = gen_run(year) # Add metadata object to DB meta = Metadata(run_id=run_id, model=model_config['name'], raw_output_link= f"https://world-modelers.s3.amazonaws.com/results/world_population_africa/{run_name}", run_label="World Population Africa", point_resolution_meters=1000) db_session.add(meta) db_session.commit() # Add parameters to DB for name, val in params.items(): param = Parameters(run_id=run_id, model=model_config['name'], parameter_name=name, parameter_value=val, parameter_type='string') db_session.add(param) db_session.commit() # Convert Raster to GeoPandas InRaster = f"Africa_1km_Population/{run_name}" feature_name = 'population per 1km' gdf = raster2gpd(InRaster,feature_name) sudan_gdf = gdf.cx[23.73:35.88, 2.92:13.155] ethiopia_gdf = gdf.cx[35.88:48.22, 2.92:15.2] del(gdf) for gdf in [sudan_gdf, ethiopia_gdf]:
def ingest2db(year, df, filename): model_name = "flood_index_model" run_id = gen_run_id(year) init_db() # Load Admin2 shape from GADM print("Loading GADM shapes...") admin2 = gpd.read_file(f"{config['GADM']['GADM_PATH']}/gadm36_2.shp") admin2['country'] = admin2['NAME_0'] admin2['state'] = admin2['NAME_1'] admin2['admin1'] = admin2['NAME_1'] admin2['admin2'] = admin2['NAME_2'] admin2 = admin2[['geometry', 'country', 'state', 'admin1', 'admin2']] # Add metadata object to DB # TODO: add run_label and run_description print("Storing metadata...") meta = Metadata( run_id=run_id, model=model_name, run_description=f"{model_name} run for {year}", raw_output_link= f'https://s3.amazonaws.com/world-modelers/flood_index_model/{filename}.nc', # 0.1 degrees (~10km) point_resolution_meters=10000) print("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB print("Storing parameters...") param = Parameters(run_id=run_id, model=model_name, parameter_name="year", parameter_value=year, parameter_type="integer") db_session.add(param) db_session.commit() # Process CSV and normalize it print("Processing points...") df['geometry'] = df.apply(lambda x: Point(x.longitude, x.latitude), axis=1) df['run_id'] = run_id df['model'] = model_name gdf = gpd.GeoDataFrame(df) # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') base_cols = [ 'run_id', 'model', 'latitude', 'longitude', 'datetime', 'admin1', 'admin2', 'state', 'country' ] feature_cols = ['feature_name', 'feature_description', 'feature_value'] # Need to iterate over features to generate one GDF per feature # then upload the GDF per feature to ensure that rows are added for each # feature for feature_name, feature_description in features.items(): cols_to_select = base_cols + [feature_name] gdf_ = gdf[cols_to_select] # generate new interim GDF gdf_['feature_name'] = feature_name gdf_['feature_description'] = feature_description gdf_['feature_value'] = gdf_[feature_name] gdf_ = gdf_[base_cols + feature_cols] # perform bulk insert of entire geopandas DF print(f"Storing point data output for {feature_name}...") db_session.bulk_insert_mappings(Output, gdf_.to_dict(orient="records")) db_session.commit()
def ingest2db(self): init_db() # Add metadata object to DB desc = f"{self.name} run for {self.scenario_type} scenario with start year {self.start_year} and end year {self.end_year}" if self.scenario_type == 'production_failure_scenario': desc += f". Shock severity was set to {self.shock_severity} and the shocked region was {self.shocked_region}." logging.info("Storing metadata...") meta = Metadata(run_id=self.run_id, model=self.name, run_label=f"{self.name}: {self.scenario_type}", run_description=desc, raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}') logging.info("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB logging.info("Storing parameters...") for param_name, param_val in self.model_config.items(): if param_name == 'run_id': pass else: param = Parameters( run_id=self.run_id, model=self.name, parameter_name=param_name, parameter_value=param_val, parameter_type=self.descriptions['parameters'][param_name]) db_session.add(param) db_session.commit() # Process CSV and normalize it logging.info("Processing timeseries...") df = pd.read_csv(self.output) df = df.transpose().reset_index() df = df.rename( columns=dict(zip(list(df.columns), list(df.iloc[0]))))[1:] df = df.rename(columns={'Unnamed: 0': 'Date'}) df['datetime'] = df.Date.apply(lambda x: datetime( year=int(x.split('.')[1]), month=int(x.split('.')[0]), day=1)) del (df['Date']) df['run_id'] = self.run_id df['model'] = self.name base_cols = ['run_id', 'model', 'datetime'] feature_cols = ['feature_name', 'feature_description', 'feature_value'] for feature_name, feature_description in self.descriptions[ 'features'].items(): # some multi_twist outputs will not be present depending on the scenario type # so first check if feature_name in df: logging.info( f"Storing point data output for {feature_name}...") cols_to_select = base_cols + [feature_name] df_ = df[cols_to_select] # generate new interim DF df_['feature_name'] = feature_name df_['feature_description'] = feature_description.split('.')[0] df_['feature_value'] = df_[feature_name] df_ = df_[base_cols + feature_cols] # perform bulk insert of entire geopandas DF db_session.bulk_insert_mappings(Output, df_.to_dict(orient="records")) db_session.commit()
def ingest2db(self): init_db() # Load Admin2 shape from GADM logging.info("Loading GADM shapes...") admin2 = gpd.read_file(f'{self.gadm}/gadm36_2.shp') admin2['country'] = admin2['NAME_0'] admin2['state'] = admin2['NAME_1'] admin2['admin1'] = admin2['NAME_1'] admin2['admin2'] = admin2['NAME_2'] admin2 = admin2[['geometry', 'country', 'state', 'admin1', 'admin2']] # Add metadata object to DB # TODO: add run_label and run_description meta = Metadata(run_id=self.run_id, model=self.name, run_description=self.run_description, raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}', point_resolution_meters=1000) logging.info("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB logging.info("Storing parameters...") for param_name, param_val in self.model_config['config'].items(): if param_name == 'run_id': pass else: if param_name == 'year': param_type = 'integer' elif param_name == 'month': param_type = 'integer' else: param_type = 'string' param = Parameters(run_id=self.run_id, model=self.name, parameter_name=param_name, parameter_value=param_val, parameter_type=param_type) db_session.add(param) db_session.commit() # Process tiff file into point data logging.info("Processing tiff...") InRaster = f"{self.install_path}/output/{self.key}" feature_name = self.feature_name feature_description = self.feature_description gdf = raster2gpd(InRaster, feature_name, band=self.band) # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Set run fields: datetime, run_id, model gdf['datetime'] = self.start_time gdf['run_id'] = self.run_id gdf['model'] = self.name gdf['feature_description'] = feature_description del (gdf['geometry']) del (gdf['index_right']) # perform bulk insert of entire geopandas DF logging.info("Storing point data output...") db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()
def process_herbage(herbage, scen, scenarios, grange): """ Primary function for processing grange """ # subset for the correct scenario herbage = herbage[herbage['scenario'] == scen] herbage['geometry'] = herbage.apply( lambda x: Point(x.longitude, x.latitude), axis=1) # obtain scenario parameters params = scenarios[scenarios['scenario'] == scen].iloc[0].to_dict() params = format_params(params) run_id, model_config, run_obj = gen_run(model_name, params) # generate temp CSV and push it to S3 herbage.to_csv("tmp_g.csv", index=False) time.sleep(1) try: s3_bucket.upload_file("tmp_g.csv", run_obj['key'], ExtraArgs={'ACL': 'public-read'}) except Exception as e: print(e) print("Retrying file upload...") try: s3_bucket.upload_file("tmp_g.csv", run_obj['key'], ExtraArgs={'ACL': 'public-read'}) except: pass # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.csv", run_label=herbage.description.iloc[0], point_resolution_meters=25000) db_session.add(meta) db_session.commit() # Add parameters to DB for param in grange['parameters']: # ensure that no null parameters are stored if not pd.isna(params[param['name']]): if param['metadata']['type'] == 'ChoiceParameter': p_type = 'string' elif param['name'] == 'fertilizer' or param[ 'name'] == 'sowing_window_shift': p_type = 'int' else: p_type = 'float' p_value = params[param['name']] param = Parameters(run_id=run_id, model=model_name, parameter_name=param['name'], parameter_value=p_value, parameter_type=p_type) db_session.add(param) db_session.commit() gdf = gpd.GeoDataFrame(herbage) gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') gdf['run_id'] = run_id gdf['model'] = model_name if 'geometry' in gdf: del (gdf['geometry']) del (gdf['index_right']) return gdf, run_id