def get_df_solution(solution_file, var_list_mps, con_list_mps): ''' Create dataframes of marginals and levels of variables and constraints of interest. Note that all strings are lowercased because GAMS is case insensitive. Args: solution_file (string): Full path to GAMS gdx solution file. var_list_mps (list of strings): List of lowercased variable names that are of interest. con_list_mps (list of strings): List of lowercased constraint names that are of interest. Returns: dict of two pandas dataframes, one for variables ('vars'), and one for constraints ('cons'). Columns in the 'vars' dataframe: var_name (string): Name of variable. var_set (string): Period-seperated sets of the variable. var_level (float): Level of the variable in the solution. var_marginal (float): Marginal of the variable in the solution. Columns in the 'cons' dataframe con_name (string): Constraint name. con_set (string): Period-seperated sets of the constraint. con_level (float): Level of the constraint in the solution. con_marginal (float): Marginal of the constraint in the solution. ''' start = datetime.now() dfs = gdxpds.to_dataframes(solution_file) print('solution read: ' + str(datetime.now() - start)) start = datetime.now() dfs = {k.lower(): v for k, v in list(dfs.items())} df_vars = get_df_symbols(dfs, var_list_mps) df_vars = df_vars.rename(columns={"Level": "var_level", "Marginal": "var_marginal", 'sym_name':'var_name', 'sym_set': 'var_set'}) df_cons = get_df_symbols(dfs, con_list_mps) df_cons = df_cons.rename(columns={"Level": "con_level", "Marginal": "con_marginal", 'sym_name':'con_name', 'sym_set': 'con_set'}) print('solution reformatted: ' + str(datetime.now() - start)) return {'vars':df_vars, 'cons':df_cons}
def convert_gdx_to_csv(in_gdx, out_dir, gams_dir=None, wide=False, frmt=None): # check inputs if not os.path.exists(os.path.dirname(out_dir)): raise RuntimeError( "Parent directory of output directory '{}' does not exist.".format( out_dir)) # convert to pandas.DataFrames dataframes = gdxpds.to_dataframes(in_gdx, gams_dir) logger.debug('Have dataframes') # write to files if not os.path.exists(out_dir): os.mkdir(out_dir) for symbol_name, df in dataframes.items(): csv_path = os.path.join(out_dir, symbol_name + ".csv") if os.path.exists(csv_path): logger.info("Overwriting '{}'".format(csv_path)) if wide: idx = list(df.columns[:-1]) logger.info('Setting index {}'.format(idx)) df = df.set_index(idx).unstack() if isinstance(df, pd.DataFrame) and df.columns.nlevels > 1: df.columns = df.columns.droplevel(0) while df.index.nlevels > 1: df.index = df.index.droplevel(0) if frmt and symbol_name in frmt: logger.info("Format symbol {}".format(symbol_name)) frmt_symbol = frmt[symbol_name] if 'columns' in frmt_symbol: columns = frmt_symbol['columns'] for c in columns: if c not in df: df[c] = 0 df = df[columns] if 'index_name' in frmt_symbol: df.index.name = frmt_symbol['index_name'] if 'datetime' in frmt_symbol: df.insert(0, 'date', pd.to_datetime(df['datetime'], unit='s')) if 'sort' in frmt_symbol: df = df.sort_values(by=frmt_symbol['sort']) df.to_csv(csv_path)
def convert_gdx_to_csv(in_gdx, out_dir, gams_dir=None): # check inputs if not os.path.exists(os.path.dirname(out_dir)): raise RuntimeError( "Parent directory of output directory '{}' does not exist.".format( out_dir)) # convert to pandas.DataFrames dataframes = gdxpds.to_dataframes(in_gdx, gams_dir) # write to files if not os.path.exists(out_dir): os.mkdir(out_dir) for symbol_name, df in dataframes.items(): csv_path = os.path.join(out_dir, symbol_name + ".csv") if os.path.exists(csv_path): logger.info("Overwriting '{}'".format(csv_path)) df.to_csv(csv_path, na_rep='NaN', index=False)
't09': '2023', 't10': '2024', 't11': '2025', 't12': '2026', 't13': '2027', 't14': '2028', 't15': '2029', 't16': '2030' } costcalclist = [] withClist = [] withoutClist = [] for filename, scenario in zip(files, scenarios): _dict = gdxpds.to_dataframes(filename) #costcalcs = _df['RWcostcalcs'] #costcalcs.columns = ['trun','item','c','value'] withC = _dict['RWnet_carbonrecycle'] withoutC = _dict['RWnetex'] withC.columns = ['trun', 'c', 'value'] withoutC.columns = ['trun', 'c', 'value'] withC['scenario'] = scenario withoutC['scenario'] = scenario #costcalcs = costcalcs[costcalcs['item']=='Total System'] withC = withC.replace(years) withClist.append(withC) withoutC = withoutC.replace(years)
def fit(self, X, y): class Boxes(): def __init__(self, LE, x, vertices=0): self.LE = LE self.x = x self.vertices = vertices #--------------------------------------------------------------------- # This function calculates the coordinates of the vertices # of the hyperboxes. This is usefull for predictions def define_vertices(LE, x): number_of_boxes = np.unique(LE['i']) vertices = {} for i in number_of_boxes: lower_limits=list(x[x['i']==i]['Level']-(LE[LE['i']==i]\ ['Level']/2)) upper_limits=list(x[x['i']==i]['Level']+(LE[LE['i']==i]\ ['Level']/2)) vertices.update({i: ([lower_limits], [upper_limits])}) return vertices #--------------------------------------------------------------------- #--------------------------------------------------------------------- # This function generates the .gdx file with all the input data def generate_gdx(dataset): # The number of predictors in the set number_predictors = dataset.shape[1] - 1 # The names of the attributes attribute_names = dataset.columns[0:number_predictors] #--------------------Create the set of samples-------------------- s_val = list(map(str, dataset.index)) s_val = ["s" + x for x in s_val] #----------------------------------------------------------------- #--------------Create the appropriate format for gdx-------------- s_val = s_val * number_predictors s_val = pd.DataFrame(s_val) values = dataset.iloc[:, 0:number_predictors] values = values.unstack() values = pd.DataFrame(list(values)) all_attr = pd.DataFrame( np.repeat(attribute_names[0], dataset.shape[0])) for i in range(1, number_predictors): single_attr = pd.DataFrame( np.repeat(attribute_names[i], dataset.shape[0])) all_attr = pd.concat([all_attr, single_attr], axis=0) all_attr.index = s_val.index final_values = pd.concat([s_val, all_attr], axis=1) final_values = pd.concat([final_values, values], axis=1) del (values) #----------------------------------------------------------------- output = list(np.unique(dataset.iloc[:, number_predictors])) #---Define the .gdx file and assign values to all the elements---- gdx_file = "input.gdx" input_data = gd.gdx.GdxFile() # First define the set of the samples input_data.append(gd.gdx.GdxSymbol("s",gd.gdx.GamsDataType.Set\ ,dims=1,description="set of samples in the set")) # Then define the set of the predictor variables input_data.append(gd.gdx.GdxSymbol("m",gd.gdx.GamsDataType.Set\ ,dims=1,description="The input variables of the dataset")) # Then define the input of the dataset input_data.append(gd.gdx.GdxSymbol("A",\ gd.gdx.GamsDataType.Parameter,dims=2,\ description="The values of the samples")) input_data.append(gd.gdx.GdxSymbol("map",gd.gdx.GamsDataType.Set,\ dims=2,description="Mapping of samples")) input_data.append(gd.gdx.GdxSymbol("i",gd.gdx.GamsDataType.Set,\ dims=1,description="Hyper-boxes")) input_data[0].dataframe = s_val[0:dataset.shape[0]] input_data[1].dataframe = attribute_names input_data[2].dataframe = final_values dataset.index = s_val[0:dataset.shape[0]].index input_data[3].dataframe=pd.concat([s_val[0:dataset.shape[0]],\ dataset.iloc[:,number_predictors]],axis=1) input_data[4].dataframe = pd.DataFrame(output) input_data.write(gdx_file) #----------------------------------------------------------------- return (".gdx file has been generated") #--------------------------------------------------------------------- # By default, the algorithm scales the input variables in the range # [0,1]. This is done in order to aid the optimisation and the bigM # constraints. # So, at the end of the optimisation, the regression coefficients # and intercepts of each region are scaled back to reflect the # original data. #--------------------------------------------------------------------- # This method unscales the values of the length (LE) and central # coordinates (x) of the hyperboxes def unscale_values(vector): variables = np.unique(vector['m']) for i, j in enumerate(variables): b = {} to_replace = vector['Level'][vector['m'] == j] value=(vector['Level'][vector['m']==j]*(scaler.data_max_[i]-\ scaler.data_min_[i])+scaler.data_min_[i]) [b.update({i: j}) for i, j in zip(to_replace, value)] vector['Level'] = vector['Level'].replace(to_replace=b, value=None) return vector #--------------------------------------------------------------------- #---------------------Main part of the fit method--------------------- v_names = False if (isinstance(X, pd.DataFrame)): variable_names = X.columns v_names = True o_name = False if (isinstance(y, pd.DataFrame)): output_name = y.columns o_name = True X, y = check_X_y(X, y) scaler = MinMaxScaler(feature_range=(0, 1)) scaler.fit(X) X = scaler.transform(X) X = pd.DataFrame(X) y = pd.DataFrame(y) if (v_names is True): X.columns = variable_names else: # If there are no names, create a vector for the names. # x0,x1,x2,... X.columns = [("x" + str(i)) for i in range(len(X.columns))] if (o_name is True): y.columns = output_name if (v_names is True): self.names_ = variable_names else: self.names_ = False self.classes_ = unique_labels(y) X = pd.concat([X, y], axis=1) generate_gdx(X) # Call GAMS os.system("gams optimal_hyperbox_model.gms o=nul") results = gd.to_dataframes('results.gdx') LE = results['LE'] x = results['x'] x = unscale_values(x) hyperboxes = Boxes(LE, x) box_vertices = define_vertices(hyperboxes.LE, hyperboxes.x) hyperboxes.vertices = box_vertices os.remove("input.gdx") os.remove("results.gdx") os.remove("cplex.opt") self.is_fitted_ = True self.model_ = hyperboxes return self
def test_export_to_gdx(self): import gdxpds cero = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3], "D": [4], "E": [5], "F": [6], }, orient='index', dtype=pd.np.float32) cero.sort_index(inplace=True) cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y")) proc = FromCERO._Procedure({"file": "gdx_export.gdx", "output_kwargs": {"id": "test_gdx"}}) proc.exec_ops(cero) # Read gdx dfs = gdxpds.to_dataframes("gdx_export.gdx") self.assertEqual(len(dfs), 2) self.assertTrue("test_gdx" in dfs) df1 = dfs["test_gdx"] df1.columns = ["Col1", "Values"] df1.set_index("Col1", inplace=True) test_list = [1, 2, 3, 4, 5, 6] df1_vals = df1.values.tolist() self.assertTrue(all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)])) test_list = ["A", "B", "C", "D", "E", "F"] self.assertTrue(all([x == y for (x, y) in zip(test_list, df1.index.tolist())])) os.remove("gdx_export.gdx") # Test 2 # Setup test dataframe df = pd.DataFrame(data=[[1, 2, 3], [6, 4, 5], [4, 5, 8], [9, 10, 12]], dtype=pd.np.float32) df.columns = pd.DatetimeIndex(pd.to_datetime([2017, 2018, 2019], format="%Y")) df.index = pd.Index([("a_redundant_identifier", "solar"), ("a_redundant_identifier", "wind"), ("a_redundant_identifier", "oil"), ("a_redundant_identifier", "gas")], tupleize_cols=False) df.sort_index(inplace=True) # Export dataframe proc = FromCERO._Procedure({"file": "gdx_file.gdx", "output_kwargs": {"id": "fuel_export", "index_col": 1}}) proc.exec_ops(df) # Read in created file dfs = gdxpds.to_dataframes("gdx_file.gdx") # Disect the created file self.assertEqual(len(dfs), 3) # One more dimension than previous test, given variability by year self.assertTrue("fuel_export" in dfs) df1 = dfs["fuel_export"] df1.columns = ["Col1", "Year", "Values"] df1.set_index(["Col1"], inplace=True) df1 = df1.pivot(columns="Year", values="Values") df1 = df1.astype(int) # Perform tests... test_list = [[9, 10, 12], [4, 5, 8], [1, 2, 3], [6, 4, 5]] df1_vals = df1.values.tolist() self.assertTrue(test_list == df1_vals) test_list = ["gas", "oil", "solar", "wind"] self.assertTrue(test_list == df1.index.tolist()) os.remove("gdx_file.gdx")
def convert_file(self, path): dataframes = gdxpds.to_dataframes(path) for symbol_name, df in dataframes.items(): print("Converting {}.".format(symbol_name)) path = path.strip('.gdx') + '.xls' pd.DataFrame(df).to_excel(path)
import gdxpds import pandas as pd import pickle if __name__ == "__main__": # Loading the assets data tables = gdxpds.to_dataframes('tables.gdx') # Loading the regional marker regionalMarker = gdxpds.to_dataframes('price_ratio.gdx') print('Finished opening the GDX') # Loading the parameters from the dataFrame and setting the headers names # Loading sets asset = tables['asset'] asset.columns = ['Asset', 'Value'] country = tables['country'] country.columns = ['c', 'Value'] group = tables['group'] group.columns = ['group', 'Value'] assetType = tables['type'] assetType.columns = ['type', 'Value'] # Loading parameters approval = tables['approval'] approval.columns = ['Asset', 'Value'] start_y = tables['start'] start_y.columns = ['Asset', 'Value'] last_y = tables['last_year'] last_y.columns = ['Asset', 'Value']
jedi_gdxs.append(path + '/gdxfiles/JEDI.gdx') else: subdirs = next(os.walk(path))[1] for subdir in subdirs: if os.path.isfile(path + '/' + subdir + '/gdxfiles/JEDI.gdx'): if os.path.isfile(path + '/' + subdir + '/gdxfiles/JEDI_out.gdx'): sys.exit( path + '/' + subdir + '/gdxfiles/JEDI_out.gdx already exists. Please remove or rename it to continue.' ) jedi_gdxs.append(path + '/' + subdir + '/gdxfiles/JEDI.gdx') for jedi_gdx in jedi_gdxs: print(jedi_gdx) dfs = gdxpds.to_dataframes(jedi_gdx) df_full = dfs['Jedi'] df_full.rename(columns={ 'allyears': 'year', 'jedi_cat': 'cat' }, inplace=True) #convert text columns to lower case df_full['bigQ'] = df_full['bigQ'].str.lower() df_full['cat'] = df_full['cat'].str.lower() #join with tech mapping df_full = pd.merge(left=df_full, right=df_tech_map, how='inner',
def load_gdx_file(input_file, gams_dir): file_name = input_file + ".gdx" logging.info('Loading gdx with output results') output_gdx = gdxpds.to_dataframes(DEF_INPUT_PATH / file_name, gams_dir=gams_dir) return dw.DataWrapper(input_file, output_gdx)
import pandas as pd # import XlsxWriter import gdxpds from ctypes import c_bool import logging import gdxpds.gdx """ Python dicts of {symbol_name: pandas.DataFrame}, where each pandas.DataFrame contains data for a single set, parameter, equation, or variable. """ # gdx_file = "C:\Users\Adel\Documents\Test_wash\WASH_5yrs_OutputData.gdx" # gdx_file = "C:\Users\Adel\Documents\Test_wash\Systems-model-in-Wetlands-to-Allocate-water-and-Manage-Plant-Spread\Version1.2-WetlandUnitsAsTanks\GUI_v1.2\BRMBR_Input.gdx" # read from this one gdx_file = "C:\Users\Adel\Documents\GitHub\WaMDaM_Wizard_1.04\src\controller\WASH\Extract_WASH_GDX_ToWaMDaM\WASH-Data.gdx" dataframes = gdxpds.to_dataframes(gdx_file) # with gdxpds.gdx.GdxFile(lazy_load=False) as f: # f.read(gdx_file) # for symbol in f: # symbol_name = symbol.name # df = symbol.dataframe # GamsDataType = symbol.data_type.name # print("Doing work with {},{}".format(symbol_name,GamsDataType)) # Create a Pandas Excel writer using XlsxWriter as the engine. #1. # writer = pd.ExcelWriter('WAHS_input_GDX3_as_is.xlsx', engine='xlsxwriter') #2. # writer = pd.ExcelWriter('SWAMPS_Input_GDX.xlsx', engine='xlsxwriter')
level_col = df_sym.columns.get_loc('Level') df_sym['sym_set'] = '' for s in range(level_col): df_sym['sym_set'] = df_sym['sym_set'] + df_sym.iloc[:, s] if s < level_col - 1: df_sym['sym_set'] = df_sym['sym_set'] + '.' #remove set columns df_sym = df_sym.iloc[:, level_col:] #remove lower upper scale df_sym = df_sym.drop(['Lower', 'Upper', 'Scale'], axis=1) df_syms.append(df_sym) df_syms = pd.concat(df_syms).reset_index(drop=True) return df_syms dfs = gdxpds.to_dataframes(solution_file) df_vars = get_df_symbols(dfs, df_mps['var_name'].unique()) df_vars = df_vars.rename( columns={ "Level": "var_level", "Marginal": "var_marginal", 'sym_name': 'var_name', 'sym_set': 'var_set' }) df = pd.merge(left=df_mps, right=df_vars, how='left', on=['var_name', 'var_set'], sort=False) df_cons = get_df_symbols(dfs, df_mps['con_name'].unique()) df_cons = df_cons.rename(