Python to_dataframes示例，gdxpds.to_dataframes Python示例

示例#1

0

显示文件

def get_df_solution(solution_file, var_list_mps, con_list_mps):
    '''
    Create dataframes of marginals and levels of variables and constraints of interest.
    Note that all strings are lowercased because GAMS is case insensitive.
    Args:
        solution_file (string): Full path to GAMS gdx solution file.
        var_list_mps (list of strings): List of lowercased variable names that are of interest.
        con_list_mps (list of strings): List of lowercased constraint names that are of interest.
    Returns:
        dict of two pandas dataframes, one for variables ('vars'), and one for constraints ('cons').
        Columns in the 'vars' dataframe:
            var_name (string): Name of variable.
            var_set (string): Period-seperated sets of the variable.
            var_level (float): Level of the variable in the solution.
            var_marginal (float): Marginal of the variable in the solution.
        Columns in the 'cons' dataframe
            con_name (string): Constraint name.
            con_set (string): Period-seperated sets of the constraint.
            con_level (float): Level of the constraint in the solution.
            con_marginal (float): Marginal of the constraint in the solution.
    '''
    start = datetime.now()
    dfs = gdxpds.to_dataframes(solution_file)
    print('solution read: ' + str(datetime.now() - start))
    start = datetime.now()
    dfs = {k.lower(): v for k, v in list(dfs.items())}
    df_vars = get_df_symbols(dfs, var_list_mps)
    df_vars = df_vars.rename(columns={"Level": "var_level", "Marginal": "var_marginal", 'sym_name':'var_name', 'sym_set': 'var_set'})
    df_cons = get_df_symbols(dfs, con_list_mps)
    df_cons = df_cons.rename(columns={"Level": "con_level", "Marginal": "con_marginal", 'sym_name':'con_name', 'sym_set': 'con_set'})
    print('solution reformatted: ' + str(datetime.now() - start))
    return {'vars':df_vars, 'cons':df_cons}

示例#2

0

显示文件

文件： gdx_to_csv.py 项目： anderson-optimization/task-siteopt

def convert_gdx_to_csv(in_gdx, out_dir, gams_dir=None, wide=False, frmt=None):
    # check inputs
    if not os.path.exists(os.path.dirname(out_dir)):
        raise RuntimeError(
            "Parent directory of output directory '{}' does not exist.".format(
                out_dir))

    # convert to pandas.DataFrames
    dataframes = gdxpds.to_dataframes(in_gdx, gams_dir)
    logger.debug('Have dataframes')

    # write to files
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    for symbol_name, df in dataframes.items():
        csv_path = os.path.join(out_dir, symbol_name + ".csv")
        if os.path.exists(csv_path):
            logger.info("Overwriting '{}'".format(csv_path))
        if wide:
            idx = list(df.columns[:-1])
            logger.info('Setting index {}'.format(idx))
            df = df.set_index(idx).unstack()
            if isinstance(df, pd.DataFrame) and df.columns.nlevels > 1:
                df.columns = df.columns.droplevel(0)
            while df.index.nlevels > 1:
                df.index = df.index.droplevel(0)

        if frmt and symbol_name in frmt:
            logger.info("Format symbol {}".format(symbol_name))
            frmt_symbol = frmt[symbol_name]
            if 'columns' in frmt_symbol:
                columns = frmt_symbol['columns']
                for c in columns:
                    if c not in df:
                        df[c] = 0
                df = df[columns]
            if 'index_name' in frmt_symbol:
                df.index.name = frmt_symbol['index_name']
            if 'datetime' in frmt_symbol:
                df.insert(0, 'date', pd.to_datetime(df['datetime'], unit='s'))
            if 'sort' in frmt_symbol:
                df = df.sort_values(by=frmt_symbol['sort'])
        df.to_csv(csv_path)

示例#3

0

显示文件

def convert_gdx_to_csv(in_gdx, out_dir, gams_dir=None):
    # check inputs
    if not os.path.exists(os.path.dirname(out_dir)):
        raise RuntimeError(
            "Parent directory of output directory '{}' does not exist.".format(
                out_dir))

    # convert to pandas.DataFrames
    dataframes = gdxpds.to_dataframes(in_gdx, gams_dir)

    # write to files
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    for symbol_name, df in dataframes.items():
        csv_path = os.path.join(out_dir, symbol_name + ".csv")
        if os.path.exists(csv_path):
            logger.info("Overwriting '{}'".format(csv_path))
        df.to_csv(csv_path, na_rep='NaN', index=False)

示例#4

0

显示文件

文件： post-process-costs.py 项目： wogandavid/KEMGCC

    't09': '2023',
    't10': '2024',
    't11': '2025',
    't12': '2026',
    't13': '2027',
    't14': '2028',
    't15': '2029',
    't16': '2030'
}

costcalclist = []
withClist = []
withoutClist = []

for filename, scenario in zip(files, scenarios):
    _dict = gdxpds.to_dataframes(filename)
    #costcalcs = _df['RWcostcalcs']
    #costcalcs.columns = ['trun','item','c','value']
    withC = _dict['RWnet_carbonrecycle']
    withoutC = _dict['RWnetex']
    withC.columns = ['trun', 'c', 'value']
    withoutC.columns = ['trun', 'c', 'value']
    withC['scenario'] = scenario
    withoutC['scenario'] = scenario

    #costcalcs = costcalcs[costcalcs['item']=='Total System']

    withC = withC.replace(years)
    withClist.append(withC)

    withoutC = withoutC.replace(years)

示例#5

0

显示文件

文件： gams_hyperbox.py 项目： giannisgioule/Hyperbox_classification

    def fit(self, X, y):
        class Boxes():
            def __init__(self, LE, x, vertices=0):
                self.LE = LE
                self.x = x
                self.vertices = vertices

        #---------------------------------------------------------------------
        # This function calculates the coordinates of the vertices
        # of the hyperboxes. This is usefull for predictions
        def define_vertices(LE, x):
            number_of_boxes = np.unique(LE['i'])
            vertices = {}
            for i in number_of_boxes:
                lower_limits=list(x[x['i']==i]['Level']-(LE[LE['i']==i]\
                    ['Level']/2))
                upper_limits=list(x[x['i']==i]['Level']+(LE[LE['i']==i]\
                    ['Level']/2))
                vertices.update({i: ([lower_limits], [upper_limits])})

            return vertices

        #---------------------------------------------------------------------

        #---------------------------------------------------------------------
        # This function generates the .gdx file with all the input data
        def generate_gdx(dataset):

            # The number of predictors in the set
            number_predictors = dataset.shape[1] - 1
            # The names of the attributes
            attribute_names = dataset.columns[0:number_predictors]

            #--------------------Create the set of samples--------------------
            s_val = list(map(str, dataset.index))
            s_val = ["s" + x for x in s_val]
            #-----------------------------------------------------------------

            #--------------Create the appropriate format for gdx--------------
            s_val = s_val * number_predictors
            s_val = pd.DataFrame(s_val)

            values = dataset.iloc[:, 0:number_predictors]
            values = values.unstack()
            values = pd.DataFrame(list(values))

            all_attr = pd.DataFrame(
                np.repeat(attribute_names[0], dataset.shape[0]))
            for i in range(1, number_predictors):
                single_attr = pd.DataFrame(
                    np.repeat(attribute_names[i], dataset.shape[0]))
                all_attr = pd.concat([all_attr, single_attr], axis=0)

            all_attr.index = s_val.index
            final_values = pd.concat([s_val, all_attr], axis=1)
            final_values = pd.concat([final_values, values], axis=1)
            del (values)
            #-----------------------------------------------------------------

            output = list(np.unique(dataset.iloc[:, number_predictors]))

            #---Define the .gdx file and assign values to all the elements----
            gdx_file = "input.gdx"
            input_data = gd.gdx.GdxFile()
            # First define the set of the samples
            input_data.append(gd.gdx.GdxSymbol("s",gd.gdx.GamsDataType.Set\
                ,dims=1,description="set of samples in the set"))
            # Then define the set of the predictor variables
            input_data.append(gd.gdx.GdxSymbol("m",gd.gdx.GamsDataType.Set\
                ,dims=1,description="The input variables of the dataset"))
            # Then define the input of the dataset
            input_data.append(gd.gdx.GdxSymbol("A",\
                gd.gdx.GamsDataType.Parameter,dims=2,\
                    description="The values of the samples"))
            input_data.append(gd.gdx.GdxSymbol("map",gd.gdx.GamsDataType.Set,\
                dims=2,description="Mapping of samples"))
            input_data.append(gd.gdx.GdxSymbol("i",gd.gdx.GamsDataType.Set,\
                dims=1,description="Hyper-boxes"))

            input_data[0].dataframe = s_val[0:dataset.shape[0]]
            input_data[1].dataframe = attribute_names
            input_data[2].dataframe = final_values
            dataset.index = s_val[0:dataset.shape[0]].index
            input_data[3].dataframe=pd.concat([s_val[0:dataset.shape[0]],\
                dataset.iloc[:,number_predictors]],axis=1)
            input_data[4].dataframe = pd.DataFrame(output)

            input_data.write(gdx_file)
            #-----------------------------------------------------------------

            return (".gdx file has been generated")

        #---------------------------------------------------------------------

        # By default, the algorithm scales the input variables in the range
        # [0,1]. This is done in order to aid the optimisation and the bigM
        # constraints.

        # So, at the end of the optimisation, the regression coefficients
        # and intercepts of each region are scaled back to reflect the
        # original data.

        #---------------------------------------------------------------------
        # This method unscales the values of the length (LE) and central
        # coordinates (x) of the hyperboxes
        def unscale_values(vector):
            variables = np.unique(vector['m'])
            for i, j in enumerate(variables):
                b = {}
                to_replace = vector['Level'][vector['m'] == j]
                value=(vector['Level'][vector['m']==j]*(scaler.data_max_[i]-\
                    scaler.data_min_[i])+scaler.data_min_[i])
                [b.update({i: j}) for i, j in zip(to_replace, value)]
                vector['Level'] = vector['Level'].replace(to_replace=b,
                                                          value=None)

            return vector

        #---------------------------------------------------------------------

        #---------------------Main part of the fit method---------------------
        v_names = False
        if (isinstance(X, pd.DataFrame)):
            variable_names = X.columns
            v_names = True

        o_name = False
        if (isinstance(y, pd.DataFrame)):
            output_name = y.columns
            o_name = True

        X, y = check_X_y(X, y)
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaler.fit(X)
        X = scaler.transform(X)
        X = pd.DataFrame(X)
        y = pd.DataFrame(y)
        if (v_names is True):
            X.columns = variable_names
        else:
            # If there are no names, create a vector for the names.
            # x0,x1,x2,...
            X.columns = [("x" + str(i)) for i in range(len(X.columns))]
        if (o_name is True):
            y.columns = output_name
        if (v_names is True):
            self.names_ = variable_names
        else:
            self.names_ = False

        self.classes_ = unique_labels(y)
        X = pd.concat([X, y], axis=1)

        generate_gdx(X)

        # Call GAMS
        os.system("gams optimal_hyperbox_model.gms o=nul")
        results = gd.to_dataframes('results.gdx')

        LE = results['LE']
        x = results['x']
        x = unscale_values(x)

        hyperboxes = Boxes(LE, x)
        box_vertices = define_vertices(hyperboxes.LE, hyperboxes.x)
        hyperboxes.vertices = box_vertices

        os.remove("input.gdx")
        os.remove("results.gdx")
        os.remove("cplex.opt")

        self.is_fitted_ = True
        self.model_ = hyperboxes

        return self

示例#6

0

显示文件

    def test_export_to_gdx(self):

        import gdxpds

        cero = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3], "D": [4], "E": [5], "F": [6], }, orient='index',
                                      dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y"))

        proc = FromCERO._Procedure({"file": "gdx_export.gdx",
                                    "output_kwargs": {"id": "test_gdx"}})
        proc.exec_ops(cero)

        # Read gdx
        dfs = gdxpds.to_dataframes("gdx_export.gdx")

        self.assertEqual(len(dfs), 2)
        self.assertTrue("test_gdx" in dfs)
        df1 = dfs["test_gdx"]
        df1.columns = ["Col1", "Values"]
        df1.set_index("Col1", inplace=True)

        test_list = [1, 2, 3, 4, 5, 6]
        df1_vals = df1.values.tolist()
        self.assertTrue(all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)]))
        test_list = ["A", "B", "C", "D", "E", "F"]
        self.assertTrue(all([x == y for (x, y) in zip(test_list, df1.index.tolist())]))

        os.remove("gdx_export.gdx")

        # Test 2

        # Setup test dataframe
        df = pd.DataFrame(data=[[1, 2, 3],
                                [6, 4, 5],
                                [4, 5, 8],
                                [9, 10, 12]], dtype=pd.np.float32)
        df.columns = pd.DatetimeIndex(pd.to_datetime([2017, 2018, 2019], format="%Y"))
        df.index = pd.Index([("a_redundant_identifier", "solar"),
                             ("a_redundant_identifier", "wind"),
                             ("a_redundant_identifier", "oil"),
                             ("a_redundant_identifier", "gas")], tupleize_cols=False)
        df.sort_index(inplace=True)

        # Export dataframe
        proc = FromCERO._Procedure({"file": "gdx_file.gdx",
                                    "output_kwargs": {"id": "fuel_export",
                                                      "index_col": 1}})
        proc.exec_ops(df)

        # Read in created file
        dfs = gdxpds.to_dataframes("gdx_file.gdx")

        # Disect the created file
        self.assertEqual(len(dfs), 3) # One more dimension than previous test, given variability by year
        self.assertTrue("fuel_export" in dfs)
        df1 = dfs["fuel_export"]
        df1.columns = ["Col1", "Year", "Values"]
        df1.set_index(["Col1"], inplace=True)
        df1 = df1.pivot(columns="Year", values="Values")
        df1 = df1.astype(int)

        # Perform tests...

        test_list = [[9, 10, 12], [4, 5, 8], [1, 2, 3], [6, 4, 5]]
        df1_vals = df1.values.tolist()
        self.assertTrue(test_list == df1_vals)

        test_list = ["gas", "oil", "solar", "wind"]
        self.assertTrue(test_list == df1.index.tolist())

        os.remove("gdx_file.gdx")

示例#7

0

显示文件

文件： converter.py 项目： Jungar111/GDXtoXLS

 def convert_file(self, path):
     dataframes = gdxpds.to_dataframes(path)
     for symbol_name, df in dataframes.items():
         print("Converting {}.".format(symbol_name))
         path = path.strip('.gdx') + '.xls'
         pd.DataFrame(df).to_excel(path)

示例#8

0

显示文件

import gdxpds
import pandas as pd
import pickle

if __name__ == "__main__":
    # Loading the assets data

    tables = gdxpds.to_dataframes('tables.gdx')
    # Loading the regional marker
    regionalMarker = gdxpds.to_dataframes('price_ratio.gdx')
    print('Finished opening the GDX')

    # Loading the parameters from the dataFrame and setting the headers names
    # Loading sets
    asset = tables['asset']
    asset.columns = ['Asset', 'Value']
    country = tables['country']
    country.columns = ['c', 'Value']
    group = tables['group']
    group.columns = ['group', 'Value']
    assetType = tables['type']
    assetType.columns = ['type', 'Value']

    # Loading parameters
    approval = tables['approval']
    approval.columns = ['Asset', 'Value']
    start_y = tables['start']
    start_y.columns = ['Asset', 'Value']
    last_y = tables['last_year']
    last_y.columns = ['Asset', 'Value']

示例#9

0

显示文件

文件： reeds_jedi.py 项目： mmowers/reeds_jedi

        jedi_gdxs.append(path + '/gdxfiles/JEDI.gdx')
    else:
        subdirs = next(os.walk(path))[1]
        for subdir in subdirs:
            if os.path.isfile(path + '/' + subdir + '/gdxfiles/JEDI.gdx'):
                if os.path.isfile(path + '/' + subdir +
                                  '/gdxfiles/JEDI_out.gdx'):
                    sys.exit(
                        path + '/' + subdir +
                        '/gdxfiles/JEDI_out.gdx already exists. Please remove or rename it to continue.'
                    )
                jedi_gdxs.append(path + '/' + subdir + '/gdxfiles/JEDI.gdx')

for jedi_gdx in jedi_gdxs:
    print(jedi_gdx)
    dfs = gdxpds.to_dataframes(jedi_gdx)
    df_full = dfs['Jedi']
    df_full.rename(columns={
        'allyears': 'year',
        'jedi_cat': 'cat'
    },
                   inplace=True)

    #convert text columns to lower case
    df_full['bigQ'] = df_full['bigQ'].str.lower()
    df_full['cat'] = df_full['cat'].str.lower()

    #join with tech mapping
    df_full = pd.merge(left=df_full,
                       right=df_tech_map,
                       how='inner',

示例#10

0

显示文件

文件： data_reader.py 项目： schipfer/linkages

def load_gdx_file(input_file, gams_dir):
    file_name = input_file + ".gdx"
    logging.info('Loading gdx with output results')
    output_gdx = gdxpds.to_dataframes(DEF_INPUT_PATH / file_name, gams_dir=gams_dir)

    return dw.DataWrapper(input_file, output_gdx)

示例#11

0

显示文件

文件： ReadGDX.py 项目： WamdamProject/WaMDaM_Wizard

import pandas as pd
# import XlsxWriter
import gdxpds
from ctypes import c_bool
import logging
import gdxpds.gdx
"""
Python dicts of {symbol_name: pandas.DataFrame}, where 
each pandas.DataFrame contains data for a single set, parameter, equation, or 
variable.
"""
# gdx_file = "C:\Users\Adel\Documents\Test_wash\WASH_5yrs_OutputData.gdx"
# gdx_file = "C:\Users\Adel\Documents\Test_wash\Systems-model-in-Wetlands-to-Allocate-water-and-Manage-Plant-Spread\Version1.2-WetlandUnitsAsTanks\GUI_v1.2\BRMBR_Input.gdx"
# read from this one
gdx_file = "C:\Users\Adel\Documents\GitHub\WaMDaM_Wizard_1.04\src\controller\WASH\Extract_WASH_GDX_ToWaMDaM\WASH-Data.gdx"
dataframes = gdxpds.to_dataframes(gdx_file)

# with gdxpds.gdx.GdxFile(lazy_load=False) as f:
#     f.read(gdx_file)
#     for symbol in f:
#         symbol_name = symbol.name
#         df = symbol.dataframe
#         GamsDataType = symbol.data_type.name
#         print("Doing work with {},{}".format(symbol_name,GamsDataType))

# Create a Pandas Excel writer using XlsxWriter as the engine.

#1.
# writer = pd.ExcelWriter('WAHS_input_GDX3_as_is.xlsx', engine='xlsxwriter')
#2.
# writer = pd.ExcelWriter('SWAMPS_Input_GDX.xlsx', engine='xlsxwriter')

示例#12

0

显示文件

        level_col = df_sym.columns.get_loc('Level')
        df_sym['sym_set'] = ''
        for s in range(level_col):
            df_sym['sym_set'] = df_sym['sym_set'] + df_sym.iloc[:, s]
            if s < level_col - 1:
                df_sym['sym_set'] = df_sym['sym_set'] + '.'
        #remove set columns
        df_sym = df_sym.iloc[:, level_col:]
        #remove lower upper scale
        df_sym = df_sym.drop(['Lower', 'Upper', 'Scale'], axis=1)
        df_syms.append(df_sym)
    df_syms = pd.concat(df_syms).reset_index(drop=True)
    return df_syms


dfs = gdxpds.to_dataframes(solution_file)
df_vars = get_df_symbols(dfs, df_mps['var_name'].unique())
df_vars = df_vars.rename(
    columns={
        "Level": "var_level",
        "Marginal": "var_marginal",
        'sym_name': 'var_name',
        'sym_set': 'var_set'
    })
df = pd.merge(left=df_mps,
              right=df_vars,
              how='left',
              on=['var_name', 'var_set'],
              sort=False)
df_cons = get_df_symbols(dfs, df_mps['con_name'].unique())
df_cons = df_cons.rename(