Пример #1
0
    max_SMS_SoC=12 / 3,
    max_LION_SoC=54,
    # Period length in minutes:
    PERIODEN_DAUER=period_min,
    # Q-Table can only take discrete inputs and make dicrete outputs
    ACTION_TYPE='discrete',
    OBS_TYPE='discrete',
    # Set number of discrete values:
    discrete_space=22,
    # Size of validation data:
    val_split=0.1)

# Setup agent:
agent = Q_Learner(
    env=env,
    memory_len=update_num,
    # Training parameter:
    gamma=0.85,
    epsilon=0.8,
    epsilon_min=0.1,
    epsilon_decay=0.999996,
    lr=0.5,
    tau=0.125)

# Train:
training(agent, epochs, update_num, num_warmup_steps)

# Test with dataset that includes val-data:
env.use_all_data()
testing(agent)
Пример #2
0
def run_agent(name='',gamma=.9, lr=0.1, tau=0.15, update_num=500,
              epsilon_decay='linear', input_list=['norm_total_power','normal','seq_max'],
              hidden_size=256, pre_trained_model=None, target_update_num=None):
    '''
    Trains and tests a DQN based on the passed parameter.
    '''
    
    # Naming the agent:
    now  = datetime.now()
    NAME = 'agent_DQN_'+name+'_t-stamp'+now.strftime("_%d-%m-%Y_%H-%M-%S")

    # Import dataset and logger based from the common settings
    df, power_dem_df, logger, period_min = dataset_and_logger(NAME)

    # Number of warm-up steps:
    num_warmup_steps = 100
    # Number of epochs and steps:
    epochs           = 100


    # Setup reward_maker
    r_maker = reward_maker(
        LOGGER                  = logger,
        # Settings:
        COST_TYPE               = 'exact_costs',
        R_TYPE                  = 'savings_focus',
        R_HORIZON               = 'single_step',
        # Parameter to calculate costs:
        cost_per_kwh            = 0.2255,
        LION_Anschaffungs_Preis = 34100,
        LION_max_Ladezyklen     = 6000,
        SMS_Anschaffungs_Preis  = 55000,#115000/3,
        SMS_max_Nutzungsjahre   = 25,
        Leistungspreis          = 102,)


    # Setup common_env
    env = common_env(
        reward_maker   = r_maker,
        df             = df,
        power_dem_df   = power_dem_df,
        # Datset Inputs for the states:
        input_list     = input_list,
        # Batters stats:
        max_SMS_SoC        = 25,
        max_LION_SoC       = 54,
        LION_max_entladung = 50,
        SMS_max_entladung  = 100,
        SMS_entladerate    = 0.72,
        LION_entladerate   = 0.00008,
        # Period length in minutes:
        PERIODEN_DAUER = period_min,
        # DQN inputs can be conti and outputs must be discrete:
        ACTION_TYPE    = 'discrete',
        OBS_TYPE       = 'contin',
        # Set number of discrete values:
        discrete_space = 22,
        # Size of validation data:
        val_split      = 0.1)

    # Setup Agent:
    agent = DQN(
        env            = env,
        memory_len     = update_num,
        # Training parameter:
        gamma          = gamma,
        epsilon        = 0.99,
        epsilon_min    = 0.1,
        epsilon_decay  = epsilon_decay,
        lr             = lr,
        tau            = tau,
        activation     = 'relu',
        loss           = 'mean_squared_error',
        hidden_size    = hidden_size,
        pre_trained_model=pre_trained_model,
        target_update_num=target_update_num)

    # Train:
    training(agent, epochs, update_num, num_warmup_steps)

    # Test with dataset that includes val-data:
    env.use_all_data()
    testing(agent)
Пример #3
0
def run_agent(name='',gamma=.9, lr=0.1, update_num=100, load_table=None,
              epsilon_decay='linear', input_list=['norm_total_power','normal','seq_max']):
    
    # Naming the agent:
    now    = datetime.now()
    NAME   = 'agent_Q-Table_'+name+'_t-stamp'+now.strftime("_%d-%m-%Y_%H-%M-%S")

    # Import dataset and logger based on the common settings
    df, power_dem_df, logger, period_min = dataset_and_logger(NAME)

    # Number of warm-up steps:
    num_warmup_steps = 100
    # Number of epochs and steps:
    epochs           = 100


    # Setup reward_maker
    r_maker = reward_maker(
        LOGGER                  = logger,
        # Settings:
        COST_TYPE               = 'exact_costs',
        R_TYPE                  = 'savings_focus',
        R_HORIZON               = 'single_step',
        # Parameter to calculate costs:
        cost_per_kwh            = 0.2255,
        LION_Anschaffungs_Preis = 34100,
        LION_max_Ladezyklen     = 6000,
        SMS_Anschaffungs_Preis  = 55000,#115000/3,
        SMS_max_Nutzungsjahre   = 25,
        Leistungspreis          = 102,)

    # Setup common_env
    env = common_env(
        reward_maker   = r_maker,
        df             = df,
        power_dem_df   = power_dem_df,
        # Datset Inputs for the states:
        input_list     = input_list,
        # Batters stats:
        max_SMS_SoC        = 25,
        max_LION_SoC       = 54,
        LION_max_entladung = 50,
        SMS_max_entladung  = 100,
        SMS_entladerate    = 0.72,
        LION_entladerate   = 0.00008,
        # Period length in minutes:
        PERIODEN_DAUER = period_min,
        # Q-Table can only take discrete inputs and make discrete outputs
        ACTION_TYPE    = 'discrete',
        OBS_TYPE       = 'discrete',
        # Set number of discrete values:
        discrete_space = 22,
        # Size of validation data:
        val_split      = 0.1)

    # Setup agent:
    agent = Q_Learner(
        env            = env,
        memory_len     = update_num,
        # Training parameter:
        gamma          = gamma,
        epsilon        = 0.99,
        epsilon_min    = 0.1,
        epsilon_decay  = epsilon_decay,
        lr             = lr,
        load_table     = load_table)

    # Train:
    training(agent, epochs, update_num, num_warmup_steps)

    # Test with dataset that includes val-data:
    env.use_all_data()
    testing(agent)