Exemplo n.º 1
0
def train(args, n_actors, batch_queue, prios_queue, param_queue):
    env = wrapper.make_atari(args.env)
    env = wrapper.wrap_atari_dqn(env, args)
    utils.set_global_seeds(args.seed, use_torch=True)

    model = DuelingDQN(env, args).to(args.device)
    # model.load_state_dict(torch.load('model_30h.pth'))
    tgt_model = DuelingDQN(env, args).to(args.device)
    tgt_model.load_state_dict(model.state_dict())

    writer = SummaryWriter(comment="-{}-learner".format(args.env))
    optimizer = torch.optim.Adam(model.parameters(), args.lr)
    # optimizer = torch.optim.RMSprop(model.parameters(), args.lr, alpha=0.95, eps=1.5e-7, centered=True)

    check_connection(n_actors)

    param_queue.put(model.state_dict())
    learn_idx = 0
    ts = time.time()
    tb_dict = {
        k: []
        for k in ['loss', 'grad_norm', 'max_q', 'mean_q', 'min_q']
    }
    while True:
        *batch, idxes = batch_queue.get()
        loss, prios, q_values = utils.compute_loss(model, tgt_model, batch,
                                                   args.n_steps, args.gamma)
        grad_norm = utils.update_parameters(loss, model, optimizer,
                                            args.max_norm)
        prios_queue.put((idxes, prios))
        batch, idxes, prios = None, None, None
        learn_idx += 1

        tb_dict["loss"].append(float(loss))
        tb_dict["grad_norm"].append(float(grad_norm))
        tb_dict["max_q"].append(float(torch.max(q_values)))
        tb_dict["mean_q"].append(float(torch.mean(q_values)))
        tb_dict["min_q"].append(float(torch.min(q_values)))

        if args.soft_target_update:
            tau = args.tau
            for p_tgt, p in zip(tgt_model.parameters(), model.parameters()):
                p_tgt.data *= 1 - tau
                p_tgt.data += tau * p
        elif learn_idx % args.target_update_interval == 0:
            print("Updating Target Network..")
            tgt_model.load_state_dict(model.state_dict())
        if learn_idx % args.save_interval == 0:
            print("Saving Model..")
            torch.save(model.state_dict(), "model.pth")
        if learn_idx % args.publish_param_interval == 0:
            param_queue.put(model.state_dict())
        if learn_idx % args.tb_interval == 0:
            bps = args.tb_interval / (time.time() - ts)
            print("Step: {:8} / BPS: {:.2f}".format(learn_idx, bps))
            writer.add_scalar("learner/BPS", bps, learn_idx)
            for k, v in tb_dict.items():
                writer.add_scalar(f'learner/{k}', np.mean(v), learn_idx)
                v.clear()
            ts = time.time()
Exemplo n.º 2
0
def maml_train(raw_phi_u, raw_phi_i, raw_phi_r, u_grad_list, i_grad_list,
               r_grad_list, global_lr):
    """
    Update the global model parameters
    :param raw_phi_u: global user parameter
    :param raw_phi_i: global item parameter
    :param raw_phi_r: global rating parameter
    :param u_grad_list: list of user gradients
    :param i_grad_list: list of item gradients
    :param r_grad_list: list of rating gradients
    :param global_lr: global learning rate
    """
    phi_u = update_parameters(raw_phi_u, u_grad_list, global_lr)
    phi_i = update_parameters(raw_phi_i, i_grad_list, global_lr)
    phi_r = update_parameters(raw_phi_r, r_grad_list, global_lr)
    return phi_u, phi_i, phi_r
Exemplo n.º 3
0
def initiate_sced_model(day_idx, data_path,start, valid_id, gen_df, genth_df, bus_df,\
                                      branch_df, ptdf_dict, wind_generator_names,\
                                      margcost_df, blockmargcost_df, FlexibleRampFactor,load_scaling_factor, wind_scaling_factor,\
                                      blockmargcost_dict, blockoutputlimit_dict,\
                                       load_s_df, hourly_load_df,hourly_load_dict, input_mode):
    shift = 0
    slot = 1
    bus_slot_load_dict, slot_load_dict, hourly_load_dict, total_hourly_load_dict,\
    hourly_load_df, genforren_dict, horizon, RampUpRequirement_dict, RampDnRequirement_dict = \
        update_parameters(data_path,day_idx,start+shift, FlexibleRampFactor, load_scaling_factor, wind_scaling_factor, input_mode, 'real-time')
    slot_load_sced = slot_load_dict[1 + shift, slot]

    bus_slot_load_sced = extract_dictionary_for_sced(bus_slot_load_dict, 1, 2,
                                                     0, slot, start, shift)

    genforren_sced = extract_dictionary_for_sced(genforren_dict, 1, 2, 0, slot,
                                                 start, shift)
    #print('sced genforren',genforren_sced)

    sced_model = build_sced_model(start, slot, valid_id, gen_df, genth_df, bus_df,\
                                      branch_df, ptdf_dict, wind_generator_names,\
                                      margcost_df, blockmargcost_df, FlexibleRampFactor,\
                                      blockmargcost_dict, blockoutputlimit_dict,\
                                      genforren_sced, load_s_df, hourly_load_df,\
                                      hourly_load_dict,total_hourly_load_dict,\
                                      slot_load_sced, bus_slot_load_sced)
    return sced_model
Exemplo n.º 4
0
def train(args, n_actors, batch_queue, prios_queue, param_queue):
    env = RunTagEnv(width=5,
                    height=5,
                    number_of_subordinates=1,
                    max_steps=1000)
    #env = wrapper.make_atari(args.env)
    #env = wrapper.wrap_atari_dqn(env, args)
    utils.set_global_seeds(args.seed, use_torch=True)

    model = DuelingDQN(env).to(args.device)
    tgt_model = DuelingDQN(env).to(args.device)
    tgt_model.load_state_dict(model.state_dict())

    writer = SummaryWriter(comment="-{}-learner".format(args.env))
    # optimizer = torch.optim.Adam(model.parameters(), args.lr)
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    args.lr,
                                    alpha=0.95,
                                    eps=1.5e-7,
                                    centered=True)

    check_connection(n_actors)

    param_queue.put(model.state_dict())
    learn_idx = 0
    ts = time.time()
    while True:
        *batch, idxes = batch_queue.get()
        loss, prios = utils.compute_loss(model, tgt_model, batch, args.n_steps,
                                         args.gamma)
        grad_norm = utils.update_parameters(loss, model, optimizer,
                                            args.max_norm)
        print('Updated parameters!')
        prios_queue.put((idxes, prios))
        batch, idxes, prios = None, None, None
        learn_idx += 1

        writer.add_scalar("learner/loss", loss, learn_idx)
        writer.add_scalar("learner/grad_norm", grad_norm, learn_idx)

        if learn_idx % args.target_update_interval == 0:
            print("Updating Target Network..")
            tgt_model.load_state_dict(model.state_dict())
        if learn_idx % args.save_interval == 0:
            print("Saving Model..")
            torch.save(model.state_dict(), "model.pth")
        if learn_idx % args.publish_param_interval == 0:
            param_queue.put(model.state_dict())
        if learn_idx % args.bps_interval == 0:
            bps = args.bps_interval / (time.time() - ts)
            print("Step: {:8} / BPS: {:.2f}".format(learn_idx, bps))
            writer.add_scalar("learner/BPS", bps, learn_idx)
            ts = time.time()
Exemplo n.º 5
0
    def train(self):
        utils.set_global_seeds(self.seed, use_torch=True)

        learn_idx = 0
        while True:
            beta = self.beta_by_frame(learn_idx)
            states, actions, rewards, next_states, dones, weights, idxes = self.buffer.sample(
                self.batch_size, beta)
            states = torch.FloatTensor(states).to(self.device)
            actions = torch.LongTensor(actions).to(self.device)
            rewards = torch.FloatTensor(rewards).to(self.device)
            next_states = torch.FloatTensor(next_states).to(self.device)
            dones = torch.FloatTensor(dones).to(self.device)
            weights = torch.FloatTensor(weights).to(self.device)
            batch = (states, actions, rewards, next_states, dones, weights)

            loss, prios = utils.compute_loss(self.model, self.tgt_model, batch,
                                             self.n_step, self.gamma)

            self.scheduler.step()
            grad_norm = utils.update_parameters(loss, self.model,
                                                self.optimizer, self.max_norm)

            self.buffer.update_priorities(idxes, prios)

            batch, idxes, prios = None, None, None
            learn_idx += 1

            self.writer.add_scalar("learner/loss", loss, learn_idx)
            self.writer.add_scalar("learner/grad_norm", grad_norm, learn_idx)

            if learn_idx % self.target_update_interval == 0:
                print("Updating Target Network..")
                self.tgt_model.load_state_dict(self.model.state_dict())
            if learn_idx % self.save_interval == 0:
                print("Saving Model..")
                torch.save(self.model.state_dict(),
                           "model{}.pth".format(learn_idx))
            if learn_idx % self.publish_param_interval == 0:
                self.batch_recorder.set_worker_weights(
                    copy.deepcopy(self.model))
            if learn_idx >= self.max_step:
                torch.save(self.model.state_dict(),
                           "model{}.pth".format(learn_idx))
                self.batch_recorder.cleanup()
                break
Exemplo n.º 6
0
def optimize(X, Y, a_prev, parameters, learning_rate=0.01):
    """
    Execute one step of the optimization to train the model.

    Arguments:
    X -- list of integers, where each integer is a number that maps to a character in the vocabulary.
    Y -- list of integers, exactly the same as X but shifted one index to the left.
    a_prev -- previous hidden state.
    parameters -- python dictionary containing:
                        Wax -- Weight matrix multiplying the input, numpy array of shape (n_a, n_x)
                        Waa -- Weight matrix multiplying the hidden state, numpy array of shape (n_a, n_a)
                        Wya -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a)
                        b --  Bias, numpy array of shape (n_a, 1)
                        by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1)
    learning_rate -- learning rate for the model.

    Returns:
    loss -- value of the loss function (cross-entropy)
    gradients -- python dictionary containing:
                        dWax -- Gradients of input-to-hidden weights, of shape (n_a, n_x)
                        dWaa -- Gradients of hidden-to-hidden weights, of shape (n_a, n_a)
                        dWya -- Gradients of hidden-to-output weights, of shape (n_y, n_a)
                        db -- Gradients of bias vector, of shape (n_a, 1)
                        dby -- Gradients of output bias vector, of shape (n_y, 1)
    a[len(X)-1] -- the last hidden state, of shape (n_a, 1)
    """

    ### START CODE HERE ###

    # Forward propagate through time (≈1 line)
    loss, cache = rnn_forward(X, Y, a_prev, parameters)

    # Backpropagate through time (≈1 line)
    gradients, a = rnn_backward(X, Y, parameters, cache)

    # Clip your gradients between -5 (min) and 5 (max) (≈1 line)
    gradients = clip(gradients, maxValue=5)

    # Update parameters (≈1 line)
    parameters = update_parameters(parameters, gradients, learning_rate)

    ### END CODE HERE ###

    return loss, gradients, a[len(X) - 1]
Exemplo n.º 7
0
def run_sced(day_idx,data_path,previous_dispatch,wind_scaling_factor, input_mode, sced_model,ha_instance, valid_id, gen_df, genth_df, bus_df, branch_df, ptdf_dict,\
             wind_generator_names, margcost_df, blockmargcost_df, FlexibleRampFactor,\
             blockmargcost_dict, blockoutputlimit_dict, load_s_df,slot_load_dict,\
             hourly_load_df, hourly_load_dict,total_hourly_load_dict,\
             bus_slot_load_dict, genforren_dict, start, slot, shift=0):
    """
    sced_instance = run_sced(sced_model, ha_instance, valid_id, gen_df, genth_df, bus_df, branch_df, ptdf_dict,\
             wind_generator_names, margcost_df, blockmargcost_df, FlexibleRampFactor,\
             blockmargcost_dict, blockoutputlimit_dict, load_s_df,slot_load_dict,\
             hourly_load_df, hourly_load_dict,total_hourly_load_dict,\
             bus_slot_load_dict, genforren_dict, start, slot, shift=0)
    """
    load_scaling_factor = 1
    bus_slot_load_dict, slot_load_dict, hourly_load_dict, total_hourly_load_dict,\
    hourly_load_df, genforren_dict, horizon, RampUpRequirement_dict, RampDnRequirement_dict = \
        update_parameters(data_path,day_idx,start+shift, FlexibleRampFactor, load_scaling_factor, wind_scaling_factor, input_mode, 'real-time')

    #slot_load_sced = slot_load_dict[1+shift,slot]

    #bus_slot_load_sced = extract_dictionary_for_sced(bus_slot_load_dict, 1,2, 0, slot, start, shift)

    #genforren_sced = extract_dictionary_for_sced(genforren_dict, 1, 2, 0, slot, start, shift)

    sced_instance = sced_model.create_instance()
    #sced_instance = remove_param_constraints(sced_instance)
    sced_instance = reset_sced_parameters(previous_dispatch,
                                          ha_instance,
                                          sced_instance,
                                          bus_slot_load_dict,
                                          slot_load_dict,
                                          genforren_dict,
                                          start,
                                          slot,
                                          shift=0)
    #print(genforren_dict)
    sced_results = lp_solver.solve(sced_instance)
    #sced_instance = remove_param_constraints(sced_instance)
    for g in sced_instance.ThermalGenerators:
        previous_dispatch[g] = sced_instance.PowerGenerated[g].value
    #print(previous_dispatch)

    return sced_instance, sced_results, previous_dispatch
Exemplo n.º 8
0
def da_input_data(wind_penetration,day_idx, data_path, FlexibleRampFactor, load_scaling_factor,\
                  start=1, input_mode='static', mode='day-ahead'):
    """
    This function extracts and outputs all relevant input data for day-ahead market simulation
    Syntaxe: 
        load_scaling_factor,start,valid_id, FlexibleRampFactor, ReserveFactor,\
        RegulatingReserveFactor, gen_df, genth_df, bus_df, branch_df, ptdf_dict,\
        wind_generator_names, margcost_df, blockmargcost_df, blockmargcost_dict,\
        blockoutputlimit_dict, genforren_dict, load_s_df, hourly_load_df,\
        hourly_load_dict, total_hourly_load_dict, slot_load_dict, RampUpRequirement_dict,\
        RampDnRequirement_dict, bus_slot_load_dict, horizon =\
        da_input_data(day_idx, data_path, data_path, day_idx,start,\
        FlexibleRampFactor, load_scaling_factor, start=1, mode='day-ahead', input_mode='static') 
    """


    wind_scaling_factor, load_scaling_factor,start,valid_id, FlexibleRampFactor, ReserveFactor, RegulatingReserveFactor,\
                         gen_df, genth_df, bus_df, branch_df, ptdf_dict, \
                         wind_generator_names, margcost_df, blockmargcost_df,\
                         blockmargcost_dict, blockoutputlimit_dict, genforren_dict,\
                         load_s_df, hourly_load_df, hourly_load_dict, \
                         total_hourly_load_dict, slot_load_dict,\
                         RampUpRequirement_dict, RampDnRequirement_dict = \
                         get_model_input_data(start,day_idx, data_path, wind_penetration)

    bus_slot_load_dict, slot_load_dict, hourly_load_dict, total_hourly_load_dict,\
    hourly_load_df, genforren_dict, horizon, RampUpRequirement_dict, RampDnRequirement_dict = \
            update_parameters(data_path,day_idx,start, FlexibleRampFactor,\
                              load_scaling_factor, wind_scaling_factor, input_mode, 'day-ahead')

    return wind_scaling_factor, load_scaling_factor,start,valid_id, FlexibleRampFactor, ReserveFactor, RegulatingReserveFactor,\
                         gen_df, genth_df, bus_df, branch_df, ptdf_dict, \
                         wind_generator_names, margcost_df, blockmargcost_df,\
                         blockmargcost_dict, blockoutputlimit_dict, genforren_dict,\
                         load_s_df, hourly_load_df, hourly_load_dict,\
                         total_hourly_load_dict, slot_load_dict,\
                         RampUpRequirement_dict, RampDnRequirement_dict, bus_slot_load_dict, horizon
Exemplo n.º 9
0
def train(args):
    dataset = omniglot(args.folder,
                       shots=args.num_shots,
                       ways=args.num_ways,
                       shuffle=True,
                       test_shots=15,
                       meta_train=True,
                       download=args.download)
    dataloader = BatchMetaDataLoader(dataset,
                                     batch_size=args.batch_size,
                                     shuffle=True,
                                     num_workers=args.num_workers)

    model = ConvolutionalNeuralNetwork(1,
                                       args.num_ways,
                                       hidden_size=args.hidden_size)
    model.to(device=args.device)
    model.train()
    meta_optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    # Training loop
    with tqdm(dataloader, total=args.num_batches) as pbar:
        for batch_idx, batch in enumerate(pbar):
            model.zero_grad()

            train_inputs, train_targets = batch['train']
            train_inputs = train_inputs.to(device=args.device)
            train_targets = train_targets.to(device=args.device)

            test_inputs, test_targets = batch['test']
            test_inputs = test_inputs.to(device=args.device)
            test_targets = test_targets.to(device=args.device)

            outer_loss = torch.tensor(0., device=args.device)
            accuracy = torch.tensor(0., device=args.device)
            for task_idx, (train_input, train_target, test_input,
                           test_target) in enumerate(
                               zip(train_inputs, train_targets, test_inputs,
                                   test_targets)):
                train_logit = model(train_input)
                inner_loss = F.cross_entropy(train_logit, train_target)

                model.zero_grad()
                params = update_parameters(model,
                                           inner_loss,
                                           step_size=args.step_size,
                                           first_order=args.first_order)

                test_logit = model(test_input, params=params)
                outer_loss += F.cross_entropy(test_logit, test_target)

                with torch.no_grad():
                    accuracy += get_accuracy(test_logit, test_target)

            outer_loss.div_(args.batch_size)
            accuracy.div_(args.batch_size)

            outer_loss.backward()
            meta_optimizer.step()

            pbar.set_postfix(accuracy='{0:.4f}'.format(accuracy.item()))
            if batch_idx >= args.num_batches:
                break

    # Save model
    if args.output_folder is not None:
        filename = os.path.join(
            args.output_folder, 'maml_omniglot_'
            '{0}shot_{1}way.pt'.format(args.num_shots, args.num_ways))
        with open(filename, 'wb') as f:
            state_dict = model.state_dict()
            torch.save(state_dict, f)
Exemplo n.º 10
0
def model(X, Y, learning_rate=0.3, num_iterations=30000, lambd=0, keep_prob=1):
    """
    使用三层网络,激活函数为:LINEAR->RELU->LINEAR->RELU->LINEAR->SIGMOID.
    第一个隐层:20个神经元
    第二个隐层:3个神经元
    输出层:1个神经元
    """

    grads = {}
    costs = []
    m = X.shape[1]
    layers_dims = [X.shape[0], 20, 3, 1]

    # 初始化网络参数
    parameters = initialize_parameters(layers_dims)

    # 梯度下降循环逻辑
    for i in range(0, num_iterations):

        # 前向传播计算
        # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID.
        # 如果keep_prob=1,进行正常前向传播
        # 如果keep_prob<1,说明需要进行droupout计算
        if keep_prob == 1:
            a3, cache = forward_propagation(X, parameters)
        elif keep_prob < 1:
            a3, cache = forward_propagation_with_dropout(X, parameters, keep_prob)

        # 计算损失
        # 如果传入lambd不为0,判断加入正则化
        if lambd == 0:
            cost = compute_cost(a3, Y)
        else:
            cost = compute_cost_with_regularization(a3, Y, parameters, lambd)

        # 只允许选择一个,要么L2正则化,要么Droupout
        assert (lambd == 0 or keep_prob == 1)

        if lambd == 0 and keep_prob == 1:
            grads = backward_propagation(X, Y, cache)
        elif lambd != 0:
            grads = backward_propagation_with_regularization(X, Y, cache, lambd)
        elif keep_prob < 1:
            grads = backward_propagation_with_dropout(X, Y, cache, keep_prob)

        # 更新参数
        parameters = update_parameters(parameters, grads, learning_rate)

        # 每10000词打印损失结果
        if i % 10000 == 0:
            print("迭代次数为 {}: 损失结果大小:{}".format(i, cost))
            costs.append(cost)

    # 画出损失变化结果图
    plt.plot(costs)
    plt.ylabel('损失')
    plt.xlabel('迭代次数')
    plt.title("损失变化图,学习率为" + str(learning_rate))
    plt.show()

    return parameters
Exemplo n.º 11
0
def train(args):
    dataset = miniimagenet(args.folder,
                           shots=args.num_shots,
                           ways=args.num_ways,
                           shuffle=True,
                           test_shots=15,
                           meta_train=True,
                           download=args.download)
    dataloader = BatchMetaDataLoader(dataset,
                                     batch_size=args.batch_size,
                                     shuffle=True,
                                     num_workers=args.num_workers)

    model = ConvolutionalNeuralNetwork(3,
                                       84,
                                       args.num_ways,
                                       hidden_size=args.hidden_size)
    model.to(device=args.device)
    model.train()
    meta_optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    # Training loop
    with tqdm(dataloader, total=args.num_batches) as pbar:
        for batch_idx, batch in enumerate(pbar):
            model.zero_grad()

            train_inputs, train_targets = batch['train']
            train_inputs = train_inputs.to(device=args.device)
            train_targets = train_targets.to(device=args.device)

            test_inputs, test_targets = batch['test']
            test_inputs = test_inputs.to(device=args.device)
            test_targets = test_targets.to(device=args.device)

            outer_loss = torch.tensor(0., device=args.device)
            accuracy = torch.tensor(0., device=args.device)
            for task_idx, (train_input, train_target, test_input,
                           test_target) in enumerate(
                               zip(train_inputs, train_targets, test_inputs,
                                   test_targets)):

                train_logit = model(train_input)

                inner_loss = F.cross_entropy(train_logit, train_target)
                # writer.add_scalar('Loss/inner_loss', np.random.random(), task_idx)
                grid = torchvision.utils.make_grid(train_input)
                writer.add_image('images', grid, 0)
                writer.add_graph(model, train_input)

                model.zero_grad()
                params = update_parameters(model,
                                           inner_loss,
                                           step_size=args.step_size,
                                           first_order=args.first_order)
                test_logit = model(test_input, params=params)
                outer_loss += F.cross_entropy(test_logit, test_target)
                # writer.add_scalar('Loss/outer_loss', np.random.random(), n_iter)
                for name, grads in model.meta_named_parameters():
                    writer.add_histogram(name, grads, batch_idx)
                with torch.no_grad():
                    accuracy += get_accuracy(test_logit, test_target)
                    writer.add_histogram('meta parameters', grads, batch_idx)

            outer_loss.div_(args.batch_size)
            accuracy.div_(args.batch_size)

            outer_loss.backward()
            meta_optimizer.step()
            pbar.set_postfix(accuracy='{0:.4f}'.format(accuracy.item()))
            writer.add_scalar('Accuracy/test', accuracy.item(), batch_idx)
            if batch_idx >= args.num_batches:
                break
            writer.close()

    # Save model
    if args.output_folder is not None:
        filename = os.path.join(
            args.output_folder, 'maml_omniglot_'
            '{0}shot_{1}way.pt'.format(args.num_shots, args.num_ways))
        with open(filename, 'wb') as f:
            state_dict = model.state_dict()
            torch.save(state_dict, f)
Exemplo n.º 12
0
def run_hour_ahead_sequence(day_idx,data_path,result_path, previous_dispatch,input_mode, start,valid_id, FlexibleRampFactor, ReserveFactor,\
                            RegulatingReserveFactor, load_scaling_factor, wind_scaling_factor,\
                            gen_df, genth_df, bus_df, branch_df, ptdf_dict,\
                            wind_generator_names, margcost_df, blockmargcost_df,\
                            blockmargcost_dict, blockoutputlimit_dict, genforren_dict,\
                            load_s_df, hourly_load_df, hourly_load_dict,\
                            total_hourly_load_dict, slot_load_dict,\
                            RampUpRequirement_dict, RampDnRequirement_dict, wrp_status):

    #start=1
    #slot =1
    print('Updating parameters for hour-ahead model ...')
    update_time_init = time.time()
    bus_slot_load_dict, slot_load_dict, hourly_load_dict, total_hourly_load_dict,\
    hourly_load_df, genforren_dict, horizon, RampUpRequirement_dict, RampDnRequirement_dict = \
        update_parameters(data_path,day_idx,start, FlexibleRampFactor,\
                          load_scaling_factor, wind_scaling_factor, input_mode, mode='hour-ahead')
    print('Update done! Time: ', time.time() - update_time_init)

    print('Building hour-ahead model ...')
    ha_model_time_init = time.time()
    model2 = build_scuc_model(start,valid_id, FlexibleRampFactor, ReserveFactor, RegulatingReserveFactor,\
                             gen_df, genth_df, bus_df, branch_df, ptdf_dict, wind_generator_names,\
                             margcost_df, blockmargcost_df, blockmargcost_dict, blockoutputlimit_dict,\
                              genforren_dict, load_s_df, hourly_load_df, hourly_load_dict, total_hourly_load_dict,\
                              slot_load_dict, RampUpRequirement_dict, RampDnRequirement_dict, wrp_status['ha'])
    print('Done building hour-ahead model! Time:',
          time.time() - ha_model_time_init)

    print('Initiating SCED model ...')
    sced_model_time_init = time.time()
    sced_model = initiate_sced_model(day_idx,data_path,start,valid_id, gen_df, genth_df, bus_df,\
                                      branch_df, ptdf_dict, wind_generator_names,\
                                      margcost_df, blockmargcost_df, FlexibleRampFactor,load_scaling_factor, wind_scaling_factor,\
                                      blockmargcost_dict, blockoutputlimit_dict,\
                                       load_s_df, hourly_load_df,hourly_load_dict,input_mode)
    print('Done initiating SCED model! Time: ',
          time.time() - sced_model_time_init)

    sced_instance = sced_model.create_instance()
    obj_dict = dict()
    ha_obj = dict()
    rt_obj = dict()
    Hourly_FixedCost = dict()
    Hourly_ProductionCost = dict()
    Hourly_RampingCost = dict()
    Demand = []
    SlotDemand = []
    WindPowerGenerated = []
    SlotWindPowerGenerated = []
    WindPowerForecasted = []
    TotalFlexRampRequired = []
    TotalFlexRampDnRequired = []
    WindTotalFlexRamp = []
    WindTotalFlexRampDn = []
    TotalFlexRampProvided = []
    WindTotalCurtailments = []
    WindCurtailments = []
    WindFlexRamp = []
    WindFlexRampDn = []
    rt_demand = dict()
    rt_load_curtailment = dict()
    FuelType = get_fuel_type_list(list(gen_df.index))
    Generation_by_fueltype = pd.DataFrame(0,
                                          index=range(1, 25),
                                          columns=FuelType)
    ha_previous_dispatch = dict()
    shadow_prices = dict()
    congestion_prices = dict()
    LMPs = dict()
    Dispatch = dict()

    for start in range(1, 23):
        print('********************************* Hour ', start,
              '**********************************')
        print('Updating input data for hour-ahead instance ...')
        ha_update_new_time_init = time.time()
        bus_slot_load_dict, slot_load_dict, hourly_load_dict, total_hourly_load_dict,\
        hourly_load_df, genforren_dict, horizon, RampUpRequirement_dict, RampDnRequirement_dict = \
        update_parameters(data_path,day_idx,start, FlexibleRampFactor, load_scaling_factor, wind_scaling_factor,\
                          input_mode, mode='hour-ahead')
        print('Done updating input data for hour-ahead instance! Time: ',
              time.time() - ha_update_new_time_init)

        print('Creating hour-ahead instance ...')
        ha_instance_time_init = time.time()
        ha_instance = model2.create_instance()
        print('Done creating hour-ahead instance! Time: ',
              time.time() - ha_instance_time_init)

        print('Resetting hour-ahead instance ...')
        ha_instance_reset_time_init = time.time()
        ha_instance = reset_instance(start, ha_previous_dispatch, ha_instance,\
                                     slot_load_dict, hourly_load_dict, total_hourly_load_dict,\
                                     hourly_load_df, genforren_dict,RampUpRequirement_dict,\
                                     RampDnRequirement_dict)
        print('Done with hour-ahead instance reset! Time: ',
              time.time() - ha_instance_reset_time_init)

        print('Solving hour-ahead instance ...')
        t0 = time.time()
        results = mip_solver.solve(ha_instance)
        t1 = time.time()
        total_time = t1 - t0
        print('Done solving hour-ahead instance! Time:', total_time)

        print('*** CONDITIONING THE HOUR-AHEAD RESULTS ***')
        misc_time_init = time.time()
        results.write(num=1)
        TotalRampCost = sum(ha_instance.RampingCost[t].value
                            for t in ha_instance.TimePeriods)
        print('Objective: ', ha_instance.TotalProductionCost.value)
        ha_obj[start]={'fixed_cost': sum(ha_instance.StartupCost[g, 1].value + ha_instance.ShutdownCost[g, 1].value\
                                        for g in ha_instance.ThermalGenerators),\
              'ramp_cost': ha_instance.RampingCost[1].value,\
              'prod_cost':sum(ha_instance.ProductionCost[g, 1].value\
                             for g in ha_instance.ThermalGenerators)}
        #print(value(ha_instance.TimeStart))
        print('')
        #TotWindGen = [sum(ha_instance.PowerGenerated[g,t].value\
        #                  for g in ha_instance.WindGenerators) for t in ha_instance.TimePeriods]
        #print('Total wind generation over this horizon: ',TotWindGen)
        #print('Total thermal generation: ', [sum(ha_instance.PowerGenerated[g,t].value\
        #                                         for g in ha_instance.ThermalGenerators) for t in ha_instance.TimePeriods])
        #print('')
        #print('Thermal generation costs: ', [ha_instance.ProductionCost[g,t].value\
        #                                     for g in ha_instance.ThermalGenerators for t in ha_instance.TimePeriods])
        #print('')
        #print('Ramping cost: ', [ha_instance.RampingCost[t].value for t in ha_instance.TimePeriods])
        print('Total ramp cost: ', TotalRampCost)
        obj_dict[
            start] = ha_instance.TotalFixedCost.value + ha_instance.TotalProductionCost.value + TotalRampCost
        Hourly_ProductionCost[start] = sum(ha_instance.ProductionCost[g, 1].value\
                             for g in ha_instance.ThermalGenerators)
        Hourly_RampingCost[start] = ha_instance.RampingCost[1].value
        Hourly_FixedCost[start] = sum(ha_instance.StartupCost[g, 1].value + ha_instance.ShutdownCost[g, 1].value\
                                        for g in ha_instance.ThermalGenerators)

        for g in ha_instance.ThermalGenerators:
            ha_previous_dispatch[g] = ha_instance.PowerGenerated[g, 1].value
        #print('PREV: ',ha_previous_dispatch)
        Demand.append(value(ha_instance.Demand[1]))
        SlotDemand.append([
            value(ha_instance.SlotDemand[1, s]) for s in ha_instance.TimeSlots
        ])

        WindPowerGenerated.append(
            sum(ha_instance.PowerGenerated[g, 1].value
                for g in ha_instance.WindGenerators))
        SlotWindPowerGenerated.append([sum(ha_instance.PowerGenerated[g,1].value\
                                           for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots])

        WindPowerForecasted.append([sum(ha_instance.PowerForecast[g,1,s].value\
                                        for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots])

        WindTotalFlexRamp.append([sum(ha_instance.FlexibleRampUpAvailable[g,1,s].value - ha_instance.WindRpCurtailment[g,1,s].value\
                                      for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots])

        WindTotalFlexRampDn.append([sum(ha_instance.FlexibleRampDnAvailable[g,1,s].value\
                                        for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots])


        WindFlexRamp.append([[ha_instance.FlexibleRampUpAvailable[g,1,s].value - ha_instance.WindRpCurtailment[g,1,s].value\
                              for s in ha_instance.TimeSlots] for g in ha_instance.WindGenerators ])

        WindFlexRampDn.append([[ha_instance.FlexibleRampDnAvailable[g,1,s].value\
                                for s in ha_instance.TimeSlots] for g in ha_instance.WindGenerators ])

        WindTotalCurtailments.append([sum(ha_instance.WindRpCurtailment[g,1,s].value\
                                          for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots])

        WindCurtailments.append([[ha_instance.WindRpCurtailment[g,1,s].value\
                                  for s in ha_instance.TimeSlots] for g in ha_instance.WindGenerators ])


        TotalFlexRampProvided.append([sum(ha_instance.FlexibleRampUpAvailable[g,1,s].value\
                                          for g in ha_instance.ThermalGenerators|ha_instance.WindGenerators) for s in ha_instance.TimeSlots])

        TotalFlexRampRequired.append([
            ha_instance.FlexibleRampUpRequirement[1, s].value
            for s in ha_instance.TimeSlots
        ])
        TotalFlexRampDnRequired.append([
            ha_instance.FlexibleRampDnRequirement[1, s].value
            for s in ha_instance.TimeSlots
        ])
        #print('actual bid for wind0:', ha_instance.PowerGenerated['wind0',1].value)
        for i in FuelType:
            iset = [x for x in ha_instance.AllGenerators if x.startswith(i)]
            t = start
            Generation_by_fueltype.loc[t, i] = sum(
                ha_instance.PowerGenerated[g, 1].value for g in iset)
        """New!!!"""
        print('*** End of Conditioning! Time: ',
              time.time() - misc_time_init, ' ***')

        print(
            '*** Starting Real-time Security Constrained Economic Dispatch ***'
        )
        overall_sced_time_init = time.time()
        for slot in range(1, 7):
            print('Running SCED for slot ', slot, ' ...')
            sced_slot_time_init = time.time()
            sced_instance,sced_results,previous_dispatch =\
            run_sced(day_idx,data_path,previous_dispatch,wind_scaling_factor, input_mode, sced_model,ha_instance, valid_id, gen_df, genth_df, bus_df,\
            branch_df, ptdf_dict, wind_generator_names, margcost_df, blockmargcost_df,\
            FlexibleRampFactor,blockmargcost_dict, blockoutputlimit_dict,\
            load_s_df,slot_load_dict, hourly_load_df, hourly_load_dict,\
            total_hourly_load_dict, bus_slot_load_dict, genforren_dict, start, slot, shift=0)

            LMPs, shadow_prices, congestion_prices =\
            compute_LMPs(sced_instance, ptdf_dict, shadow_prices, congestion_prices,\
                         LMPs, start,slot,shift=0)

            rt_demand[start,slot] = [start, slot, value(sced_instance.SlotDemand), sum(value(sced_instance.PowerForecast[g])\
                     for g in sced_instance.WindGenerators)]
            rt_load_curtailment[start, slot] = [start, slot, sum(sced_instance.BusCurtailment[b].value\
                                for b in sced_instance.LoadBuses)]

            rt_obj[start,slot]={'curtailment_cost': sced_instance.TotalCurtailmentCost.value,\
                  'prod_cost':sced_instance.TotalProductionCost.value}

            for g in sced_instance.AllGenerators:
                Dispatch[g, start, slot] = [
                    g, start, slot, sced_instance.PowerGenerated[g].value
                ]
            #print('passed bid',value(sced_instance.EnergyBid['wind0']))
            print('Done with SCED and LMP computation for slot ', slot,
                  '! Time: ',
                  time.time() - sced_slot_time_init)

        # Write out LMPs for this hour and clear the LMP dictionary
        write_out_lmps_hourly(wrp_status, day_idx, result_path, LMPs, start)
        LMPs = dict()

        if start == 22:
            for i in range(1, 3):
                print('********************************* Hour ', start + i,
                      ' **********************************')
                print('*** CONDITIONING THE HOUR-AHEAD RESULTS ***')
                misc_time_init = time.time()


                ha_obj[start+i]={'fixed_cost': sum(ha_instance.StartupCost[g, 1+i].value + ha_instance.ShutdownCost[g, 1].value\
                      for g in ha_instance.ThermalGenerators),\
                'ramp_cost': ha_instance.RampingCost[1+i].value,\
                'prod_cost':sum(ha_instance.ProductionCost[g, 1+i].value for g in ha_instance.ThermalGenerators)}

                Hourly_ProductionCost[start+i] = sum(ha_instance.ProductionCost[g, 1+i].value\
                                     for g in ha_instance.ThermalGenerators)
                Hourly_RampingCost[start +
                                   i] = ha_instance.RampingCost[1 + i].value
                Hourly_FixedCost[start+i] = sum(ha_instance.StartupCost[g, 1+i].value + ha_instance.ShutdownCost[g, 1+1].value\
                                for g in ha_instance.ThermalGenerators)

                Demand.append(value(ha_instance.Demand[1 + i]))
                SlotDemand.append([value(ha_instance.SlotDemand[1+i,s])\
                                   for s in ha_instance.TimeSlots] )
                WindPowerGenerated.append(sum(ha_instance.PowerGenerated[g,1+i].value\
                                              for g in ha_instance.WindGenerators))
                SlotWindPowerGenerated.append([sum(ha_instance.PowerGenerated[g,1+1].value\
                                                   for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots])

                WindPowerForecasted.append([sum(ha_instance.PowerForecast[g,1+i,s].value\
                                                for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots])
                WindTotalFlexRamp.append([sum(ha_instance.FlexibleRampUpAvailable[g,1+i,s].value - ha_instance.WindRpCurtailment[g,1+i,s].value\
                                              for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots])
                WindTotalFlexRampDn.append([sum(ha_instance.FlexibleRampDnAvailable[g,1+i,s].value\
                                                for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots])
                WindTotalCurtailments.append([sum(ha_instance.WindRpCurtailment[g,1+i,s].value\
                                                  for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots])

                WindCurtailments.append([[ha_instance.WindRpCurtailment[g,1+i,s].value\
                                          for s in ha_instance.TimeSlots] for g in ha_instance.WindGenerators ])
                WindFlexRamp.append([[ha_instance.FlexibleRampUpAvailable[g,1+i,s].value - ha_instance.WindRpCurtailment[g,1+i,s].value\
                                      for s in ha_instance.TimeSlots] for g in ha_instance.WindGenerators ])
                WindFlexRampDn.append([[ha_instance.FlexibleRampDnAvailable[g,1+i,s].value\
                                        for s in ha_instance.TimeSlots] for g in ha_instance.WindGenerators ])

                TotalFlexRampProvided.append([sum(ha_instance.FlexibleRampUpAvailable[g,1+i,s].value\
                                                  for g in ha_instance.WindGenerators) for s in ha_instance.TimeSlots])
                TotalFlexRampRequired.append([ha_instance.FlexibleRampUpRequirement[1+i,s].value\
                                              for s in ha_instance.TimeSlots])
                TotalFlexRampDnRequired.append([ha_instance.FlexibleRampDnRequirement[1+i,s].value\
                                                for s in ha_instance.TimeSlots])
                for j in FuelType:
                    iset = [
                        x for x in ha_instance.AllGenerators if x.startswith(j)
                    ]
                    Generation_by_fueltype.loc[start + i, j] = sum(
                        ha_instance.PowerGenerated[g, 1 + i].value
                        for g in iset)

                for g in ha_instance.ThermalGenerators:
                    ha_previous_dispatch[g] = ha_instance.PowerGenerated[
                        g, 1 + i].value
                print('*** End of Conditioning! Time: ',
                      time.time() - misc_time_init, ' ***')
                """New!!!"""
                for slot in range(1, 7):
                    print('Running SCED for slot ', slot, ' ...')
                    sced_slot_time_init = time.time()
                    sced_instance,sced_results,previous_dispatch =\
                    run_sced(day_idx,data_path,previous_dispatch,wind_scaling_factor, input_mode,sced_model,ha_instance, valid_id, gen_df, genth_df, bus_df, branch_df, ptdf_dict,\
                    wind_generator_names, margcost_df, blockmargcost_df, FlexibleRampFactor,\
                    blockmargcost_dict, blockoutputlimit_dict, load_s_df,slot_load_dict,\
                    hourly_load_df, hourly_load_dict,total_hourly_load_dict,\
                    bus_slot_load_dict, genforren_dict, start, slot, i)

                    LMPs, shadow_prices, congestion_prices =\
                    compute_LMPs(sced_instance, ptdf_dict, shadow_prices,\
                                 congestion_prices, LMPs, start,slot,i)

                    rt_demand[start+i,slot] = [start+i, slot, value(sced_instance.SlotDemand),\
                             sum(value(sced_instance.PowerForecast[g])\
                                 for g in sced_instance.WindGenerators)]

                    rt_load_curtailment[start+i, slot] = [start+i, slot, sum(sced_instance.BusCurtailment[b].value\
                                        for b in sced_instance.LoadBuses)]

                    rt_obj[start+i,slot]={'curtailment_cost': sced_instance.TotalCurtailmentCost.value,\
                          'prod_cost':sced_instance.TotalProductionCost.value}

                    for g in sced_instance.AllGenerators:
                        Dispatch[g, start + i, slot] = [
                            g, start + i, slot,
                            sced_instance.PowerGenerated[g].value
                        ]
                    print('Done with SCED and LMP computation for slot ', slot,
                          '! Time: ',
                          time.time() - sced_slot_time_init)

                write_out_lmps_hourly(wrp_status, day_idx, result_path, LMPs,
                                      start, i)
                LMPs = dict()

        print('SCED done for the hour! Time: ',
              time.time() - overall_sced_time_init)

    print('Additional conditioning ...')
    additional_misc_time_init = time.time()
    WindFlexRamp_arr = np.array(WindFlexRamp).swapaxes(1, 2)
    WindFlexRampDn_arr = np.array(WindFlexRampDn).swapaxes(1, 2)

    #WindFlexRamp_arr = WindFlexRamp.swapaxes(1,2)
    nt, ns, nw = np.shape(WindFlexRamp_arr)
    WindFlexRamp = np.array(
        [WindFlexRamp_arr[t, s, :] for t in range(nt) for s in range(ns)])
    WindFlexRampDn = np.array(
        [WindFlexRampDn_arr[t, s, :] for t in range(nt) for s in range(ns)])

    WindCurtailments_arr = np.array(WindCurtailments).swapaxes(1, 2)
    nt, ns, nw = np.shape(WindCurtailments_arr)
    WindCurtailments = np.array(
        [WindCurtailments_arr[t, s, :] for t in range(nt) for s in range(ns)])
    print('End of conditioning! Time: ',
          time.time() - additional_misc_time_init)

    return sced_instance, ha_instance, obj_dict, ha_obj, rt_obj, Demand, SlotDemand, WindPowerForecasted,\
           WindPowerGenerated,SlotWindPowerGenerated,WindTotalFlexRamp,WindTotalFlexRampDn,\
           TotalFlexRampRequired, TotalFlexRampDnRequired,TotalFlexRampProvided,\
           Generation_by_fueltype, WindFlexRamp, WindFlexRampDn,Hourly_ProductionCost,\
           Hourly_RampingCost, Hourly_FixedCost, WindTotalCurtailments, WindCurtailments,\
           LMPs, shadow_prices, congestion_prices, Dispatch, rt_demand, rt_load_curtailment
Exemplo n.º 13
0
def train(args, n_actors, batch_queue, prios_queue, param_queue):
    """
    thread to fill parameter queue
    """
    def _fill_param():
        while True:
            model_dict = {}
            state_dict = model.state_dict()
            for k, v in state_dict.items():
                model_dict[k] = v.cpu().numpy()
            param_queue.put(model_dict)

    env = wrapper.make_atari(args.env)
    env = wrapper.wrap_atari_dqn(env, args)
    utils.set_global_seeds(args.seed, use_torch=True)

    model = DuelingDQN(env).to(args.device)
    tgt_model = DuelingDQN(env).to(args.device)
    tgt_model.load_state_dict(model.state_dict())

    writer = SummaryWriter(comment="-{}-learner".format(args.env))
    # optimizer = torch.optim.Adam(model.parameters(), args.lr)
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    args.lr,
                                    alpha=0.95,
                                    eps=1.5e-7,
                                    centered=True)
    model_dict = {}
    state_dict = model.state_dict()
    for k, v in state_dict.items():
        model_dict[k] = v.cpu().numpy()
    param_queue.put(model_dict)
    threading.Thread(target=_fill_param).start()
    learn_idx = 0
    ts = time.time()
    while True:
        #if batch_queue.empty():
        #    print("batch queue size:{}".format(batch_queue.qsize()))
        *batch, idxes = batch_queue.get()
        loss, prios = utils.compute_loss(model, tgt_model, batch, args.n_steps,
                                         args.gamma)
        grad_norm = utils.update_parameters(loss, model, optimizer,
                                            args.max_norm)
        prios_queue.put((idxes, prios))
        batch, idxes, prios = None, None, None
        learn_idx += 1

        if learn_idx % args.tensorboard_update_interval == 0:
            writer.add_scalar("learner/loss", loss, learn_idx)
            writer.add_scalar("learner/grad_norm", grad_norm, learn_idx)

        if learn_idx % args.target_update_interval == 0:
            print("Updating Target Network..")
            tgt_model.load_state_dict(model.state_dict())
        if learn_idx % args.save_interval == 0:
            print("Saving Model..")
            torch.save(model.state_dict(), "model.pth")
        if learn_idx % args.publish_param_interval == 0:
            param_queue.get()
        if learn_idx % args.bps_interval == 0:
            bps = args.bps_interval / (time.time() - ts)
            print("Step: {:8} / BPS: {:.2f}".format(learn_idx, bps))
            writer.add_scalar("learner/BPS", bps, learn_idx)
            ts = time.time()