def goal_seeking(goals_to_reach):
    sim_env = sim.SimulationEnvironment()
    action_repeat = 100
    # steering_behavior = Wander(action_repeat)
    steering_behavior = Seek(sim_env.goal_body.position)

    #load model
    model = Action_Conditioned_FF()

    model.load_state_dict(torch.load('saved/saved_model.pkl'))

    model.eval()

    #load normalization parameters
    scaler = pickle.load(open("saved/scaler.pkl", "rb"))

    accurate_predictions, false_positives, missed_collisions = 0, 0, 0
    robot_turned_around = False
    actions_checked = []
    goals_reached = 0
    while goals_reached < goals_to_reach:

        seek_vector = sim_env.goal_body.position - sim_env.robot.body.position
        if la.norm(seek_vector) < 50:
            sim_env.move_goal()
            steering_behavior.update_goal(sim_env.goal_body.position)
            goals_reached += 1
            continue

        action_space = np.arange(-5, 6)
        actions_available = []
        for action in action_space:
            network_param = get_network_param(sim_env, action, scaler)
            prediction = model(network_param)
            print(prediction)
            if prediction.item() < 0.25:
                actions_available.append(action)

        if len(actions_available) == 0:
            sim_env.turn_robot_around()
            continue

        action, _ = steering_behavior.get_action(sim_env.robot.body.position,
                                                 sim_env.robot.body.angle)
        min, closest_action = 9999, 9999
        for a in actions_available:
            diff = abs(action - a)
            if diff < min:
                min = diff
                closest_action = a

        steering_force = steering_behavior.get_steering_force(
            closest_action, sim_env.robot.body.angle)
        for action_timestep in range(action_repeat):
            _, collision, _ = sim_env.step(steering_force)
            if collision:
                steering_behavior.reset_action()
                break
示例#2
0
def train_model(no_epochs):
    if torch.cuda.is_available():
        dev = "cuda:0"
    else:
        dev = "cpu"

    device = torch.device(dev)
    print(device)
    batch_size = 256
    data_loaders = Data_Loaders(batch_size)
    model = Action_Conditioned_FF()
    model.to(device)
    loss_function = nn.BCEWithLogitsLoss()
    losses = []
    min_loss = model.evaluate(model, data_loaders.test_loader, loss_function)
    losses.append(min_loss)
    learning_rate = 0.1
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    for epoch_i in range(no_epochs):
        model.train()
        epoch_loss = 0
        epoch_acc = 0
        # sample['input'] and sample['label']
        for idx, sample in enumerate(data_loaders.train_loader):
            inpt = sample['input'].to(device)
            labels = sample['label'].to(device)
            labels = labels.unsqueeze(1)
            optimizer.zero_grad()
            outputs = model(inpt)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f'| Epoch: {epoch_i+1}', end=" | ")
        print(f'Loss: {epoch_loss/len(data_loaders.train_loader):.4f} |')
        #print(f'Acc: {epoch_acc/len(data_loaders.train_loader):.3f}')
        model.eval()
        with torch.no_grad():
            test_loss = model.evaluate(model, data_loaders.test_loader,
                                       loss_function)
            print(f'------- Test Loss: {test_loss:.4f} -------')
            losses.append(test_loss)
            PATH = f"saved/weights/weights_{test_loss:.3f}.pkl"
            torch.save(model.state_dict(),
                       PATH,
                       _use_new_zipfile_serialization=False)