def create_actor_network(self, state_size, action_dim):
     log('[DDPG] Building the actor model')
     S = Input(shape=[state_size])
     # Use default initializer to initialize weights
     h0 = Dense(
         HIDDEN1_UNITS,
         activation='relu',
         kernel_initializer=RandomUniform(minval=-1.0 / np.sqrt(state_size),
                                          maxval=1.0 / np.sqrt(state_size)),
         bias_initializer=RandomUniform(minval=-1.0 / np.sqrt(state_size),
                                        maxval=1.0 /
                                        np.sqrt(state_size)))(S)
     h1 = Dense(HIDDEN2_UNITS,
                activation='relu',
                kernel_initializer=RandomUniform(
                    minval=-1.0 / np.sqrt(HIDDEN1_UNITS),
                    maxval=1.0 / np.sqrt(HIDDEN1_UNITS)),
                bias_initializer=RandomUniform(
                    minval=-1.0 / np.sqrt(HIDDEN1_UNITS),
                    maxval=1.0 / np.sqrt(HIDDEN1_UNITS)))(h0)
     Left_gain_factor = Dense(
         1,
         activation='sigmoid',  # to bound output between 0 and 1
         kernel_initializer=RandomUniform(minval=-0.003, maxval=0.003),
         bias_initializer=RandomUniform(minval=-0.003, maxval=0.003))(h1)
     Right_gain_factor = Dense(
         1,
         activation='sigmoid',  # to bound output between 0 and 1
         kernel_initializer=RandomUniform(minval=-0.003, maxval=0.003),
         bias_initializer=RandomUniform(minval=-0.003, maxval=0.003))(h1)
     V = merge([Left_gain_factor, Right_gain_factor], mode='concat')
     model = Model(input=S, output=V)
     return model, model.trainable_weights, S
 def create_critic_network(self, state_size, action_dim):
     log('[DDPG] Building the critic model')
     S = Input(shape=[state_size])
     A = Input(shape=[action_dim], name='action2')
     w1 = Dense(
         HIDDEN1_UNITS,
         activation='relu',
         kernel_regularizer=regularizers.l2(0.01),
         kernel_initializer=RandomUniform(minval=-1.0 / np.sqrt(state_size),
                                          maxval=1.0 / np.sqrt(state_size)),
         bias_initializer=RandomUniform(minval=-1.0 / np.sqrt(state_size),
                                        maxval=1.0 /
                                        np.sqrt(state_size)))(S)
     a1 = Dense(
         HIDDEN2_UNITS,
         activation='relu',
         kernel_regularizer=regularizers.l2(0.01),
         kernel_initializer=RandomUniform(minval=-1.0 / np.sqrt(action_dim),
                                          maxval=1.0 / np.sqrt(action_dim)),
         bias_initializer=RandomUniform(minval=-1.0 / np.sqrt(action_dim),
                                        maxval=1.0 /
                                        np.sqrt(action_dim)))(A)
     h1 = Dense(HIDDEN2_UNITS,
                activation='relu',
                kernel_regularizer=regularizers.l2(0.01),
                kernel_initializer=RandomUniform(
                    minval=-1.0 / np.sqrt(HIDDEN1_UNITS),
                    maxval=1.0 / np.sqrt(HIDDEN1_UNITS)),
                bias_initializer=RandomUniform(
                    minval=-1.0 / np.sqrt(HIDDEN1_UNITS),
                    maxval=1.0 / np.sqrt(HIDDEN1_UNITS)))(w1)
     h2 = merge([h1, a1], mode='sum')
     h3 = Dense(HIDDEN2_UNITS,
                activation='relu',
                kernel_regularizer=regularizers.l2(0.01),
                kernel_initializer=RandomUniform(
                    minval=-1.0 / np.sqrt(HIDDEN2_UNITS),
                    maxval=1.0 / np.sqrt(HIDDEN2_UNITS)),
                bias_initializer=RandomUniform(
                    minval=-1.0 / np.sqrt(HIDDEN2_UNITS),
                    maxval=1.0 / np.sqrt(HIDDEN2_UNITS)))(h2)
     V = Dense(
         action_dim,
         activation='linear',  # Linear activation function
         kernel_initializer=RandomUniform(minval=-0.003, maxval=0.003),
         bias_initializer=RandomUniform(minval=-0.003, maxval=0.003))(h3)
     model = Model(input=[S, A], output=V)
     adam = Adam(lr=self.LEARNING_RATE)
     model.compile(loss='mse', optimizer=adam)
     return model, A, S
def gait_eval(position_vector, description, serial, oscillator_nw, max_evals=max_evals, max_duration=max_duration):
    for i in range(max_evals):
        result = oscillator_nw(position_vector, max_time=max_duration)
        log('[EVAL] Description: {0}, Serial#: {1}, Run#: {2}, Result: << {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11} >>'
            .format(description,
                    serial,
                    i + 1,
                    result['fitness'],
                    result['fallen'],
                    result['up'],
                    result['x_distance'],
                    result['abs_y_deviation'],
                    result['avg_footstep_x'],
                    result['var_torso_alpha'],
                    result['var_torso_beta'],
                    result['var_torso_gamma']))
    log('#################################################')
def oscillator_nw(kf,
                  GAIN1,
                  GAIN2,
                  GAIN3,
                  GAIN4,
                  GAIN5,
                  GAIN6,
                  BIAS1,
                  BIAS2,
                  BIAS3,
                  BIAS4,
                  max_time=15.0,
                  fitness_option=1):

    # Try to connect to VREP
    vrep = None
    try_counter = 0
    try_max = 5
    while vrep is None:
        try:
            log('Trying to create robot handle (attempt: {0} of {1})'.format(
                try_counter, try_max))
            try_counter += 1
            vrep = VrepIO(vrep_host='127.0.0.1',
                          vrep_port=19997,
                          scene=None,
                          start=False)
        except Exception, e:
            log('Could not connect to VREP')
            log('Error: {0}'.format(e.message))
            time.sleep(1.0)

        if try_counter > try_max:
            log('Unable to create robot handle after {0} tries'.format(
                try_max))
            exit(1)
            vrep = VrepIO(vrep_host='127.0.0.1',
                          vrep_port=19997,
                          scene=None,
                          start=False)
        except Exception, e:
            log('Could not connect to VREP')
            log('Error: {0}'.format(e.message))
            time.sleep(1.0)

        if try_counter > try_max:
            log('Unable to create robot handle after {0} tries'.format(
                try_max))
            exit(1)

    if vrep is not None:
        log('Successfully connected to VREP')

    # Start the simulation
    vrep.start_simulation()

    # Start the monitoring thread
    monitor_thread = RobotMonitorThread(portnum=19998,
                                        objname='torso_11_respondable',
                                        height_threshold=0.3)
    monitor_thread.start()
    log('Started monitoring thread')

    # Note the current position
    start_pos_x = monitor_thread.x
    start_pos_y = monitor_thread.y
    start_pos_z = monitor_thread.z
示例#6
0
import os
import numpy as np

from matsuoka_walk import Logger, log
from matsuoka_walk.oscillator_3_test_yaw import oscillator_nw as oscillator_3_test_yaw

# Set the home directory
home_dir = os.path.expanduser('~')

# Set the logging variables
# This also creates a new log file
Logger(log_dir=os.path.join(home_dir, '.bio_walk/logs/'), log_flag=True)

LOWEST_POSSIBLE_GAIN = 0.4

log('[STATIC TEST] LOWEST_POSSIBLE_GAIN: {}'.format(LOWEST_POSSIBLE_GAIN))

wtmpc23_run3_best30 = [
    0.3178385532762875, 0.3777451259604342, 0.023411599863716586,
    0.013217696615302215, 0.4566963469455763, 0.20194162123716233,
    0.3309010463046798, -0.05187677829896087, 0.09633745660574622,
    -0.11559976203529859, 0.4814311312157089, 1.5364038978521224
]
asus_run1_bestall = [
    0.7461913734531209, 0.8422944031253159, 0.07043758116681641,
    0.14236621222553963, 0.48893497409925746, 0.5980055418720059,
    0.740811806645801, -0.11618361090424223, 0.492832184960149,
    -0.2949145038394889, 0.175450703085948, -0.3419733470484183
]

best_chromosome = asus_run1_bestall
def deviation_controller(train_indicator=0,
                         identifier=''):  #1 means Train, 0 means simply Run

    # np.random.seed(1337)

    # The train_indicator is switched internally to test the model after every n runs
    # So a separate flag indicates if the entire run is a test run, in which case the train_indicator always stays 0
    only_test_run = False
    if train_indicator == 0:
        log('[DDPG TEST ] This is a test run')
        only_test_run = True
    else:
        log('[DDPG] This is a training run')

    done = False
    step = 0
    epsilon = 1

    # Tensorflow GPU optimization
    config = tf.ConfigProto()
    # config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    # Create the actor and acritic models and the replay buffer
    actor = ActorNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRA)
    critic = CriticNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRC)
    buff = ReplayBuffer(BUFFER_SIZE)  # Create replay buffer

    # Register the matsuoka environment
    ENV_NAME = 'matsuoka_env-v0'
    gym.undo_logger_setup()
    env = gym.make(ENV_NAME)
    env._max_episode_steps = max_steps
    np.random.seed(SEED_FOR_RANDOM)
    env.seed(SEED_FOR_RANDOM)

    # Load existing weights if this is a test run
    if only_test_run:
        log('[DDPG TEST ] Loading existing weights')
        try:
            actor.model.load_weights(
                os.path.join(model_dir, 'actormodel_' + identifier + '.h5'))
            critic.model.load_weights(
                os.path.join(model_dir, 'criticmodel_' + identifier + '.h5'))
            actor.target_model.load_weights(
                os.path.join(model_dir, 'actormodel_' + identifier + '.h5'))
            critic.target_model.load_weights(
                os.path.join(model_dir, 'criticmodel_' + identifier + '.h5'))
            log('[DDPG TEST ] Weight load successfully')
        except:
            print("Cannot find the weight")

    # This flag indicates if a test has just been done
    just_tested = False

    # Counter for episodes
    i = 1

    # While max number of episodes is not over
    episode_count = train_episode_count if train_indicator == 1 else test_episode_count
    log('[DDPG ' + ('' if train_indicator else 'TEST') +
        '] Number of max episodes: {}'.format(episode_count))

    while i <= episode_count:

        # Test the policy after every n episodes
        # So after episode 20 completes, i will be 21 and the if will evaluate to True
        # If train_indicator is initially set to 0, then execute the else block only
        # This logic of switching the train_indicator is only needed during a training run
        if not only_test_run:
            if not just_tested and (i - 1) > 0 and (
                (i - 1) % TEST_AFTER_N_EPISODES == 0):
                train_indicator = 0
                # We are testing for the last episode
                i -= 1
                just_tested = True
                log('[DDPG TEST] Testing network after episode {}'.format(i))
            else:
                train_indicator = 1
                just_tested = False

        log('[DDPG ' + ('' if train_indicator else 'TEST') + '] Episode : ' +
            str(i) + ' Replay Buffer ' + str(buff.count()))

        ob = env.reset()

        s_t = ob

        total_reward = 0.

        for j in range(max_steps):

            loss = 0
            epsilon -= 1.0 / EXPLORE
            a_t = np.zeros([1, action_dim])
            noise_t = np.zeros([1, action_dim])

            a_t_original = actor.model.predict(s_t.reshape(1, s_t.shape[0]))

            # Include noise only during training
            noise_t[0][0] = train_indicator * max(epsilon, 0) * OU.function(
                a_t_original[0][0], 0.0, 0.15, 0.2)
            noise_t[0][1] = train_indicator * max(epsilon, 0) * OU.function(
                a_t_original[0][1], 0.0, 0.15, 0.2)

            a_t[0][0] = a_t_original[0][0] + noise_t[0][0]
            a_t[0][1] = a_t_original[0][1] + noise_t[0][1]

            # Step the environment and fetch the observation, reward and terminal_flag
            ob, r_t, done, info = env.step(a_t[0])

            # Set the new state
            s_t1 = ob

            # Add to replay buffer
            buff.add(s_t, a_t[0], r_t, s_t1, done)

            # Do the batch update
            batch = buff.getBatch(BATCH_SIZE)
            states = np.asarray([e[0] for e in batch])
            actions = np.asarray([e[1] for e in batch])
            rewards = np.asarray([e[2] for e in batch])
            new_states = np.asarray([e[3] for e in batch])
            dones = np.asarray([e[4] for e in batch])
            y_t = np.asarray([e[1] for e in batch])

            target_q_values = critic.target_model.predict(
                [new_states,
                 actor.target_model.predict(new_states)])

            for k in range(len(batch)):
                if dones[k]:
                    y_t[k] = rewards[k]
                else:
                    y_t[k] = rewards[k] + GAMMA * target_q_values[k]

            if train_indicator:
                log('[DDPG] Updating the models')
                loss += critic.model.train_on_batch([states, actions], y_t)
                a_for_grad = actor.model.predict(states)
                grads = critic.gradients(states, a_for_grad)
                actor.train(states, grads)
                actor.target_train()
                critic.target_train()

            total_reward += r_t
            s_t = s_t1

            log('[DDPG ' + ('' if train_indicator else 'TEST') +
                '] Episode: {0} Step: {1} Action: {2} Reward: {3} Loss: {4}'.
                format(i, step, a_t, r_t, loss))

            step += 1
            if done:
                break

            # Save the model after every n episodes
            if i > 0 and np.mod(i, TEST_AFTER_N_EPISODES) == 0:
                if (train_indicator):
                    log('[DDPG] Saving the model')
                    actor.model.save_weights(os.path.join(
                        model_dir,
                        'actormodel_' + identifier + '_{}'.format(i) + '.h5'),
                                             overwrite=True)
                    with open(
                            os.path.join(
                                model_dir, 'actormodel_' + identifier +
                                '_{}'.format(i) + '.json'), "w") as outfile:
                        json.dump(actor.model.to_json(), outfile)

                    critic.model.save_weights(os.path.join(
                        model_dir,
                        'criticmodel_' + identifier + '_{}'.format(i) + '.h5'),
                                              overwrite=True)
                    with open(
                            os.path.join(
                                model_dir, 'criticmodel_' + identifier +
                                '_{}'.format(i) + '.json'), "w") as outfile:
                        json.dump(critic.model.to_json(), outfile)

        # Reinitialize step count after an episode is done
        step = 0

        log('[DDPG ' + ('' if train_indicator else 'TEST') +
            '] TOTAL REWARD @ ' + str(i) + '-th Episode  : Reward ' +
            str(total_reward))
        log('')

        # Increment the episode count
        i += 1

    env.close()
    log('[DDPG] Finish')
        # Increment the episode count
        i += 1

    env.close()
    log('[DDPG] Finish')


if __name__ == "__main__":
    from matsuoka_walk.matsuoka_env import MatsuokaEnv
    from gym.envs.registration import register

    register(
        id='matsuoka_env-v0',
        entry_point='matsuoka_walk:matsuoka_env.MatsuokaEnv',
        max_episode_steps=40,
    )

    # Set the logging variables
    # This also creates a new log file
    Logger(log_dir=os.path.join(home_dir, '.bio_walk/logs/'), log_flag=True)

    # Identifier used for saving model weights
    identifier = Logger.datetime_str
    log('[DDPG MAIN] Model weight identifier is {}'.format(identifier))

    # Start the DDPG algorithm
    # Set train_indicator=1 for training and train_indicator=0 for testing
    # For testing, set identifier to that of the desired weights to be loaded
    # identifier = '20171027_144930'
    deviation_controller(train_indicator=1, identifier=identifier)
def main():
    #random.seed(64)

    # Create an initial population of `POP_SIZE` individuals (where each individual is a list of floats)
    pop = toolbox.population(n=POP_SIZE)

    # CXPB  is the probability with which two individuals are crossed
    # MUTPB is the probability for mutating an individual
    CXPB, MUTPB = 0.8, 0.1

    log('[GA] Starting genetic algorithm')

    # Evaluate the entire population and store the fitness of each individual
    log('[GA] Finding the fitness of individuals in the initial generation')
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        print ind, fit
        ind.fitness.values = (fit, )

    # Extracting all the fitnesses
    fits = [ind.fitness.values[0] for ind in pop]

    # Variable keeping track of the number of generations
    g = 0

    best_ind_ever = None
    best_fitness_ever = 0.0

    # Begin the evolution
    while max(fits) < 100 and g < MAX_GEN:

        # A new generation
        g = g + 1
        log('[GA] Running generation {0}'.format(g))

        # Select the next generation individuals
        log('[GA] Selecting the next generation')
        offspring = toolbox.select(pop, len(pop))
        # Clone the selected individuals
        offspring = list(map(toolbox.clone, offspring))

        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            # cross two individuals with probability CXPB
            if random.random() < CXPB:
                toolbox.mate(child1, child2)

                # fitness values of the children
                # must be recalculated later
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            # mutate an individual with probability MUTPB
            if random.random() < MUTPB:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        # Since the content of some of our offspring changed during the last step, we now need to
        # re-evaluate their fitnesses. To save time and resources, we just map those offspring which
        # fitnesses were marked invalid.
        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = (fit, )

        log('[GA] Evaluated {0} individuals (invalid fitness)'.format(
            len(invalid_ind)))

        # The population is entirely replaced by the offspring
        pop[:] = offspring

        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values[0] for ind in pop]

        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x * x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5

        log('[GA] Results for generation {0}'.format(g))
        log('[GA] Min %s' % min(fits))
        log('[GA] Max %s' % max(fits))
        log('[GA] Avg %s' % mean)
        log('[GA] Std %s' % std)

        best_ind_g = tools.selBest(pop, 1)[0]

        # Store the best individual over all generations
        if best_ind_g.fitness.values[0] > best_fitness_ever:
            best_fitness_ever = best_ind_g.fitness.values[0]
            best_ind_ever = best_ind_g

        log('[GA] Best individual for generation {0}: {1}, {2}'.format(
            g, best_ind_g, best_ind_g.fitness.values[0]))

        log('[GA] ############################# End of generation {0} #############################'
            .format(g))

    log('[GA] ===================== End of evolution =====================')

    best_ind = tools.selBest(pop, 1)[0]
    log('[GA] Best individual in the population: %s, %s' %
        (best_ind, best_ind.fitness.values[0]))
    log('[GA] Best individual ever: %s, %s' %
        (best_ind_ever, best_fitness_ever))
from deap import base
from deap import creator
from deap import tools

from matsuoka_walk import oscillator_nw, Logger, log

# Set the home directory
home_dir = os.path.expanduser('~')

# Set the logging variables
# This also creates a new log file
Logger(log_dir=os.path.join(home_dir, '.bio_walk/logs/'), log_flag=True)

# Create the position bounds of the individual
log('[GA] Creating position bounds')
FLT_MIN_KF, FLT_MAX_KF = 0.2, 0.5
FLT_MIN_GAIN1, FLT_MAX_GAIN1 = 0.01, 1.0
FLT_MIN_GAIN2, FLT_MAX_GAIN2 = 0.01, 1.0
FLT_MIN_GAIN3, FLT_MAX_GAIN3 = 0.01, 1.0
FLT_MIN_GAIN4, FLT_MAX_GAIN4 = 0.01, 1.0
FLT_MIN_GAIN5, FLT_MAX_GAIN5 = 0.01, 1.0
FLT_MIN_GAIN6, FLT_MAX_GAIN6 = 0.01, 1.0
FLT_MIN_BIAS1, FLT_MAX_BIAS1 = -0.6, 0.0
FLT_MIN_BIAS2, FLT_MAX_BIAS2 = 0.0, 0.5
FLT_MIN_BIAS3, FLT_MAX_BIAS3 = -0.5, 0.0
FLT_MIN_BIAS4, FLT_MAX_BIAS4 = 0.0, 1.0

log('[GA] Logging position bounds')
log('[GA] FLT_MIN_KF={0}, FLT_MAX_KF={1}'.format(FLT_MIN_KF, FLT_MAX_KF))
log('[GA] FLT_MIN_GAIN1={0}, FLT_MAX_GAIN1={1}'.format(FLT_MIN_GAIN1,
from deap import base
from deap import creator
from deap import tools

from matsuoka_walk import Logger, log
from matsuoka_walk.oscillator_4 import oscillator_nw

# Set the home directory
home_dir = os.path.expanduser('~')

# Set the logging variables
# This also creates a new log file
Logger(log_dir=os.path.join(home_dir, '.bio_walk/logs/'), log_flag=True)

log('[GA] Running ga_4')

# Create the position bounds of the individual
log('[GA] Creating position bounds')
FLT_MIN_KF, FLT_MAX_KF = 0.2, 1.0
FLT_MIN_GAIN1, FLT_MAX_GAIN1 = 0.01, 1.0
FLT_MIN_GAIN2, FLT_MAX_GAIN2 = 0.01, 1.0
FLT_MIN_GAIN3, FLT_MAX_GAIN3 = 0.01, 1.0
FLT_MIN_GAIN4, FLT_MAX_GAIN4 = 0.01, 1.0
FLT_MIN_GAIN5, FLT_MAX_GAIN5 = 0.01, 1.0
FLT_MIN_GAIN6, FLT_MAX_GAIN6 = 0.01, 1.0
FLT_MIN_BIAS1, FLT_MAX_BIAS1 = -0.6, 0.0
FLT_MIN_BIAS2, FLT_MAX_BIAS2 = 0.0, 0.5
FLT_MIN_BIAS3, FLT_MAX_BIAS3 = -0.5, 0.0
FLT_MIN_BIAS4, FLT_MAX_BIAS4 = 0.0, 1.0
FLT_MIN_K_HIP_Y, FLT_MAX_K_HIP_Y = -2.5, 2.5