예제 #1
0
 def __init__(self):
     self.parser = self._create_parser()
     self.args = self.parser.parse_args()
     self.white_list = str2list(self.args.white_list)
     self.black_list = str2list(self.args.black_list)
     self.static_list = str2list(self.args.static_list)
     self.mappings = str2map(self.args.mappings)
예제 #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--env-interface", type=str, default='gym')
    parser.add_argument("--environment",
                        type=str,
                        default='BreakoutDeterministic-v4')
    parser.add_argument("--action-size", type=int, default=4)
    parser.add_argument("--input-shape", type=str, default='None,84,84,4')
    parser.add_argument("--state-len-max", type=int, default=4)
    parser.add_argument("--target-update-freq", type=int, default=10000)

    parser.add_argument("--ep-greedy-speed", type=str, default='slow')
    parser.add_argument("--epsilon-max", type=float, default=1.)
    parser.add_argument("--epsilon-min", type=float, default=.01)
    parser.add_argument("--epsilon-decay-slow", type=int, default=1000000)

    parser.add_argument("--epsilon-decay-fast", type=float, default=.001)

    parser.add_argument("--learning-rate", type=float, default=.95)
    parser.add_argument("--replay-start-size", type=int, default=50000)
    parser.add_argument("--batch-size", type=int, default=32)
    parser.add_argument("--replay-mem-size", type=int, default=1000000)
    parser.add_argument("--epochs", type=int, default=30000)

    parser.add_argument("--pixel-feature", type=int, default=1)
    parser.add_argument("--padding", type=int, default=0)

    parser.add_argument("--model", type=str, default='nature')

    args = parser.parse_args()

    args.input_shape = str2list(args.input_shape)
    assert args.model in ['nature', 'gated']
    assert args.ep_greedy_speed in ['fast', 'slow']
    assert args.env_interface in [
        'gym', 'ale', 'custom_cart', 'custom_cartpole', 'ple'
    ]
    if args.env_interface in ['gym', 'ale']:
        env = env_interface(args.env_interface, args.environment)
    elif args.env_interface in ['custom_cart', 'custom_cartpole', 'ple']:
        env = env_interface(args.env_interface, args.environment,
                            bool(args.pixel_feature), bool(args.padding))
        args.input_shape = [None] + list(env.obs_space_shape) + [1]
    args.input_shape[-1] = args.state_len_max
    args.action_size = env.action_size
    assert args.state_len_max == args.input_shape[-1]
    print args

    #Other other paramters
    state_old = []
    state = []
    steps = 0

    #Other parameters
    if args.ep_greedy_speed == 'slow':
        epsilon = args.epsilon_max
        epsilon_rate = 0.
        if args.epsilon_decay_slow != 0:
            epsilon_rate = ((args.epsilon_max - args.epsilon_min) /
                            float(args.epsilon_decay_slow))
    elif args.ep_greedy_speed == 'fast':
        epsilon = args.epsilon_max

    #Initialize replay memory
    memory = Memory(args.replay_mem_size, args.input_shape[1:])

    #Initialize neural net
    qnet, tnet, update_ops = init_network(args.input_shape, args.action_size,
                                          args.model)

    #import time
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(update_ops)
        for epoch in range(args.epochs):
            frame = env.reset()
            total_rewards = 0.
            total_losses = 0.
            state_old = []
            state = [frame] * args.state_len_max
            done = False

            #start = time.time()
            while done == False:
                if np.random.rand() < epsilon:
                    action = np.random.randint(args.action_size)
                else:
                    image_in = np.stack(state, axis=-1)[np.newaxis, ...]
                    action = qnet.get_action(sess, image_in)

                frame, reward, done, _ = env.step(action)
                total_rewards += reward
                state_old = state[:]
                state.append(frame)
                if len(state) > args.state_len_max:
                    state = state[1:]

                #Add to memory
                memory.add([
                    np.stack(state_old, axis=-1)[np.newaxis, ...], action,
                    min(1., max(-1., reward)),
                    np.stack(state, axis=-1)[np.newaxis, ...], done
                ])

                #Reduce epsilon
                if args.ep_greedy_speed == 'slow':
                    epsilon = max(args.epsilon_min, epsilon - epsilon_rate)
                elif args.ep_greedy_speed == 'fast':
                    epsilon = args.epsilon_min + (
                        args.epsilon_max - args.epsilon_min) * np.exp(
                            -args.epsilon_decay_fast * float(steps))

                if steps > args.replay_start_size:
                    #Training step
                    batch = np.array(memory.sample(args.batch_size))

                    states = np.concatenate(batch[:, 0], axis=0)
                    actions = batch[:, 1]
                    rewards = batch[:, 2]
                    states1 = np.concatenate(batch[:, 3], axis=0)
                    dones = batch[:, 4]

                    l = qnet.train(sess, states, actions, rewards, states1,
                                   dones, args.learning_rate, tnet)
                    total_losses += l

                #Increase the frame steps counter
                steps += 1
                #Check if target network is to be updated
                if steps % args.target_update_freq == 0:
                    print "Updating target..."
                    sess.run(update_ops)

                if done == True:
                    print "epoch:", epoch, "total rewards", total_rewards, "total losses", total_losses, qnet.string
                    #print 'time:', time.time() - start
                    break
    env.close()
예제 #3
0
def main():
    #Arguments for the q-learner
    parser = argparse.ArgumentParser()
    parser.add_argument("--env-interface", type=str, default='gym')
    parser.add_argument("--environment",
                        type=str,
                        default='BreakoutDeterministic-v4')
    parser.add_argument("--action-size", type=int, default=4)
    parser.add_argument("--input-shape", type=str, default='None,84,84,4')
    parser.add_argument("--state-len-max", type=int, default=4)
    parser.add_argument("--target-update-freq", type=int, default=10000)
    parser.add_argument("--epsilon-max", type=float, default=1.)
    parser.add_argument("--epsilon-min", type=float, default=.01)
    parser.add_argument("--epsilon-decay", type=int, default=1000000)
    parser.add_argument("--learning-rate", type=float, default=.95)
    parser.add_argument("--replay-start-size", type=int, default=50000)
    parser.add_argument("--batch-size", type=int, default=32)
    parser.add_argument("--replay-mem-size", type=int, default=1000000)
    parser.add_argument("--epochs", type=int, default=30000)

    #Arguments for the feature extractor
    parser.add_argument("--train-fe-shape", type=str, default='None,12,12,4')
    parser.add_argument("--stop-gradient", type=int, default=0)
    parser.add_argument("--train-fe-iterations", type=int, default=1000)
    parser.add_argument("--train-fe-batch-size", type=int, default=100)
    parser.add_argument("--train-fe-lamb", type=float, default=0.)
    parser.add_argument("--train-fe-numfactors", type=int, default=200)
    parser.add_argument("--train-fe-nummap", type=int, default=100)
    parser.add_argument("--train-fe-learning-rate", type=float, default=.001)
    parser.add_argument("--train-fe-w", type=int, default=12)
    parser.add_argument("--train-fe-s", type=int, default=1)

    parser.add_argument("--use-conv-after-fe", type=int, default=0)

    parser.add_argument("--ep-greedy-speed", type=str, default='slow')
    #Arguments for the environment interface
    parser.add_argument("--pixel-features", type=int, default=1)
    parser.add_argument("--padding", type=int, default=0)
    args = parser.parse_args()

    #Parse arguments wrt other arguments
    args.input_shape = str2list(args.input_shape)
    args.train_fe_shape = str2list(args.train_fe_shape)
    assert args.env_interface in [
        'gym', 'ale', 'custom_cart', 'custom_cartpole'
    ]
    assert args.ep_greedy_speed in ['fast', 'slow']
    env = env_interface(args.env_interface,
                        args.environment,
                        pixel_feature=bool(args.pixel_features),
                        padding=bool(args.padding),
                        render=True)
    args.action_size = env.action_size
    if args.env_interface in ['custom_cart', 'custom_cartpole']:
        args.input_shape = [None] + list(
            env.obs_space_shape) + [args.state_len_max]
    args.train_fe_shape[-1] = args.state_len_max
    print args

    #Other other parameters
    state_old = []
    state = []
    steps = 0

    #Other parameters
    epsilon_lambda = .001
    epsilon = args.epsilon_max
    epsilon_rate = 0.
    if args.epsilon_decay != 0:
        epsilon_rate = ((args.epsilon_max - args.epsilon_min) /
                        float(args.epsilon_decay))

    #Initialize replay memory
    print args.input_shape
    memory = Memory(args.replay_mem_size, args.input_shape[1:])

    #Initialize neural net
    from gated_qlearning import gated_qlearning
    qnet = gated_qlearning(shape=args.train_fe_shape,\
                           nummap=args.train_fe_nummap,\
                           numfactors=args.train_fe_numfactors,\
                           learning_rate=args.train_fe_learning_rate,\
                           frame_shape=args.input_shape,\
                           a_size=args.action_size,\
                           stop_gradient=bool(args.stop_gradient),\
                           lamb=args.train_fe_lamb,\
                           w=args.train_fe_w,\
                           s=args.train_fe_s,\
                           use_conv_after_fe=bool(args.use_conv_after_fe))
    qnet_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

    tnet = gated_qlearning(shape=args.train_fe_shape,\
                           nummap=args.train_fe_nummap,\
                           numfactors=args.train_fe_numfactors,\
                           learning_rate=args.train_fe_learning_rate,\
                           frame_shape=args.input_shape,\
                           a_size=args.action_size,\
                           stop_gradient=bool(args.stop_gradient),\
                           lamb=args.train_fe_lamb,\
                           w=args.train_fe_w,\
                           s=args.train_fe_s,\
                           use_conv_after_fe=bool(args.use_conv_after_fe))
    tnet_vars = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES)[len(qnet_vars):]

    update_ops = update_target_graph_vars(qnet_vars, tnet_vars)
    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init)
        sess.run(update_ops)
        for epoch in range(args.epochs):
            frame = env.reset()
            total_rewards = 0.
            total_losses = 0.
            state_old = []
            state = [frame] * args.state_len_max
            done = False

            while done == False:
                if np.random.rand() < epsilon:
                    action = np.random.randint(args.action_size)
                else:
                    image_in = np.stack(state, axis=-1)[np.newaxis, ...]
                    action = qnet.get_action(sess, image_in)

                frame, reward, done, _ = env.step(action)
                total_rewards += reward
                state_old = state[:]
                state.append(frame)
                if len(state) > args.state_len_max:
                    state = state[1:]

                #Add to memory
                memory.add([np.stack(state_old, axis=-1)[np.newaxis, ...],\
                           action,\
                           min(1., max(-1., reward)),\
                           np.stack(state, axis=-1)[np.newaxis, ...],\
                           done])

                #Reduce epsilon
                if args.ep_greedy_speed == 'slow':
                    epsilon = max(args.epsilon_min, epsilon - epsilon_rate)
                elif args.ep_greedy_speed == 'fast':
                    epsilon = args.epsilon_min + (
                        args.epsilon_max - args.epsilon_min) * np.exp(
                            -epsilon_lambda * float(steps))

                #Train the reconstruction loss
                if args.train_fe_iterations > 0:
                    args.train_fe_iterations -= qnet.train_feature_extractor(
                        sess, memory, args.train_fe_batch_size, 10)
                    print args.train_fe_iterations

                if steps > args.replay_start_size and args.train_fe_iterations <= 0:
                    #Training step
                    batch = np.array(memory.sample(args.batch_size))

                    states = np.concatenate(batch[:, 0], axis=0)
                    actions = batch[:, 1]
                    rewards = batch[:, 2]
                    states1 = np.concatenate(batch[:, 3], axis=0)
                    dones = batch[:, 4]

                    Q1 = qnet.get_Q1(sess, states1, tnet)

                    targetQ = rewards + (1. -
                                         dones) * args.learning_rate * np.amax(
                                             Q1, keepdims=False, axis=1)

                    l, _, _ = qnet.train(sess, states, actions,
                                         targetQ[..., np.newaxis])
                    total_losses += l

                #Increase the frame steps counter
                steps += 1
                #Check if target network is to be updated
                if steps % args.target_update_freq == 0:
                    print "Updating target..."
                    sess.run(update_ops)

                if done == True:
                    print "epoch", epoch, "total rewards", total_rewards, "total losses", total_losses
                    break

    env.close()
예제 #4
0
def load_dataset(dataset_path="", features=[], num_top_users=None, min_tweets=0, random_sample_size=0, rows_to_read=None, user_col="user_id", str2list_cols=[]):
    """Returns the csv twitter dataset, number of outputs same as features with order maintained.
    
    Args:
        dataset_path (str) : 
            Path to the dataset csv file.
        features (list) : 
            List of feature/columns names to return, if empty, returns all columns.
        num_top_users (int) : 
            Number of top users to return.
        min_tweets (int) : 
            Criteria to filter users, with tweets>=min_tweets.
        random_sample_size (int): 
            Random samples to get from the dataset, must be less than the total dataset size.
        user_col (string) : 
            User Identification Column Name. MUST BE SPECIFIED.
        str2list_cols (list) : 
            Column names with list values read as string, converted back to lists using str2list.
    
    Returns:
        (list) : csv rows as dictionaries.
    """
    INFO.LOAD_PARAMS_USED = f" #rows {rows_to_read} num_top_users {num_top_users} min_tweets {min_tweets}"
    print("\n"+INFO.LOAD_PARAMS_USED+"\n")
    
    if not dataset_path:
        raise ValueError("Arguement dataset_path not defined !")

    dataset = []
    with open(dataset_path, encoding="utf8") as csv_file:  
        csv_file = DictReader(csv_file)

        for i,row in enumerate(tqdm(csv_file, desc="reading rows", leave=LEAVE_BAR),1):
            if features:
                out = tuple( [row[feat] for feat in features] )
                dataset.append( out )
            else:
                dataset.append( row )
            
            if i==rows_to_read:
                break
    
    # Select random samples from the list
    if random_sample_size:
        try:
            dataset = sample(dataset, random_sample_size)
        except:
            raise ValueError(f"random_sample_size larger than dataset size: {len(output)} or negative !")
    
    # Filtering Top users with tweets>=min_tweets
    index_of_user_col = features.index( user_col )
    users_list = [ row[index_of_user_col] for row in dataset ]

    # filtering users
    users_to_keep = filter_users(users_list, num_top_users, min_tweets)

    # filtering rest of data, based on users_to_keep
    str2list_indices = [features.index(col) for col in str2list_cols]
    filtered_dataset = [ tuple([x if i not in str2list_indices else str2list(x) for i,x in enumerate(row)])
                         for row in tqdm(dataset, desc="filtering data", leave=LEAVE_BAR) if row[index_of_user_col] in users_to_keep]

    return zip(* filtered_dataset )