def __init__(self): self.parser = self._create_parser() self.args = self.parser.parse_args() self.white_list = str2list(self.args.white_list) self.black_list = str2list(self.args.black_list) self.static_list = str2list(self.args.static_list) self.mappings = str2map(self.args.mappings)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--env-interface", type=str, default='gym') parser.add_argument("--environment", type=str, default='BreakoutDeterministic-v4') parser.add_argument("--action-size", type=int, default=4) parser.add_argument("--input-shape", type=str, default='None,84,84,4') parser.add_argument("--state-len-max", type=int, default=4) parser.add_argument("--target-update-freq", type=int, default=10000) parser.add_argument("--ep-greedy-speed", type=str, default='slow') parser.add_argument("--epsilon-max", type=float, default=1.) parser.add_argument("--epsilon-min", type=float, default=.01) parser.add_argument("--epsilon-decay-slow", type=int, default=1000000) parser.add_argument("--epsilon-decay-fast", type=float, default=.001) parser.add_argument("--learning-rate", type=float, default=.95) parser.add_argument("--replay-start-size", type=int, default=50000) parser.add_argument("--batch-size", type=int, default=32) parser.add_argument("--replay-mem-size", type=int, default=1000000) parser.add_argument("--epochs", type=int, default=30000) parser.add_argument("--pixel-feature", type=int, default=1) parser.add_argument("--padding", type=int, default=0) parser.add_argument("--model", type=str, default='nature') args = parser.parse_args() args.input_shape = str2list(args.input_shape) assert args.model in ['nature', 'gated'] assert args.ep_greedy_speed in ['fast', 'slow'] assert args.env_interface in [ 'gym', 'ale', 'custom_cart', 'custom_cartpole', 'ple' ] if args.env_interface in ['gym', 'ale']: env = env_interface(args.env_interface, args.environment) elif args.env_interface in ['custom_cart', 'custom_cartpole', 'ple']: env = env_interface(args.env_interface, args.environment, bool(args.pixel_feature), bool(args.padding)) args.input_shape = [None] + list(env.obs_space_shape) + [1] args.input_shape[-1] = args.state_len_max args.action_size = env.action_size assert args.state_len_max == args.input_shape[-1] print args #Other other paramters state_old = [] state = [] steps = 0 #Other parameters if args.ep_greedy_speed == 'slow': epsilon = args.epsilon_max epsilon_rate = 0. if args.epsilon_decay_slow != 0: epsilon_rate = ((args.epsilon_max - args.epsilon_min) / float(args.epsilon_decay_slow)) elif args.ep_greedy_speed == 'fast': epsilon = args.epsilon_max #Initialize replay memory memory = Memory(args.replay_mem_size, args.input_shape[1:]) #Initialize neural net qnet, tnet, update_ops = init_network(args.input_shape, args.action_size, args.model) #import time with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(update_ops) for epoch in range(args.epochs): frame = env.reset() total_rewards = 0. total_losses = 0. state_old = [] state = [frame] * args.state_len_max done = False #start = time.time() while done == False: if np.random.rand() < epsilon: action = np.random.randint(args.action_size) else: image_in = np.stack(state, axis=-1)[np.newaxis, ...] action = qnet.get_action(sess, image_in) frame, reward, done, _ = env.step(action) total_rewards += reward state_old = state[:] state.append(frame) if len(state) > args.state_len_max: state = state[1:] #Add to memory memory.add([ np.stack(state_old, axis=-1)[np.newaxis, ...], action, min(1., max(-1., reward)), np.stack(state, axis=-1)[np.newaxis, ...], done ]) #Reduce epsilon if args.ep_greedy_speed == 'slow': epsilon = max(args.epsilon_min, epsilon - epsilon_rate) elif args.ep_greedy_speed == 'fast': epsilon = args.epsilon_min + ( args.epsilon_max - args.epsilon_min) * np.exp( -args.epsilon_decay_fast * float(steps)) if steps > args.replay_start_size: #Training step batch = np.array(memory.sample(args.batch_size)) states = np.concatenate(batch[:, 0], axis=0) actions = batch[:, 1] rewards = batch[:, 2] states1 = np.concatenate(batch[:, 3], axis=0) dones = batch[:, 4] l = qnet.train(sess, states, actions, rewards, states1, dones, args.learning_rate, tnet) total_losses += l #Increase the frame steps counter steps += 1 #Check if target network is to be updated if steps % args.target_update_freq == 0: print "Updating target..." sess.run(update_ops) if done == True: print "epoch:", epoch, "total rewards", total_rewards, "total losses", total_losses, qnet.string #print 'time:', time.time() - start break env.close()
def main(): #Arguments for the q-learner parser = argparse.ArgumentParser() parser.add_argument("--env-interface", type=str, default='gym') parser.add_argument("--environment", type=str, default='BreakoutDeterministic-v4') parser.add_argument("--action-size", type=int, default=4) parser.add_argument("--input-shape", type=str, default='None,84,84,4') parser.add_argument("--state-len-max", type=int, default=4) parser.add_argument("--target-update-freq", type=int, default=10000) parser.add_argument("--epsilon-max", type=float, default=1.) parser.add_argument("--epsilon-min", type=float, default=.01) parser.add_argument("--epsilon-decay", type=int, default=1000000) parser.add_argument("--learning-rate", type=float, default=.95) parser.add_argument("--replay-start-size", type=int, default=50000) parser.add_argument("--batch-size", type=int, default=32) parser.add_argument("--replay-mem-size", type=int, default=1000000) parser.add_argument("--epochs", type=int, default=30000) #Arguments for the feature extractor parser.add_argument("--train-fe-shape", type=str, default='None,12,12,4') parser.add_argument("--stop-gradient", type=int, default=0) parser.add_argument("--train-fe-iterations", type=int, default=1000) parser.add_argument("--train-fe-batch-size", type=int, default=100) parser.add_argument("--train-fe-lamb", type=float, default=0.) parser.add_argument("--train-fe-numfactors", type=int, default=200) parser.add_argument("--train-fe-nummap", type=int, default=100) parser.add_argument("--train-fe-learning-rate", type=float, default=.001) parser.add_argument("--train-fe-w", type=int, default=12) parser.add_argument("--train-fe-s", type=int, default=1) parser.add_argument("--use-conv-after-fe", type=int, default=0) parser.add_argument("--ep-greedy-speed", type=str, default='slow') #Arguments for the environment interface parser.add_argument("--pixel-features", type=int, default=1) parser.add_argument("--padding", type=int, default=0) args = parser.parse_args() #Parse arguments wrt other arguments args.input_shape = str2list(args.input_shape) args.train_fe_shape = str2list(args.train_fe_shape) assert args.env_interface in [ 'gym', 'ale', 'custom_cart', 'custom_cartpole' ] assert args.ep_greedy_speed in ['fast', 'slow'] env = env_interface(args.env_interface, args.environment, pixel_feature=bool(args.pixel_features), padding=bool(args.padding), render=True) args.action_size = env.action_size if args.env_interface in ['custom_cart', 'custom_cartpole']: args.input_shape = [None] + list( env.obs_space_shape) + [args.state_len_max] args.train_fe_shape[-1] = args.state_len_max print args #Other other parameters state_old = [] state = [] steps = 0 #Other parameters epsilon_lambda = .001 epsilon = args.epsilon_max epsilon_rate = 0. if args.epsilon_decay != 0: epsilon_rate = ((args.epsilon_max - args.epsilon_min) / float(args.epsilon_decay)) #Initialize replay memory print args.input_shape memory = Memory(args.replay_mem_size, args.input_shape[1:]) #Initialize neural net from gated_qlearning import gated_qlearning qnet = gated_qlearning(shape=args.train_fe_shape,\ nummap=args.train_fe_nummap,\ numfactors=args.train_fe_numfactors,\ learning_rate=args.train_fe_learning_rate,\ frame_shape=args.input_shape,\ a_size=args.action_size,\ stop_gradient=bool(args.stop_gradient),\ lamb=args.train_fe_lamb,\ w=args.train_fe_w,\ s=args.train_fe_s,\ use_conv_after_fe=bool(args.use_conv_after_fe)) qnet_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) tnet = gated_qlearning(shape=args.train_fe_shape,\ nummap=args.train_fe_nummap,\ numfactors=args.train_fe_numfactors,\ learning_rate=args.train_fe_learning_rate,\ frame_shape=args.input_shape,\ a_size=args.action_size,\ stop_gradient=bool(args.stop_gradient),\ lamb=args.train_fe_lamb,\ w=args.train_fe_w,\ s=args.train_fe_s,\ use_conv_after_fe=bool(args.use_conv_after_fe)) tnet_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES)[len(qnet_vars):] update_ops = update_target_graph_vars(qnet_vars, tnet_vars) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) sess.run(update_ops) for epoch in range(args.epochs): frame = env.reset() total_rewards = 0. total_losses = 0. state_old = [] state = [frame] * args.state_len_max done = False while done == False: if np.random.rand() < epsilon: action = np.random.randint(args.action_size) else: image_in = np.stack(state, axis=-1)[np.newaxis, ...] action = qnet.get_action(sess, image_in) frame, reward, done, _ = env.step(action) total_rewards += reward state_old = state[:] state.append(frame) if len(state) > args.state_len_max: state = state[1:] #Add to memory memory.add([np.stack(state_old, axis=-1)[np.newaxis, ...],\ action,\ min(1., max(-1., reward)),\ np.stack(state, axis=-1)[np.newaxis, ...],\ done]) #Reduce epsilon if args.ep_greedy_speed == 'slow': epsilon = max(args.epsilon_min, epsilon - epsilon_rate) elif args.ep_greedy_speed == 'fast': epsilon = args.epsilon_min + ( args.epsilon_max - args.epsilon_min) * np.exp( -epsilon_lambda * float(steps)) #Train the reconstruction loss if args.train_fe_iterations > 0: args.train_fe_iterations -= qnet.train_feature_extractor( sess, memory, args.train_fe_batch_size, 10) print args.train_fe_iterations if steps > args.replay_start_size and args.train_fe_iterations <= 0: #Training step batch = np.array(memory.sample(args.batch_size)) states = np.concatenate(batch[:, 0], axis=0) actions = batch[:, 1] rewards = batch[:, 2] states1 = np.concatenate(batch[:, 3], axis=0) dones = batch[:, 4] Q1 = qnet.get_Q1(sess, states1, tnet) targetQ = rewards + (1. - dones) * args.learning_rate * np.amax( Q1, keepdims=False, axis=1) l, _, _ = qnet.train(sess, states, actions, targetQ[..., np.newaxis]) total_losses += l #Increase the frame steps counter steps += 1 #Check if target network is to be updated if steps % args.target_update_freq == 0: print "Updating target..." sess.run(update_ops) if done == True: print "epoch", epoch, "total rewards", total_rewards, "total losses", total_losses break env.close()
def load_dataset(dataset_path="", features=[], num_top_users=None, min_tweets=0, random_sample_size=0, rows_to_read=None, user_col="user_id", str2list_cols=[]): """Returns the csv twitter dataset, number of outputs same as features with order maintained. Args: dataset_path (str) : Path to the dataset csv file. features (list) : List of feature/columns names to return, if empty, returns all columns. num_top_users (int) : Number of top users to return. min_tweets (int) : Criteria to filter users, with tweets>=min_tweets. random_sample_size (int): Random samples to get from the dataset, must be less than the total dataset size. user_col (string) : User Identification Column Name. MUST BE SPECIFIED. str2list_cols (list) : Column names with list values read as string, converted back to lists using str2list. Returns: (list) : csv rows as dictionaries. """ INFO.LOAD_PARAMS_USED = f" #rows {rows_to_read} num_top_users {num_top_users} min_tweets {min_tweets}" print("\n"+INFO.LOAD_PARAMS_USED+"\n") if not dataset_path: raise ValueError("Arguement dataset_path not defined !") dataset = [] with open(dataset_path, encoding="utf8") as csv_file: csv_file = DictReader(csv_file) for i,row in enumerate(tqdm(csv_file, desc="reading rows", leave=LEAVE_BAR),1): if features: out = tuple( [row[feat] for feat in features] ) dataset.append( out ) else: dataset.append( row ) if i==rows_to_read: break # Select random samples from the list if random_sample_size: try: dataset = sample(dataset, random_sample_size) except: raise ValueError(f"random_sample_size larger than dataset size: {len(output)} or negative !") # Filtering Top users with tweets>=min_tweets index_of_user_col = features.index( user_col ) users_list = [ row[index_of_user_col] for row in dataset ] # filtering users users_to_keep = filter_users(users_list, num_top_users, min_tweets) # filtering rest of data, based on users_to_keep str2list_indices = [features.index(col) for col in str2list_cols] filtered_dataset = [ tuple([x if i not in str2list_indices else str2list(x) for i,x in enumerate(row)]) for row in tqdm(dataset, desc="filtering data", leave=LEAVE_BAR) if row[index_of_user_col] in users_to_keep] return zip(* filtered_dataset )