def getHRFromLog(log): data = Buffer() print("Opening file: '" + str(log) + "'") with open(log, 'rt') as f: reader = csv.reader(f, delimiter=',', skipinitialspace=True) for col in reader: try: item = int(col[2]) data.add(item) except ValueError as e: print(e) return data
class baseline: def __init__(self, port, hz=100, duration=60): self.hz = hz self.duration = duration #buffers self.timestamps = Buffer(hz * duration) self.baseHR = Buffer(hz * duration) self.baseGSR = Buffer(hz * duration) #Serial Settings self.serial = SerialReader(port='COM5') def gatherBaseline(self): if self.serial.ser is not None: isGathering = True print('Gathering Baseline physiological signals') while isGathering: currentData = self.serial.current_data() if currentData is not 'error' and not self.timestamps.isfull(): time, gsr, hr = currentData.split(',', 3) self.baseHR.add(float(hr)) self.baseGSR.add(float(gsr)) self.timestamps.add(time) else: print( "Unable to contact arduino, Baseline could not be calculated")
def __init__(self, port, hz=100, duration=60): self.hz = hz self.duration = duration #buffers self.timestamps = Buffer(hz * duration) self.baseHR = Buffer(hz * duration) self.baseGSR = Buffer(hz * duration) #Serial Settings self.serial = SerialReader(port='COM5')
def learn(Q, operator, data, demand, min_env_flow, actions_report_file="", max_iter=5000, buffer_size=10000, batch_size=50, alpha=0.001, train_freq=1, eval_freq=50, eps_start=1.0, eps_end=0.02, exploration_fraction=0.2, random_episodes=0, eval_states=None, eval_episodes=1, mean_episodes=50, preprocess=lambda x: x, seed=None, render=False, verbose=True): leap_year_demand = np.insert(demand, 60, demand[59]) if seed is not None: np.random.seed(seed) # mdp creation lake = Lakecomo(None, None, min_env_flow, None, None, seed=seed) years = data.year.unique() description = str(int(years[0])) + "-" + str(int(years[-1])) sampled_year = np.random.choice(years) inflow = list(data.loc[data['year'] == sampled_year, 'in']) if sampled_year % 4 == 0: # leap years between 1946 and 2011 satisfy this condition even though it's not the complete leap year condition mdp = LakeEnv(inflow, leap_year_demand, lake) else: mdp = LakeEnv(inflow, demand, lake) # Randomly initialize the weights in case an MLP is used if isinstance(Q, MLPQFunction): Q.init_weights() if isinstance(operator, DQNOperator): operator._q_target._w = Q._w # Initialize policies schedule = np.linspace(eps_start, eps_end, int(exploration_fraction * max_iter)) pi = ScheduledGibbs(Q, np.arange(mdp.N_DISCRETE_ACTIONS), schedule) pi_u = Gibbs(Q, np.arange(mdp.N_DISCRETE_ACTIONS), tau=0) pi_g = Gibbs(Q, np.arange(mdp.N_DISCRETE_ACTIONS), tau=np.inf) # Add random episodes if needed init_samples = utils.generate_episodes( mdp, pi_u, n_episodes=random_episodes, preprocess=preprocess) if random_episodes > 0 else None if random_episodes > 0: t, s, a, r, s_prime, absorbing, sa = utils.split_data( init_samples, mdp.observation_space.shape[0], mdp.action_dim) init_samples = np.concatenate( (t[:, np.newaxis], preprocess(s), a, r[:, np.newaxis], preprocess(s_prime), absorbing[:, np.newaxis]), axis=1) # Figure out the effective state-dimension after preprocessing is applied eff_state_dim = preprocess(np.zeros(mdp.observation_space.shape[0])).size # Create replay buffer buffer = Buffer(buffer_size, eff_state_dim) n_init_samples = buffer.add_all(init_samples) if random_episodes > 0 else 0 # Results iterations = [] episodes = [] n_samples = [] evaluation_rewards = [] learning_rewards = [] episode_rewards = [0.0] episode_t = [] l_2 = [] l_inf = [] # Adam initial params m_t = 0 v_t = 0 t = 0 # Init env s = mdp.reset() h = 0 start_time = time.time() if actions_report_file: actions_executed = [] columns = list(range(mdp.N_DISCRETE_ACTIONS)) actions_report_df = pd.DataFrame(columns=columns) actions_report_df.to_csv(actions_report_file, index=False) done_counter = 0 # Learning for i in range(max_iter): # Take epsilon-greedy action wrt current Q-function s_prep = preprocess(s) a = pi.sample_action(s_prep) if actions_report_file: actions_executed.append(a) # Step s_prime, r, done, _ = mdp.step(a) # Build the new sample and add it to the dataset buffer.add_sample(h, s_prep, a, r, preprocess(s_prime), done) # Take a step of gradient if needed if i % train_freq == 0: # Estimate gradient g = operator.gradient_be(Q, buffer.sample_batch(batch_size)) # Take a gradient step Q._w, t, m_t, v_t = utils.adam(Q._w, g, t, m_t, v_t, alpha=alpha) # Add reward to last episode episode_rewards[-1] += r * mdp.gamma**h s = s_prime h += 1 if done or h >= mdp.horizon: if actions_report_file: actions_counts = np.bincount(actions_executed) actions_freqs = list(actions_counts / sum(actions_counts)) new_row = dict(zip(columns, actions_freqs)) actions_report_df = actions_report_df.append(new_row, ignore_index=True) actions_report_df.to_csv(actions_report_file, index=False) actions_executed = [] episode_rewards.append(0.0) sampled_year = np.random.choice(years) inflow = list(data.loc[data['year'] == sampled_year, 'in']) if sampled_year % 4 == 0: mdp = LakeEnv(inflow, leap_year_demand, lake) else: mdp = LakeEnv(inflow, demand, lake) s = mdp.reset() h = 0 episode_t.append(i) done_counter += 1 # Evaluate model if done_counter == eval_freq: # Evaluate greedy policy scores = [] for _ in range(eval_episodes): sampled_year = np.random.choice(years) inflow = list(data.loc[data['year'] == sampled_year, 'in']) if sampled_year % 4 == 0: mdp = LakeEnv(inflow, leap_year_demand, lake) else: mdp = LakeEnv(inflow, demand, lake) scores.append(_single_year_eval(mdp, pi_g)) rew = np.mean(scores) learning_rew = np.mean( episode_rewards[-mean_episodes - 1:-1]) if len(episode_rewards) > 1 else 0.0 br = operator.bellman_residual(Q, buffer.sample_batch(batch_size))**2 l_2_err = np.average(br) l_inf_err = np.max(br) # Append results iterations.append(i) episodes.append(len(episode_rewards) - 1) n_samples.append(n_init_samples + i + 1) evaluation_rewards.append(rew) learning_rewards.append(learning_rew) l_2.append(l_2_err) l_inf.append(l_inf_err) sampled_year = np.random.choice(years) inflow = list(data.loc[data['year'] == sampled_year, 'in']) if sampled_year % 4 == 0: mdp = LakeEnv(inflow, leap_year_demand, lake) else: mdp = LakeEnv(inflow, demand, lake) s = mdp.reset() end_time = time.time() elapsed_time = end_time - start_time start_time = end_time if verbose: print( "Iter {} Episodes {} Rew(G) {} Rew(L) {} L2 {} L_inf {} time {:.1f} s" .format(i, episodes[-1], rew, learning_rew, l_2_err, l_inf_err, elapsed_time)) done_counter = 0 if (i * 100 / max_iter) % 10 == 0: print("years:", description, "- Progress:", str(int(i * 100 / max_iter)) + "%") run_info = [ iterations, episodes, n_samples, learning_rewards, evaluation_rewards, l_2, l_inf, episode_rewards[:len(episode_t)], episode_t ] weights = np.array(Q._w) last_rewards = 5 print("years:", description, "- Last evaluation rewards:", np.around(evaluation_rewards[-last_rewards:], decimals=3)) return [[], weights, run_info]
def learn( mdp, Q, operator, max_iter=5000, buffer_size=10000, batch_size=50, alpha_adam=0.001, alpha_sgd=0.1, lambda_=0.001, n_weights=10, train_freq=1, eval_freq=50, random_episodes=0, eval_states=None, eval_episodes=1, mean_episodes=50, preprocess=lambda x: x, cholesky_clip=0.0001, bandwidth=0.00001, post_components=1, max_iter_ukl=60, eps=0.001, eta=1e-6, time_coherent=False, source_file=None, seed=None, render=False, verbose=True, ukl_tight_freq=1, sources=None, # Lambda function to calculate the weights weights_calculator=None): if seed is not None: np.random.seed(seed) # Randomly initialize the weights in case an MLP is used if isinstance(Q, MLPQFunction): Q.init_weights() # Reset global variables global prior_eigen prior_eigen = None global cholesky_mask cholesky_mask = None global prior_normal prior_normal = None global posterior_normal posterior_normal = None # Initialize policies pi_g = EpsilonGreedy(Q, np.arange(mdp.action_space.n), epsilon=0) # Get number of features K = Q._w.size C = post_components # Load weights and construct prior distribution weights = utils.load_object(source_file) if sources is None else sources timesteps = len(weights) ws = [] # Take only 1 sample per timestep for i in range(timesteps): samples = weights[i] np.random.shuffle(samples) ws.append(samples[0][1]) # 0: first sample (random), 1: weights ws = np.array(ws) # The gaussian mixture weights are uniform if not provided. c_bar = np.ones( timesteps ) / timesteps if weights_calculator is None else weights_calculator(ws) # Take only gaussians with non-zero weights ws = ws[c_bar > 0] timesteps = len(ws) c_bar = c_bar[c_bar > 0] mu_bar = ws Sigma_bar = np.tile(np.eye(K) * bandwidth, (timesteps, 1, 1)) Sigma_bar_inv = np.tile((1 / bandwidth * np.eye(K))[np.newaxis], (timesteps, 1, 1)) # We initialize the parameters of the posterior to the best approximation of the posterior family to the prior c = np.ones(C) / C psi = c[:, np.newaxis] * c_bar[np.newaxis] phi = np.array(psi) mu = np.array([100 * np.random.randn(K) for _ in range(C)]) Sigma = np.array([np.eye(K) for _ in range(C)]) phi, psi = tight_ukl(c, mu, Sigma, c_bar, mu_bar, Sigma_bar, phi, psi, max_iter=max_iter_ukl, eps=eps) params, phi, psi = init_posterior(c, mu, Sigma, c_bar, mu_bar, Sigma_bar, phi, psi, C, K, cholesky_clip, max_iter_ukl, max_iter=max_iter_ukl * 10, precision=Sigma_bar_inv, eta=eta, eps=eps, verbose=verbose) # Add random episodes if needed init_samples = list() if random_episodes > 0: w, _ = sample_gmm(random_episodes, c_bar, mu_bar, np.sqrt(Sigma_bar)) for i in range(random_episodes): Q._w = w[i] init_samples.append( utils.generate_episodes(mdp, pi_g, n_episodes=1, preprocess=preprocess)) init_samples = np.concatenate(init_samples) t, s, a, r, s_prime, absorbing, sa = utils.split_data( init_samples, mdp.state_dim, mdp.action_dim) init_samples = np.concatenate( (t[:, np.newaxis], preprocess(s), a, r[:, np.newaxis], preprocess(s_prime), absorbing[:, np.newaxis]), axis=1) # Figure out the effective state-dimension after preprocessing is applied eff_state_dim = preprocess(np.zeros(mdp.state_dim)).size # Create replay buffer buffer = Buffer(buffer_size, eff_state_dim) n_init_samples = buffer.add_all(init_samples) if random_episodes > 0 else 0 # Results iterations = [] episodes = [] n_samples = [] evaluation_rewards = [] learning_rewards = [] episode_rewards = [0.0] l_2 = [] l_inf = [] fvals = [] episode_t = [] # Create masks for ADAM and SGD adam_mask = pack(np.zeros(C), np.ones((C, K)) * alpha_adam, np.zeros( (C, K, K))) # ADAM learns only \mu sgd_mask = pack(np.zeros(C), np.zeros((C, K)), np.ones((C, K, K)) * alpha_sgd) # SGD learns only L # Adam initial params m_t = 0 v_t = 0 t = 0 # Init env s = mdp.reset() h = 0 Q._w = sample_posterior(params, C, K) start_time = time.time() # Learning for i in range(max_iter): # If we do not use time coherent exploration, resample parameters Q._w = sample_posterior(params, C, K) if not time_coherent else Q._w # Take greedy action wrt current Q-function s_prep = preprocess(s) a = np.argmax(Q.value_actions(s_prep)) # Step s_prime, r, done, _ = mdp.step(a) # Build the new sample and add it to the dataset buffer.add_sample(h, s_prep, a, r, preprocess(s_prime), done) # Take a step of gradient if needed if i % train_freq == 0: # Estimate gradient g = gradient(buffer.sample_batch(batch_size), params, Q, c_bar, mu_bar, Sigma_bar, operator, i + 1, phi, psi, n_weights, lambda_, max_iter_ukl, C, K, precision=Sigma_bar_inv, t_step=i, ukl_tight_freq=ukl_tight_freq) # Take a gradient step for \mu params, t, m_t, v_t = utils.adam(params, g, t, m_t, v_t, alpha=adam_mask) # Take a gradient step for L params = utils.sgd(params, g, alpha=sgd_mask) # Clip parameters params = clip(params, cholesky_clip, C, K) # Add reward to last episode episode_rewards[-1] += r * mdp.gamma**h s = s_prime h += 1 if done or h >= mdp.horizon: episode_rewards.append(0.0) s = mdp.reset() h = 0 Q._w = sample_posterior(params, C, K) episode_t.append(i) # Evaluate model if i % eval_freq == 0: #Save current weights current_w = np.array(Q._w) # Evaluate MAP Q-function c, mu, _ = unpack(params, C, K) rew = 0 for j in range(C): Q._w = mu[j] rew += utils.evaluate_policy(mdp, pi_g, render=render, initial_states=eval_states, n_episodes=eval_episodes, preprocess=preprocess)[0] rew /= C learning_rew = np.mean( episode_rewards[-mean_episodes - 1:-1]) if len(episode_rewards) > 1 else 0.0 br = operator.bellman_residual(Q, buffer.sample_batch(batch_size))**2 l_2_err = np.average(br) l_inf_err = np.max(br) fval = objective(buffer.sample_batch(batch_size), params, Q, c_bar, mu_bar, Sigma_bar, operator, i + 1, phi, psi, n_weights, lambda_, C, K, precision=Sigma_bar_inv) # Append results iterations.append(i) episodes.append(len(episode_rewards) - 1) n_samples.append(n_init_samples + i + 1) evaluation_rewards.append(rew) learning_rewards.append(learning_rew) l_2.append(l_2_err) l_inf.append(l_inf_err) fvals.append(fval) # Make sure we restart from s mdp.reset(s) # Restore weights Q._w = current_w end_time = time.time() elapsed_time = end_time - start_time start_time = end_time if verbose: print( "Iter {} Episodes {} Rew(G) {} Rew(L) {} Fval {} L2 {} L_inf {} time {:.1f} s" .format(i, episodes[-1], rew, learning_rew, fval, l_2_err, l_inf_err, elapsed_time)) if (i * 100 / max_iter) % 10 == 0: print("Seed: " + str(seed) + " - Progress: " + str(int(i * 100 / max_iter)) + "%") run_info = [ iterations, episodes, n_samples, learning_rewards, evaluation_rewards, l_2, l_inf, fvals, episode_rewards[:len(episode_t)], episode_t ] weights = np.array(mu) print("Task over: ", mdp.get_info(), " - Last learning rewards: ", np.around(run_info[3][-5:], decimals=3)) return [mdp.get_info(), weights, run_info]
def learn(mdp, Q, operator, max_iter=5000, buffer_size=10000, batch_size=50, alpha=0.001, train_freq=1, eval_freq=50, eps_start=1.0, eps_end=0.02, exploration_fraction=0.2, random_episodes=0, eval_states=None, eval_episodes=1, mean_episodes=50, preprocess=lambda x: x, seed=None, render=False, verbose=True): if seed is not None: np.random.seed(seed) # Randomly initialize the weights in case an MLP is used if isinstance(Q, MLPQFunction): # Q.init_weights() if isinstance(operator, DQNOperator): operator._q_target._w = Q._w # Initialize policies schedule = np.linspace(eps_start, eps_end, exploration_fraction * max_iter) pi = ScheduledEpsilonGreedy(Q, np.arange(mdp.action_space.n), schedule) pi_u = EpsilonGreedy(Q, np.arange(mdp.action_space.n), epsilon=1) pi_g = EpsilonGreedy(Q, np.arange(mdp.action_space.n), epsilon=0) # Add random episodes if needed init_samples = utils.generate_episodes( mdp, pi_u, n_episodes=random_episodes, preprocess=preprocess) if random_episodes > 0 else None if random_episodes > 0: t, s, a, r, s_prime, absorbing, sa = utils.split_data( init_samples, mdp.state_dim, mdp.action_dim) init_samples = np.concatenate( (t[:, np.newaxis], preprocess(s), a, r[:, np.newaxis], preprocess(s_prime), absorbing[:, np.newaxis]), axis=1) # Figure out the effective state-dimension after preprocessing is applied eff_state_dim = preprocess(np.zeros(mdp.state_dim)).size # Create replay buffer buffer = Buffer(buffer_size, eff_state_dim) n_init_samples = buffer.add_all(init_samples) if random_episodes > 0 else 0 # Results iterations = [] episodes = [] n_samples = [] evaluation_rewards = [] learning_rewards = [] episode_rewards = [0.0] episode_t = [] l_2 = [] l_inf = [] # Adam initial params m_t = 0 v_t = 0 t = 0 # Init env s = mdp.reset() h = 0 start_time = time.time() # Learning for i in range(max_iter): # Take epsilon-greedy action wrt current Q-function s_prep = preprocess(s) a = pi.sample_action(s_prep) # Step s_prime, r, done, _ = mdp.step(a) # Build the new sample and add it to the dataset buffer.add_sample(h, s_prep, a, r, preprocess(s_prime), done) # Take a step of gradient if needed if i % train_freq == 0: # Estimate gradient g = operator.gradient_be(Q, buffer.sample_batch(batch_size)) # Take a gradient step Q._w, t, m_t, v_t = utils.adam(Q._w, g, t, m_t, v_t, alpha=alpha) # Add reward to last episode episode_rewards[-1] += r * mdp.gamma**h s = s_prime h += 1 if done or h >= mdp.horizon: episode_rewards.append(0.0) s = mdp.reset() h = 0 episode_t.append(i) # Evaluate model if i % eval_freq == 0: # Evaluate greedy policy rew = utils.evaluate_policy(mdp, pi_g, render=render, initial_states=eval_states, n_episodes=eval_episodes, preprocess=preprocess)[0] learning_rew = np.mean( episode_rewards[-mean_episodes - 1:-1]) if len(episode_rewards) > 1 else 0.0 br = operator.bellman_residual(Q, buffer.sample_batch(batch_size))**2 l_2_err = np.average(br) l_inf_err = np.max(br) # Append results iterations.append(i) episodes.append(len(episode_rewards) - 1) n_samples.append(n_init_samples + i + 1) evaluation_rewards.append(rew) learning_rewards.append(learning_rew) l_2.append(l_2_err) l_inf.append(l_inf_err) # Make sure we restart from s mdp.reset(s) end_time = time.time() elapsed_time = end_time - start_time start_time = end_time if verbose: print( "Iter {} Episodes {} Rew(G) {} Rew(L) {} L2 {} L_inf {} time {:.1f} s" .format(i, episodes[-1], rew, learning_rew, l_2_err, l_inf_err, elapsed_time)) # if np.mean(episode_rewards[-mean_episodes - 1:-1]) > -80: # render=True run_info = [ iterations, episodes, n_samples, learning_rewards, evaluation_rewards, l_2, l_inf, episode_rewards[:len(episode_t)], episode_t ] weights = np.array(Q._w) return [mdp.get_info(), weights, run_info]
args = parser.parse_args() if args.seed is not None: np.random.seed(args.seed) torch.manual_seed(args.seed) if args.device_id is not None and torch.cuda.is_available(): torch.cuda.manual_seed_all(args.seed) # create the environment env = create_env(args.env, args.seed) # create the agent agent, agent_args = create_agent(env, args.device_id) # create the data buffer buffer = Buffer(batch_size=args.batch_size, seq_len=args.train_seq_len) # create the optimizer optimizer = Optimizer(agent, optimizer=args.optimizer, lr=args.lr, norm_grad=args.grad_norm, weight_decay=args.weight_decay, value_tau=args.value_tau, policy_tau=args.policy_tau, value_update=args.value_update, policy_update=args.policy_update) # create the logger / plotter plotter = Plotter(args, agent_args, agent)
def learn(mdp, Q, operator, max_iter=5000, buffer_size=10000, batch_size=50, alpha_adam=0.001, alpha_sgd=0.1, lambda_=0.001, n_weights=10, train_freq=1, eval_freq=50, random_episodes=0, eval_states=None, eval_episodes=1, mean_episodes=50, preprocess=lambda x: x, sigma_reg=0.0001, cholesky_clip=0.0001, time_coherent=False, n_source=10, source_file=None, seed=None, render=False, verbose=True, sources=None): if seed is not None: np.random.seed(seed) # Randomly initialize the weights in case an MLP is used if isinstance(Q, MLPQFunction): Q.init_weights() global prior_eigen_torch prior_eigen_torch = None # Initialize policies pi_g = EpsilonGreedy(Q, np.arange(mdp.action_space.n), epsilon=0) # Get number of features K = Q._w.size # Load weights and construct prior distribution weights = utils.load_object(source_file) if sources is None else sources ws = np.array([w[1] for w in weights]) np.random.shuffle(ws) # Take only the first n_source weights ws = ws[:n_source, :] mu_bar = np.mean(ws, axis=0) Sigma_bar = np.cov(ws.T) # We use higher regularization for the prior to prevent the ELBO from diverging Sigma_bar_inv = np.linalg.inv(Sigma_bar + np.eye(K) * sigma_reg) # We initialize the parameters at the prior with smaller regularization (just to make sure Sigma_bar is pd) params = clip( pack(mu_bar, np.linalg.cholesky(Sigma_bar + np.eye(K) * cholesky_clip**2)), cholesky_clip, K) # Add random episodes if needed if random_episodes > 0: init_samples = list() for i in range(random_episodes): Q._w = sample_posterior(params, K) init_samples.append( utils.generate_episodes(mdp, pi_g, n_episodes=1, preprocess=preprocess)) init_samples = np.concatenate(init_samples) t, s, a, r, s_prime, absorbing, sa = utils.split_data( init_samples, mdp.state_dim, mdp.action_dim) init_samples = np.concatenate( (t[:, np.newaxis], preprocess(s), a, r[:, np.newaxis], preprocess(s_prime), absorbing[:, np.newaxis]), axis=1) # Figure out the effective state-dimension after preprocessing is applied eff_state_dim = preprocess(np.zeros(mdp.state_dim)).size # Create replay buffer buffer = Buffer(buffer_size, eff_state_dim) n_init_samples = buffer.add_all(init_samples) if random_episodes > 0 else 0 # Results iterations = [] episodes = [] n_samples = [] evaluation_rewards = [] learning_rewards = [] episode_rewards = [0.0] episode_t = [] l_2 = [] l_inf = [] fvals = [] # Create masks for ADAM and SGD adam_mask = pack(np.ones(K) * alpha_adam, np.zeros( (K, K))) # ADAM learns only \mu sgd_mask = pack(np.zeros(K), np.ones((K, K)) * alpha_sgd) # SGD learns only L # Adam initial params m_t = 0 v_t = 0 t = 0 # RMSprop for Variance v_t_var = 0. # Init env s = mdp.reset() h = 0 Q._w = sample_posterior(params, K) start_time = time.time() # Learning for i in range(max_iter): # If we do not use time coherent exploration, resample parameters Q._w = sample_posterior(params, K) if not time_coherent else Q._w # Take greedy action wrt current Q-function s_prep = preprocess(s) a = np.argmax(Q.value_actions(s_prep)) # Step s_prime, r, done, _ = mdp.step(a) # Build the new sample and add it to the dataset buffer.add_sample(h, s_prep, a, r, preprocess(s_prime), done) # Take a step of gradient if needed if i % train_freq == 0: # Estimate gradient g = gradient(buffer.sample_batch(batch_size), params, Q, mu_bar, Sigma_bar_inv, operator, i + 1, lambda_, n_weights) # Take a gradient step for \mu params, t, m_t, v_t = utils.adam(params, g, t, m_t, v_t, alpha=adam_mask) # Take a gradient step for L params = utils.sgd(params, g, alpha=sgd_mask) # params,v_t_var = utils.rmsprop(params, g, v_t_var, alpha=sgd_mask) # Clip parameters params = clip(params, cholesky_clip, K) # Add reward to last episode episode_rewards[-1] += r * mdp.gamma**h s = s_prime h += 1 if done or h >= mdp.horizon: episode_rewards.append(0.0) s = mdp.reset() h = 0 Q._w = sample_posterior(params, K) episode_t.append(i) # Evaluate model if i % eval_freq == 0: #Save current weights current_w = np.array(Q._w) # Evaluate MAP Q-function mu, _ = unpack(params, K) Q._w = mu rew = utils.evaluate_policy(mdp, pi_g, render=render, initial_states=eval_states, n_episodes=eval_episodes, preprocess=preprocess)[0] learning_rew = np.mean( episode_rewards[-mean_episodes - 1:-1]) if len(episode_rewards) > 1 else 0.0 br = operator.bellman_residual(Q, buffer.sample_batch(batch_size))**2 l_2_err = np.average(br) l_inf_err = np.max(br) fval = objective(buffer.sample_batch(batch_size), params, Q, mu_bar, Sigma_bar_inv, operator, i + 1, lambda_, n_weights) # Append results iterations.append(i) episodes.append(len(episode_rewards) - 1) n_samples.append(n_init_samples + i + 1) evaluation_rewards.append(rew) learning_rewards.append(learning_rew) l_2.append(l_2_err) l_inf.append(l_inf_err) fvals.append(fval) # Make sure we restart from s mdp.reset(s) # Restore weights Q._w = current_w end_time = time.time() elapsed_time = end_time - start_time start_time = end_time if verbose: print( "Iter {} Episodes {} Rew(G) {} Rew(L) {} Fval {} L2 {} L_inf {} time {:.1f} s" .format(i, episodes[-1], rew, learning_rew, fval, l_2_err, l_inf_err, elapsed_time)) run_info = [ iterations, episodes, n_samples, learning_rewards, evaluation_rewards, l_2, l_inf, fvals, episode_rewards[:len(episode_t)], episode_t ] weights = np.array(mu) return [mdp.get_info(), weights, run_info]
def testhr(): with open("Logs/P1/05_03-09_00_09.csv") as f: data = np.array(f.read().splitlines()) c = hrProcesser() hrbuffer = Buffer(size=100) gsrbuffer = Buffer(size=100) tsbuffer = Buffer(size=100) #SETUP LIVE PLOTTER plt.ion() fig = plt.figure() ax = fig.add_subplot(1, 1, 1) for i in range(1, len(data)): #split log data log_data = data[i].split(',', 3) #make buffers tsbuffer.add(log_data[0]) gsrbuffer.add(float(log_data[1])) hrbuffer.add(float(log_data[2])) if len(hrbuffer.data) == hrbuffer.size: #Do calculations hrdat = np.asarray(qm.moving_average(hrbuffer.data, window=10)) tsdat = np.asarray(tsbuffer.data) l = qm.ampd(hrdat, limit=0.5) #Peak Detectoin hrs = c.HR(hrdat, tsdat) #plot Data disppeaks = [] for dp in l: disppeaks.append(hrdat[int(dp)]) #Update plot ax.clear() ax.set_title("HeartRate: {} ({} Samples)".format( hrs, len(disppeaks))) plt.plot(hrbuffer.data) plt.plot(hrdat) plt.plot(gsrbuffer.data) plt.scatter(l, disppeaks, c='r') plt.show() plt.pause(0.01)
with open(path + file) as f: print("opening file: ", file) logData = np.array(f.read().splitlines()) data_out = open("logWithHr.csv", "w") hrData = [] hrsData = [] tsData = [] gsrData = [] indexes = [] markers = [] offset = 150 HRp = hrProcesser() buffersize = 100 hrbuffer = Buffer(size=buffersize) gsrbuffer = Buffer(size=buffersize) tsbuffer = Buffer(size=buffersize) for i in range(1, len(logData)): #split log data log_data = logData[i].split(',') if len(log_data) >= 3: #make buffers tsbuffer.add(log_data[0]) gsrbuffer.add(float(log_data[1])) hrbuffer.add(float(log_data[2])) #TIMESTAMP,HR,HRV,GSR if len(hrbuffer.data) == hrbuffer.size: #Do calculations