def insert_records(): # (1 - imports) # (2 - generate database schema) # 3 - create a new session (from the session factory) session = Session() # 4 - create movies bourne_identity = Movie("The Bourne Identity", date(2002, 10, 11)) furious_7 = Movie("Furious 7", date(2015, 4, 2)) pain_and_gain = Movie("Pain & Gain", date(2013, 8, 23)) # 5 - creates actors matt_damon = Actor("Matt Damon", date(1970, 10, 8)) dwayne_johnson = Actor("Dwayne Johnson", date(1972, 5, 2)) mark_wahlberg = Actor("Mark Wahlberg", date(1971, 6, 5)) # 6 - add actors to movies bourne_identity.actors = [matt_damon] furious_7.actors = [dwayne_johnson] pain_and_gain.actors = [dwayne_johnson, mark_wahlberg] # 7 - add contact details to actors (the last parameter defines the actor that the ContactDetails instance is associated to) matt_contact = ContactDetails("415 555 2671", "Burbank, CA", matt_damon) dwayne_contact = ContactDetails("423 555 5623", "Glendale, CA", dwayne_johnson) dwayne_contact_2 = ContactDetails("421 444 2323", "West Hollywood, CA", dwayne_johnson) mark_contact = ContactDetails("421 333 9428", "Glendale, CA", mark_wahlberg) # 8 - create stuntmen matt_stuntman = Stuntman("John Doe", True, matt_damon) dwayne_stuntman = Stuntman("John Roe", True, dwayne_johnson) mark_stuntman = Stuntman("Richard Roe", True, mark_wahlberg) # 9 - persists data (save the movies, actors, contact details, and stuntment) # (note that actors don't need to be explicitly saved, because SQLAlchemy, by default, uses the 'save-update' cascade strategy) session.add(bourne_identity) session.add(furious_7) session.add(pain_and_gain) session.add(matt_contact) session.add(dwayne_contact) session.add(dwayne_contact_2) session.add(mark_contact) session.add(matt_stuntman) session.add(dwayne_stuntman) session.add(mark_stuntman) # 10 - commit and close session session.commit() session.close()
def fetchAllActorinfo(self): query = "SELECT * FROM actor" output = self.connection.run(query, False) list = [] for record in output: id = record[0] name = record[1] gender = record[2] actorObj = Actor(id, name, gender) list.append(actorObj) return list
def print_models(): matt_damon = Actor("Matt Damon", date(1970, 10, 8)) print(matt_damon) matt_contact = ContactDetails("415 555 2671", "Burbank, CA", matt_damon) print(matt_contact) bourne_identity = Movie("The Bourne Identity", date(2002, 10, 11)) print(bourne_identity) matt_stuntman = Stuntman("John Doe", True, matt_damon) print(matt_stuntman)
def replay_train(critic: Critic, critic_copy: Critic, actor: Actor, actor_copy: Actor, train_batch): state_stack = np.empty(input_size).reshape(1, input_size) action_stack = np.empty(output_size).reshape(1, output_size) sampled_action_stack = np.empty(output_size).reshape(1, output_size) y_stack = np.empty(output_size).reshape(1, output_size) for state, action, reward, next_state, done in train_batch: a = np.empty(output_size).reshape(1, output_size) s_a = np.empty(output_size).reshape(1, output_size) y = np.empty(output_size).reshape(1, output_size) sampled_action_copy = actor_copy.action(next_state) sampled_action = actor.action(state) sampled_q_value = critic_copy.q_value(next_state, sampled_action_copy) state = np.reshape(state, newshape=(1, input_size)) if done: y[0, output_size - 1] = reward else: y[0, output_size - 1] = reward + dis * sampled_q_value[0][0] a[0, output_size - 1] = action s_a[0, output_size - 1] = sampled_action state_stack = np.vstack([state_stack, state]) action_stack = np.vstack([action_stack, a]) sampled_action_stack = np.vstack([sampled_action_stack, s_a]) y_stack = np.vstack([y_stack, y]) state_stack = np.delete(state_stack, 0, 0) action_stack = np.delete(action_stack, 0, 0) sampled_action_stack = np.delete(sampled_action_stack, 0, 0) y_stack = np.delete(y_stack, 0, 0) loss, _ = critic.update(state_stack, action_stack, y_stack) gradient = critic.get_gradient(state_stack, sampled_action_stack) actor.update(state_stack, gradient) return loss
def __init__(self, state_size, batch_size, is_eval=False): self.state_size = state_size self.action_size = 3 self.buffer_size = 1000000 self.batch_size = batch_size self.memory = ReplayBuffer(self.buffer_size, self.batch_size) self.inventory = [] self.is_eval = is_eval self.gamma = 0.99 self.tau = 0.001 self.actor_local = Actor(self.state_size, self.action_size) self.actor_target = Actor(self.state_size, self.action_size) self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights())
def fetchActorByMovieinfoId(self, movieinfoId): query = """SELECT id,name,gender FROM `movieinfo_actor` AS m LEFT JOIN `actor` AS a ON m.actor_id = a.id WHERE m.movie_info_id = %s""" output = self.connection.run(query, False, [movieinfoId]) list = [] for record in output: id = record[0] name = record[1] gender = record[2] actorObj = Actor(id, name, gender) list.append(actorObj) return list
def bot_play(actor: Actor): # See our trained network in action s = env.reset() reward_sum = 0 while True: env.render() a = actor.action(s) s, reward, done, _ = env.step(a) reward_sum += reward if done: print("Total score: {}".format(reward_sum)) break
class Agent: def __init__(self, state_size, batch_size, is_eval=False): self.state_size = state_size self.action_size = 3 self.buffer_size = 1000000 self.batch_size = batch_size self.memory = ReplayBuffer(self.buffer_size, self.batch_size) self.inventory = [] self.is_eval = is_eval self.gamma = 0.99 self.tau = 0.001 self.actor_local = Actor(self.state_size, self.action_size) self.actor_target = Actor(self.state_size, self.action_size) self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) def act(self, state): options = self.actor_local.model.predict(state) self.last_state = state if not self.is_eval: return choice(range(3), p=options[0]) return np.argmax(options[0]) def step(self, action, reward, next_state, done): self.memory.add(self.last_state, action, reward, next_state, done) if len(self.memory) > self.batch_size: experiences = self.memory.sample(self.batch_size) self.learn(experiences) self.last_state = next_state def learn(self, experiences): states = np.vstack([e.state for e in experiences if e is not None ]).astype(np.float32).reshape(-1, self.state_size) actions = np.vstack([e.action for e in experiences if e is not None]).astype(np.float32).reshape( -1, self.action_size) rewards = np.array([e.reward for e in experiences if e is not None ]).astype(np.float32).reshape(-1, 1) dones = np.array([e.done for e in experiences if e is not None]).astype(np.float32).reshape(-1, 1) next_states = np.vstack([ e.next_state for e in experiences if e is not None ]).astype(np.float32).reshape(-1, self.state_size) actions_next = self.actor_target.model.predict_on_batch(next_states) Q_targets_next = self.critic_target.model.predict_on_batch( [next_states, actions_next]) Q_targets = rewards + self.gamma * Q_targets_next * (1 - dones) self.critic_local.model.train_on_batch(x=[states, actions], y=Q_targets) action_gradients = np.reshape( self.critic_local.get_action_gradients([states, actions, 0]), (-1, self.action_size)) self.actor_local.train_fn([states, action_gradients, 1]) self.soft_update(self.critic_local.model, self.critic_target.model) self.soft_update(self.actor_local.model, self.actor_target.model) def soft_update(self, local_model, target_model): local_weights = np.array(local_model.get_weights()) target_weights = np.array(target_model.get_weights()) assert len(local_weights) == len(target_weights) new_weights = self.tau * local_weights + (1 - self.tau) * target_weights target_model.set_weights(new_weights)
def __init__(self, init_std, final_std, action_dim, state_dim, alpha, batch_size=128, gamma=.99, lr=1e-4): """Builds up the graph and all neccesary operations for the model. Parameters ---------- init_std : float initial standard deviation for the exploration noise. final_std : float Final standard deviation for the exploration noise. action_dim : int Dimensionality of the actions. state_dim : int Dimensionality of the states. alpha : float parameter for the updates of the average networks, i.e. avg_net = net * alpha + avg_net * (1 - alpha) batch_size : int, optional Batch size for training (the default is 128) gamma : float, optional Discount factor (the default is .99) lr : [type], optional Learning rate for the optimizers (the default is 1e-4) """ Exploration.__init__(self, init_std, final_std, 1000) ReplayBuffer.__init__(self, state_dim, action_dim) self.batch_size = batch_size self.gamma = .99 self.sess = tf.Session() self._actor = Actor(state_dim, action_dim) self._avg_actor = Actor(state_dim, action_dim, scope="avg_actor") self.update_avg_actor = self.__avg_params_update( self._actor.trainable_vars, self._avg_actor.trainable_vars) self._critic = Critic(state_dim, action_dim) self._avg_critic = Critic(state_dim, action_dim, scope="avg_critic") self.update_avg_critic = self.__avg_params_update( self._critic.trainable_vars, self._avg_critic.trainable_vars) with tf.name_scope("training-placeholders"): self.td_target = tf.placeholder(dtype=tf.float32, shape=[None, 1], name="td-target") with tf.name_scope("loss-functions"): critic_loss = tf.reduce_mean( tf.squared_difference(self._critic.q_value, self.td_target)) with tf.name_scope("actor-grads"): self.action_grads = tf.placeholder( dtype=tf.float32, shape=[None, action_dim], name="action-grads") actor_grads = tf.gradients( ys=self._actor.action, xs=self._actor.trainable_vars, grad_ys=-self.action_grads) with tf.name_scope("optimizers"): self._critic_trainer = tf.train.AdamOptimizer(learning_rate=5 * lr) self._actor_trainer = tf.train.AdamOptimizer(learning_rate=lr) with tf.name_scope("update-ops"): self.update_critic = self._critic_trainer.minimize( critic_loss, var_list=self._critic.trainable_vars) self.update_actor = self._actor_trainer.apply_gradients( grads_and_vars=zip(actor_grads, self._actor.trainable_vars))
def _init_actor(self): self.actor = Actor(self.game, Rectangle(self.scr_1)) self.actor.route.updated.path.connect(self.map_view.slot_path) self.actor.route.updated.zones.connect(self.map_view.slot_zones)
class Robot(QMainWindow): def __init__(self, app, window_name, process_name): super(Robot, self).__init__() self.app = app self.active = False self.scr_1 = self.app.screens()[0].availableGeometry() self.scr_2 = self.app.screens()[1].availableGeometry() self._init_keyboard_hook() self._init_map_view() self._init_game(window_name, process_name) self._init_actor() # initialise game state before starting workers self.read_game_state() self._init_timers() self.show() def _init_keyboard_hook(self): self.keyboard_hook = KeyboardHook() self.keyboard_hook.set_callback(self.toggle_activity, (19, )) # pause button def _init_map_view(self): self.map_view = MapView(self) self.setCentralWidget(self.map_view) def _init_game(self, window_name, process_name): self.game = Game(window_name, process_name) self.game.window.center(Rectangle(self.scr_1)) self.game.updated.map.connect(self.slot_map) def _init_actor(self): self.actor = Actor(self.game, Rectangle(self.scr_1)) self.actor.route.updated.path.connect(self.map_view.slot_path) self.actor.route.updated.zones.connect(self.map_view.slot_zones) def _init_timers(self): self.timer_keyboard = QTimer() self.timer_keyboard.timeout.connect(self.keyboard_hook.flush) self.timer_keyboard.start(100) self.timer_game = QTimer() self.timer_game.timeout.connect(self.read_game_state) self.timer_game.start(100) self.timer_actor = QTimer() self.timer_actor.timeout.connect(self.act) self.timer_actor.start(500) def closeEvent(self, event): self.keyboard_hook.close() super(Robot, self).closeEvent(event) def resizeEvent(self, event): self.setGeometry( QStyle.alignedRect( Qt.LeftToRight, Qt.AlignCenter, self.map_view.size(), self.scr_2, )) def slot_map(self, map): self.map_view.slot_map(map) self.adjustSize() def toggle_activity(self): if self.active: log.debug('Bot activity has been paused') else: log.debug('Bot activity has been resumed') self.active = not self.active def read_game_state(self): self.game.read() self.map_view.update() def act(self): if self.active: self.actor.act()
def Main(): max_episodes = 50000 replay_buffer = deque() with tf.name_scope("network"): actor = Actor(n_state=input_size, n_action=output_size, n_layers=1, n_units=400, scope="actor") actor_copy = Actor(n_state=input_size, n_action=output_size, n_layers=1, n_units=400, scope="a_copy") critic = Critic(n_state=input_size, n_action=output_size, n_layers=1, n_units=400, scope="critic") critic_copy = Critic(n_state=input_size, n_action=output_size, n_layers=1, n_units=400, scope="c_copy") with tf.name_scope("train"): actor_copy_ops = get_copy_var_ops(actor_copy.get_variables(), actor.get_variables()) # get_copy_var_ops(dest_scope_name="actor_copy", src_scope_name="actor") critic_copy_ops = get_copy_var_ops(critic_copy.get_variables(), critic.get_variables()) #get_copy_var_ops(dest_scope_name="critic_copy", src_scope_name="critic") actor_soft_copy_ops = get_copy_var_ops(actor_copy.get_variables(), actor.get_variables(), "soft") #get_copy_var_ops(dest_scope_name="actor_copy", src_scope_name="actor", op_name="soft") critic_soft_copy_ops = get_copy_var_ops(critic_copy.get_variables(), critic.get_variables(), "soft") #get_copy_var_ops(dest_scope_name="critic_copy", src_scope_name="critic", op_name="soft") with tf.name_scope("miscellaneous"): init = tf.global_variables_initializer() noise_generator = Uhlenbeck(action_dimension=output_size, mu=0.6) saver = tf.train.Saver() with tf.Session() as sess: # initialize variables sess.run(init) # copy the variables sess.run([actor_copy_ops, critic_copy_ops]) # set the current session to models actor.set_session(sess) actor_copy.set_session(sess) critic.set_session(sess) critic_copy.set_session(sess) # iterate through the episodes for episode in range(max_episodes): done = False step_count = 0 state = env.reset() noise_generator.reset() loss = 0.0 while not done: env.render() action = actor.action(state) + noise_generator.noise() next_state, reward, done, _ = env.step(action) replay_buffer.append((state, action, reward, next_state, done)) if len(replay_buffer) > REPLAY_MEMORY: replay_buffer.popleft() state = next_state step_count += 1 if step_count % 100 == 1: print("Step {}, chosed action {}, reward {}".format( step_count, action, reward)) if len(replay_buffer) < 64: continue mini_batch = random.sample(replay_buffer, 64) loss = replay_train(critic, critic_copy, actor, actor_copy, mini_batch) sess.run([actor_soft_copy_ops, critic_soft_copy_ops]) if done: print("Loss : {}".format(loss)) if episode % 10 == 1: print("Episode: {} steps: {}".format(episode, step_count)) print("Loss : {}".format(loss)) save_path = saver.save(sess, "./model.ckpt")
from datetime import date from base import Session, engine, Base from model.actor import Actor from model.contact_details import ContactDetails from model.movie import Movie from model.stuntman import Stuntman Base.metadata.create_all(engine) session = Session() bourne_identity = Movie("The Bourne Identity", date(2002, 10, 11)) furious_7 = Movie("Furious 7", date(2015, 4, 2)) pain_and_gain = Movie("Pain & Gain", date(2013, 8, 23)) matt_damon = Actor("Matt Damon", date(1970, 10, 8)) dwayne_johnson = Actor("Dwayne Johnson", date(1972, 5, 2)) mark_wahlberg = Actor("Mark Wahlberg", date(1971, 6, 5)) bourne_identity.actors = [matt_damon] furious_7.actors = [dwayne_johnson] pain_and_gain.actors = [dwayne_johnson, mark_wahlberg] matt_contact = ContactDetails("415 555 2671", "Burbank, CA", matt_damon) dwayne_contact = ContactDetails("423 555 5623", "Glendale, CA", dwayne_johnson) dwayne_contact_2 = ContactDetails("421 444 2323", "West Hollywood, CA", dwayne_johnson) mark_contact = ContactDetails("421 333 9428", "Glendale, CA", mark_wahlberg) matt_stuntman = Stuntman("John Doe", True, matt_damon) dwayne_stuntman = Stuntman("John Roe", True, dwayne_johnson)
from base import Base, engine, Session from model.actor import Actor from model.contact_details import ContactDetails from model.movie import Movie from model.stuntman import Stuntman #Initialize DB #Base.metadata.create_all(engine) session = Session() movie1 = Movie("The Dark Knight Rises", date(2012, 7, 20)) actor1 = Actor("Christian Bale", date(1974, 1, 30)) movie1.actors = [actor1] contact1 = ContactDetails("433 555 4353", "East Hollywood, CA", actor1) stuntman1 = Stuntman("", False, actor1) session.add(movie1) session.add(contact1) session.add(stuntman1) session.commit() session.close()