def __init__(self, view): Gtk.EventBox.__init__(self) self._view = view self.set_visible_window(False) hbox = Gtk.Box.new(Gtk.Orientation.HORIZONTAL, 3) hbox.show() hbox.set_border_width(3) # context for the view self._entry = Gtk.Entry() self._entry.set_has_frame(False) self._entry.set_name('gedit-commander-entry') self._entry.show() css = Gtk.CssProvider() css.load_from_data(""" @binding-set terminal-like-bindings { unbind "<Control>A"; bind "<Control>W" { "delete-from-cursor" (word-ends, -1) }; bind "<Control>A" { "move-cursor" (buffer-ends, -1, 0) }; bind "<Control>U" { "delete-from-cursor" (display-line-ends, -1) }; bind "<Control>K" { "delete-from-cursor" (display-line-ends, 1) }; bind "<Control>E" { "move-cursor" (buffer-ends, 1, 0) }; bind "Escape" { "delete-from-cursor" (display-lines, 1) }; } GtkEntry#gedit-commander-entry { gtk-key-bindings: terminal-like-bindings; /* Override background to anything. This is weird, but doing this we can then in code use widget.override_background to set the color dynamically to the same color as the gedit view */ background: transparent; border-width: 0; box-shadow: 0 0 transparent; } """) # FIXME: remove hardcopy of 600 (GTK_STYLE_PROVIDER_PRIORITY_APPLICATION) # https://bugzilla.gnome.org/show_bug.cgi?id=646860 self._entry.get_style_context().add_provider(css, 600) self._prompt_label = Gtk.Label(label='<b>>>></b>', use_markup=True) self._prompt_label.show() self._entry.connect('focus-out-event', self.on_entry_focus_out) self._entry.connect('key-press-event', self.on_entry_key_press) self._history = History( os.path.join(GLib.get_user_config_dir(), 'gedit/commander/history')) self._prompt = None self._accel_group = None hbox.pack_start(self._prompt_label, False, False, 0) hbox.pack_start(self._entry, True, True, 0) self.copy_style_from_view() self.view_style_updated_id = self._view.connect( 'style-updated', self.on_view_style_updated) self.add(hbox) self.attach() self._entry.grab_focus() self._wait_timeout = 0 self._info_window = None self.connect('destroy', self.on_destroy) self.connect_after('size-allocate', self.on_size_allocate) self.view_draw_id = self._view.connect_after('draw', self.on_draw) self._history_prefix = None self._suspended = None self._handlers = [[0, Gdk.KEY_Up, self.on_history_move, -1], [0, Gdk.KEY_Down, self.on_history_move, 1], [None, Gdk.KEY_Return, self.on_execute, None], [None, Gdk.KEY_KP_Enter, self.on_execute, None], [0, Gdk.KEY_Tab, self.on_complete, None], [0, Gdk.KEY_ISO_Left_Tab, self.on_complete, None]] self._re_complete = re.compile( '("((?:\\\\"|[^"])*)"?|\'((?:\\\\\'|[^\'])*)\'?|[^\s]+)') self._command_state = commands.Commands.State()
def main(): """ Main procedure. """ # Hyper-parameters. gamma = 0.95 epsilon = 1.0 epsilon_min = 0.1 epsilon_step = 0.01 batch_size = 64 actions = [1, 2, 3, 7, 8] n_action = 5 n_history = 50000 n_episode = 5000 n_observation = 4 # Initalize instances. env = gym.make('Enduro-v0', frameskip=5) online = Model(n_action=n_action).cuda() target = Model(n_action=n_action).cuda() trainer = Trainer(online, target, gamma=gamma) history = History('s', 'a', 'r', 's*', 't', maxlen=n_history) for episode in range(n_episode): # Initialize the environment. observation, observations = env.reset(), deque(maxlen=n_observation) for _ in range(n_observation): state = preprocess(observation, observations) # Iterate until the episode is done. total, done = 0, False while not done: # Choose between exploration vs exploitation. if np.random.rand() <= epsilon: action = np.random.randint(n_action) else: action = online.predict(state) # Interact with the environment. observation, reward, done, _ = env.step(actions[action]) consequence = preprocess(observation, observations) total += reward # Stack the experience tuple. history.append(state, action, reward, consequence, done) # Preserve the next state as a current state. state = consequence # Skip learning phase if it doesn't have enough history. if len(history) < n_history: continue # Checkpoint. trainer.save(f'checkpoint/{episode:04d}-{int(total):03d}.pt') # Epsilon schedule. epsilon = max(epsilon - epsilon_step, epsilon_min) # Mini-batch training. for replay in history.replay(batch_size): trainer.train(*replay) # Update target network. if episode % 5 == 0: trainer.update()
def main(): args = build_parser().parse_args() config = build_default_config() config.merge_from_file(args.config_path) config.experiment_path = args.experiment_path config.render = not args.no_render config.freeze() del args writer = SummaryWriter(config.experiment_path) seed_torch(config.seed) env = VecEnv([lambda: build_env(config) for _ in range(config.workers)]) if config.render: env = wrappers.TensorboardBatchMonitor(env, writer, config.log_interval) env = wrappers.torch.Torch(env, device=DEVICE) env.seed(config.seed) policy_model = ModelDQN(config.model, env.observation_space, env.action_space).to(DEVICE) target_model = ModelDQN(config.model, env.observation_space, env.action_space).to(DEVICE) target_model.load_state_dict(policy_model.state_dict()) optimizer = build_optimizer(config.opt, policy_model.parameters()) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.episodes) metrics = { "loss": Mean(), "lr": Last(), "eps": FPS(), "ep/length": Mean(), "ep/reward": Mean(), } # ================================================================================================================== # training loop policy_model.train() target_model.eval() episode = 0 s = env.reset() e_base = 0.95 e_step = np.exp(np.log(0.05 / e_base) / config.episodes) bar = tqdm(total=config.episodes, desc="training") history = History() while episode < config.episodes: with torch.no_grad(): for _ in range(config.horizon): av = policy_model(s) a = sample_action(av, e_base * e_step ** episode) s_prime, r, d, meta = env.step(a) history.append( state=s.cpu(), action=a.cpu(), reward=r.cpu(), done=d.cpu(), state_prime=s_prime.cpu(), ) # history.append(state=s, action=a, reward=r, done=d, state_prime=s_prime) s = s_prime (indices,) = torch.where(d) for i in indices: metrics["eps"].update(1) metrics["ep/length"].update(meta[i]["episode"]["l"]) metrics["ep/reward"].update(meta[i]["episode"]["r"]) episode += 1 scheduler.step() bar.update(1) if episode % 10 == 0: target_model.load_state_dict(policy_model.state_dict()) if episode % config.log_interval == 0 and episode > 0: for k in metrics: writer.add_scalar( k, metrics[k].compute_and_reset(), global_step=episode ) writer.add_scalar("e", e_base * e_step ** episode, global_step=episode) writer.add_histogram( "rollout/action", rollout.actions, global_step=episode ) writer.add_histogram( "rollout/reward", rollout.rewards, global_step=episode ) writer.add_histogram("rollout/return", returns, global_step=episode) writer.add_histogram( "rollout/action_value", action_values, global_step=episode ) rollout = history.full_rollout() action_values = policy_model(rollout.states) action_values = action_values * one_hot(rollout.actions, action_values.size(-1)) action_values = action_values.sum(-1) with torch.no_grad(): action_values_prime = target_model(rollout.states_prime) action_values_prime, _ = action_values_prime.detach().max(-1) returns = one_step_discounted_return( rollout.rewards, action_values_prime, rollout.dones, gamma=config.gamma ) # critic errors = returns - action_values critic_loss = errors ** 2 loss = (critic_loss * 0.5).mean(1) metrics["loss"].update(loss.data.cpu().numpy()) metrics["lr"].update(np.squeeze(scheduler.get_lr())) # training optimizer.zero_grad() loss.mean().backward() nn.utils.clip_grad_norm_(policy_model.parameters(), 0.5) optimizer.step() bar.close() env.close()
app.register_blueprint( Books(url_prefix="/", dir_books=os.path.join(JLAT_DIR, "files", "uploads", "book"))) from note import Notes app.register_blueprint(Notes(url_prefix="/")) from review import Reviews app.register_blueprint(Reviews(url_prefix="/")) from up import Ups app.register_blueprint( Ups(url_prefix='/', dir_uploads=os.path.join(JLAT_DIR, "files", "uploads", "import"))) from synth import Synth app.register_blueprint(Synth(url_prefix='/')) from history import History app.register_blueprint( History(url_prefix='/', archives=os.path.join(JLAT_DIR, "files", "archives"))) from register import Register app.register_blueprint(Register(url_prefix='/')) @app.route("/", methods=["GET", "POST"]) @login_required def home(): return render_template('index.html') if __name__ == "__main__": db.init_app(app) with app.app_context(): db.create_all() with app.app_context():
def __init__(self): self.config = Config() self.net_tool = NetTool() self.history = History()
def __init__(self, show_training=False): self.history = History() self.show_training = show_training self.grasp = np.array((0, 0, 0.067)) # acquire autonomously
def sim(config): # TODO: Create agents here agents = init_agents(config) # Uncomment to print agents. #for a in agents: # logging.info(a) n = len(agents) by_id = dict((a.id, a) for a in agents) agent_ids = [a.id for a in agents] if (config.mechanism.lower() == 'gsp' or config.mechanism.lower() == 'switch'): mechanism = GSP elif config.mechanism.lower() == 'vcg': mechanism = VCG else: raise ValueError("mechanism must be one of 'gsp', 'vcg', or 'switch'") reserve = config.reserve # Dictionaries : round # -> per_slot_list_of_whatever slot_occupants = {} slot_clicks = {} per_click_payments = {} slot_payments = {} values = {} bids = {} history = History(bids, slot_occupants, slot_clicks, per_click_payments, slot_payments, n) def total_spent(agent_id, end): """ Compute total amount spent by agent_id through (not including) round end. """ s = 0 for t in range(end): slot = agent_slot(slot_occupants, agent_id, t) if slot != -1: s += slot_payments[t][slot] return s def run_round(top_slot_clicks, t): """ top_slot_clicks is the expected number of clicks in the top slot k is the round number """ if t == 0: bids[t] = [(a.id, a.initial_bid(reserve)) for a in agents] else: # Bids from agents with no money get reduced to zero have_money = lambda a: total_spent(a.id, t) < config.budget still_have_money = filter(have_money, agents) current_bids = [] for a in agents: b = a.bid(t, history, reserve) if total_spent(a.id, t) < config.budget: current_bids.append((a.id, b)) else: # Out of money: make bid zero. current_bids.append((a.id, 0)) bids[t] = current_bids ## Ignore those below reserve price active_bidders = len(filter(lambda (i, b): b >= reserve, bids[t])) ##################################### ## 1a. Define no. of slots (TO-DO: Check what the # of available slots should be) #num_slots = max(1, active_bidders-1) num_slots = max(1, n - 1) ## 1b. Calculate clicks/slot slot_clicks[t] = [ iround(top_slot_clicks * pow(config.dropoff, i)) for i in range(num_slots) ] ## 2. Run mechanism and allocate slots (slot_occupants[t], per_click_payments[t]) = (mechanism.compute(slot_clicks[t], reserve, bids[t])) ## 3. Define payments slot_payments[t] = map(lambda (x, y): x * y, zip(slot_clicks[t], per_click_payments[t])) ## 4. Save utility (misnamed as values) values[t] = dict(zip(agent_ids, zeros)) def agent_value(agent_id, clicks, payment): if agent_id is not None: values[t][agent_id] = by_id[agent_id].value * clicks - payment return None map(agent_value, slot_occupants[t], slot_clicks[t], slot_payments[t]) ## Debugging. Set to True to see what's happening. log_console = True if log_console: logging.info("\t=== Round %d ===" % t) logging.info("\tnum_slots: %d" % num_slots) logging.info("\tbids: %s" % bids[t]) logging.info("\tslot occupants: %s" % slot_occupants[t]) logging.info("\tslot_clicks: %s" % slot_clicks[t]) logging.info("\tper_click_payments: %s" % per_click_payments[t]) logging.info("\tslot_payments: %s" % slot_payments[t]) logging.info("\tUtility: %s" % values[t]) logging.info("\ttotals spent: %s" % [total_spent(a.id, t + 1) for a in agents]) for t in range(0, config.num_rounds): # Over 48 rounds, go from 80 to 20 and back to 80. Mean 50. # Makes sense when 48 rounds, to simulate a day top_slot_clicks = iround(30 * math.cos(math.pi * t / 24) + 50) if t == config.num_rounds / 2 and config.mechanism == 'switch': mechanism = VCG ## 0. Runs one round run_round(top_slot_clicks, t) for a in agents: history.set_agent_spent(a.id, total_spent(a.id, t)) for a in agents: history.set_agent_spent(a.id, total_spent(a.id, config.num_rounds)) return history
def doOperation(self, operation: Operation): operation.execute() self.addToHistory( History("Do operation: " + operation.__class__.__name__, self.getId()))
def retrieve_history(self): if not self.has_history(): raise HistoryNotFound(self) history_db = self.retrieve_object(self.manifest.history_database, 'H') return History(history_db)
def generate_history(): data_generation = DataGeneration() history = History(data_generation.generate_transactions()) history.interleave_transaction_schedule() return jsonify(history.serialize()), 200
def close_product(self): self._history.append(History("Account closed", self.getId())) return True
def __init__(self): self.name = None self.email = None self.created_at = str(timestring.Date('today')) self.history = History(self)
def create(self): table = 'screensaver' if is_screensaver_mode() else 'photoframe' return History(table)
tr = Tracer('tr') tr.open('/tmp/', '', '.dat') tr.start() robot.after.addSignal('tr.triger') #tr.add(dyn.name+'.ffposition','ff') tr.add(taskRF.featureDes.name + '.position', 'refr') tr.add(taskLF.featureDes.name + '.position', 'refl') tr.add('dyn.rf', 'r') tr.add('dyn.lf', 'l') tr.add('featureComDes.errorIN', 'comref') tr.add('dyn.com', 'com') tr.add(taskWaist.gain.name + '.gain', 'gainwaist') history = History(dyn, 1) # --- RUN ----------------------------------------------------------------- featurePosture.selec.value = toFlags(range(6, 36)) sot.clear() for task in [taskWaist, taskRF, taskLF]: task.feature.position.recompute(0) task.feature.keep() task.feature.selec.value = '111111' sot.push(task.task.name) taskWaist.ref = matrixToTuple(eye(4)) taskWaist.feature.selec.value = '111011' taskWaist.gain.setByPoint(18, 0.1, 0.005, 0.8)
Jason Mahr """ from constants import * from cube import Cube from fitness import * from history import History from validate import is_even, is_solved ## For history.py h = History() moves = [1, 5, 1, 9, 9, 8, 8, 12, 14, 15, 17, 12, 2, 3, 6, 5, 7, 7, 9, 10] for move in moves: h.add(move) assert h.get() == ["R'", 'B2', 'F2', 'U', "L'", 'R', 'F', "R'", "B'"] ## For cube.py c = Cube() for move in moves: c.move(move) assert c.get_cube() == [[1, 1, 5, 4, 3, 1, 4, 4], [3, 3, 0, 4, 2, 3, 4, 0], [3, 5, 0, 2, 1, 2, 4, 2], [5, 5, 2, 3, 0, 3, 1, 0], [4, 2, 2, 5, 5, 0, 1, 4], [0, 1, 3, 1, 5, 0, 2, 5]]
def run_sim_once(self): """Return a history""" conf = self.config # Keep track of the current round. Needs to be in scope for helpers. round = 0 def check_pred(pred, msg, Exc, lst): """Check if any element of lst matches the predicate. If it does, raise an exception of type Exc, including the msg and the offending element.""" m = list(map(pred, lst)) if True in m: i = m.index(True) raise Exc(msg + " Bad element: %s" % lst[i]) def check_uploads(peer, uploads): """Raise an IllegalUpload exception if there is a problem.""" def check(pred, msg): check_pred(pred, msg, IllegalUpload, uploads) not_upload = lambda o: not isinstance(o, Upload) check(not_upload, "List of Uploads contains non-Upload object.") self_upload = lambda upload: upload.to_id == peer.id check(self_upload, "Can't upload to yourself.") not_from_self = lambda upload: upload.from_id != peer.id check(not_from_self, "Upload.from != peer id.") check(lambda u: u.bw < 0, "Upload bandwidth must be non-negative!") limit = self.up_bw(peer.id) print(sum([u.bw for u in uploads]), " ", limit) if sum([u.bw for u in uploads]) > limit: raise IllegalUpload("Can't upload more than limit of %d. %s" % (limit, uploads)) # If we got here, looks ok. def check_requests(peer, requests, peer_pieces, available): """Raise an IllegalRequest exception if there is a problem.""" def check(pred, msg): check_pred(pred, msg, IllegalRequest, requests) check(lambda o: not isinstance(o, Request), "List of Requests contains non-Request object.") bad_piece_id = lambda r: (r.piece_id < 0 or r.piece_id >= self. config.num_pieces) check(bad_piece_id, "Request asks for non-existent piece!") bad_peer_id = lambda r: r.peer_id not in self.peer_ids check(bad_peer_id, "Request mentions non-existent peer!") bad_requester_id = lambda r: r.requester_id != peer.id check(bad_requester_id, "Request has wrong peer id!") bad_start_block = lambda r: (r.start < 0 or r.start >= self.config. blocks_per_piece or r.start > peer_pieces[peer.id][r.piece_id]) # Must request the _next_ necessary block check(bad_start_block, "Request has bad start block!") def piece_peer_does_not_have(r): other_peer = self.peers_by_id[r.peer_id] return r.piece_id not in available[other_peer.id] check(piece_peer_does_not_have, "Asking for piece peer does not have!") # If we got here, looks ok def available_pieces(peer_id, peer_pieces): """ Return a list of piece ids that this peer has available. """ return [ i for i in range(conf.num_pieces) if peer_pieces[peer_id][i] == conf.blocks_per_piece ] def peer_done(peer_pieces, peer_id): # TODO: remove linear pass for blocks_so_far in peer_pieces[peer_id]: if blocks_so_far < conf.blocks_per_piece: return False return True def all_done(peer_pieces): result = True # Check all peers to update done status for peer_id in peer_pieces: if peer_done(peer_pieces, peer_id): history.peer_is_done(round, peer_id) else: result = False return result def create_peers(): """Each agent class must be already loaded, and have a constructor that takes the config, id, pieces, and up and down bandwidth, in that order.""" def load(class_name, params): agent_class = conf.agent_classes[class_name] return agent_class(*params) counts = dict() def index(name): if name in counts: a = counts[name] counts[name] += 1 else: a = 0 counts[name] = 1 return a n = len(conf.agent_class_names) ids = ["%s%d" % (n, index(n)) for n in conf.agent_class_names] is_seed = lambda id: id.startswith("Seed") def get_pieces(id): if id.startswith("Seed"): return [conf.blocks_per_piece] * conf.num_pieces else: return [0] * conf.num_pieces peer_pieces = dict() # id -> list (blocks / piece) peer_pieces = dict((id, get_pieces(id)) for id in ids) pieces = [get_pieces(id) for id in ids] r = itertools.repeat # Re-initialize upload bandwidths at the beginning of each # new simulation up_bws = [self.up_bw(id, reinit=True) for id in ids] params = list(zip(r(conf), ids, pieces, up_bws)) peers = list(map(load, conf.agent_class_names, params)) #logging.debug("Peers: \n" + "\n".join(str(p) for p in peers)) return peers, peer_pieces def get_peer_requests(p, peer_info, peer_history, peer_pieces, available): def remove_me(info): # TODO: Do we need this linear pass? return [peer for peer in peer_info if peer.id != p.id] pieces = copy.copy(peer_pieces[p.id]) # Made copy of pieces and the peer info this peer needs to make it's # decision, so that it can't change the simulation's copies. p.update_pieces(pieces) rs = p.requests(remove_me(peer_info), peer_history) check_requests(p, rs, peer_pieces, available) return rs def get_peer_uploads(all_requests, p, peer_info, peer_history): def remove_me(info): # TODO: remove this pass? Use a set? return [peer for peer in peer_info if peer.id != p.id] def requests_to(id): f = lambda r: r.peer_id == id ans = [] for rs in list(all_requests.values()): ans.extend(list(filter(f, rs))) return ans requests = requests_to(p.id) us = p.uploads(requests, remove_me(peer_info), peer_history) check_uploads(p, us) return us def upload_rate(uploads, uploader_id, requester_id): """ return the uploading rate from uploader to requester in blocks per time period, or 0 if not uploading. """ for u in uploads[uploader_id]: if u.to_id == requester_id: return u.bw return 0 def update_peer_pieces(peer_pieces, requests, uploads, available): """ Process the uploads: figure out how many blocks of all the requested pieces the requesters ended up with. Make sure requesting the same thing from lots of peers doesn't stack. update the sets of available pieces as needed. """ downloads = dict() # peer_id -> [downloads] new_pp = copy.deepcopy(peer_pieces) for requester_id in requests: downloads[requester_id] = list() for requester_id in requests: # Keep track of how many blocks of each piece this # requester got. piece -> (blocks, from_who) new_blocks_per_piece = dict() def update_count(piece_id, blocks, peer_id): if piece_id in new_blocks_per_piece: old = new_blocks_per_piece[piece_id][0] if blocks > old: new_blocks_per_piece[piece_id] = (blocks, peer_id) else: new_blocks_per_piece[piece_id] = (blocks, peer_id) # Group the requests by peer that is being asked get_peer_id = lambda r: r.peer_id rs = sorted(requests[requester_id], key=get_peer_id) for peer_id, rs_for_peer in itertools.groupby(rs, get_peer_id): bw = upload_rate(uploads, peer_id, requester_id) if bw == 0: continue # This bandwidth gets applied in order to each piece requested for r in rs_for_peer: needed_blocks = conf.blocks_per_piece - r.start alloced_bw = min(bw, needed_blocks) update_count(r.piece_id, alloced_bw, peer_id) bw -= alloced_bw if bw == 0: break for piece_id in new_blocks_per_piece: (blocks, peer_id) = new_blocks_per_piece[piece_id] new_pp[requester_id][piece_id] += blocks if new_pp[requester_id][piece_id] == conf.blocks_per_piece: available[requester_id].add(piece_id) d = Download(peer_id, requester_id, piece_id, blocks) downloads[requester_id].append(d) return (new_pp, downloads) def completed_pieces(peer_id, available): return len(available[peer_id]) def log_peer_info(peer_pieces, available): for p_id in self.peer_ids: pieces = peer_pieces[p_id] logging.debug("pieces for %s: %s" % (str(p_id), str(pieces))) log = ", ".join("%s:%s" % (p_id, completed_pieces(p_id, available)) for p_id in self.peer_ids) logging.info("Pieces completed: " + log) logging.debug("Starting simulation with config: %s" % str(conf)) peers, peer_pieces = create_peers() self.peer_ids = [p.id for p in peers] self.peers_by_id = dict((p.id, p) for p in peers) upload_rates = dict((id, self.up_bw(id)) for id in self.peer_ids) history = History(self.peer_ids, upload_rates) # dict : pid -> set(finished / available pieces) available = dict((pid, set(available_pieces(pid, peer_pieces))) for pid in self.peer_ids) # Begin the event loop while True: logging.info("======= Round %d ========" % round) peer_info = [PeerInfo(p.id, available[p.id]) for p in peers] requests = dict() # peer_id -> list of Requests uploads = dict() # peer_id -> list of Uploads h = dict() for p in peers: h[p.id] = history.peer_history(p.id) requests[p.id] = get_peer_requests(p, peer_info, h[p.id], peer_pieces, available) for p in peers: uploads[p.id] = get_peer_uploads(requests, p, peer_info, h[p.id]) (peer_pieces, downloads) = update_peer_pieces(peer_pieces, requests, uploads, available) history.update(downloads, uploads) logging.debug(history.pretty_for_round(round)) log_peer_info(peer_pieces, available) if all_done(peer_pieces): logging.info("All done!") break round += 1 if round > conf.max_round: logging.info("Out of time. Stopping.") break logging.info("Game history:\n%s" % history.pretty()) logging.info("======== STATS ========") logging.info("Uploaded blocks:\n%s" % Stats.uploaded_blocks_str(self.peer_ids, history)) logging.info("Completion rounds:\n%s" % Stats.completion_rounds_str(self.peer_ids, history)) logging.info("All done round: %s" % Stats.all_done_round(self.peer_ids, history)) return history
loss0=loss0) ckpter_auc = CheckPoint(model=model, optimizer=optimizer_model, path=path_ckpt, prefix=run_name, interval=1, save_num=n_save_epoch, loss0=auc_last) ckpter_auc_lr = CheckPoint(model=logisticReg, optimizer=optimizer_model, path=path_ckpt, prefix=run_name + '_lr', interval=1, save_num=n_save_epoch, loss0=auc_last) train_hist = History(name='train_hist' + run_name) validation_hist = History(name='validation_hist' + run_name) if start: # --------- Training logs before start training ----------------- # model.eval() # logisticReg.eval() with torch.no_grad(): tot_loss, tot_acc = 0, 0 n_batches = len(train_loader) Ptp01, Ptp05, Ptp1, AUC = np.zeros( n_batches // n_batch_verif), np.zeros( n_batches // n_batch_verif), np.zeros( n_batches // n_batch_verif), np.zeros(n_batches // n_batch_verif) vs, vf, tg = [], [], [] idx = -1
def __init__(self, view): gtk.EventBox.__init__(self) self._view = view hbox = gtk.HBox(False, 3) hbox.show() hbox.set_border_width(3) self._entry = gtk.Entry() self._entry.modify_font(self._view.style.font_desc) self._entry.set_has_frame(False) self._entry.set_name('command-bar') self._entry.modify_text(gtk.STATE_NORMAL, self._view.style.text[gtk.STATE_NORMAL]) self._entry.set_app_paintable(True) self._entry.connect('realize', self.on_realize) self._entry.connect('expose-event', self.on_entry_expose) self._entry.show() self._prompt_label = gtk.Label('<b>>>></b>') self._prompt_label.set_use_markup(True) self._prompt_label.modify_font(self._view.style.font_desc) self._prompt_label.show() self._prompt_label.modify_fg(gtk.STATE_NORMAL, self._view.style.text[gtk.STATE_NORMAL]) self.modify_bg(gtk.STATE_NORMAL, self.background_gdk()) self._entry.modify_base(gtk.STATE_NORMAL, self.background_gdk()) self._entry.connect('focus-out-event', self.on_entry_focus_out) self._entry.connect('key-press-event', self.on_entry_key_press) self.connect_after('size-allocate', self.on_size_allocate) self.connect_after('expose-event', self.on_expose) self.connect_after('realize', self.on_realize) self._history = History( os.path.expanduser('~/.config/pluma/commander/history')) self._prompt = None self._accel_group = None hbox.pack_start(self._prompt_label, False, False, 0) hbox.pack_start(self._entry, True, True, 0) self.add(hbox) self.attach() self._entry.grab_focus() self._wait_timeout = 0 self._info_window = None self.connect('destroy', self.on_destroy) self._history_prefix = None self._suspended = None self._handlers = [[0, gtk.keysyms.Up, self.on_history_move, -1], [0, gtk.keysyms.Down, self.on_history_move, 1], [None, gtk.keysyms.Return, self.on_execute, None], [None, gtk.keysyms.KP_Enter, self.on_execute, None], [0, gtk.keysyms.Tab, self.on_complete, None], [ 0, gtk.keysyms.ISO_Left_Tab, self.on_complete, None ]] self._re_complete = re.compile( '("((?:\\\\"|[^"])*)"?|\'((?:\\\\\'|[^\'])*)\'?|[^\s]+)') self._command_state = commands.Commands.State()
def __init__(self, config): #init replay memory self.session = tf.Session() self.config = config #init parameters self.timeStep = 0 self.stateInput = tf.placeholder(tf.int32, [None, self.config.seq_length]) self.data = {} self.history = [History(), History(), History()] self.BATCH_SIZE = 256 #set config.final_vocab_size manually embed = tf.Variable(tf.random_uniform( [self.config.final_vocab_size, self.config.embed_dim], -1.0, 1.0), name="embed") word_embeds = tf.nn.embedding_lookup(embed, self.stateInput) self.initializer = tf.truncated_normal_initializer(stddev=0.02) self.cell = tf.nn.rnn_cell.LSTMCell(self.config.rnn_size, initializer=self.initializer, state_is_tuple=True) initial_state = self.cell.zero_state(self.BATCH_SIZE, tf.float32) outputs, _ = tf.nn.rnn(self.cell, [ tf.reshape(embed_t, [-1, self.config.embed_dim]) for embed_t in tf.split(1, self.config.seq_length, word_embeds) ], dtype=tf.float32, initial_state=initial_state, scope="LSTMN") self.output_embed = tf.transpose(tf.pack(outputs), [1, 0, 2]) self.mean_pool = tf.reduce_mean(self.output_embed, 1) linear_output = tf.nn.relu( tf.nn.rnn_cell._linear(self.mean_pool, int(self.output_embed.get_shape()[2]), 1.0, 0.01, scope="linearN")) linear_output_21 = tf.nn.relu( tf.nn.rnn_cell._linear(linear_output, int(self.output_embed.get_shape()[2]), 1.0, 0.01, scope="linearN21")) linear_output_22 = tf.nn.relu( tf.nn.rnn_cell._linear(linear_output, int(self.output_embed.get_shape()[2]), 1.0, 0.01, scope="linearN22")) linear_output_23 = tf.nn.relu( tf.nn.rnn_cell._linear(linear_output, int(self.output_embed.get_shape()[2]), 1.0, 0.01, scope="linearN23")) #we calculate the Q values. For the Student Network self.action_value_1 = tf.nn.rnn_cell._linear(linear_output_21, self.config.num_actions, 1.0, 0.01, scope="actionN1") self.object_value_1 = tf.nn.rnn_cell._linear(linear_output_21, self.config.num_objects, 1.0, 0.01, scope="objectN1") self.action_value_2 = tf.nn.rnn_cell._linear(linear_output_22, self.config.num_actions, 1.0, 0.01, scope="actionN2") self.object_value_2 = tf.nn.rnn_cell._linear(linear_output_22, self.config.num_objects, 1.0, 0.01, scope="objectN2") self.action_value_3 = tf.nn.rnn_cell._linear(linear_output_23, self.config.num_actions, 1.0, 0.01, scope="actionN3") self.object_value_3 = tf.nn.rnn_cell._linear(linear_output_23, self.config.num_objects, 1.0, 0.01, scope="objectN3") #here we will input the teachers q value self.target_action_value = tf.placeholder( tf.float32, [None, self.config.num_actions]) self.target_object_value = tf.placeholder( tf.float32, [None, self.config.num_objects]) #here we calculate the probabilities for the teacher network self.target_action_prob = tf.nn.softmax( tf.truediv(self.target_action_value, self.config.temperature)) self.target_object_prob = tf.nn.softmax( tf.truediv(self.target_object_value, self.config.temperature)) #here we calculate the probabilities for the student network self.pred_action_prob_1 = tf.nn.softmax(self.action_value_1) self.pred_object_prob_1 = tf.nn.softmax(self.object_value_1) self.pred_action_prob_2 = tf.nn.softmax(self.action_value_2) self.pred_object_prob_2 = tf.nn.softmax(self.object_value_2) self.pred_action_prob_3 = tf.nn.softmax(self.action_value_3) self.pred_object_prob_3 = tf.nn.softmax(self.object_value_3) entropy_action = -tf.reduce_sum( self.target_action_prob * tf.log(self.target_action_prob), reduction_indices=[1]) entropy_object = -tf.reduce_sum( self.target_object_prob * tf.log(self.target_object_prob), reduction_indices=[1]) cross_entropy_action_1 = -tf.reduce_sum( self.target_action_prob * tf.log(self.pred_action_prob_1), reduction_indices=[1]) cross_entropy_object_1 = -tf.reduce_sum( self.target_object_prob * tf.log(self.pred_object_prob_1), reduction_indices=[1]) cross_entropy_action_2 = -tf.reduce_sum( self.target_action_prob * tf.log(self.pred_action_prob_2), reduction_indices=[1]) cross_entropy_object_2 = -tf.reduce_sum( self.target_object_prob * tf.log(self.pred_object_prob_2), reduction_indices=[1]) cross_entropy_action_3 = -tf.reduce_sum( self.target_action_prob * tf.log(self.pred_action_prob_3), reduction_indices=[1]) cross_entropy_object_3 = -tf.reduce_sum( self.target_object_prob * tf.log(self.pred_object_prob_3), reduction_indices=[1]) self.kl_divergence_1 = tf.reduce_mean( 0.5 * (cross_entropy_action_1 - entropy_action + cross_entropy_object_1 - entropy_object)) self.kl_divergence_2 = tf.reduce_mean( 0.5 * (cross_entropy_action_2 - entropy_action + cross_entropy_object_2 - entropy_object)) self.kl_divergence_3 = tf.reduce_mean( 0.5 * (cross_entropy_action_3 - entropy_action + cross_entropy_object_3 - entropy_object)) self.optim_1 = tf.train.AdamOptimizer( learning_rate=self.config.LEARNING_RATE).minimize( self.kl_divergence_1) self.optim_2 = tf.train.AdamOptimizer( learning_rate=self.config.LEARNING_RATE).minimize( self.kl_divergence_2) self.optim_3 = tf.train.AdamOptimizer( learning_rate=self.config.LEARNING_RATE).minimize( self.kl_divergence_3) self.summary_placeholders = {} self.summary_ops = {} tags = [ 'average_reward', 'average_numrewards', 'number_of_episodes', 'quest1_average_reward_cnt' ] scalar_summary_tags = [] for i in range(1, 4): scalar_summary_tags.append([tag + str(i) for tag in tags]) for i in range(3): for tag in scalar_summary_tags[i]: self.summary_placeholders[tag] = tf.placeholder( 'float32', None, name=tag.replace(' ', '_')) self.summary_ops[tag] = tf.scalar_summary( 'evaluation_data/' + tag, self.summary_placeholders[tag]) self.saver = tf.train.Saver() self.train_writer = tf.train.SummaryWriter( self.config.summaries_dir + '/train/' + str(self.config.game_num), self.session.graph) if not (self.config.LOAD_WEIGHTS and self.load_weights()): self.session.run(tf.initialize_all_variables())
def test_md_history(): yield _md, History(':memory:')
import utils # hyperparameters num_epochs = 10000 batch_size = 128 lr = 1e-4 beta = 4 save_iter = 200 shape = (28, 28) n_obs = shape[0] * shape[1] # create DAE and ß-VAE and their training history dae = DAE(n_obs, num_epochs, batch_size, 1e-3, save_iter, shape) beta_vae = BetaVAE(n_obs, num_epochs, batch_size, 1e-4, beta, save_iter, shape) history = History() # fill autoencoder training history with examples print('Filling history...', end='', flush=True) transformation = transforms.Compose([ transforms.ColorJitter(), transforms.ToTensor() ]) dataset = MNIST('data', transform=transformation, download=True) dataloader = DataLoader(dataset, batch_size=1, shuffle=True) for data in dataloader: img, _ = data img = img.view(img.size(0), -1).numpy().tolist()
def __init__(self, config): self.dic = pickle.load(open("embedTeacher"+str(config.game_num)+".p","rb")) #init replay memory conf = tf.ConfigProto() conf.gpu_options.allow_growth=True self.session = tf.Session(config=conf) self.config = config self.memory = self.load_replay_memory(config) self.history = History() #init parameters self.timeStep = 0 self.epsilon = config.INITIAL_EPSILON # self.stateInput = tf.placeholder(tf.int32, [None, self.config.seq_length,self.config.embed_dim]) # self.stateInputT = tf.placeholder(tf.int32, [None, self.config.seq_length,self.config.embed_dim]) self.stateInput = tf.placeholder(tf.float32, [None, self.config.seq_length,self.config.embed_dim]) self.stateInputT = tf.placeholder(tf.float32, [None, self.config.seq_length,self.config.embed_dim]) # self.stateInput = tf.placeholder(tf.int32, [self.config.seq_length, self.config.BATCH_SIZE, self.config.embed_dim]) # self.stateInputT = tf.placeholder(tf.int32, [self.config.seq_length, self.config.BATCH_SIZE, self.config.embed_dim]) self.word_embeds = self.stateInput self.word_embedsT = self.stateInputT # print '$'*100 self.initializer = tf.truncated_normal_initializer(stddev = 0.02) # self.initializer = tf.random_uniform_initializer(minval=-1.0, maxval=1.0, seed=None, dtype=tf.float32) # self.initializer = tf.contrib.layers.xavier_initializer() # print '$'*100 self.cell = tf.nn.rnn_cell.LSTMCell(self.config.rnn_size, initializer = self.initializer, state_is_tuple=True) self.cellT = tf.nn.rnn_cell.LSTMCell(self.config.rnn_size, initializer = self.initializer, state_is_tuple=True) # print '$'*100 initial_state = self.cell.zero_state(self.config.BATCH_SIZE, tf.float32) initial_stateT = self.cellT.zero_state(self.config.BATCH_SIZE, tf.float32) # print '$'*100 # early_stop = tf.constant(self.config.seq_length, dtype = tf.int32) # print '$'*100 outputs, _ = tf.nn.rnn(self.cell, [tf.reshape(embed_t, [-1, self.config.embed_dim]) for embed_t in tf.split(1, self.config.seq_length, self.word_embeds)], dtype=tf.float32, initial_state = initial_state, scope = "LSTMN") outputsT, _ = tf.nn.rnn(self.cellT, [tf.reshape(embed_tT, [-1, self.config.embed_dim]) for embed_tT in tf.split(1, self.config.seq_length, self.word_embedsT)], dtype=tf.float32, initial_state = initial_stateT, scope = "LSTMT") # outputs, _ = tf.nn.rnn(self.cell, self.word_embeds, dtype=tf.float32, initial_state = initial_state, scope = "LSTMN") # outputsT, _ = tf.nn.rnn(self.cellT, self.word_embedsT, dtype=tf.float32, initial_state = initial_stateT, scope = "LSTMT") # print '$'*100 self.output_embed = tf.transpose(tf.pack(outputs), [1, 0, 2]) self.output_embedT = tf.transpose(tf.pack(outputsT), [1, 0, 2]) # print '$'*100 mean_pool = tf.reduce_mean(self.output_embed, 1) mean_poolT = tf.reduce_mean(self.output_embedT, 1) # print '$'*100 linear_output = tf.nn.relu(tf.nn.rnn_cell._linear(mean_pool, int(self.output_embed.get_shape()[2]), 1,0.01, scope="linearN")) linear_outputT = tf.nn.relu(tf.nn.rnn_cell._linear(mean_poolT, int(self.output_embedT.get_shape()[2]),1, 0.01, scope="linearT")) # print '$'*100 self.action_value = tf.nn.rnn_cell._linear(linear_output, self.config.num_actions, 1,0.01, scope="actionN") self.action_valueT = tf.nn.rnn_cell._linear(linear_outputT, self.config.num_actions, 1,0.01, scope="actionT") self.object_value = tf.nn.rnn_cell._linear(linear_output, self.config.num_objects, 1,0.01, scope="objectN") self.object_valueT = tf.nn.rnn_cell._linear(linear_outputT, self.config.num_objects, 1,0.01, scope="objectT") self.target_action_value = tf.placeholder(tf.float32, [None]) self.target_object_value = tf.placeholder(tf.float32, [None]) self.action_indicator = tf.placeholder(tf.float32, [None, self.config.num_actions]) self.object_indicator = tf.placeholder(tf.float32, [None, self.config.num_objects]) self.pred_action_value = tf.reduce_sum(tf.mul(self.action_indicator, self.action_value), 1) self.pred_object_value = tf.reduce_sum(tf.mul(self.object_indicator, self.object_value), 1) self.target_qpred = tf.truediv(tf.add(self.target_action_value,self.target_object_value),2.0) # self.qpred = tf.truediv(tf.add(self.pred_action_value,self.pred_object_value),2.0) summary_list = [] with tf.name_scope('delta'): # self.delta_a = self.target_action_value - self.pred_action_value # self.delta_o = self.target_object_value - self.pred_object_value self.delta_a = self.target_qpred - self.pred_action_value self.delta_o = self.target_qpred - self.pred_object_value self.variable_summaries(self.delta_a, 'delta_a',summary_list) self.variable_summaries(self.delta_o, 'delta_o',summary_list) # self.delta = self.target_qpred - self.qpred # self.variable_summaries(self.delta, 'delta',summary_list) if self.config.clipDelta: with tf.name_scope('clippeddelta'): # self.delta = tf.clip_by_value(self.delta, self.config.minDelta, self.config.maxDelta, name='clipped_delta') self.quadratic_part_a = tf.minimum(abs(self.delta_a), config.maxDelta) self.linear_part_a = abs(self.delta_a) - self.quadratic_part_a self.quadratic_part_o = tf.minimum(abs(self.delta_o), config.maxDelta) self.linear_part_o = abs(self.delta_o) - self.quadratic_part_o self.quadratic_part = tf.concat(0,[self.quadratic_part_a,self.quadratic_part_o]) self.linear_part = tf.concat(0,[self.linear_part_a,self.linear_part_o]) # self.quadratic_part = tf.minimum(abs(self.delta), config.maxDelta) # self.linear_part = abs(self.delta) - self.quadratic_part # self.variable_summaries(self.delta, 'clippeddelta',summary_list) # self.variable_summaries(self.linear_part_a, 'linear_part_a',summary_list) # self.variable_summaries(self.quadratic_part_a, 'quadratic_part_a',summary_list) # self.variable_summaries(self.linear_part_o, 'linear_part_o',summary_list) # self.variable_summaries(self.quadratic_part_o, 'quadratic_part_o',summary_list) self.variable_summaries(self.linear_part, 'linear_part',summary_list) self.variable_summaries(self.quadratic_part, 'quadratic_part',summary_list) with tf.name_scope('loss'): #self.loss = 0.5*tf.reduce_mean(tf.square(self.delta), name='loss') # self.loss_a = tf.reduce_mean(0.5*tf.square(self.quadratic_part_a) + config.clipDelta * self.linear_part_a, name='loss_a') # self.variable_summaries(self.loss_a, 'loss_a',summary_list) # self.loss_o = tf.reduce_mean(0.5*tf.square(self.quadratic_part_o) + config.clipDelta * self.linear_part_o, name='loss_o') # self.variable_summaries(self.loss_o, 'loss_o',summary_list) self.loss = tf.reduce_mean(0.5*tf.square(self.quadratic_part) + config.clipDelta * self.linear_part, name='loss') self.variable_summaries(self.loss, 'loss',summary_list) self.W = ["LSTMN", "linearN", "actionN", "objectN"] self.target_W = ["LSTMT", "linearT", "actionT", "objectT"] # for i in range(len(self.W)): # vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = self.W[i]) # varsT = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = self.target_W[i]) # with tf.name_scope('activationsN'): # summary_list.extend(map(lambda x:tf.histogram_summary('activations/'+str(x.name), x), vars)) # with tf.name_scope('activationsT'): # summary_list.extend(map(lambda x:tf.histogram_summary('activations/'+str(x.name), x), varsT)) self.summary_placeholders = {} self.summary_ops = {} if self.config.TUTORIAL_WORLD: scalar_summary_tags = ['average.q_a','average.q_o','average.q','average_reward','average_numrewards','number_of_episodes','quest1_average_reward_cnt', \ 'quest2_average_reward_cnt','quest3_average_reward_cnt'] else: scalar_summary_tags = ['average.q_a','average.q_o','average.q','average_reward','average_numrewards','number_of_episodes','quest1_average_reward_cnt'] for tag in scalar_summary_tags: self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag.replace(' ', '_')) self.summary_ops[tag] = tf.scalar_summary('evaluation_data/'+tag, self.summary_placeholders[tag]) # Clipping gradients # self.optim_ = tf.train.RMSPropOptimizer(learning_rate = self.config.LEARNING_RATE) # tvars = tf.trainable_variables() # def ClipIfNotNone(grad,var): # if grad is None: # return (grad, var) # return (tf.clip_by_norm(grad,10), var) # grads = [ClipIfNotNone(i,var) for i,var in self.optim_.compute_gradients(self.loss, tvars)] # self.optim = self.optim_.apply_gradients(grads) # self.optim = tf.train.RMSPropOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss_a + self.loss_o) # self.optim = tf.train.RMSPropOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss) # self.optim = tf.train.AdagradOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss) # self.optim_a = tf.train.AdagradOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss_a) # self.optim_o = tf.train.AdagradOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss_o) # self.optim1 = tf.train.AdamOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss_a) # self.optim2 = tf.train.AdamOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss_o) self.optim = tf.train.AdamOptimizer(learning_rate = self.config.LEARNING_RATE).minimize(self.loss) self.saver = tf.train.Saver() if not(self.config.LOAD_WEIGHTS and self.load_weights()): self.session.run(tf.initialize_all_variables()) # self.merged = tf.merge_all_summaries() self.merged = tf.merge_summary(summary_list) self.train_writer = tf.train.SummaryWriter(self.config.summaries_dir + '/train/'+str(self.config.game_num),self.session.graph) self.copyTargetQNetworkOperation()
from torch.utils.tensorboard import SummaryWriter from albumentations import * import cv2 import json from pycocotools.cocoeval import COCOeval try: from apex import amp APEX = True except ModuleNotFoundError: APEX = False if __name__ == '__main__': opt = opts().parse() logger = TrainingManager(opt.save_dir) history = History(opt.save_dir, opt.resume) writer = SummaryWriter() torch.backends.cudnn.benchmark = True print(opt) transforms = { "train": Compose( [ ShiftScaleRotate(rotate_limit=90, scale_limit=(-0.35, 0.3), border_mode=cv2.BORDER_CONSTANT), PadIfNeeded(min_height=512, min_width=512, border_mode=cv2.BORDER_CONSTANT, always_apply=True), RandomCrop(512, 512, always_apply=True),
import sys; print('%s %s' % (sys.executable or sys.platform, sys.version)) {0:,} .format(2**100) {0:,} .format(2**100) {0:,} .format(2**100) {0:,} .format(2**100) {0:,} .format(2**100) {0:,} .format(2**100) print('hello) print('hello') import sys; print('%s %s' % (sys.executable or sys.platform, sys.version)) from history import History x = History() x.a = 1 import sys; print('%s %s' % (sys.executable or sys.platform, sys.version)) from history import History x = History() x.history_dict import sys; print('%s %s' % (sys.executable or sys.platform, sys.version)) from bag import Bag b2 = Bag(['b','d','a','b','d','c','d']) sorted(b2) [i for i in b2] b2.bag iter iter(b2) Bag.__iter__(b2) import sys; print('%s %s' % (sys.executable or sys.platform, sys.version)) from bag import Bag b2 = Bag(['a','b','b','c','d','d','d']) iter(b2) sorted(b)
def weighted_dual_averages_method(oracle, prox, primal_dual_oracle, t_start, max_iter = 1000, eps = 1e-5, eps_abs = None, stop_crit = 'dual_gap_rel', verbose_step = 100, verbose = False, save_history = False): if stop_crit == 'dual_gap_rel': def crit(): return duality_gap <= eps * duality_gap_init elif stop_crit == 'dual_gap': def crit(): return duality_gap <= eps_abs elif stop_crit == 'max_iter': def crit(): return it_counter == max_iter elif callable(stop_crit): crit = stop_crit else: raise ValueError("stop_crit should be callable or one of the following names: \ 'dual_gap', 'dual_gap_rel', 'max iter'") A = 0.0 t = np.copy(t_start) grad_sum = np.zeros(len(t_start)) beta_seq = 1.0 rho_wda = np.sqrt(2) * np.linalg.norm(t_start) flows_weighted = primal_dual_oracle.get_flows(t_start) t_weighted = np.copy(t_start) primal, dual, duality_gap_init, state_msg = primal_dual_oracle(flows_weighted, t_weighted) if save_history: history = History('iter', 'primal_func', 'dual_func', 'dual_gap') history.update(0, primal, dual, duality_gap_init) if verbose: print(state_msg) success = False for it_counter in range(1, max_iter+1): grad_t = oracle.grad(t) flows = primal_dual_oracle.get_flows(t) #grad() is called here alpha = 1 / np.linalg.norm(grad_t) A += alpha grad_sum += alpha * grad_t beta_seq = 1 if it_counter == 1 else beta_seq + 1.0 / beta_seq beta = beta_seq / rho_wda t = prox(grad_sum / A, t_start, beta / A) t_weighted = (t_weighted * (A - alpha) + t * alpha) / A flows_weighted = (flows_weighted * (A - alpha) + flows * alpha) / A primal, dual, duality_gap, state_msg = primal_dual_oracle(flows_weighted, t_weighted) if save_history: history.update(it_counter, primal, dual, duality_gap) if verbose and (it_counter % verbose_step == 0): print('\nIterations number: {:d}'.format(it_counter)) print(state_msg, flush = True) if crit(): success = True break result = {'times': t_weighted, 'flows': flows_weighted, 'iter_num': it_counter, 'res_msg': 'success' if success else 'iterations number exceeded'} if save_history: result['history'] = history.dict if verbose: print('\nResult: ' + result['res_msg']) print('Total iters: ' + str(it_counter)) print(state_msg) print('Oracle elapsed time: {:.0f} sec'.format(oracle.time)) return result
t.optim.Adam, nn.MSELoss(reduction="sum"), actor_learning_rate=1e-5, critic_learning_rate=1e-4, ) episode, step, reward_fulfilled = 0, 0, 0 smoothed_total_reward = 0 while episode < max_episodes: episode += 1 total_reward = 0 terminal = False step = 0 state = convert(env.reset()) history = History(history_depth, (1, 128)) tmp_observations = [] while not terminal: step += 1 with t.no_grad(): history.append(state) # agent model inference action = ppo.act({"mem": history.get()})[0] state, reward, terminal, _ = env.step(action.item()) state = convert(state) total_reward += reward old_history = history.get() new_history = history.append(state).get() tmp_observations.append({
def main(**kwargs): config = C( horizon=32, discount=0.99, num_episodes=100000, num_workers=8, e_greedy_eps=0.9, ) for k in kwargs: config[k] = kwargs[k] writer = SummaryWriter(config.experiment_path) env = VecEnv([build_env for _ in range(config.num_workers)]) env = wrappers.TensorboardBatchMonitor(env, writer, log_interval=100, fps_mul=0.5) env = wrappers.Torch(env) model = Agent(env.observation_space, env.action_space) optimizer = torch.optim.RMSprop(model.parameters(), 1e-4 * config.num_workers) episode = 0 pbar = tqdm(total=config.num_episodes) obs = env.reset() state = model.zero_state(config.num_workers) while episode < config.num_episodes: history = History() state = tuple(x.detach() for x in state) for i in range(config.horizon): transition = history.append_transition() action_value, state_prime = model(obs, state) action = select_action(action_value, eps=config.e_greedy_eps) transition.record( action_value_i=select_action_value(action_value, action)) obs_prime, reward, done, info = env.step(action) transition.record(reward=reward, done=done) state_prime = model.reset_state(state_prime, done) obs, state = obs_prime, state_prime for i in info: if "episode" not in i: continue episode += 1 writer.add_scalar("episode/return", i["episode"]["r"], global_step=episode) writer.add_scalar("episode/length", i["episode"]["l"], global_step=episode) pbar.update() rollout = history.build() action_value_prime, _ = model(obs_prime, state_prime) action_prime = select_action(action_value_prime, eps=config.e_greedy_eps) return_ = n_step_bootstrapped_return( reward_t=rollout.reward, value_prime=select_action_value(action_value_prime, action_prime).detach(), done_t=rollout.done, discount=config.discount, ) td_error = rollout.action_value_i - return_ loss = td_error.pow(2) optimizer.zero_grad() loss.mean().backward() optimizer.step() writer.add_scalar("rollout/action_value_i", rollout.action_value_i.mean(), global_step=episode) writer.add_scalar("rollout/td_error", td_error.mean(), global_step=episode) writer.add_scalar("rollout/loss", loss.mean(), global_step=episode) env.close() writer.close()
def testName(self): history = History("./tmp/test.db") builder = BuilderTpp(history) builder.build()
def main(): parser = argparse.ArgumentParser(description='SM Room Timer') parser.add_argument('-f', '--file', dest='filename', default=None) parser.add_argument('--rooms', dest='rooms_filename', default='rooms.json') parser.add_argument('--doors', dest='doors_filename', default='doors.json') parser.add_argument('--debug', dest='debug', action='store_true') parser.add_argument('--debug-log', dest='debug_log_filename') parser.add_argument('--verbose', dest='verbose', action='store_true') parser.add_argument('--usb2snes', action='store_true') parser.add_argument('--route', action='store_true') parser.add_argument('--rebuild', action='store_true') # parser.add_argument('--segment', action='append', required=True) args = parser.parse_args() rooms = Rooms.read(args.rooms_filename) doors = Doors.read(args.doors_filename, rooms) route = Route() if args.route else DummyRoute() if args.filename and need_rebuild(args.filename): if not args.rebuild: print( "File needs to be rebuilt before it can be used; run rebuild_history.py or pass --rebuild to this script." ) sys.exit(1) backup_and_rebuild(rooms, doors, args.filename) if args.debug_log_filename: debug_log = open(args.debug_log_filename, 'a') verbose = True elif args.debug: debug_log = sys.stdout verbose = True else: debug_log = None verbose = args.verbose frontend = SegmentTimerTerminalFrontend(verbose=verbose, debug_log=debug_log) if args.filename is not None and os.path.exists(args.filename): history = read_transition_log(args.filename, rooms, doors) else: history = History() for tid in history: route.record(tid) if route.complete: break print('Route is %s' % ('complete' if route.complete else 'incomplete')) transition_log = FileTransitionLog( args.filename) if args.filename is not None else NullTransitionLog() tracker = SegmentTimeTracker(history, transition_log, route, on_new_room_time=frontend.new_room_time) state_reader = ThreadedStateReader(rooms, doors, usb2snes=args.usb2snes, logger=frontend) state_reader.start() try: timer = SegmentTimer(frontend, state_reader, on_transitioned=tracker.transitioned, on_state_change=frontend.state_changed, on_reset=tracker.room_reset) while state_reader.is_alive(): timer.poll() finally: state_reader.stop()
tr.add('dyn.lf','lf') tr.add('dyn.rf','rf') tr.start() robot.after.addSignal('tr.triger') robot.after.addSignal(contactLF.task.name+'.error') robot.after.addSignal('dyn.rf') robot.after.addSignal('dyn.lf') robot.after.addSignal('dyn.com') robot.after.addSignal('sot.forcesNormal') robot.after.addSignal('dyn.waist') robot.after.addSignal('taskLim.normalizedPosition') tr.add('taskLim.normalizedPosition','qn') history = History(dyn,1,zmp.zmp) #----------------------------------------------------------------------------- # --- RUN -------------------------------------------------------------------- #----------------------------------------------------------------------------- RADIUS = (0.35,-0.2) #RADIUS = (0.4,-0.42) # WARNING: this version induce a collision of the hips. q0 = robot.state.value sot.clear() contact(contactLF) contact(contactRF)