def cb(outputs): #if not FEATURE: distrib, value = outputs.result() #else: # distrib, value, feature = outputs.result() assert np.all(np.isfinite(distrib)), distrib action = np.random.choice(len(distrib), p=distrib) client = self.clients[ident] client.memory.append(TransitionExperience(state, action, None, value=value)) if not FEATURE: self.send_queue.put([ident, dumps(action)]) else: feature = self.offline_predictor([[state]])[0][0] self.send_queue.put([ident, dumps([action, feature])])
def cb(outputs): try: policy, value = outputs.result() except CancelledError: logger.info("Client {} cancelled.".format(ident)) return assert np.all(np.isfinite(policy)), policy action = policy # action = np.clip(action, -1., 1.) # 能否在初期得到比较好的reward决定了收敛的快慢,所以此处加入一些先验 # 新手上路,方向盘保守一点,带点油门,不踩刹车 # if client._cidx < SIMULATOR_PROC: # if self.epoch_num <= 1: # if self.local_step % 10 == 0: # action[1] = self._rng.rand() * 0.5 + 0.5 # if action[1] < 0: action[1] = 0. # if self.epoch_num <= 2: # action[1] = np.clip(action[1], 0, 1.) # if self.local_step % 3 == 0: # action[0] *= self._rng.choice([-1., 1.]) # # action[0] *= (self._rng.rand() * 0.2 + 0.2) * self._rng.choice([-1., 1.]) # else: # action[0] = np.clip(action[0], -0.2, 0.2) # if self._rng.rand() < client._explore: # action[0] = self._rng.rand() - 0.5 client.memory.append( TransitionExperience(state, action=None, reward=None, value=value)) self.send_queue.put([ident, dumps((action, value))])
def cb(outputs): try: distrib1, distrib2, value1, value2 = outputs.result() except CancelledError: logger.info("Client {} cancelled.".format(ident)) return assert np.all(np.isfinite(distrib1)), distrib1 assert np.all(np.isfinite(distrib2)), distrib2 rand_num = np.random.rand() if rand_num < 0.5: action = np.random.choice(len(distrib1), p=distrib1) updateweight1, updateweight2 = 1.0, 0.0 else: action = np.random.choice(len(distrib2), p=distrib2) updateweight2, updateweight1 = 1.0, 0.0 client = self.clients[ident] client.memory.append( TransitionExperience(state, action, reward=None, value1=value1, value2=value2, updateweight1=updateweight1, updateweight2=updateweight2, prob1=distrib1[action], prob2=distrib2[action])) self.send_queue.put([ident, dumps(action)])
def perf_from_log(log_fn): """ Args: log_fn : a stdout file xxx/stdout/triali/stdout.txt """ dn = os.path.dirname(log_fn) cache_fn = dn.replace('/', '__') cache_fn = os.path.join(cache_dir, cache_fn) if os.path.exists(cache_fn): with open(cache_fn, 'rb') as fin: ss = fin.read() try: ret = loads(ss) except: pass if ret and not FORCE_LOAD: return ret if os.path.exists(log_fn): min_ve, min_ve_epoch = val_err_from_log(log_fn) multi_add, n_params = multi_add_from_log(log_fn) ret = (min_ve, multi_add * 2. * 1e-9, min_ve_epoch) with open(cache_fn, 'wb') as fout: fout.write(dumps(ret)) return ret else: return 2.0, -1.0, -1
def run(self): enable_death_signal() player = self._build_player() context = zmq.Context() c2s_socket = context.socket(zmq.PUSH) c2s_socket.setsockopt(zmq.IDENTITY, self.identity) c2s_socket.set_hwm(2) c2s_socket.connect(self.c2s) s2c_socket = context.socket(zmq.DEALER) s2c_socket.setsockopt(zmq.IDENTITY, self.identity) s2c_socket.connect(self.s2c) state = player.reset() reward, isOver = 0, False while True: # after taking the last action, get to this state and get this reward/isOver. # If isOver, get to the next-episode state immediately. # This tuple is not the same as the one put into the memory buffer c2s_socket.send(dumps((self.identity, state, reward, isOver)), copy=False) action = loads(s2c_socket.recv(copy=False)) state, reward, isOver, _ = player.step(action) if isOver: state = player.reset()
def cb(outputs): try: policy, value = outputs.result() except CancelledError: logger.info("Client {} cancelled.".format(ident)) return assert np.all(np.isfinite(policy)), policy action = policy # action = np.clip(action, -1., 1.) # 能否在初期得到比较好的reward决定了收敛的快慢,所以此处加入一些先验 # 新手上路,方向盘保守一点,带点油门,不踩刹车 # if client._cidx < SIMULATOR_PROC: # if self.epoch_num <= 1: # if self.local_step % 10 == 0: # action[1] = self._rng.rand() * 0.5 + 0.5 # if action[1] < 0: action[1] = 0. # if self.epoch_num <= 2: # action[1] = np.clip(action[1], 0, 1.) # if self.local_step % 3 == 0: # action[0] *= self._rng.choice([-1., 1.]) # # action[0] *= (self._rng.rand() * 0.2 + 0.2) * self._rng.choice([-1., 1.]) # else: # action[0] = np.clip(action[0], -0.2, 0.2) # if self._rng.rand() < client._explore: # action[0] = self._rng.rand() - 0.5 client.memory.append(TransitionExperience( state, action=None, reward=None, value=value)) self.send_queue.put([ident, dumps((action,value))])
def run(self): player = self._build_player() context = zmq.Context() c2s_socket = context.socket(zmq.PUSH) c2s_socket.setsockopt(zmq.IDENTITY, self.identity) c2s_socket.set_hwm(2) c2s_socket.connect(self.c2s) s2c_socket = context.socket(zmq.DEALER) s2c_socket.setsockopt(zmq.IDENTITY, self.identity) s2c_socket.connect(self.s2c) state = player.reset() reward, isOver = 0, False while True: # after taking the last action, get to this state and get this reward/isOver. # If isOver, get to the next-episode state immediately. # This tuple is not the same as the one put into the memory buffer c2s_socket.send(dumps( (self.identity, state, reward, isOver)), copy=False) action = loads(s2c_socket.recv(copy=False).bytes) state, reward, isOver, _ = player.step(action) if isOver: state = player.reset()
def _process_msg(self, client, role_id, prob_state, all_state, last_cards_onehot, first_st, mask, minor_type, mode, reward, isOver): """ Process a message sent from some client. """ # in the first message, only state is valid, # reward&isOver should be discarde # print('received msg') if isOver and first_st: # should clear client's memory and put to queue assert reward != 0 for i in range(3): j = -1 while client.memory[i][j].reward == 0: # notice that C++ returns the reward for farmer, transform to the reward in each agent's perspective client.memory[i][ j].reward = reward if i != 1 else -reward if client.memory[i][j].first_st: break j -= 1 self._parse_memory(0, client) # feed state and return action rand_a = np.random.rand(mask.shape[0]) rand_a = (rand_a + 1e-6) * mask self.send_queue.put([client.ident, dumps(np.argmax(rand_a))]) client.memory[role_id - 1].append( TransitionExperience(prob_state, all_state, np.argmax(rand_a), reward=0, first_st=first_st, mode=mode))
def mark_stopped(log_dir, is_interrupted=False, msg_func=None): fn = stop_mark_fn(log_dir, is_interrupted) tmp_fn = fn + '.tmp' with open(tmp_fn, 'wb') as fout: msg = msg_func() if msg_func is not None else dumps('meow') fout.write(msg) # we do this in case we cannot finish writing "finish.bin" before it is found os.rename(tmp_fn, fn)
def request_click(bbox): sim2mgr_socket.send( dumps([ self.name, SimulatorManager.MSG_TYPE.CLICK, [(bbox[0] + bbox[2]) // 2 + self.window_rect[0] + 6, (bbox[1] + bbox[3]) // 2 + self.window_rect[1] + 46] ])) return loads(mgr2sim_socket.recv(copy=False).bytes)
def mark_failed(log_dir): fn = stop_mark_fn(log_dir, is_interrupted=False) tmp_fn = fn + '.tmp' with open(tmp_fn, 'wb') as fout: msg = dumps('failed_meow') fout.write(msg) # we do this in case we cannot finish writing "finish.bin" before it is found os.rename(tmp_fn, fn)
def spawn(self, job_type, entry_func, stop_func, msg_func, sleep_time): qid = job_type self.worker_id += 1 msg_func2 = lambda: dumps(msg_func() + [self.worker_id]) proc = WhileSleepWorker(self.pipename, self.hwm, entry_func, stop_func, msg_func2, sleep_time) self.pools.enqueue(qid, self.worker_id) self.worker_id_to_proc[self.worker_id] = proc start_proc_mask_signal([proc])
def cb(outputs): try: output = outputs.result() except CancelledError: logger.info("{} cancelled.".format(sim_name)) return print('coordinator sending', sim_name.encode('utf-8'), output[0].shape) self.coord2sim_socket.send_multipart( [sim_name.encode('utf-8'), dumps(output[0])])
def cb(outputs): try: distrib, value = outputs.result() except CancelledError: logger.info("Client {} cancelled.".format(client.ident)) return assert np.all(np.isfinite(distrib)), distrib action = np.random.choice(len(distrib), p=distrib) client.memory.append(TransitionExperience( state, action, reward=None, value=value, prob=distrib[action])) self.send_queue.put([client.ident, dumps(action)])
def compute_mean_std(db, fname): ds = LMDBSerializer.load(db, shuffle=False) ds.reset_state() o = OnlineMoments() for dp in get_tqdm(ds): feat = dp[0] # len x dim for f in feat: o.feed(f) logger.info("Writing to {} ...".format(fname)) with open(fname, 'wb') as f: f.write(serialize.dumps([o.mean, o.std]))
def cb(outputs): try: distrib, value = outputs.result() except CancelledError: logger.info("Client {} cancelled.".format(client.ident)) return assert np.all(np.isfinite(distrib)), distrib action = np.random.choice(len(distrib), p=distrib) client.memory.append(TransitionExperience( state, action, reward=None, value=value, prob=distrib[action])) self.send_queue.put([client.ident, dumps(action)])
def compute_mean_std(db, fname): ds = LMDBSerializer.load(db, shuffle=False) ds.reset_state() o = OnlineMoments() for dp in get_tqdm(ds): feat = dp[0] # len x dim for f in feat: o.feed(f) logger.info("Writing to {} ...".format(fname)) with open(fname, 'wb') as f: f.write(serialize.dumps([o.mean, o.std]))
def run(self): self.player = self._build_player() self.ctx = zmq.Context() self.c2s_socket = self.ctx.socket(zmq.PUSH) self.c2s_socket.setsockopt(zmq.IDENTITY, self.identity) self.c2s_socket.set_hwm(5) self.c2s_socket.connect(self.pipe_c2s) self._prepare() for dp in self.get_data(): self.c2s_socket.send(dumps(dp), copy=False)
def run(self): self.player = self._build_player() self.ctx = zmq.Context() self.c2s_socket = self.ctx.socket(zmq.PUSH) self.c2s_socket.setsockopt(zmq.IDENTITY, self.identity) self.c2s_socket.set_hwm(5) self.c2s_socket.connect(self.pipe_c2s) self._prepare() for dp in self.get_data(): self.c2s_socket.send(dumps(dp), copy=False)
def compute_mean_std(db, fname): ds = LMDBDataPoint(db, shuffle=False) ds.reset_state() o = OnlineMoments() with get_tqdm(total=ds.size()) as bar: for dp in ds.get_data(): feat = dp[0] # len x dim for f in feat: o.feed(f) bar.update() logger.info("Writing to {} ...".format(fname)) with open(fname, 'wb') as f: f.write(serialize.dumps([o.mean, o.std]))
def compute_mean_std(db, fname): ds = LMDBDataPoint(db, shuffle=False) ds.reset_state() o = OnlineMoments() with get_tqdm(total=ds.size()) as bar: for dp in ds.get_data(): feat = dp[0] # len x dim for f in feat: o.feed(f) bar.update() logger.info("Writing to {} ...".format(fname)) with open(fname, 'wb') as f: f.write(serialize.dumps([o.mean, o.std]))
def compute_mean_std(ds, fname): """ Compute mean and std in datasets. Usage: compute_mean_std(ds, 'mean_std.txt') """ o = stats.OnlineMoments() for dp in get_tqdm(ds): feat = dp[0] # len x dim for f in feat: o.feed(f) logger.info("Writing to {} ...".format(fname)) with open(fname, 'wb') as f: f.write(serialize.dumps([o.mean, o.std]))
def f(): msg = self.queue.get() sim_name = msg[0] if msg[1] == SimulatorManager.MSG_TYPE.LOCK and self.locked_sim is None: self.locked_sim = sim_name self.mgr2sim_socket.send_multipart( [sim_name.encode('utf-8'), dumps('lock')]) time.sleep(0.2) return if self.locked_sim is not None: if sim_name != self.locked_sim: time.sleep(0.2) self.queue.put(msg) return elif msg[1] == SimulatorManager.MSG_TYPE.UNLOCK: self.locked_sim = None self.mgr2sim_socket.send_multipart( [sim_name.encode('utf-8'), dumps('unlock')]) time.sleep(0.2) return self.cxt_switch(sim_name) # time.sleep(0.2) # print(msg[1]) if msg[1] == SimulatorManager.MSG_TYPE.SCREEN: screen = grab_screen() self.mgr2sim_socket.send_multipart( [sim_name.encode('utf-8'), dumps(screen)]) elif msg[1] == SimulatorManager.MSG_TYPE.CLICK: # print('need to click') click(msg[2][0], msg[2][1]) self.mgr2sim_socket.send_multipart( [sim_name.encode('utf-8'), dumps('click')])
def cb(outputs): # logger.info('async predictor callback') try: action, prob, value = outputs.result() except CancelledError: logger.info("Client {} cancelled.".format(client.ident)) return assert np.all(np.isfinite(prob)), prob client.memory.append( TransitionExperience(state, action, reward=None, value=value, prob=prob)) self.send_queue.put([client.ident, dumps((action))])
def cb(outputs): # logger.info('async predictor callback') try: output = outputs.result() except CancelledError: logger.info("Client {} cancelled.".format(client.ident)) return mode = output[-1] distrib = (output[:-1][mode] + 1e-6) * mask assert np.all(np.isfinite(distrib)), distrib action = np.random.choice(len(distrib), p=distrib / distrib.sum()) client.memory[role_id - 1].append(TransitionExperience( prob_state, all_state, action, reward=0, minor_type=minor_type, first_st=first_st, last_cards_onehot=last_cards_onehot, mode=mode, prob=distrib[action])) self.send_queue.put([client.ident, dumps(action)])
def run(self): player = self._build_player() context = zmq.Context() c2s_socket = context.socket(zmq.PUSH) c2s_socket.setsockopt(zmq.IDENTITY, self.identity) c2s_socket.set_hwm(2) c2s_socket.connect(self.c2s) s2c_socket = context.socket(zmq.DEALER) s2c_socket.setsockopt(zmq.IDENTITY, self.identity) # s2c_socket.set_hwm(5) s2c_socket.connect(self.s2c) state = player.current_state() reward, isOver = 0, False while True: c2s_socket.send(dumps((self.identity, state, reward, isOver)), copy=False) action = loads(s2c_socket.recv(copy=False).bytes) reward, isOver = player.action(action) state = player.current_state()
def run(self): player = self._build_player() context = zmq.Context() c2s_socket = context.socket(zmq.PUSH) c2s_socket.setsockopt(zmq.IDENTITY, self.identity) c2s_socket.set_hwm(2) c2s_socket.connect(self.c2s) s2c_socket = context.socket(zmq.DEALER) s2c_socket.setsockopt(zmq.IDENTITY, self.identity) # s2c_socket.set_hwm(5) s2c_socket.connect(self.s2c) state = player.current_state() reward, isOver = 0, False while True: c2s_socket.send(dumps( (self.identity, state, reward, isOver)), copy=False) action = loads(s2c_socket.recv(copy=False).bytes) reward, isOver = player.action(action) state = player.current_state()
def run(self): player = self._build_player() context = zmq.Context() c2s_socket = context.socket(zmq.PUSH) c2s_socket.setsockopt(zmq.IDENTITY, self.identity) c2s_socket.set_hwm(10) c2s_socket.connect(self.c2s) s2c_socket = context.socket(zmq.DEALER) s2c_socket.setsockopt(zmq.IDENTITY, self.identity) s2c_socket.connect(self.s2c) st = player.reset() r, is_over = 0, False while True: c2s_socket.send(dumps((self.identity, st, r, is_over)), copy=False) # action = player.action_space.sample() action = loads(s2c_socket.recv(copy=False).bytes) st, r, is_over, _ = player.step(action) # print(st.shape) if is_over: player.reset()
def _eval(self): if cfg.TRAINER == 'replicated': with ThreadPoolExecutor(max_workers=self.num_predictor, thread_name_prefix='EvalWorker') as executor, \ tqdm.tqdm(total=sum([df.size() for df in self.dataflows])) as pbar: futures = [] for dataflow, pred in zip(self.dataflows, self.predictors): futures.append( executor.submit(eval_coco, dataflow, pred, pbar)) all_results = list( itertools.chain(*[fut.result() for fut in futures])) else: local_results = eval_coco(self.dataflow, self.predictor) results_as_arr = np.frombuffer(dumps(local_results), dtype=np.uint8) sizes, concat_arrs = tf.get_default_session().run( [self.string_lens, self.concat_results], feed_dict={self.local_result_tensor: results_as_arr}) if hvd.rank() > 0: return all_results = [] start = 0 for size in sizes: substr = concat_arrs[start:start + size] results = loads(substr.tobytes()) all_results.extend(results) start = start + size output_file = os.path.join(logger.get_logger_dir(), 'outputs{}.json'.format(self.global_step)) with open(output_file, 'w') as f: json.dump(all_results, f) try: scores = print_evaluation_scores(output_file) for k, v in scores.items(): self.trainer.monitors.put_scalar(k, v) except Exception: logger.exception("Exception in COCO evaluation.")
def request_screen(): sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.SCREEN, []])) return loads(mgr2sim_socket.recv(copy=False).bytes)
def final(): camera = libcpm.Camera() camera.setup() # cpp matcher: pmatcher = libcpm.PatchMatch() pmatcher.init(camera, 20) # python matcher: #bgs0, bgs1 = [], [] #for k in range(20): #m1 = camera.get_for_py(0) #m1 = np.array(m1, copy=True) #m2 = camera.get_for_py(1) #m2 = np.array(m2, copy=True) #bgs0.append(m1) #bgs1.append(m2) #matcher = Matcher(BackgroundSegmentor(bgs0), BackgroundSegmentor(bgs1)) runner = get_parallel_runner('../data/cpm.npy') viewer = libcpm.StereoCameraViewer(camera) viewer.start() C1, C0, d1, d0 = load_camera_from_calibr( '../calibr-1211/camchain-homeyihuaDesktopCPM3D_kalibrfinal3.yaml') queue = deque(maxlen=2) ctx = zmq.Context() sok = ctx.socket(zmq.PUSH) global args sok.connect('tcp://{}:8888'.format(args.host)) def cpp_matcher(m1, m2, o1, o2): o1 = libcpm.Mat(o1) o2 = libcpm.Mat(o2) out = pmatcher.match_with_hm(m1, m2, o1, o2) return np.asarray(out).reshape(14, 4) #14 x 2image x (x,y) pts3ds = [] cnt = 0 while True: cnt += 1 print 'begin---', time.time() m1 = camera.get_for_py(0) m1r = np.array(m1, copy=False) m2 = camera.get_for_py(1) m2r = np.array(m2, copy=False) m1s = cv2.resize(m1r, (368, 368)) m2s = cv2.resize(m2r, (368, 368)) print 'after resize---', time.time() o1, o2 = runner(m1s, m2s) print 'after cpm---', time.time() #pts14x4 = matcher.match(m1r, m2r, o1, o2) pts14x4 = cpp_matcher(m1, m2, o1, o2) #to_save = (m1s, m2s, o1, o2, pts14x4) #fout = open('full-recording/{:04d}.dat'.format(cnt), 'wb') #fout.write(dumps(to_save)) #fout.close() print 'after match---', time.time() queue.append(pts14x4) p2d = np.mean(queue, axis=0) p3ds = np.zeros((14, 3)) for c in range(14): p3d = triangulate(C0, C1, p2d[c, :2], p2d[c, 2:]) p3ds[c, :] = p3d sok.send(dumps(p3ds)) print p3ds print 'after send---', time.time() print '-----------------'
args.do_validation = False args.compute_hallu_stats = False test_ret = eval_child(model_cls, args, args.log_dir, args.model_dir, collect_hallu_stats=False) te = test_ret[0] else: te = ve # form stopping message for main. json_ret = dict() json_ret['ve'] = ve json_ret['te'] = te json_ret['fp'] = fp json_ret['l_stats'] = l_stats json_ret['l_op_indices'] = l_op_indices json_ret['l_op_omega'] = l_op_omega ret_str = dumps(json_ret) msg_func = lambda : ret_str mark_stopped(args.log_dir, msg_func=msg_func) # Go to parse_remote_stop_file for how this msg is parsed. except Exception as e: mi = os.path.basename(os.path.normpath(args.model_dir)) logger.info("mi={} failed: {}".format(mi, e)) # TODO differentiate OOM and Unknown: # tensorflow.python.framework.errors_impl.UnknownError # tensorflow.python.framework.errors_impl.ResourceExhaustedError mark_failed(args.log_dir) traceback.print_exc() raise # pack the info for the stop file, see parse_remote_stop_file forr unpacking
def run(self): player = self._build_player() context = zmq.Context() c2s_socket = context.socket(zmq.PUSH) c2s_socket.setsockopt(zmq.IDENTITY, self.identity) c2s_socket.set_hwm(10) c2s_socket.connect(self.c2s) s2c_socket = context.socket(zmq.DEALER) s2c_socket.setsockopt(zmq.IDENTITY, self.identity) s2c_socket.connect(self.s2c) player.reset() init_cards = np.arange(21) # init_cards = np.append(init_cards[::4], init_cards[1::4]) player.prepare_manual(init_cards) r, is_over = 0, False while True: all_state, role_id, curr_handcards_value, last_cards_value, last_category = \ player.get_state_all_cards(), player.get_role_ID(), player.get_curr_handcards(), player.get_last_outcards(), player.get_last_outcategory_idx() # after taking the last action, get to this state and get this reward/isOver. # If isOver, get to the next-episode state immediately. # This tuple is not the same as the one put into the memory buffer is_active = (last_cards_value.size == 0) all_state = np.stack([ get_mask( Card.onehot2char(all_state[i * 60:(i + 1) * 60]), action_space, None if is_active else to_char(last_cards_value)).astype( np.float32) for i in range(3) ]).reshape(-1) last_state = get_mask(to_char(last_cards_value), action_space, None).astype(np.float32) if role_id == 2: st = SubState( ACT_TYPE.PASSIVE if last_cards_value.size > 0 else ACT_TYPE.ACTIVE, all_state, to_char(curr_handcards_value), last_cards_value, last_category) if last_cards_value.size > 0: assert last_category > 0 first_st = True while not st.finished: c2s_socket.send(dumps( (self.identity, role_id, st.state, st.all_state, last_state, first_st, st.get_mask(), st.minor_type, st.mode, r, is_over)), copy=False) first_st = False action = loads(s2c_socket.recv(copy=False).bytes) # logger.info('received action {}'.format(action)) # print(action) st.step(action) # print(st.intention) assert st.card_type != -1 r, is_over, category_idx = player.step_manual(st.intention) else: _, r, _ = player.step_auto() is_over = (r != 0) if is_over: # print('{} over with reward {}'.format(self.identity, r)) # logger.info('{} over with reward {}'.format(self.identity, r)) # sys.stdout.flush() player.reset() player.prepare_manual(init_cards)
def run(self): player = self._build_player() context = zmq.Context() c2s_socket = context.socket(zmq.PUSH) c2s_socket.setsockopt(zmq.IDENTITY, self.identity) c2s_socket.set_hwm(10) c2s_socket.connect(self.c2s) s2c_socket = context.socket(zmq.DEALER) s2c_socket.setsockopt(zmq.IDENTITY, self.identity) s2c_socket.connect(self.s2c) player.reset() # init_cards = np.arange(52) # init_cards = np.append(init_cards[::4], init_cards[1::4]) # player.prepare_manual(init_cards) player.prepare() r, is_over = 0, False lstm_state = np.zeros([1024 * 2]) while True: role_id = player.get_role_ID() if role_id in ROLE_IDS_TO_TRAIN: prob_state, all_state, curr_handcards_value, last_cards_value, last_category = \ player.get_state_prob(), player.get_state_all_cards(), player.get_curr_handcards(), player.get_last_outcards(), player.get_last_outcategory_idx() prob_state = np.concatenate( [Card.val2onehot60(curr_handcards_value), prob_state]) # after taking the last action, get to this state and get this reward/isOver. # If isOver, get to the next-episode state immediately. # This tuple is not the same as the one put into the memory buffer is_active = False if last_cards_value.size > 0 else True mask = get_mask( to_char(curr_handcards_value), action_space, None if is_active else to_char(last_cards_value)) if is_active: mask[0] = 0 last_two_cards = player.get_last_two_cards() last_two_cards_onehot = np.concatenate([ Card.val2onehot60(last_two_cards[0]), Card.val2onehot60(last_two_cards[1]) ]) c2s_socket.send(dumps( (self.identity, role_id, prob_state, all_state, last_two_cards_onehot, mask, 0 if is_active else 1, lstm_state, r, is_over)), copy=False) action_idx, lstm_state = loads( s2c_socket.recv(copy=False).bytes) r, is_over, _ = player.step_manual( to_value(action_space[action_idx])) else: _, r, _ = player.step_auto() is_over = (r != 0) if is_over: # print('{} over with reward {}'.format(self.identity, r)) # logger.info('{} over with reward {}'.format(self.identity, r)) # sys.stdout.flush() player.reset() player.prepare() lstm_state = np.zeros([1024 * 2])
def run(self): logger.info('simulator main loop') context = zmq.Context() sim2coord_socket = context.socket(zmq.PUSH) sim2coord_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8')) sim2coord_socket.set_hwm(2) sim2coord_socket.connect(self.sim2coord) coord2sim_socket = context.socket(zmq.DEALER) coord2sim_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8')) coord2sim_socket.set_hwm(2) coord2sim_socket.connect(self.coord2sim) sim2exp_sockets = [] for sim2exp in self.sim2exps: sim2exp_socket = context.socket(zmq.PUSH) sim2exp_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8')) sim2exp_socket.set_hwm(2) sim2exp_socket.connect(sim2exp) sim2exp_sockets.append(sim2exp_socket) sim2mgr_socket = context.socket(zmq.PUSH) sim2mgr_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8')) sim2mgr_socket.set_hwm(2) sim2mgr_socket.connect(self.sim2mgr) mgr2sim_socket = context.socket(zmq.DEALER) mgr2sim_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8')) mgr2sim_socket.set_hwm(2) mgr2sim_socket.connect(self.mgr2sim) # while True: # time.sleep(0.3) # print(self.name) # sim2exp_sockets[1].send(dumps([self.name, 'haha'])) # print('main loop') # while True: # time.sleep(0.3) # msg = loads(coord2sim_socket.recv(copy=False).bytes) # print(msg) # sim2coord_socket.send(dumps([self.name, self.agent_names[0], np.arange(10)])) def request_screen(): sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.SCREEN, []])) return loads(mgr2sim_socket.recv(copy=False).bytes) def request_click(bbox): sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.CLICK, [(bbox[0] + bbox[2]) // 2 + self.window_rect[0] + 6, (bbox[1] + bbox[3]) // 2 + self.window_rect[1] + 46]])) return loads(mgr2sim_socket.recv(copy=False).bytes) def request_lock(): sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.LOCK, []])) return loads(mgr2sim_socket.recv(copy=False).bytes) def request_unlock(): sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.UNLOCK, []])) return loads(mgr2sim_socket.recv(copy=False).bytes) def spin_lock_on_button(): act = dict() while not act: self.current_screen = request_screen() cv2.imwrite('debug.png', self.current_screen) act = get_current_button_action(self.current_screen) if self.toggle.value == 0: break return act def discard(act, bboxes, idxs): def diff(idxs, cards): res = [] for i in range(len(cards)): if cards[i] is not None: if i in idxs: res.append(i) else: if i not in idxs: res.append(i) return res differences = diff(idxs, get_cards_bboxes(request_screen(), self.templates, bboxes=bboxes)[0]) print(differences) request_lock() while len(differences) > 0: for d in differences: request_click(bboxes[d]) # request_click(bboxes[differences[0]]) # time.sleep(0.3) differences = diff(idxs, get_cards_bboxes(request_screen(), self.templates, bboxes=bboxes)[0]) print(differences) if 'chupai' in act: request_click(act['chupai']) elif 'alone_chupai' in act: request_click(act['alone_chupai']) elif 'ming_chupai' in act: request_click(act['ming_chupai']) request_unlock() game_cnt = 0 while True: import psutil # print('memory usage is: ', psutil.virtual_memory()) if self.toggle.value == 0: time.sleep(0.2) continue print('new round') self.current_screen = request_screen() act = spin_lock_on_button() if not act: continue print(act) if 'start' in act: request_click(act['start']) continue if self.state == Simulator.State.CALLING: # state has changed if 'reverse' in act: self.state = Simulator.State.PLAYING self.current_lord_pos = who_is_lord(self.current_screen) while self.current_lord_pos < 0: self.current_screen = request_screen() self.current_lord_pos = who_is_lord(self.current_screen) print('current lord pos ', self.current_lord_pos) if self.toggle.value == 0: break continue if 'continuous defeat' in act: request_click(act['continuous defeat']) continue print('calling', act) handcards, _ = get_cards_bboxes(self.current_screen, self.templates, 0) cards_value, _ = CEnv.get_cards_value(Card.char2color(handcards)) print('cards value: ', cards_value) # assert 'jiaodizhu' in act request_click(act['bujiao']) if cards_value < 10 else request_click(act['jiaodizhu']) elif self.state == Simulator.State.PLAYING: if 'defeat' in act or 'victory' in act: request_click(act['defeat'] if 'defeat' in act else act['victory']) if self.cached_msg is None: print('other player wins in one step!!!') continue win = is_win(self.current_screen) state, action, fine_mask = self.cached_msg if win: sim2exp_sockets[self.current_lord_pos].send(dumps([[state, state], action, 1, True, False, [fine_mask, fine_mask]])) self.win_rates[self.agent_names[self.current_lord_pos]].feed(1.) else: sim2exp_sockets[self.current_lord_pos].send(dumps([[state, state], action, -1, True, False, [fine_mask, fine_mask]])) self.win_rates[self.agent_names[self.current_lord_pos]].feed(0.) game_cnt += 1 if game_cnt % 100 == 0: for agent in self.agent_names: if self.win_rates[agent].count > 0: logger.info('[last-100]{} win rate: {}'.format(agent, self.win_rates[agent].average)) self.win_rates[agent].reset() self.reset_episode() continue # test if we have cached msg not sent print('playing', act) left_cards, _ = get_cards_bboxes(self.current_screen, self.mini_templates, 1) right_cards, _ = get_cards_bboxes(self.current_screen, self.mini_templates, 2) if None in left_cards or None in right_cards: request_click(act['buchu']) time.sleep(1.) continue assert None not in left_cards assert None not in right_cards self.history[1].extend(right_cards) self.history[2].extend(left_cards) # last_cards = left_cards # if not left_cards: # last_cards = right_cards # print('last cards', last_cards) total_cards = np.ones([60]) total_cards[53:56] = 0 total_cards[57:60] = 0 handcards, bboxes = get_cards_bboxes(self.current_screen, self.templates, 0) handcards = [card for card in handcards if card is not None] remain_cards = total_cards - Card.char2onehot60(handcards + self.history[0] + self.history[1] + self.history[2]) print('current handcards: ', handcards) # left_cnt, right_cnt = get_opponent_cnts(self.current_screen, self.tiny_templates) # print('left cnt: ', left_cnt, 'right cnt: ', right_cnt) left_cnt = 17 - len(self.history[2]) right_cnt = 17 - len(self.history[1]) if self.current_lord_pos == 1: left_cnt += 3 if self.current_lord_pos == 2: right_cnt += 3 # assert left_cnt > 0 and right_cnt > 0 # to be the same as C++ side, right comes before left right_prob_state = remain_cards * (right_cnt / (left_cnt + right_cnt)) left_prob_state = remain_cards * (left_cnt / (left_cnt + right_cnt)) prob_state = np.concatenate([right_prob_state, left_prob_state]) # assert prob_state.size == 120 # assert np.all(prob_state < 1.) and np.all(prob_state >= 0.) # print(prob_state) intention, buffer_comb, buffer_fine = self.predictor.predict(handcards, [left_cards, right_cards], prob_state, self, sim2coord_socket, coord2sim_socket) if self.cached_msg is not None: state, action, fine_mask = self.cached_msg sim2exp_sockets[self.current_lord_pos].send( dumps([[state, buffer_comb[0]], action, 0, False, False, [fine_mask, buffer_comb[2]]])) sim2exp_sockets[self.current_lord_pos].send( dumps([[buffer_comb[0], buffer_fine[0]], buffer_comb[1], 0, False, True, [buffer_comb[2], buffer_fine[2]]])) self.cached_msg = buffer_fine self.history[0].extend(intention) print('intention is: ', intention) intention.sort(key=lambda k: Card.cards_to_value[k]) if len(intention) == 0: request_click(act['buchu']) else: i = 0 j = 0 to_click = [] to_click_idxs = [] while j < len(intention): if handcards[i] == intention[j]: to_click_idxs.append(i) to_click.append(bboxes[i]) i += 1 j += 1 else: i += 1 for bbox in to_click: request_click(bbox) time.sleep(0.5) request_click([1310, 760, 1310, 760]) time.sleep(1.)
def save(df, paths, N, write_frequency=1000): """ Args: df (DataFlow): the DataFlow to serialize. path (str): output path. Must be an lmdb file. write_frequency (int): the frequency to write back data to disk. A smaller value reduces memory usage. """ assert isinstance(df, DataFlow), type(df) map_size = 1099511627776 * 2 if platform.system() == 'Linux' else 128 * 10**6 dbs = [] txns = [] all_slice_keys = [ [] for i in range(N) ] size = _reset_df_and_get_size(df) slice_sizes = [ 0 for i in range(N) ] for path in paths: assert not os.path.isfile(path), "LMDB file {} exists!".format(path) # It's OK to use super large map_size on Linux, but not on other platforms # See: https://github.com/NVIDIA/DIGITS/issues/206 db = lmdb.open(path, subdir=False, map_size=map_size, readonly=False, meminit=False, map_async=True) # need sync() at the end dbs.append(db) # LMDB transaction is not exception-safe! # although it has a context manager interface txns.append(db.begin(write=True)) # put data into lmdb, and doubling the size if full. # Ref: https://github.com/NVIDIA/DIGITS/pull/209/files def put_or_grow(db, txn, key, value): try: txn.put(key, value) return txn except lmdb.MapFullError: pass txn.abort() curr_size = db.info()['map_size'] new_size = curr_size * 2 print("Doubling LMDB map_size to {:.2f}GB".format(new_size / 10**9)) db.set_mapsize(new_size) txn = db.begin(write=True) txn = put_or_grow(db, txn, key, value) return txn with tqdm.tqdm(total=size) as pbar: idx = -1 db = None for idx, dp in enumerate(df): slice_idx = idx % N db = dbs[slice_idx] txn = txns[slice_idx] slice_keys = all_slice_keys[slice_idx] txn = put_or_grow(db, txn, u'{:08}'.format(idx).encode('ascii'), dumps(dp)) slice_sizes[slice_idx] += 1 key = u'{:08}'.format(idx).encode('ascii') slice_keys.append(key) pbar.set_postfix(s=str(slice_sizes)) pbar.update() if (slice_sizes[slice_idx] + 1) % write_frequency == 0: txn.commit() txn = db.begin(write=True) txns[slice_idx] = txn print("Finished reading %d data points" %(idx+1)) for i in range(N): db = dbs[i] txns[i].commit() slice_keys = all_slice_keys[i] with db.begin(write=True) as txn: txn = put_or_grow(db, txn, b'__keys__', dumps(slice_keys)) print("Flushing '%s' (%d keys) ..." %((paths[i]), len(slice_keys)) ) db.sync() for db in dbs: db.close()
def request_unlock(): sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.UNLOCK, []])) return loads(mgr2sim_socket.recv(copy=False).bytes)