def gen_Ball(info): seed, data_dir, data_names = info['seed'], info['data_dir'], info[ 'data_names'] time_step, dt, n_ball = info['time_step'], info['dt'], info['n_ball'] file_name = info['file_name'] os.system('mkdir -p ' + data_dir) np.random.seed(seed) attr_dim = 1 state_dim = 4 action_dim = 2 engine = BallEngine(dt, state_dim, action_dim=2) engine.init(n_ball) n_obj = engine.num_obj attrs_all = np.zeros((time_step, n_obj, attr_dim)) states_all = np.zeros((time_step, n_obj, state_dim)) actions_all = np.zeros((time_step, n_obj, action_dim)) rel_attrs_all = np.zeros((time_step, engine.param_dim, 2)) act = np.zeros((n_obj, 2)) for j in range(time_step): state = engine.get_state() vel_dim = state_dim // 2 pos = state[:, :vel_dim] vel = state[:, vel_dim:] if j > 0: vel = (pos - states_all[j - 1, :, :vel_dim]) / dt attrs = np.zeros((n_obj, attr_dim)) attrs[:] = engine.radius attrs_all[j] = attrs states_all[j, :, :vel_dim] = pos states_all[j, :, vel_dim:] = vel rel_attrs_all[j] = engine.param # apply zero action engine.step(act) actions_all[j] = act.copy() datas = [attrs_all, states_all, actions_all, rel_attrs_all] store_data(data_names, datas, os.path.join(data_dir, '%s.h5' % file_name)) '''
def main(): print('Provide data about your employee:\n') data_frame, data_list, column_name = u.store_data( os.path.dirname(os.path.realpath(__file__)) + '/Dane.csv') probability_list, probability_dictionary = u.specify_list_of_probabilities( data_frame, data_list, column_name) probability_cleared = u.count_final_probability(probability_list) u.give_recommendations(probability_cleared, probability_dictionary)
def test_params(data_dir, nrows, low, high, len): frequency = np.random.choice(np.linspace(low, high, len), nrows) ufrequency = np.unique(frequency) store_data(os.path.join(data_dir, 'frequency.dat'), frequency) store_data(os.path.join(data_dir, 'ufrequency.dat'), ufrequency) start = time.time() vmap = np.array(get_rowmap(frequency, ufrequency)) stop = time.time() print('Original Time: {:.2f}s'.format(stop - start)) store_data(os.path.join(data_dir, 'vmap.dat'), vmap)
def gen_Cloth(info): env, env_idx = info['env'], info['env_idx'] thread_idx, data_dir, data_names = info['thread_idx'], info['data_dir'], info['data_names'] n_rollout, time_step = info['n_rollout'], info['time_step'] dt, args, phase = info['dt'], info['args'], info['phase'] vis_width, vis_height = info['vis_width'], info['vis_height'] state_dim = args.state_dim action_dim = args.action_dim dt = 1. / 60. np.random.seed(round(time.time() * 1000 + thread_idx) % 2 ** 32) stats = [init_stat(state_dim), init_stat(action_dim)] engine = ClothEngine(dt, state_dim, action_dim) import pyflex pyflex.init() # bar = ProgressBar() for i in range(n_rollout): rollout_idx = thread_idx * n_rollout + i rollout_dir = os.path.join(data_dir, str(rollout_idx)) os.system('mkdir -p ' + rollout_dir) engine.init(pyflex) scene_params = engine.scene_params action = np.zeros(4) states_all = np.zeros((time_step, engine.n_particles, state_dim)) actions_all = np.zeros((time_step, 1, action_dim)) # drop the cloth down engine.set_action(action) engine.step() for j in range(time_step): positions = pyflex.get_positions().reshape(-1, 4)[:, :3] # sample the action if j % 5 == 0: ctrl_pts = rand_int(0, 8) act_lim = 0.05 dx = rand_float(-act_lim, act_lim) dz = rand_float(-act_lim, act_lim) dy = 0.05 action = np.array([ctrl_pts, dx, dy, dz]) else: action[2] = 0. # store the rollout information state = engine.get_state() states_all[j] = state tga_path = os.path.join(rollout_dir, '%d.tga' % j) pyflex.render(capture=True, path=tga_path) tga = Image.open(tga_path) tga = np.array(tga)[:, 60:780, :3][:, :, ::-1] tga = cv2.resize(tga, (vis_width, vis_height), interpolation=cv2.INTER_AREA) os.system('rm ' + tga_path) jpg_path = os.path.join(rollout_dir, 'fig_%d.jpg' % j) cv2.imwrite(jpg_path, tga) actions_all[j, 0] = action.copy() engine.set_action(action) engine.step() datas = [states_all, actions_all, scene_params] store_data(data_names, datas, rollout_dir + '.h5') datas = [datas[j].astype(np.float64) for j in range(len(datas))] for j in range(len(stats)): stat = init_stat(stats[j].shape[0]) stat[:, 0] = np.mean(datas[j], axis=(0, 1))[:] stat[:, 1] = np.std(datas[j], axis=(0, 1))[:] stat[:, 2] = datas[j].shape[0] stats[j] = combine_stat(stats[j], stat) pyflex.clean() return stats
def gen_data(self): # if the data hasn't been generated, generate the data n_rollout, time_step, dt = self.n_rollout, self.args.time_step, self.args.dt assert n_rollout % self.args.num_workers == 0 print("Generating data ... n_rollout=%d, time_step=%d" % (n_rollout, time_step)) infos = [] for i in range(self.args.num_workers): info = {'thread_idx': i, 'data_dir': self.data_dir, 'data_names': self.data_names, 'n_rollout': n_rollout // self.args.num_workers, 'time_step': time_step, 'dt': dt, 'video': self.args.video, 'image': self.args.image, 'draw_edge': self.args.draw_edge, 'phase': self.phase, 'args': self.args, 'vis_height': self.args.height_raw, 'vis_width': self.args.width_raw, 'save_type':self.args.h5} if self.args.env in ['Ball']: info['env'] = 'Ball' info['n_ball'] = self.args.n_ball elif self.args.env in ['Cloth']: info['env'] = 'Cloth' info['env_idx'] = 15 infos.append(info) cores = self.args.num_workers pool = mp.Pool(processes=cores) env = self.args.env if env in ['Ball']: data = pool.map(gen_Ball, infos) elif env in ['Cloth']: data = pool.map(gen_Cloth, infos) else: raise AssertionError("Unknown env") print("Training data generated, warpping up stats ...") if self.phase == 'train': if env in ['Ball']: self.stat = [init_stat(self.args.attr_dim), init_stat(self.args.state_dim), init_stat(self.args.action_dim)] elif env in ['Cloth']: self.stat = [init_stat(self.args.state_dim), init_stat(self.args.action_dim)] if self.args.h5 != 0 : data_, graph, trajectories = [core[0] for core in data], [core[1] for core in data], [core[2] for core in data] else: data_ = data for i in range(len(data_)): for j in range(len(self.stat)): self.stat[j] = combine_stat(self.stat[j], data_[i][j]) if self.args.h5 !=0 : store_trajectories(trajectories,self.args.dataf) store_graph(graph,self.args.dataf) store_data(self.data_names[:len(self.stat)], self.stat, self.stat_path) else: print("Loading stat from %s ..." % self.stat_path) self.stat = load_data(self.data_names, self.stat_path)
# if record_exist(date): if os.path.isfile(os.path.join(DUMP_DIRECTORY, f'{date}.json')): logger.debug(f'"{str(date)}.json" exist, continue') continue entries = dom.find('div.themeform').find('p') total, new = parse_confirmed_total(entries) persons = [] entries = dom.find('div.themeform').find('li') checker = new for entry in entries: person = parse_infected_info(entry) if person: persons.append(person) checker -= 1 if checker == 0: break store_data( os.path.join(DUMP_DIRECTORY, f'{date}.json'), { 'day': date, 'total': total, 'diff': checker, 'new': new, 'persons': persons }) except IndexError: logger.debug('An index error has been found')
def evaluate(roll_idx, video=True, image=True): eval_path = os.path.join(args.evalf, str(roll_idx)) n_split = 4 split = 4 if image: os.system('mkdir -p ' + eval_path) print('Save images to %s' % eval_path) if video: video_path = eval_path + '.avi' fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') print('Save video as %s' % video_path) frame_rate = 25 if args.env in ['Ball'] else 60 out = cv2.VideoWriter(video_path, fourcc, frame_rate, ( 400 * n_split + split * (n_split - 1), 400)) # load images imgs = [] suffix = '.png' if args.env in ['Ball'] else '.jpg' for i in range(args.eval_st_idx, args.eval_ed_idx): img_path = os.path.join(data_dir, str(roll_idx), 'fig_%d%s' % (i, suffix)) img = loader(img_path) img = resize_and_crop('valid', img, args.scale_size, args.crop_size) img = trans_to_tensor(img).unsqueeze(0).cuda() imgs.append(img) imgs = torch.cat(imgs, 0) ''' model prediction ''' loss_rec_acc = 0. loss_kp_acc = 0. for i in range(args.eval_ed_idx - args.eval_st_idx): if args.stage == 'kp': img = imgs[i:i+1] if i == 0: src = img.clone() with torch.set_grad_enabled(False): # reconstruct the target image using the source image img_pred, _, _ = model_kp(src, img) # predict the position of the keypoints keypoint = model_kp.predict_keypoint(img) # transform the keypoints to the heatmap heatmap = model_kp.keypoint_to_heatmap(keypoint, inv_std=args.inv_std) if args.store_result == 1: timesteps = args.eval_ed_idx - args.eval_st_idx if i == 0: store_kp_result = np.zeros((timesteps, args.n_kp, 2)) store_kp_result[i] = to_np(keypoint[0]) if i == timesteps - 1: store_data(['keypoints'], [store_kp_result], os.path.join(data_store_dir, '%d.h5' % roll_idx)) if args.store_demo == 1: # transform the numpy img_pred = to_np(torch.clamp(img_pred, -1., 1.))[0].transpose(1, 2, 0)[:, :, ::-1] img_pred = (img_pred * 0.5 + 0.5) * 255. img_pred = cv2.resize(img_pred, (400, 400)) lim = args.lim keypoint = to_np(keypoint)[0] - [lim[0], lim[2]] keypoint *= 400 / 2. keypoint = np.round(keypoint).astype(np.int) heatmap = to_np(heatmap)[0].transpose((1, 2, 0)) heatmap = np.sum(heatmap, 2) # cv2.imshow('heatmap', heatmap) # cv2.waitKey(0) heatmap = np.clip(heatmap * 255., 0., 255.) heatmap = cv2.resize(heatmap, (400, 400), interpolation=cv2.INTER_NEAREST) heatmap = np.expand_dims(heatmap, -1) # generate the visualization img_path = os.path.join(data_dir, str(roll_idx), 'fig_%d%s' % (i + args.eval_st_idx, suffix)) img = cv2.imread(img_path) img = cv2.resize(img, (400, 400)).astype(np.float) img_overlay = img.copy() kp_map = np.zeros((img.shape[0], img.shape[1], 3)) c = [(255, 105, 65), (0, 69, 255), (50, 205, 50), (0, 165, 255), (238, 130, 238), (128, 128, 128), (30, 105, 210), (147, 20, 255), (205, 90, 106), (0, 215, 255)] if args.env in ['Ball']: for j in range(keypoint.shape[0]): cv2.circle(kp_map, (keypoint[j, 0], keypoint[j, 1]), 12, c[j], -1) cv2.circle(kp_map, (keypoint[j, 0], keypoint[j, 1]), 12, (255, 255, 255), 1) cv2.circle(img_overlay, (keypoint[j, 0], keypoint[j, 1]), 12, c[j], -1) cv2.circle(img_overlay, (keypoint[j, 0], keypoint[j, 1]), 12, (255, 255, 255), 1) elif args.env in ['Cloth']: for j in range(keypoint.shape[0]): cv2.circle(kp_map, (keypoint[j, 0], keypoint[j, 1]), 8, c[j], -1) cv2.circle(kp_map, (keypoint[j, 0], keypoint[j, 1]), 8, (255, 255, 255), 1) cv2.circle(img_overlay, (keypoint[j, 0], keypoint[j, 1]), 8, c[j], -1) cv2.circle(img_overlay, (keypoint[j, 0], keypoint[j, 1]), 8, (255, 255, 255), 1) merge = np.zeros((img.shape[0], img.shape[1] * n_split + split * (n_split - 1), 3)) * 255. if args.stage == 'kp': merge[:, :img.shape[1]] = img merge[:, img.shape[1] + 4 : img.shape[1] * 2 + 4] = img_overlay merge[:, img.shape[1] * 2 + 8 : img.shape[1] * 3 + 8] = heatmap merge[:, img.shape[1] * 3 + 12 : img.shape[1] * 4 + 12] = img_pred merge = merge.astype(np.uint8) if image: cv2.imwrite(os.path.join(eval_path, 'fig_%d.png' % i), merge) if video: out.write(merge) if video: out.release()
def gen_data(self): # if the data hasn't been generated, generate the data n_rollout, time_step, dt = self.n_rollout, self.args.time_step, self.args.dt assert n_rollout % self.args.num_workers == 0 print("Generating data ... n_rollout=%d, time_step=%d" % (n_rollout, time_step)) infos = [] for i in range(self.args.num_workers): info = { 'thread_idx': i, 'data_dir': self.data_dir, 'data_names': self.data_names, 'n_rollout': n_rollout // self.args.num_workers, 'time_step': time_step, 'dt': dt, 'video': False, 'phase': self.phase, 'args': self.args } infos.append(info) cores = self.args.num_workers pool = mp.Pool(processes=cores) env = self.args.env if env == 'Rope': data = pool.map(gen_Rope, infos) elif env == 'Soft': data = pool.map(gen_Soft, infos) elif env == 'Swim': data = pool.map(gen_Swim, infos) else: raise AssertionError("Unknown env") print("Training data generated, warpping up stats ...") if self.phase == 'train': # states [x, y, angle, xdot, ydot, angledot], action [x, xdot] if env in ['Rope', 'Soft', 'Swim']: self.stat = [ init_stat(self.args.attr_dim), init_stat(self.args.state_dim), init_stat(self.args.action_dim) ] for i in range(len(data)): for j in range(len(self.stat)): self.stat[j] = combine_stat(self.stat[j], data[i][j]) if self.args.gen_stat: print("Storing stat to %s" % self.stat_path) store_data(self.data_names, self.stat, self.stat_path) else: print("stat will be discarded") else: print("Loading stat from %s ..." % self.stat_path) if env in ['Rope', 'Soft', 'Swim']: self.stat = load_data(self.data_names, self.stat_path)
def gen_Cradle(info): thread_idx, data_dir, data_names = info['thread_idx'], info[ 'data_dir'], info['data_names'] n_particle, n_rollout, time_step = info['n_particle'], info[ 'n_rollout'], info['time_step'] dt, args = info['dt'], info['args'] np.random.seed(round(time.time() * 1000 + thread_idx) % 2**32) attr_dim = args.attr_dim # ball, anchor state_dim = args.state_dim # x, y, xdot, ydot assert attr_dim == 2 assert state_dim == 4 lim = 300 attr_dim = 2 state_dim = 4 relation_dim = 4 stats = [init_stat(attr_dim), init_stat(state_dim)] engine = CradleEngine(dt) n_objects = n_particle * 2 # add the same number of anchor points attrs = np.zeros((n_rollout, time_step, n_objects, attr_dim)) states = np.zeros((n_rollout, time_step, n_objects, state_dim)) bar = ProgressBar() for i in bar(range(n_rollout)): rollout_idx = thread_idx * n_rollout + i rollout_dir = os.path.join(data_dir, str(rollout_idx)) os.system('mkdir -p ' + rollout_dir) theta = rand_float(0, 90) engine.reset_scene(n_particle, theta) for j in range(time_step): states[i, j] = engine.get_state() if j > 0: states[i, j, :, 2:] = (states[i, j, :, :2] - states[i, j - 1, :, :2]) / dt attrs[i, j, :n_particle, 0] = 1 # balls attrs[i, j, n_particle:, 1] = 1 # anchors data = [attrs[i, j], states[i, j]] store_data(data_names, data, os.path.join(rollout_dir, str(j) + '.h5')) engine.step() datas = [attrs[i].astype(np.float64), states[i].astype(np.float64)] for j in range(len(stats)): stat = init_stat(stats[j].shape[0]) stat[:, 0] = np.mean(datas[j], axis=(0, 1))[:] stat[:, 1] = np.std(datas[j], axis=(0, 1))[:] stat[:, 2] = datas[j].shape[0] stats[j] = combine_stat(stats[j], stat) return stats
def recommendTo(self, to, model_path, topK): user_id = get_user_id_by_email(to) keywords_data = get_data('rec_user_keywords', 'keywords', user_id, 'user_id = %s') if not keywords_data: return sc = SimilarityCalculator(model_path) index, num = get_index_and_num(user_id) data = get_data('rec_arxiv_paper', ['id', 'arxiv', 'title', 'abstract'], [index, user_id], "rec_arxiv_paper.id >= %s AND \ (SELECT COUNT(1) FROM rec_user_arxiv_preference WHERE \ user_id = %s AND \ rec_arxiv_paper.id = rec_user_arxiv_preference.paper_id) = 0", limit=num + 100) paper_data = pd.DataFrame(data, columns=['id', 'arxiv', 'title', 'abstract']) # paper_data['contain_keywords'] = '' # idx_contain_kw = set() keywords = [' ' + kw[0] for kw in keywords_data] # for kw in keywords: # title_cont = paper_data['title'].str.lower().str.contains(kw) # abstract_cont = paper_data['abstract'].str.lower().str.contains(kw) # cont = title_cont | abstract_cont # paper_data.loc[cont, 'contain_keywords'] = paper_data.loc[cont, 'contain_keywords'] + kw + ';' # idx_contain_kw = idx_contain_kw | set(title_cont[title_cont == True].index.tolist()) # idx_contain_kw = idx_contain_kw | set(abstract_cont[abstract_cont == True].index.tolist()) # idx_not_contain_kw = set(paper_data.index.tolist()) - idx_contain_kw # paper_data_kw = paper_data.loc[list(idx_contain_kw)].reset_index(drop=True) # Don't insert index column to the df. # paper_data_no_kw = paper_data.loc[list(idx_not_contain_kw)].reset_index(drop=True) try: template = EmailTemplate() data = get_data(['rec_user_field_paper', 'rec_field_paper'], ['title', 'abstract'], user_id, 'rec_user_field_paper.user_id = %s AND \ rec_user_field_paper.field_paper_id = rec_field_paper.id', option='all') if data: field_data = pd.DataFrame(data, columns=['title', 'abstract']) # kw_result = sc.get_top_k(paper_data_kw, field_data, int(topK * 0.5), None) # no_kw_result = sc.get_top_k(paper_data_no_kw, field_data, int(topK * 0.5), None) no_kw_result = sc.get_top_k(paper_data, field_data, 20, None) # results = pd.concat([kw_result, no_kw_result], axis=0) results = no_kw_result template.fill_paper(user_id, results, keywords) else: return except ValueError as e: print(e) template = 'Unknown Message.' error_times = 0 while error_times < 5: try: self._send_email(to, template) break except: error_times += 1 time.sleep(10) results = results[['id']] results['user_id'] = user_id results['islike'] = -1 store_data('rec_user_arxiv_preference', ['paper_id', 'user_id', 'islike'], results.values.tolist())
def gen_Box(info): thread_idx, data_dir, data_names = info['thread_idx'], info[ 'data_dir'], info['data_names'] n_rollout, n_particle, time_step = info['n_rollout'], info[ 'n_particle'], info['time_step'] dt, args = info['dt'], info['args'] np.random.seed(round(time.time() * 1000 + thread_idx) % 2**32) state_dim = args.state_dim # x, y, angle, xdot, ydot, angledot action_dim = args.action_dim # x, xdot assert state_dim == 6 assert action_dim == 2 stats = [init_stat(state_dim), init_stat(action_dim)] engine = BoxEngine(dt, state_dim, action_dim) states = np.zeros((n_rollout, time_step, n_particle, state_dim)) actions = np.zeros((n_rollout, time_step, 1, action_dim)) viss = np.zeros((n_rollout, time_step, n_particle)) bar = ProgressBar() for i in bar(range(n_rollout)): rollout_idx = thread_idx * n_rollout + i rollout_dir = os.path.join(data_dir, str(rollout_idx)) os.system('mkdir -p ' + rollout_dir) engine.reset_scene(n_particle) for j in range(time_step): engine.set_action(rand_float(-600., 100.)) states[i, j] = engine.get_state() actions[i, j] = engine.get_action() viss[i, j] = engine.get_vis(states[i, j]) if j > 0: states[i, j, :, 3:] = (states[i, j, :, :3] - states[i, j - 1, :, :3]) / dt actions[i, j, :, 1] = (actions[i, j, :, 0] - actions[i, j - 1, :, 0]) / dt data = [states[i, j], actions[i, j], viss[i, j]] store_data(data_names, data, os.path.join(rollout_dir, str(j) + '.h5')) engine.step() datas = [states[i].astype(np.float64), actions[i].astype(np.float64)] for j in range(len(stats)): stat = init_stat(stats[j].shape[0]) stat[:, 0] = np.mean(datas[j], axis=(0, 1))[:] stat[:, 1] = np.std(datas[j], axis=(0, 1))[:] stat[:, 2] = datas[j].shape[0] stats[j] = combine_stat(stats[j], stat) return stats
def scrape_links(time_before_new_changed, title=None, links=None, unscraped_links_filename=os.path.join( '..', 'data', 'links', 'links_unscraped.txt'), done_links_filename=os.path.join( '..', 'data', 'links', 'done_links.txt')): """Scrape links from pages on candidate URLs and retrieve any synonyms.""" start_time = time.time() done_links = get_done_links(done_links_filename) if links == None: links = get_unscraped_links(unscraped_links_filename, done_links) syn_count = len(os.listdir(os.path.join('..', 'data', 'synonyms_new'))) print('Found {} synonym-files at start of while-loop.\n'.format(syn_count)) while links: if time.time() > start_time + time_before_new_changed: print('Time {} seconds exceeded; getting new changed links.'. format(time_before_new_changed)) links = get_recent_changes(links, done_links) start_time = time.time() title = links.pop() try: page, _, synonyms, new_links = S.main(title) except KeyboardInterrupt: print('''\nWe met with KeyboardInterrupt; title: {}. '''. format(title)) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) return links, done_links except TypeError: # TypeError: 'NoneType' object is not iterable # Usually because "HTTP Error 404: Not Found", so restore title. # But temporarily we are leaving titles unrestored, as we think # some of these were previously unfiltered `redlink=1" cases. links.add(title) try: done_links.remove(title) except KeyError: pass print(' {}'.format(title)) continue except Exception: print('\nWe met with Exception; title: {}.'. format(title)) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) print('\n') continue # Do not examine whether title in done_links; # would prevent utility of "get_recent_changes()". # Assume new links are checked only when received from S.main(). with open(done_links_filename, 'a') as f: f.write('\n' + title) if synonyms: _ = U.store_data( json.dumps(synonyms).encode(), title, target_dir='synonyms_new', tar=False) syn_count = len( os.listdir(os.path.join('..', 'data', 'synonyms_new'))) links, new_links, done_links = update_links( links, new_links, done_links, title) print('''T: {}; links: + {:>3} => {:>}; done: {} ({}%); ''' '''syn: + {} => {} ({}%);\n {}'''. format(int(time.time() - start_time), len(new_links), len(links), len(done_links), round( 100 * len(done_links) / (len(done_links) + len(links)), 1), len(synonyms), syn_count, round(100 * syn_count / len(done_links), 1), title)) # Uncomment the following line to save whole pages (compressed). # _ = U.store_data(page, title, target_dir='html_new', tar=True) # # Write the whole of "links": "title" removed, "new_links" added. # try: # with open(unscraped_links_filename, 'w') as f: # f.write('\n'.join(links)) # except KeyboardInterrupt: # print('''\nWe met with KeyboardInterrupt; title: {}. '''. # format(title)) # exc_type, exc_value, exc_traceback = sys.exc_info() # traceback.print_exception(exc_type, exc_value, exc_traceback) # return links, done_links return links, done_links
def scrape_links(time_before_new_changed, title=None, links=None, unscraped_links_filename=os.path.join('..', 'data', 'links', 'links_unscraped.txt'), done_links_filename=os.path.join('..', 'data', 'links', 'done_links.txt')): """Scrape links from pages on candidate URLs and retrieve any synonyms.""" start_time = time.time() done_links = get_done_links(done_links_filename) if links == None: links = get_unscraped_links(unscraped_links_filename, done_links) syn_count = len(os.listdir(os.path.join('..', 'data', 'synonyms_new'))) print('Found {} synonym-files at start of while-loop.\n'.format(syn_count)) while links: if time.time() > start_time + time_before_new_changed: print( 'Time {} seconds exceeded; getting new changed links.'.format( time_before_new_changed)) links = get_recent_changes(links, done_links) start_time = time.time() title = links.pop() try: page, _, synonyms, new_links = S.main(title) except KeyboardInterrupt: print('''\nWe met with KeyboardInterrupt; title: {}. '''.format( title)) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) return links, done_links except TypeError: # TypeError: 'NoneType' object is not iterable # Usually because "HTTP Error 404: Not Found", so restore title. # But temporarily we are leaving titles unrestored, as we think # some of these were previously unfiltered `redlink=1" cases. links.add(title) try: done_links.remove(title) except KeyError: pass print(' {}'.format(title)) continue except Exception: print('\nWe met with Exception; title: {}.'.format(title)) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) print('\n') continue # Do not examine whether title in done_links; # would prevent utility of "get_recent_changes()". # Assume new links are checked only when received from S.main(). with open(done_links_filename, 'a') as f: f.write('\n' + title) if synonyms: _ = U.store_data(json.dumps(synonyms).encode(), title, target_dir='synonyms_new', tar=False) syn_count = len( os.listdir(os.path.join('..', 'data', 'synonyms_new'))) links, new_links, done_links = update_links(links, new_links, done_links, title) print('''T: {}; links: + {:>3} => {:>}; done: {} ({}%); ''' '''syn: + {} => {} ({}%);\n {}'''.format( int(time.time() - start_time), len(new_links), len(links), len(done_links), round(100 * len(done_links) / (len(done_links) + len(links)), 1), len(synonyms), syn_count, round(100 * syn_count / len(done_links), 1), title)) # Uncomment the following line to save whole pages (compressed). # _ = U.store_data(page, title, target_dir='html_new', tar=True) # # Write the whole of "links": "title" removed, "new_links" added. # try: # with open(unscraped_links_filename, 'w') as f: # f.write('\n'.join(links)) # except KeyboardInterrupt: # print('''\nWe met with KeyboardInterrupt; title: {}. '''. # format(title)) # exc_type, exc_value, exc_traceback = sys.exc_info() # traceback.print_exception(exc_type, exc_value, exc_traceback) # return links, done_links return links, done_links
def quit_game(self): utils.store_data(self) utils.log('Quit the game.') pg.quit() exit()
date = parse_date(content) # Si existe el fichero ya el parte se ha registrado if record_exist(date): logger.debug(f'"{str(date)}.json" exist, continue') continue entries = dom.find('div.themeform').find('p') total, new = parse_confirmed_total(entries) persons = [] entries = dom.find('div.themeform').find('li') checker = new for entry in entries: person = parse_infected_info(entry) if person: persons.append(person) checker -= 1 if checker == 0: break # print({'day': date, 'total': total, 'new': new, 'persons': persons}) # open(f'{str(date)}.json', 'w').write(str({'day': date, 'total': total, 'new': new, 'persons': persons}) store_data(f'{str(date)}.json', { 'day': date, 'total': total, 'new': new, 'persons': persons }) except IndexError: logger.debug('An index error has been found')
def gen_data(self): # if the data hasn't been generated, generate the data n_rollout, n_particle = self.n_rollout, self.args.n_particle time_step, dt = self.args.time_step, self.args.dt print("Generating data ... n_rollout=%d, time_step=%d" % (n_rollout, time_step)) infos = [] for i in range(self.args.num_workers): info = { 'thread_idx': i, 'data_dir': self.data_dir, 'data_names': self.data_names, 'n_particle': n_particle, 'n_rollout': n_rollout // self.args.num_workers, 'time_step': time_step, 'dt': dt, 'args': self.args } infos.append(info) cores = self.args.num_workers pool = mp.Pool(processes=cores) env = self.args.env if env == 'Cradle': data = pool.map(gen_Cradle, infos) elif env == 'Rope': data = pool.map(gen_Rope, infos) elif env == 'Box': data = pool.map(gen_Box, infos) else: raise AssertionError("Unknown env") print("Training data generated, warpping up stats ...") if self.phase == 'train' and self.args.gen_stat: if env in ['Cradle']: self.stat = [ init_stat(self.args.attr_dim), init_stat(self.args.state_dim) ] elif env in ['Rope']: self.stat = [ init_stat(self.args.attr_dim), init_stat(self.args.state_dim), init_stat(self.args.action_dim) ] elif env in ['Box']: self.stat = [ init_stat(self.args.state_dim), init_stat(self.args.action_dim) ] for i in range(len(data)): for j in range(len(self.stat)): self.stat[j] = combine_stat(self.stat[j], data[i][j]) store_data(self.data_names[:len(self.stat)], self.stat, self.stat_path) else: print("Loading stat from %s ..." % self.stat_path) if env in ['Cradle', 'Rope']: self.stat = load_data(self.data_names, self.stat_path) elif env in ['Box']: self.stat = load_data(self.data_names[:2], self.stat_path)
def gen_Ball(info): thread_idx, data_dir, data_names = info['thread_idx'], info['data_dir'], info['data_names'] n_rollout, time_step = info['n_rollout'], info['time_step'] dt, video, image, draw_edge, args, phase = info['dt'], info['video'], info['image'], info['draw_edge'], info['args'], info['phase'] n_ball = info['n_ball'] save_type = info['save_type'] np.random.seed(round(time.time() * 1000 + thread_idx) % 2 ** 32) attr_dim = args.attr_dim # radius state_dim = args.state_dim # x, y, xdot, ydot action_dim = 2 # ddx, ddy stats = [init_stat(attr_dim), init_stat(state_dim), init_stat(action_dim)] traj = [] graph = None engine = BallEngine(dt, state_dim, action_dim=2) # bar = ProgressBar() for i in range(n_rollout): rollout_idx = thread_idx * n_rollout + i rollout_dir = os.path.join(data_dir, str(rollout_idx)) if save_type==0: os.system('mkdir -p ' + rollout_dir) engine.init(n_ball,param_load=args.load_rels) # changed this n_obj = engine.num_obj attrs_all = np.zeros((time_step, n_obj, attr_dim)) states_all = np.zeros((time_step, n_obj, state_dim)) actions_all = np.zeros((time_step, n_obj, action_dim)) rel_attrs_all = np.zeros((time_step, engine.param_dim, 2)) act = np.zeros((n_obj, 2)) for j in range(time_step): state = engine.get_state() vel_dim = state_dim // 2 pos = state[:, :vel_dim] vel = state[:, vel_dim:] if j > 0: vel = (pos - states_all[j - 1, :, :vel_dim]) / dt attrs = np.zeros((n_obj, attr_dim)) attrs[:] = engine.radius attrs_all[j] = attrs states_all[j, :, :vel_dim] = pos states_all[j, :, vel_dim:] = vel rel_attrs_all[j] = engine.param act += (np.random.rand(n_obj, 2) - 0.5) * 600 - act * 0.1 - state[:, 2:] * 0.1 act = np.clip(act, -1000, 1000) engine.step(act) actions_all[j] = act.copy() datas = [attrs_all, states_all, actions_all, rel_attrs_all] traj.append(states_all.astype(np.float64)) graph = rel_attrs_all if save_type == 0: store_data(data_names, datas, rollout_dir + '.h5') # stores the rollout engine.render(states_all, actions_all, engine.get_param(), video=video, image=image, path=rollout_dir, draw_edge=draw_edge, verbose=True) datas = [datas[i].astype(np.float64) for i in range(len(datas))] for j in range(len(stats)): stat = init_stat(stats[j].shape[0]) stat[:, 0] = np.mean(datas[j], axis=(0, 1))[:] stat[:, 1] = np.std(datas[j], axis=(0, 1))[:] stat[:, 2] = datas[j].shape[0] stats[j] = combine_stat(stats[j], stat) if save_type == 0: return stats else: return stats, graph, traj
def gen_Swim(info): thread_idx, data_dir, data_names = info['thread_idx'], info[ 'data_dir'], info['data_names'] n_rollout, time_step = info['n_rollout'], info['time_step'] dt, video, args, phase = info['dt'], info['video'], info['args'], info[ 'phase'] np.random.seed(round(time.time() * 1000 + thread_idx) % 2**32) attr_dim = args.attr_dim # actuated, soft, rigid state_dim = args.state_dim # x, y, xdot, ydot action_dim = args.action_dim param_dim = args.param_dim # n_box, k, damping, init_p act_scale = 500. act_delta = 250. # attr, state, action stats = [init_stat(attr_dim), init_stat(state_dim), init_stat(action_dim)] engine = SwimEngine(dt, state_dim, action_dim, param_dim) group_size = args.group_size sub_dataset_size = n_rollout * args.num_workers // args.n_splits print('group size', group_size, 'sub_dataset_size', sub_dataset_size) assert n_rollout % group_size == 0 assert args.n_rollout % args.n_splits == 0 bar = ProgressBar() for i in bar(range(n_rollout)): rollout_idx = thread_idx * n_rollout + i group_idx = rollout_idx // group_size sub_idx = rollout_idx // sub_dataset_size num_obj_range = args.num_obj_range if phase in { 'train', 'valid' } else args.extra_num_obj_range num_obj = num_obj_range[sub_idx] rollout_dir = os.path.join(data_dir, str(rollout_idx)) param_file = os.path.join(data_dir, str(group_idx) + '.param') os.system('mkdir -p ' + rollout_dir) if rollout_idx % group_size == 0: init_p = None if not args.regular_data else sample_init_p_flight( n_box=num_obj, aug=True, train=phase == 'train') engine.init(param=(num_obj, None, None, init_p)) torch.save(engine.get_param(), param_file) else: while not os.path.isfile(param_file): time.sleep(0.5) param = torch.load(param_file) engine.init(param=param) act_t_param = np.zeros((engine.n_box, 3)) for j in range(time_step): box_type = engine.init_p[:, 2] act_t = np.zeros((engine.n_box, action_dim)) for k in range(engine.n_box): if box_type[k] == 0: # if this is an actuated box if j == 0: act_t_param[k] = np.array([ rand_float(0., 1.), rand_float(1., 2.5), rand_float(0, np.pi * 2) ]) if act_t_param[k, 0] < 0.3: # using smooth action if j == 0: act_t[k] = rand_float(-act_delta, act_delta) else: lo = max(actions_all[j - 1, k] - act_delta, -act_scale - 20) hi = min(actions_all[j - 1, k] + act_delta, act_scale + 20) act_t[k] = rand_float(lo, hi) act_t[k] = np.clip(act_t[k], -act_scale, act_scale) elif act_t_param[k, 0] < 0.6: # using random action act_t[k] = rand_float(-act_scale, act_scale) else: # using sin action act_t[k] = np.sin(j / act_t_param[k, 1] + act_t_param[k, 2]) * \ rand_float(act_scale / 2., act_scale) engine.set_action(act_t) states = engine.get_state() actions = engine.get_action() pos = states[:, :8].copy() vec = states[:, 8:].copy() '''reset velocity''' if j > 0: vec = (pos - states_all[j - 1, :, :8]) / dt if j == 0: attrs_all = np.zeros((time_step, num_obj, attr_dim)) states_all = np.zeros((time_step, num_obj, state_dim)) actions_all = np.zeros((time_step, num_obj, action_dim)) '''attrs: actuated/soft/rigid''' assert attr_dim == 3 attrs = np.zeros((num_obj, attr_dim)) for k in range(engine.n_box): attrs[k, int(engine.init_p[k, 2])] = 1 assert np.sum(attrs[:, 0]) == np.sum(engine.init_p[:, 2] == 0) assert np.sum(attrs[:, 1]) == np.sum(engine.init_p[:, 2] == 1) assert np.sum(attrs[:, 2]) == np.sum(engine.init_p[:, 2] == 2) attrs_all[j] = attrs states_all[j, :, :8] = pos states_all[j, :, 8:] = vec actions_all[j] = actions data = [attrs, states_all[j], actions_all[j]] store_data(data_names, data, os.path.join(rollout_dir, str(j) + '.h5')) engine.step() datas = [ attrs_all.astype(np.float64), states_all.astype(np.float64), actions_all.astype(np.float64) ] for j in range(len(stats)): stat = init_stat(stats[j].shape[0]) stat[:, 0] = np.mean(datas[j], axis=(0, 1))[:] stat[:, 1] = np.std(datas[j], axis=(0, 1))[:] stat[:, 2] = datas[j].shape[0] stats[j] = combine_stat(stats[j], stat) return stats
def gen_Rope(info): thread_idx, data_dir, data_names = info['thread_idx'], info[ 'data_dir'], info['data_names'] n_rollout, time_step = info['n_rollout'], info['time_step'] dt, video, args, phase = info['dt'], info['video'], info['args'], info[ 'phase'] np.random.seed(round(time.time() * 1000 + thread_idx) % 2**32) attr_dim = args.attr_dim # root, child state_dim = args.state_dim # x, y, xdot, ydot action_dim = args.action_dim param_dim = args.param_dim # n_ball, init_x, k, damping, gravity act_scale = 2. ret_scale = 1. # attr, state, action stats = [init_stat(attr_dim), init_stat(state_dim), init_stat(action_dim)] engine = RopeEngine(dt, state_dim, action_dim, param_dim) group_size = args.group_size sub_dataset_size = n_rollout * args.num_workers // args.n_splits print('group size', group_size, 'sub_dataset_size', sub_dataset_size) assert n_rollout % group_size == 0 assert args.n_rollout % args.n_splits == 0 bar = ProgressBar() for i in bar(range(n_rollout)): rollout_idx = thread_idx * n_rollout + i group_idx = rollout_idx // group_size sub_idx = rollout_idx // sub_dataset_size num_obj_range = args.num_obj_range if phase in { 'train', 'valid' } else args.extra_num_obj_range num_obj = num_obj_range[sub_idx] rollout_dir = os.path.join(data_dir, str(rollout_idx)) param_file = os.path.join(data_dir, str(group_idx) + '.param') os.system('mkdir -p ' + rollout_dir) if rollout_idx % group_size == 0: engine.init(param=(num_obj, None, None, None, None)) torch.save(engine.get_param(), param_file) else: while not os.path.isfile(param_file): time.sleep(0.5) param = torch.load(param_file) engine.init(param=param) for j in range(time_step): states_ctl = engine.get_state()[0] act_t = np.zeros((engine.num_obj, action_dim)) act_t[0, 0] = (np.random.rand() * 2 - 1.) * act_scale - states_ctl[0] * ret_scale engine.set_action(action=act_t) states = engine.get_state() actions = engine.get_action() n_obj = engine.num_obj pos = states[:, :2].copy() vec = states[:, 2:].copy() '''reset velocity''' if j > 0: vec = (pos - states_all[j - 1, :, :2]) / dt if j == 0: attrs_all = np.zeros((time_step, n_obj, attr_dim)) states_all = np.zeros((time_step, n_obj, state_dim)) actions_all = np.zeros((time_step, n_obj, action_dim)) '''attrs: [1, 0] => root; [0, 1] => child''' assert attr_dim == 2 attrs = np.zeros((n_obj, attr_dim)) # category: the first ball is fixed attrs[0, 0] = 1 attrs[1:, 1] = 1 assert np.sum(attrs[:, 0]) == 1 assert np.sum(attrs[:, 1]) == engine.num_obj - 1 attrs_all[j] = attrs states_all[j, :, :2] = pos states_all[j, :, 2:] = vec actions_all[j] = actions data = [attrs, states_all[j], actions_all[j]] store_data(data_names, data, os.path.join(rollout_dir, str(j) + '.h5')) engine.step() datas = [ attrs_all.astype(np.float64), states_all.astype(np.float64), actions_all.astype(np.float64) ] for j in range(len(stats)): stat = init_stat(stats[j].shape[0]) stat[:, 0] = np.mean(datas[j], axis=(0, 1))[:] stat[:, 1] = np.std(datas[j], axis=(0, 1))[:] stat[:, 2] = datas[j].shape[0] stats[j] = combine_stat(stats[j], stat) return stats
def gen_Rope(info): thread_idx, data_dir, data_names = info['thread_idx'], info[ 'data_dir'], info['data_names'] n_rollout, n_particle, time_step = info['n_rollout'], info[ 'n_particle'], info['time_step'] dt, args = info['dt'], info['args'] np.random.seed(round(time.time() * 1000 + thread_idx) % 2**32) attr_dim = args.attr_dim # fixed, moving, radius state_dim = args.state_dim # x, y, xdot, ydot action_dim = args.action_dim # xddot, yddot assert attr_dim == 3 assert state_dim == 4 assert action_dim == 2 act_scale = 15 # attr, state, action stats = [init_stat(attr_dim), init_stat(state_dim), init_stat(action_dim)] engine = RopeEngine(dt, state_dim, action_dim) attrs = np.zeros((n_rollout, time_step, n_particle + 2, attr_dim)) states = np.zeros((n_rollout, time_step, n_particle + 2, state_dim)) actions = np.zeros((n_rollout, time_step, n_particle + 2, action_dim)) bar = ProgressBar() for i in bar(range(n_rollout)): rollout_idx = thread_idx * n_rollout + i rollout_dir = os.path.join(data_dir, str(rollout_idx)) os.system('mkdir -p ' + rollout_dir) engine.reset_scene(n_particle) act = np.zeros((n_particle, action_dim)) for j in range(time_step): f = np.zeros(action_dim) for k in range(n_particle): f += (np.random.rand(action_dim) * 2 - 1) * act_scale act[k] = f engine.set_action(action=act) state = engine.get_state() action = engine.get_action() states[i, j, :n_particle] = state states[i, j, n_particle:, :2] = engine.c_positions actions[i, j, :n_particle] = action # reset velocity if j > 0: states[i, j, :, 2:] = (states[i, j, :, :2] - states[i, j - 1, :, :2]) / dt # attrs: [1, 0] => moving; [0, 1] => fixed n_obj = attrs.shape[2] attr = np.zeros((n_obj, attr_dim)) attr[0, 1] = 1 # the first ball is fixed attr[1:n_particle, 0] = 1 # the rest of the balls is free to move attr[n_particle:, 1] = 1 # the cylinders are fixed attr[:n_particle, 2] = engine.radius attr[n_particle:, 2] = engine.c_radius # assert np.sum(attr[:, 0]) == 14 assert np.sum(attr[:, 1]) == 3 attrs[i, j] = attr data = [attr, states[i, j], actions[i, j]] store_data(data_names, data, os.path.join(rollout_dir, str(j) + '.h5')) engine.step() datas = [ attrs[i].astype(np.float64), states[i].astype(np.float64), actions[i].astype(np.float64) ] for j in range(len(stats)): stat = init_stat(stats[j].shape[0]) stat[:, 0] = np.mean(datas[j], axis=(0, 1))[:] stat[:, 1] = np.std(datas[j], axis=(0, 1))[:] stat[:, 2] = datas[j].shape[0] stats[j] = combine_stat(stats[j], stat) return stats