def request_current_weather(self): try: self.tz = self.bs.timezone["id"] self.day_part_idx = utils.get_day_part(self.tz) self.set_last_update(self.bs.find("lastupdate")) time_frames = self.bs.find_all("time") temperature = [] wind_direction = [] wind_speed = [] percipitation = [] weather = [] forecast_time = [] for i in range(0, self.time_frame_count): forecast_time.append(utils.get_time(time_frames[i]['from'])) forecast_time.append(utils.get_time(time_frames[i]['to'])) temperature.append(time_frames[i].temperature['value']) wind_direction.append( self.translate(time_frames[i].winddirection['code'], "direction")) wind_speed.append(time_frames[i].windspeed['mps']) percipitation.append(time_frames[i].precipitation['value']) weather.append( self.translate(time_frames[i].symbol['number'], "weather")) self.set_forecast_string(i, forecast_time, temperature, wind_direction, wind_speed, percipitation, weather) except Exception as e: print("Error with lxml parsing: {0}".format(str(e))) self.forecast_string = None
def main(): logging.info('-' * 45 + ' BEGIN: ' + utils.get_time() + ' ' + '-' * 45) exclude = [ 'check_epoch', 'log_file', 'model_path', 'path', 'pin_memory', 'regenerate', 'sep', 'train', 'verbose' ] logging.info(utils.format_arg_str(args, exclude_lst=exclude)) #格式化打印训练参数 # Random seed np.random.seed(args.random_seed) torch.manual_seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) torch.backends.cudnn.deterministic = True # GPU os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu logging.info('cuda available: {}'.format(torch.cuda.is_available())) logging.info('# cuda devices: {}'.format(torch.cuda.device_count())) # Read data corpus_path = os.path.join(args.path, args.dataset, model_name.reader + '.pkl') if False: # not args.regenerate and os.path.exists(corpus_path): logging.info('Load corpus from {}'.format(corpus_path)) corpus = pickle.load(open(corpus_path, 'rb')) else: corpus = reader_name(args) logging.info('Save corpus to {}'.format(corpus_path)) pickle.dump(corpus, open(corpus_path, 'wb')) #保存数据对象,它是一个类,pickle可以读取对象 # Define model model = model_name(args, corpus) logging.info(model) model.apply(model.init_weights) #apply做用在每一层 model.actions_before_train() model.to(model.device) # Run model data_dict = dict() for phase in ['train', 'dev', 'test']: data_dict[phase] = model_name.Dataset(model, corpus, phase) runner = runner_name(args) logging.info('Test Before Training: ' + runner.print_res(model, data_dict['test'])) if args.load > 0: model.load_model() if args.train > 0: runner.train(model, data_dict) logging.info(os.linesep + 'Test After Training: ' + runner.print_res(model, data_dict['test'])) model.actions_after_train() logging.info(os.linesep + '-' * 45 + ' END: ' + utils.get_time() + ' ' + '-' * 45)
def get(self): for id in states: status = states[id].poll() if status is None: id_to_urls[id]['status'] = 'In progress' elif status == 0: id_to_urls[id]['status'] = 'Done' id_to_urls[id]['stopped_at'] = get_time() else: id_to_urls[id]['status'] = STATUS_CODES.get(status, "Failed") id_to_urls[id]['stopped_at'] = get_time() return jsonify(id_to_urls)
def train(self, model: nn.Module, data_dict: Dict[str, BaseModel.Dataset]) -> NoReturn: main_metric_results, dev_results, test_results = list(), list(), list() self._check_time(start=True) try: for epoch in range(self.epoch): # Fit self._check_time() loss = self.fit(model, data_dict['train'], epoch=epoch + 1) training_time = self._check_time() # Observe selected tensors if len( model.check_list ) > 0 and self.check_epoch > 0 and epoch % self.check_epoch == 0: utils.check(model.check_list) # Record dev and test results dev_result = self.evaluate(model, data_dict['dev'], self.topk[:1], self.metrics) test_result = self.evaluate(model, data_dict['test'], self.topk[:1], self.metrics) testing_time = self._check_time() dev_results.append(dev_result) test_results.append(test_result) main_metric_results.append(dev_result[self.main_metric]) logging.info( "Epoch {:<5} loss={:<.4f} [{:<.1f} s]\t dev=({}) test=({}) [{:<.1f} s] " .format(epoch + 1, loss, training_time, utils.format_metric(dev_result), utils.format_metric(test_result), testing_time)) # Save model and early stop if max(main_metric_results) == main_metric_results[-1] or \ (hasattr(model, 'stage') and model.stage == 1): model.save_model() if self.early_stop and self.eval_termination( main_metric_results): logging.info("Early stop at %d based on dev result." % (epoch + 1)) break except KeyboardInterrupt: logging.info("Early stop manually") exit_here = input( "Exit completely without evaluation? (y/n) (default n):") if exit_here.lower().startswith('y'): logging.info(os.linesep + '-' * 45 + ' END: ' + utils.get_time() + ' ' + '-' * 45) exit(1) # Find the best dev result across iterations best_epoch = main_metric_results.index(max(main_metric_results)) logging.info( os.linesep + "Best Iter(dev)={:>5}\t dev=({}) test=({}) [{:<.1f} s] ".format( best_epoch + 1, utils.format_metric(dev_results[best_epoch]), utils.format_metric(test_results[best_epoch]), self.time[1] - self.time[0])) model.load_model()
def retranslateUi(self): _translate = QtCore.QCoreApplication.translate self.lab_date.setText("%s" % get_date()) self.lab_time.setText("{}".format(get_time())) # self.lab_title.setText("""还在考虑放什么内容,先喝杯咖啡吧\n躺着吧,万一需求消失了呢。""") self.btn_pass.setText("Pass") self.btn_later.setText("Later 10 min")
def update_board(self, board_id, cursor): unit_updates = self.gm.unit_updates.find({ "created": { "$gt": cursor }, "board_id": board_id }) unit_ids = [u["unit_id"] for u in unit_updates] # update timer self.cursor = utils.get_time() updated_units = [] removed_ids = [] for unit_id in unit_ids: unit = self.gm.units.find_one({ "short_id": unit_id, "board_id": board_id }) if unit["hidden"] is False: updated_units.append( self.gm._get_extended_unit(board_id, unit_id)) else: removed_ids.append(unit_id) return { "removed_ids": removed_ids, # deleted "updated_units": updated_units # added, edited }
def get(self): id = request.args.get('id') if id not in states: return {'Error': 'Invalid ID'}, 404 status = states[id].poll() if status is None: id_to_urls[id]['status'] = 'In progress' return {'Status': id_to_urls[id]['status']}, 200 elif status == 0: id_to_urls[id]['status'] = 'Done' id_to_urls[id]['stopped_at'] = get_time() return {'Status': id_to_urls[id]['status']}, 200 else: id_to_urls[id]['status'] = STATUS_CODES.get(status, "Failed") id_to_urls[id]['stopped_at'] = get_time() return {'Status': id_to_urls[id]['status']}, 200
def main(): logging.info('-' * 45 + ' BEGIN: ' + utils.get_time() + ' ' + '-' * 45) exclude = [ 'check_epoch', 'log_file', 'model_path', 'path', 'pin_memory', 'regenerate', 'sep', 'train', 'verbose' ] logging.info(utils.format_arg_str(args, exclude_lst=exclude)) #格式化打印训练参数 # Random seed np.random.seed(args.random_seed) torch.manual_seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) torch.backends.cudnn.deterministic = True # GPU os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu logging.info('cuda available: {}'.format(torch.cuda.is_available())) logging.info('# cuda devices: {}'.format(torch.cuda.device_count())) # Read data corpus_path = os.path.join(args.path, args.dataset, model_name.reader + '.pkl') corpus = pickle.load(open(corpus_path, 'rb')) # Define model model = model_name(args, corpus) logging.info(model) model.load_state_dict(torch.load(args.model_path, map_location="cpu")) model.eval() # Run model data_dict = dict() history_items = [509, 515, 666, 691] data_dict["test"] = { 'user_id': torch.tensor([6185]), 'item_id': torch.tensor( [sorted(list(set(corpus.all_df["item_id"].values.tolist())))]), 'history_items': torch.tensor([history_items]), 'lengths': torch.tensor([4]), 'batch_size': 1, 'phase': 'test' } runner = runner_name(args) prediction = model(data_dict["test"]) reslut = torch.topk(prediction['prediction'], 10)[1].tolist()[0] #返回top10的商品 reslut_list = [] for i in reslut: #移除用户购买过的商品 if not i in history_items: reslut_list.append(i) logging.info(f"topk10的推荐结果为:{reslut_list}")
def main(): logging.info('-' * 45 + ' BEGIN: ' + utils.get_time() + ' ' + '-' * 45) logging.info(utils.format_arg_str(args)) # Random seed torch.manual_seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) np.random.seed(args.random_seed) # GPU os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu logging.info("# cuda devices: {}".format(torch.cuda.device_count())) # Load data corpus_path = os.path.join(args.path, args.dataset, 'Corpus.pkl') if not args.regenerate and os.path.exists(corpus_path): logging.info('Load corpus from {}'.format(corpus_path)) corpus = pickle.load(open(corpus_path, 'rb')) else: corpus = loader_name(args) logging.info('Save corpus to {}'.format(corpus_path)) pickle.dump(corpus, open(corpus_path, 'wb')) # Define model model = model_name(args, corpus) logging.info(model) model = model.double() model.apply(model.init_weights) model.actions_before_train() if torch.cuda.device_count() > 0: model = model.cuda() # Run model runner = runner_name(args) logging.info('Test Before Training: ' + runner.print_res(model, corpus)) if args.load > 0: model.load_model() if args.train > 0: runner.train(model, corpus) logging.info(os.linesep + 'Test After Training: ' + runner.print_res(model, corpus)) model.actions_after_train() logging.info(os.linesep + '-' * 45 + ' END: ' + utils.get_time() + ' ' + '-' * 45)
def main(args): logging.info('-' * 45 + ' BEGIN: ' + utils.get_time() + ' ' + '-' * 45) exclude = [ 'check_epoch', 'log_file', 'model_path', 'path', 'pin_memory', 'regenerate', 'sep', 'train', 'verbose' ] logging.info(utils.format_arg_str(args, exclude_lst=exclude)) # Random seed torch.manual_seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) np.random.seed(args.random_seed) # GPU os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu logging.info("# cuda devices: {}".format(torch.cuda.device_count())) # Load data corpus = load_corpus(args) # Define model model = model_name(args, corpus) logging.info(model) model = model.double() model.apply(model.init_weights) model.actions_before_train() if torch.cuda.device_count() > 0: model = model.cuda() runner = runner_name(args) logging.info('Test Before Training: ' + runner.print_res(model, corpus)) if args.load > 0: model.load_model() if args.train > 0: runner.train(model, corpus) logging.info('\nTest After Training: ' + runner.print_res(model, corpus)) model.actions_after_train() logging.info(os.linesep + '-' * 45 + ' END: ' + utils.get_time() + ' ' + '-' * 45)
def play(self, load_model=True, test_ep=None, num_step=100000, num_episodes=200, display=True): model_q = Model() model_target_q = Model(is_target_q=True) end_points_q = model_q.model_def(self.inputs, self.env, name='main_q') _ = model_target_q.model_def( self.target_inputs, self.env, name='target_q') init = tf.global_variables_initializer() self.saver = tf.train.Saver(max_to_keep=None) if load_model: utils.load_model(self.saver, self.sess, self.model_dir) else: self.sess.run(init) if test_ep is None: test_ep = self.cfg.ep_test if not display: gym_dir = '/tmp/%s-%s' % (self.cfg.env_name, utils.get_time()) self.env.env.monitor.start(gym_dir) best_reward, best_episode = 0, 0 for episode in xrange(num_episodes): screen, reward, action, terminal = self.env.new_random_game() current_reward = 0 for _ in xrange(self.cfg.history_length): self.history.add(screen) for t in tqdm(xrange(num_step), ncols=70): # 1. predict action = self.predict( end_points_q['pred_action'], self.history.get(), ep=test_ep) # 2. act screen, reward, terminal = self.env.act( action, is_training=False) # 3. observe self.history.add(screen) current_reward += reward if terminal: break if current_reward > best_reward: best_reward = current_reward best_episode = episode print " [%d] Best reward : %d" % (best_episode, best_reward) if not display: self.env.env.monitor.close()
def model_evaluate_and_save(self, _actor, _critic, _class_name): # self.model_actor.compile(optimizer='rmsprop', loss=_loss_func, metrics=['accuracy']) # loss, accuracy = self.model_actor.evaluate(self.eval_x, self.eval_y) # # _, best_loss = self.get_best_loss_file(_class_name) # if best_loss > loss: today = utils.get_today() time_now = utils.get_time() path = self.get_model_weight_path(_class_name) file_path = path + _class_name + '_' + today + '_' + time_now + '_' _actor.save_weights(file_path + 'actor.h5') _critic.save_weights(file_path + 'critic.h5')
class Link(Document): """ Link representation. """ meta = {'collection': 'links'} short_id = StringField(default=lambda: utils.gen_key()) """ :type: *str* :required: False :default: Automatically generated. """ created = StringField(default=lambda: utils.get_time()) """ :type: *str* :required: False :default: Automatically generated. """ board_id = StringField() """ :type: *str* :required: True :default: None """ pith = StringField(required=True) """ :type: *str* :required: True :default: None """ source = StringField() """ :type: *str* :required: True :default: None """ target = StringField() """ :type: *str* :required: True :default: None """ id = StringField(default="", primary_key=True)
class Discussion(Document): """ Discussion representation. """ meta = {'collection': 'discussions'} short_id = StringField(default=lambda: utils.gen_key()) """ :type: *str* :required: False :default: Automatically generated. """ created = StringField(default=lambda: utils.get_time()) """ :type: *str* :required: False :default: Automatically generated. """ board_id = StringField() """ :type: *str* :required: True :default: None """ chat = ListField(StringField(), default=[]) # unit ids """ :type: *List[str]* :required: False :default: [] """ pinned = ListField(StringField(), default=[]) # unit ids """ :type: *List[str]* :required: False :default: [] """ focused = ListField(StringField(), default=[]) # unit ids """ :type: *List[str]* :required: False :default: [] """ id = StringField(default="", primary_key=True)
async def on_message_delete(self, message): try: embed = discord.Embed( description= f'Message sent by {message.author.mention} deleted in {message.channel.mention}', color=0xff0000) embed.set_author(name=message.author, icon_url=message.author.avatar_url) embed.add_field(name="Content", value=message.content) embed.set_footer(text=utils.get_time()) channel = await self.bot.fetch_channel( int(utils.load_settings()['chn_log'])) await channel.send(embed=embed) except errors.HTTPException: pass
def post(self): baseline_url = request.json['baseline_url'] updated_url = request.json['updated_url'] max_depth = request.json['max_depth'] max_urls = request.json['max_urls'] prefix = request.json['prefix'] auth_baseline_username = request.json.get('auth_baseline_username', '') auth_baseline_password = request.json.get('auth_baseline_password', '') auth_updated_username = request.json.get('auth_updated_username', '') auth_updated_password = request.json.get('auth_updated_password', '') if not check_unique_prefix(prefix, id_to_urls): return {'Error': 'Please choose a different prefix'}, 406 p = Popen([ 'python3', 'worker_crawl.py', '--baseline-url', baseline_url, '--updated-url', updated_url, '--max-depth', max_depth, '--max-urls', max_urls, '--prefix', prefix, '--auth-baseline-username', auth_baseline_username, '--auth-baseline-password', auth_baseline_password, '--auth-updated-username', auth_updated_username, '--auth-updated-password', auth_updated_password ]) if p.poll() is not None and p.poll() > 0: return {'Error': 'Failed to launch crawler'}, 406 id = str(uuid.uuid4().hex) states[id] = p id_to_urls[id] = { 'baseline_url': baseline_url, 'updated_url': updated_url, 'status': 'Starting', 'started_at': get_time(), 'stopped_at': 'None', 'prefix': prefix, 'max_depth': max_depth, 'max_urls': max_urls, 'auth_baseline_username': auth_baseline_username, 'auth_baseline_password': auth_baseline_password, 'auth_updated_username': auth_updated_username, 'auth_updated_password': auth_updated_password } return {'id': id}, 200
def save_state(self, val_loss, map05, to_save_folder=False, model_only=False): if to_save_folder: save_path = self.conf.work_space/'save' else: save_path = self.conf.work_space/'model' time = get_time() torch.save( self.state_dict(), save_path / ('model_{}_val_loss:{}_map05:{}_step:{}.pth'.format(time, val_loss, map05, self.step))) if not model_only: torch.save( self.optimizer.state_dict(), save_path / ('optimizer_{}_val_loss:{}_map05:{}_step:{}.pth'.format(time, val_loss, map05, self.step)))
async def on_message_edit(self, before, after): try: embed = discord.Embed( description= f'Message sent by {before.author.mention} edited in {before.channel.mention}', color=0xff5900) embed.set_author(name=before.author, icon_url=before.author.avatar_url) embed.add_field(name="Before", value=f'{before.content}', inline=False) embed.add_field(name="After", value=f'{after.content}', inline=False) embed.set_footer(text=utils.get_time()) channel = await self.bot.fetch_channel( int(utils.load_settings()['chn_log'])) await channel.send(embed=embed) except errors.HTTPException: pass
async def update_board_job(ctx): gm = ctx["manager"] cursor = ctx["cursor"] ctx["cursor"] = get_time() boards = [b for b in gm.boards.find()] for board in boards: board_id = board["short_id"] # use cursor to get update product = gm.board_manager.update_board(board_id=board_id, cursor=cursor) result = dumps(product, cls=DictEncoder) # emit to every user in board await gm.sio.emit("update_board", result, room=board_id, namespace='/board')
class Board(Document): """ Board representation. """ meta = {'collection': 'boards'} short_id = StringField(default=lambda: utils.gen_key()) """ :type: *str* :required: False :default: Automatically generated. """ created = StringField(default=lambda: utils.get_time()) """ :type: *str* :required: False :default: Automatically generated. """ id = StringField(default="", primary_key=True)
class User(Document): """ User representation. """ meta = {'collection': 'users'} short_id = StringField(default=lambda: utils.gen_key()) created = StringField(default=lambda: utils.get_time()) """ :type: *str* :required: False :default: Automatically generated. """ board_id = StringField() """ :type: *str* :required: True :default: None """ discussion_id = StringField() """ :type: *str* :required: False :default: None """ nickname = StringField(required=True) """ :type: *str* :required: True :default: None """ id = StringField(default="", primary_key=True)
def main(args): logging.info('-' * 45 + ' BEGIN: ' + utils.get_time() + ' ' + '-' * 45) exclude = [ 'check_epoch', 'log_file', 'model_path', 'path', 'pin_memory', 'regenerate', 'sep', 'train', 'verbose', 'load', 'buffer' ] logging.info(utils.format_arg_str(args, exclude_lst=exclude)) # Random seed np.random.seed(args.random_seed) # Read Data dataloader = DataLoader.DataLoader(args) dataloader._load_data() # Define Model model = model_name(args) # Run Model evaluations_list = {} for i in range(5): model.fit(dataloader.train_feature[i], dataloader.train_label[i]) evaluations = model.print_res(dataloader.test_feature[i], dataloader.test_label[i]) evaluation_results = model.evaluate(dataloader.test_feature[i], dataloader.test_label[i]) for key in evaluation_results: if key not in evaluations_list: evaluations_list[key] = [] evaluations_list[key].append(evaluation_results[key]) logging.info('Test Results at {} times: {}'.format(i, evaluations)) evaluations_all = {} for key in evaluations_list: evaluations_all[key] = np.mean(evaluations_list[key]) logging.info("Average results: {}".format( utils.format_metric(evaluations_all)))
class UnitUpdate(Document): """ Unit-update representation. """ meta = {'collection': 'unit_updates'} short_id = StringField(default=lambda: utils.gen_key()) """ :type: *str* :required: False :default: Automatically generated. """ created = StringField(default=lambda: utils.get_time()) """ :type: *datetime* :required: False :default: Automatically generated. """ board_id = StringField() """ :type: *str* :required: True :default: None """ unit_id = StringField() """ :type: *str* :required: True :default: None """ id = StringField(default="", primary_key=True)
def get_time(request): return HttpResponse(utils.get_time())
def main(args): """main""" model_config = UNIMOConfig(args.unimo_config_path) model_config.print_config() gpu_id = 0 gpus = fluid.core.get_cuda_device_count() if args.is_distributed and os.getenv("FLAGS_selected_gpus") is not None: gpu_list = os.getenv("FLAGS_selected_gpus").split(",") gpus = len(gpu_list) gpu_id = int(gpu_list[0]) if args.use_cuda: place = fluid.CUDAPlace(gpu_id) dev_count = gpus else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) tokenizer = GptBpeTokenizer(vocab_file=args.unimo_vocab_file, encoder_json_file=args.encoder_json_file, vocab_bpe_file=args.vocab_bpe_file, do_lower_case=args.do_lower_case) data_reader = RegressionReader(tokenizer, args) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: trainers_num = int(os.getenv("PADDLE_TRAINERS_NUM", "1")) train_data_generator = data_reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, dev_count=trainers_num, shuffle=True, phase="train") num_train_examples = data_reader.get_num_examples(args.train_set) if args.in_tokens: max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // trainers_num else: max_train_steps = args.epoch * num_train_examples // args.batch_size // trainers_num warmup_steps = int(max_train_steps * args.warmup_proportion) print("Device count: %d, gpu_id: %d" % (dev_count, gpu_id)) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) print("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, pyreader_name='train_reader', config=model_config) scheduled_lr, loss_scaling = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, beta1=args.beta1, beta2=args.beta2, epsilon=args.epsilon) if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_test or args.do_pred: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader', config=model_config) test_prog = test_prog.clone(for_test=True) nccl2_num_trainers = 1 nccl2_trainer_id = 0 print("args.is_distributed:", args.is_distributed) if args.is_distributed: trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) worker_endpoints_env = os.getenv("PADDLE_TRAINER_ENDPOINTS") current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") worker_endpoints = worker_endpoints_env.split(",") trainers_num = len(worker_endpoints) print("worker_endpoints:{} trainers_num:{} current_endpoint:{} \ trainer_id:{}".format(worker_endpoints, trainers_num, current_endpoint, trainer_id)) # prepare nccl2 env. config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" if args.nccl_comm_num > 1: config.nccl_comm_num = args.nccl_comm_num if args.use_hierarchical_allreduce and trainers_num > args.hierarchical_allreduce_inter_nranks: config.use_hierarchical_allreduce = args.use_hierarchical_allreduce config.hierarchical_allreduce_inter_nranks = args.hierarchical_allreduce_inter_nranks assert config.hierarchical_allreduce_inter_nranks > 1 assert trainers_num % config.hierarchical_allreduce_inter_nranks == 0 config.hierarchical_allreduce_exter_nranks = \ trainers_num / config.hierarchical_allreduce_inter_nranks t = fluid.DistributeTranspiler(config=config) t.transpile( trainer_id, trainers=worker_endpoints_env, current_endpoint=current_endpoint, program=train_program if args.do_train else test_prog, startup_program=startup_prog) nccl2_num_trainers = trainers_num nccl2_trainer_id = trainer_id exe = fluid.Executor(place) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: print( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint( exe, args.init_checkpoint, main_program=train_program) elif args.init_pretraining_params: init_pretraining_params( exe, args.init_pretraining_params, main_program=train_program) elif args.do_val or args.do_test or args.do_pred: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor( use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) train_pyreader.decorate_tensor_provider(train_data_generator) else: train_exe = None test_exe = exe if args.do_val or args.do_test or args.do_pred: if args.use_multi_gpu_test: test_exe = fluid.ParallelExecutor( use_cuda=args.use_cuda, main_program=test_prog, share_vars_from=train_exe) dev_ret_history = [] # (steps, key_eval, eval) if args.do_train: train_pyreader.start() steps = 0 if warmup_steps > 0: graph_vars["learning_rate"] = scheduled_lr time_begin = time.time() skip_steps = args.skip_steps while True: try: steps += 1 if steps % skip_steps == 0: train_fetch_list = [ graph_vars["loss"].name, ] if "learning_rate" in graph_vars: train_fetch_list.append(graph_vars["learning_rate"].name) res = train_exe.run(fetch_list=train_fetch_list) outputs = {"loss": np.mean(res[0])} if "learning_rate" in graph_vars: outputs["learning_rate"] = float(res[1][0]) if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) verbose += "learning rate: %f" % ( outputs["learning_rate"] if warmup_steps > 0 else args.learning_rate) print(verbose) current_example, current_epoch = data_reader.get_train_progress() time_end = time.time() used_time = time_end - time_begin print("%s - epoch: %d, progress: %d/%d, step: %d, ave loss: %f, speed: %f steps/s" % \ (get_time(), current_epoch, current_example, num_train_examples, steps, \ outputs["loss"], args.skip_steps / used_time)) time_begin = time.time() else: train_exe.run(fetch_list=[]) if nccl2_trainer_id == 0: if steps % args.save_steps == 0 and args.save_checkpoints: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) if steps % args.validation_steps == 0: # evaluate dev set if args.do_val: test_pyreader.decorate_tensor_provider( data_reader.data_generator( args.dev_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) outputs = evaluate(args, test_exe, test_prog, test_pyreader, graph_vars, "dev") dev_ret_history.append((steps, outputs['key_eval'], outputs[outputs['key_eval']])) # evaluate test set if args.do_test: test_pyreader.decorate_tensor_provider( data_reader.data_generator( args.test_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) outputs = evaluate(args, test_exe, test_prog, test_pyreader, graph_vars, "test") if args.do_pred: test_pyreader.decorate_tensor_provider( data_reader.data_generator( args.test_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) qids, preds, probs = predict(test_exe, test_prog, test_pyreader, graph_vars, dev_count=1) save_path = args.pred_save + '.test.' + str(steps) + '.txt' print("testing {}, save to {}".format(args.test_set, save_path)) with open(save_path, 'w') as f: for id, s, p in zip(qids, preds, probs): f.write('{}\t{}\t{}\n'.format(id, s, p)) except fluid.core.EOFException: if args.save_checkpoints: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break if nccl2_trainer_id == 0: # final eval on dev set if args.do_val: test_pyreader.decorate_tensor_provider( data_reader.data_generator( args.dev_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) print("Final validation result:") outputs = evaluate(args, test_exe, test_prog, test_pyreader, graph_vars, "dev") dev_ret_history.append((steps, outputs['key_eval'], outputs[outputs['key_eval']])) dev_ret_history = sorted(dev_ret_history, key=lambda a: a[2], reverse=True) print("Best validation result: step %d %s %f" \ % (dev_ret_history[0][0], dev_ret_history[0][1], dev_ret_history[0][2])) # final eval on test set if args.do_test: test_pyreader.decorate_tensor_provider( data_reader.data_generator( args.test_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) print("Final test result:") outputs = evaluate(args, test_exe, test_prog, test_pyreader, graph_vars, "test") # final eval on test set if args.do_pred: test_pyreader.decorate_tensor_provider( data_reader.data_generator( args.test_set, batch_size=args.batch_size, epoch=1, dev_count=1, shuffle=False)) qids, preds, probs = predict(test_exe, test_prog, test_pyreader, graph_vars, dev_count=1) save_path = args.pred_save + '.' + str(steps) + '.txt' print("testing {}, save to {}".format(args.test_set, save_path)) with open(save_path, 'w') as f: for id, s, p in zip(qids, preds, probs): f.write('{}\t{}\t{}\n'.format(id, s, p))
def update_ui(self): self.lab_date.setText("%s" % get_date()) self.lab_time.setText("{}".format(get_time())) logging.debug('update date')
class Unit(Document): """ Unit representation. Text-searchable over `pith`. """ meta = {'collection': 'units'} short_id = StringField(default=lambda: utils.gen_key()) """ :type: *str* :required: False :default: Automatically generated. """ created = StringField(default=lambda: utils.get_time()) """ :type: *str* :required: False :default: Automatically generated. """ board_id = StringField() """ :type: *str* :required: True :default: None """ pith = StringField(required=True) """ :type: *str* :required: True :default: None """ position = EmbeddedDocumentField(Position) chat = BooleanField(default=False) # versus in document """ :type: *bool* :required: False :default: False """ author = StringField() """ :type: *str* :required: False :default: None """ author_name = StringField() """ :type: *str* :required: False :default: None """ flairs = ListField(StringField(), default=[]) """ :type: :required: False :default: [] """ hidden = BooleanField(default=False) """ :type: *bool* :required: False :default: False """ notice = BooleanField(default=False) """ :type: *bool* :required: False :default: False """ id = StringField(default="", primary_key=True)
def main(args): """main""" model_config = UNIMOConfig(args.unimo_config_path) model_config.print_config() gpu_id = 0 gpus = fluid.core.get_cuda_device_count() if args.is_distributed and os.getenv("FLAGS_selected_gpus") is not None: gpu_list = os.getenv("FLAGS_selected_gpus").split(",") gpus = len(gpu_list) gpu_id = int(gpu_list[0]) if args.use_cuda: place = fluid.CUDAPlace(gpu_id) dev_count = gpus else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) tokenizer = GptBpeTokenizer(vocab_file=args.unimo_vocab_file, encoder_json_file=args.encoder_json_file, vocab_bpe_file=args.vocab_bpe_file, do_lower_case=args.do_lower_case) if not (args.do_train or args.do_val or args.do_test or args.do_test_hard): raise ValueError( "For args `do_train`, `do_val`, `do_test`, `do_test_hard`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed trainers_num = int(os.getenv("PADDLE_TRAINERS_NUM", "1")) if args.do_train: train_data_reader = ClassifyReader(args.train_filelist, args.max_seq_len, tokenizer) train_data_generator = train_data_reader.data_generator( batch_size=args.batch_size, epoch=args.epoch, phase="train") if args.num_train_examples: num_train_examples = args.num_train_examples else: num_train_examples = train_data_reader.get_num_examples() step_num_per_epoch = num_train_examples // args.batch_size // trainers_num max_train_steps = args.epoch * step_num_per_epoch warmup_steps = int(max_train_steps * args.warmup_proportion) print("Device count: %d, gpu_id: %d" % (dev_count, gpu_id)) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) print("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, config=model_config, pyreader_name="train_reader", is_train=True) scheduled_lr, loss_scaling = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, beta1=args.beta1, beta2=args.beta2, epsilon=args.epsilon) if args.do_val or args.do_test or args.do_test_hard: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, test_graph_vars = create_model( args, config=model_config, pyreader_name="dev_reader", is_train=False) test_prog = test_prog.clone(for_test=True) if args.do_val: dev_data_reader = ClassifyReader(args.dev_filelist, args.max_seq_len, tokenizer) dev_data_generator = dev_data_reader.data_generator( batch_size=args.test_batch_size, epoch=1, phase="dev") if args.do_test: test_data_reader = ClassifyReader(args.test_filelist, args.max_seq_len, tokenizer) test_data_generator = test_data_reader.data_generator( batch_size=args.test_batch_size, epoch=1, phase="test") if args.do_test_hard: test_hard_data_reader = ClassifyReader(args.test_hard_filelist, args.max_seq_len, tokenizer) test_hard_data_generator = test_hard_data_reader.data_generator( batch_size=args.test_batch_size, epoch=1, phase="test_hard") nccl2_num_trainers = 1 nccl2_trainer_id = 0 print("args.is_distributed:", args.is_distributed) if args.is_distributed: trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) worker_endpoints_env = os.getenv("PADDLE_TRAINER_ENDPOINTS") current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") worker_endpoints = worker_endpoints_env.split(",") trainers_num = len(worker_endpoints) print("worker_endpoints:{} trainers_num:{} current_endpoint:{} \ trainer_id:{}".format(worker_endpoints, trainers_num, current_endpoint, trainer_id)) # prepare nccl2 env. config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" if args.nccl_comm_num > 1: config.nccl_comm_num = args.nccl_comm_num if args.use_hierarchical_allreduce and trainers_num > args.hierarchical_allreduce_inter_nranks: config.use_hierarchical_allreduce = args.use_hierarchical_allreduce config.hierarchical_allreduce_inter_nranks = args.hierarchical_allreduce_inter_nranks assert config.hierarchical_allreduce_inter_nranks > 1 assert trainers_num % config.hierarchical_allreduce_inter_nranks == 0 config.hierarchical_allreduce_exter_nranks = \ trainers_num / config.hierarchical_allreduce_inter_nranks t = fluid.DistributeTranspiler(config=config) t.transpile(trainer_id, trainers=worker_endpoints_env, current_endpoint=current_endpoint, program=train_program if args.do_train else test_prog, startup_program=startup_prog) nccl2_num_trainers = trainers_num nccl2_trainer_id = trainer_id exe = fluid.Executor(place) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: print( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint(exe, args.init_checkpoint, main_program=train_program) elif args.init_pretraining_params: init_pretraining_params(exe, args.init_pretraining_params, main_program=train_program) elif args.do_val or args.do_test or args.do_test_hard: args.init_checkpoint = args.init_pretraining_params if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = 4 if args.use_fp16 else 2 exec_strategy.num_iteration_per_drop_scope = min( args.num_iteration_per_drop_scope, args.skip_steps) build_strategy = fluid.BuildStrategy() build_strategy.remove_unnecessary_lock = False if args.use_fuse: build_strategy.fuse_all_reduce_ops = True train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, build_strategy=build_strategy, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) train_pyreader.decorate_tensor_provider(train_data_generator) else: train_exe = None if args.do_val or args.do_test or args.do_test_hard: test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, main_program=test_prog, share_vars_from=train_exe) dev_ret_history = [] # (steps, key_eval, eval) test_ret_history = [] # (steps, key_eval, eval) test_hard_ret_history = [] # (steps, key_eval, eval) steps = 0 if args.do_train: train_pyreader.start() time_begin = time.time() skip_steps = args.skip_steps while True: try: steps += 1 if steps % skip_steps == 0: train_fetch_list = [ graph_vars["loss"].name, scheduled_lr.name ] res = train_exe.run(fetch_list=train_fetch_list) outputs = { "loss": np.mean(res[0]), 'learning_rate': float(res[1][0]) } if args.verbose: verbose = "train pyreader queue size: %d, learning_rate: %.10f" % \ (train_pyreader.queue.size(), outputs['learning_rate']) print(verbose) current_epoch, current_example, current_file_index, total_file, current_file = \ train_data_reader.get_progress() time_end = time.time() used_time = time_end - time_begin print("%s - epoch: %d, progress: %d/%d, %d/%d, step: %d, ave loss: %f, speed: %f steps/s" % \ (get_time(), current_epoch, current_example, num_train_examples, current_file_index, \ total_file, steps, outputs["loss"], args.skip_steps / used_time)) time_begin = time.time() else: train_exe.run(fetch_list=[]) if nccl2_trainer_id == 0: if steps % args.save_steps == 0 and args.save_checkpoints: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) if steps % args.validation_steps == 0: # evaluate dev set if args.do_val: test_pyreader.decorate_tensor_provider( dev_data_generator) outputs = evaluate(args, test_exe, test_pyreader, test_graph_vars, \ "dev", trainers_num, nccl2_trainer_id) if nccl2_trainer_id == 0: dev_ret_history.append( (steps, outputs['key_eval'], outputs[outputs['key_eval']])) # evaluate test set if args.do_test: test_pyreader.decorate_tensor_provider( test_data_generator) outputs = evaluate(args, test_exe, test_pyreader, test_graph_vars, \ "test", trainers_num, nccl2_trainer_id) if nccl2_trainer_id == 0: test_ret_history.append( (steps, outputs['key_eval'], outputs[outputs['key_eval']])) # evaluate test set if args.do_test_hard: test_pyreader.decorate_tensor_provider( test_hard_data_generator) outputs = evaluate(args, test_exe, test_pyreader, test_graph_vars, \ "test_hard", trainers_num, nccl2_trainer_id) if nccl2_trainer_id == 0: test_hard_ret_history.append( (steps, outputs['key_eval'], outputs[outputs['key_eval']])) except fluid.core.EOFException: if args.save_checkpoints: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break # final eval on dev set if args.do_val: test_pyreader.decorate_tensor_provider(dev_data_generator) outputs = evaluate(args, test_exe, test_pyreader, test_graph_vars, "dev", trainers_num, nccl2_trainer_id) if nccl2_trainer_id == 0: dev_ret_history.append( (steps, outputs['key_eval'], outputs[outputs['key_eval']])) # final eval on test set if args.do_test: test_pyreader.decorate_tensor_provider(test_data_generator) outputs = evaluate(args, test_exe, test_pyreader, test_graph_vars, "test", trainers_num, nccl2_trainer_id) if nccl2_trainer_id == 0: test_ret_history.append( (steps, outputs['key_eval'], outputs[outputs['key_eval']])) # final eval on test_hard set if args.do_test_hard: test_pyreader.decorate_tensor_provider(test_hard_data_generator) outputs = evaluate(args, test_exe, test_pyreader, test_graph_vars, "test_hard", trainers_num, nccl2_trainer_id) if nccl2_trainer_id == 0: test_hard_ret_history.append( (steps, outputs['key_eval'], outputs[outputs['key_eval']])) if nccl2_trainer_id == 0: if args.do_val: dev_ret_history = sorted(dev_ret_history, key=lambda a: a[2], reverse=True) print("Best validation result: step %d %s %f" % \ (dev_ret_history[0][0], dev_ret_history[0][1], dev_ret_history[0][2]))
async def startup(ctx): utils.logging.info("Starting new worker...") # dedicate a manager for the worker ctx["cursor"] = utils.get_time() ctx["manager"] = GlobalManager() ctx["manager"].start() # no Redis
def check_for_update(url): response = requests.get(url) bs = BeautifulSoup(response.content, 'lxml') last_update = utils.get_time(bs.find("lastupdate").text) return last_update