def bert_epoch_step(self, state, current): model_to_save = state['model'] if self.save_best_only: if self.monitor_op(current, self.best): logger.info(f"\nEpoch {state['epoch']}: \ {self.monitor} improved from \ {self.best:.5f} to {current:.5f}") self.best = current state['best'] = self.best model_to_save.save_pretrained(str(self.base_path)) output_config_file = self.base_path / 'config.json' with open(str(output_config_file), 'w') as f: f.write(model_to_save.config.to_json_string()) state.pop("model") torch.save(state, self.base_path / 'checkpoint_info.bin') else: if state['epoch'] % self.epoch_freq == 0: save_path = self.base_path / f"checkpoint-epoch-{state['epoch']}" save_path.mkdir(exist_ok=True) logger.info(f"\nEpoch {state['epoch']}: save model to disk.") model_to_save.save_pretrained(save_path) output_config_file = save_path / 'config.json' with open(str(output_config_file), 'w') as f: f.write(model_to_save.config.to_json_string()) state.pop("model") torch.save(state, save_path / 'checkpoint_info.bin')
async def login(client, username, password): def detect(Img): logger.info(f"Detect: Detecting...") i = transform(Img).unsqueeze(0) predict_label1, predict_label2 = model(i) predict_label = LabeltoStr( [ np.argmax(predict_label1.data.numpy()[0]), np.argmax(predict_label2.data.numpy()[0]), ] ) logger.info(f"Detect: Result {predict_label}") return predict_label home = await client.get(DK_URL) soup = BeautifulSoup(home.content, "lxml") captcha = await client.get(CAPTCHA_URL) valid_code = detect(Image.open(captcha)) execution = soup.find("input", attrs={"type": "hidden", "name": "execution"}) data = { "username": username, "password": password, "validateCode": valid_code, "execution": execution.get("value"), "_eventId": "submit", "geolocation": "", } post = await client.post(home.url, data=data) logger.info(f"Login: {username} Login...") return True if post.url == DK_URL else False
def create_examples(self, lines, example_type, cached_examples_file): ''' Creates examples for data ''' pbar = ProgressBar(n_total=len(lines)) if cached_examples_file.exists(): logger.info("Loading examples from cached file %s", cached_examples_file) examples = torch.load(cached_examples_file) else: examples = [] for i, line in enumerate(lines): guid = '%s-%d' % (example_type, i) text_a = line[0] label = line[1] if isinstance(label, str): label = [np.float(x) for x in label.split(",")] else: label = [np.float(x) for x in list(label)] text_b = None example = InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label) examples.append(example) pbar.batch_step(step=i, info={}, bar_type='create examples') logger.info("Saving examples into cached file %s", cached_examples_file) torch.save(examples, cached_examples_file) return examples
def start(update, context): message = update.message chat = message.forward_from_chat if message.forward_from_chat else message.chat jobs = [t.name for t in context.job_queue.jobs()] message.reply_markdown( f"Usage:\n/add <username> <password> \[region-num]\nregin-num: \n1 - 上海\n2 - 湖北\n3 - 其他中国地区\n5 - 国外\n/del <username>\nCHAT ID: `{chat.id}`\nCurrent Jobs: {jobs}" ) logger.info(f"Start command: Current Jobs: {jobs}")
def detect(Img): logger.info(f"Detect: Detecting...") i = transform(Img).unsqueeze(0) predict_label1, predict_label2 = model(i) predict_label = LabeltoStr([ np.argmax(predict_label1.data.numpy()[0]), np.argmax(predict_label2.data.numpy()[0]), ]) logger.info(f"Detect: Result {predict_label}") return predict_label
def add(update, context): message = update.message chat = message.chat data = message.text.split(" ") if len(data) < 3: message.reply_text("用法:\n" "添加数字平台账户:\n" "/add <学号> <密码> \\[地区]\n" "地区:\n" "1 - 上海\n" "2 - 湖北\n" "3 - 其他中国地区\n" "5 - 国外") return username, password = data[1], data[2] region = 1 if len(data) <= 3 else data[3] chat_id = chat.id if len(data) <= 4 else data[4] for job in context.job_queue.get_jobs_by_name(username): job.schedule_removal() jobs = [t.name for t in context.job_queue.jobs()] context.job_queue.run_daily( checkin_queue, datetime.time( 0, min(3 + len(jobs), 59), SystemRandom().randrange(60), SystemRandom().randrange(1000000), datetime.timezone(datetime.timedelta(hours=8)), ), context={ "username": username, "password": password, "region": region, "chat": chat_id, }, name=username, ) jobs.append(username) context.job_queue.run_once( checkin_queue, 1, context={ "username": username, "password": password, "region": region, "chat": chat_id, }, ) message.reply_text( f"添加成功!\n学号: {username}\n密码: {password}\n地区: {region}\n现在的任务列表: {jobs}" ) logger.info(f"Added Jobs: {username}, Current Jobs: {jobs}")
def main(): parser = get_argparse() parser.add_argument("--fine_tunning_model", type=str, required=True, help="fine_tuning model path") args = parser.parse_args() print( json.dumps(vars(args), sort_keys=True, indent=4, separators=(', ', ': '), ensure_ascii=False)) init_logger(log_file="./log/{}.log".format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) seed_everything(args.seed) # save path if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) # device args.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") # tokenizer tokenizer = BertTokenizerFast.from_pretrained(args.model_name_or_path) # Dataset & Dataloader test_dataset = MrcDataset(args, json_path="./data/test1.json", tokenizer=tokenizer) test_iter = DataLoader(test_dataset, shuffle=False, batch_size=args.per_gpu_eval_batch_size, collate_fn=collate_fn, num_workers=24) logger.info("The nums of the test_dataset examples is {}".format( len(test_dataset.examples))) logger.info("The nums of the test_dataset features is {}".format( len(test_dataset))) # model model = MRC_model(args.model_name_or_path) model.to(args.device) model.load_state_dict(torch.load(args.fine_tunning_model)) # predict test model.eval() evaluate(args, test_iter, model, prefix="test")
def create_dataset(self, features, is_sorted=False): if is_sorted: logger.info("sorted data by th length of input") features = sorted(features, key=lambda x: x.input_len, reverse=True) all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) all_label_ids = torch.tensor([f.label_ids for f in features], dtype=torch.long) dataset = TensorDataset(all_input_ids, all_input_ids, all_input_ids, all_label_ids) return dataset
def load_json(filename="config.json"): try: with open(filename, "r") as file: config = json.load(file) except FileNotFoundError: try: filename = f"{os.path.split(os.path.realpath(__file__))[0]}/{filename}" with open(filename, "r") as file: config = json.load(file) except FileNotFoundError: logger.exception(f"Cannot find {filename}.") sys.exit(1) logger.info(f"Json: Loaded {filename}") return config
def train_val_split(self, X: list, y: list, valid_size: float, data_name=None, data_dir=None, save=True): logger.info('split train data into train and valid') Xy = [] for i in range(len(X)): Xy.append((X[i], y[i])) train, valid = train_test_split( Xy, test_size=valid_size, random_state=42) if save: train_path = data_dir / "{}.train.pkl".format(data_name) valid_path = data_dir / "{}.valid.pkl".format(data_name) save_pickle(data=train, file_path=train_path) save_pickle(data=valid, file_path=valid_path) return train, valid
def checkin(s, username, region): data = { "xgh": username, "lon": "", "lat": "", "region": region, "rylx": 4, "status": 0, } s.post(CHECKIN_URL, data=data) logger.info(f"Checkin: {username} Checkin...") home = s.get(DK_URL) soup = BeautifulSoup(home.content, "lxml") return (True if "success" in str( soup.find("div", attrs={"class": "form-group"})) else False)
def build_filter_sets(imgs, num_classes, mode, id2trainid=None): if not (mode == "train"): return imgs json_fn = os.path.join(cfg.DATASET.PCL_DIR, "filter_set.json") if os.path.isfile(json_fn): logger.info("[*] Loading Filter sets file: {}".format(json_fn)) with open(json_fn, "r") as f: records = json.load(f) else: logger.info("[*] Didn\'t find {}, so building it.".format(json_fn)) records = generate_filter_all(imgs, num_classes, id2trainid) with open(json_fn, "w") as f: json.dump(records, f, indent=4) return records
def login(s, username, password): home = s.get(DK_URL) soup = BeautifulSoup(home.content, "lxml") captcha = s.get(CAPTCHA_URL, stream=True) valid_code = detect(Image.open(captcha.raw)) execution = soup.find("input", attrs={"type": "hidden", "name": "execution"}) data = { "username": username, "password": password, "validateCode": valid_code, "execution": execution.get("value"), "_eventId": "submit", "geolocation": "", } post = s.post(home.url, data=data) logger.info(f"Login: {username} Login...") return True if post.url == DK_URL else False
def checkin_queue(context): job = context.job username, password, region, chat = ( job.context.get("username"), job.context.get("password"), job.context.get("region"), job.context.get("chat"), ) s = requests.Session() s.headers.update({ "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36" }) retry_count = 5 for i in range(retry_count): try: if login(s, username, password): logger.info(f"Login: {username} Success!") break except: continue logger.warning(f"Login: {username} Fail {i}") for i in range(retry_count): try: if checkin(s, username, region): logger.info(f"Checkin: {username} Success!") context.bot.send_message(chat, f"任务: {username} 执行成功!", disable_notification=True) return except: continue logger.warning(f"Checkin: {username} Fail {i}") context.bot.send_message(chat, f"任务: {username} 执行失败!预计下个小时将继续执行。") context.job_queue.run_once( checkin_queue, SystemRandom().randint(1800, 3600), context={ "username": username, "password": password, "region": region, "chat": chat, }, ) logger.warning(f"Job: {username} fail -> run in next hour")
def save_pretrained(self, save_directory): """ Save a model and its configuration file to a directory, so that it can be re-loaded using the `:func:`~transformers.PreTrainedModel.from_pretrained`` class method. """ assert os.path.isdir(save_directory), "\ Saving path should be a directory where the model and configuration can be saved" # Only save the model itself if we are using distributed training model_to_save = self.module if hasattr(self, 'module') else self # Save configuration file # model_to_save.config.save_pretrained(save_directory) # If we save using the predefined names, # we can load using `from_pretrained` output_model_file = os.path.join(save_directory, "pytorch_model.bin") torch.save(model_to_save.state_dict(), output_model_file) logger.info("Model weights saved in {}".format(output_model_file))
def main(): model = ResNet(ResidualBlock) model.eval() model.load_state_dict(torch.load("model/best.pkl", map_location=device)) logger.info("Valid: loaded model") predict_dataloader = get_predict_data_loader() for i, (images, labels) in enumerate(predict_dataloader): predict_label1, predict_label2 = model(images) predict_label = LabeltoStr([ np.argmax(predict_label1.data.numpy()[0]), np.argmax(predict_label2.data.numpy()[0]), ]) true_label = LabeltoStr(labels.data.numpy()[0]) logger.info( f"Test: {i}, Expect: {true_label}, Predict: {predict_label}, Result: {True if true_label == predict_label else False}" )
def start(update, context): message = update.message jobs = [t.name for t in context.job_queue.jobs()] message.reply_text("用法:\n" "添加数字平台账户:\n" "/add <学号> <密码> [地区]\n" "地区(默认上海):\n" "1 - 上海\n" "2 - 湖北\n" "3 - 其他中国地区\n" "5 - 国外\n" "移除数字平台账户:\n" "/del <学号>\n" "立即运行:\n" "/run [学号]\n" f"现在的任务列表: {jobs}") logger.info( f"Start command: Current Jobs: {[t.context for t in context.job_queue.jobs()]}" )
def delete(update, context): message = update.message chat = message.chat data = message.text.split(" ") if len(data) < 2: message.reply_text("用法:\n移除数字平台账户:\n/del <学号>") return username = data[1] deleted_flag = False jobs = [t.name for t in context.job_queue.jobs()] for job in context.job_queue.get_jobs_by_name(username): if job.context.get("chat") in [chat.id, ADMIN]: deleted_flag = True job.schedule_removal() logger.info(f"Deleted Jobs: {username}, Current Jobs: {jobs}") if deleted_flag: message.reply_text(f"删除成功!\n学号: {username}\n现在的任务列表: {jobs}") else: message.reply_text("您没有删除此账户的权限.")
def go(self): if self.IS_HEROKU_MODE: logger.info('Heroku Mode start') # Get Heroku app name from config vars. name = os.environ.get('APP_NAME') # Get Heroku app port from config vars. port = os.environ.get('PORT') webhook_url = "https://{}.herokuapp.com/{}".format(name, self.TOKEN) self.updater.start_webhook(listen="0.0.0.0", # port=int(port), port=80, url_path=self.TOKEN) self.updater.bot.setWebhook(webhook_url) else: logger.info('Local Mode start') self.updater.start_polling() self.updater.idle()
def build_filter_sets(imgs, num_classes, mode, id2trainid=None): if not (mode == "train"): return imgs json_fn = os.path.join(cfg.DATASET.PCL_DIR, "classwised_set.json") if os.path.isfile(json_fn): logger.info("[*] Loading Class-wised file: {}".format(json_fn)) with open(json_fn, "r") as f: records = json.load(f) records = {int(k): v for k, v in records.items()} logger.info("[*] Found {} classes.".format(len(records))) else: logger.info("[*] Didn\'t find {}, so building it.".format(json_fn)) records = uniform.generate_classwised_all(imgs, num_classes, id2trainid) with open(json_fn, "w") as f: json.dump(records, f, indent=4) loss_fn = os.path.join(cfg.DATASET.PCL_DIR, "loss_info.json") if os.path.isfile(loss_fn): logger.info("[*] Loading Loss info file: {}".format(loss_fn)) with open(loss_fn, "r") as f: loss_info = json.load(f) new_records = defaultdict(list) for k, v in records.items(): for item in v: img_n = os.path.basename(item[0]).split(".")[0] loss = loss_info[img_n]["ce_loss"] new_records[k].append((*item, loss)) else: logger.info("[*] Didn\'t find {}, so didn\'t use it.") new_records = defaultdict(list) for k, v in records.items(): for item in v: new_records[k].append((*item, 0)) records = new_records return records
def build_epoch(imgs, records, num_classes, mode): if not (mode == "train"): return imgs one_class_filter_pct = cfg.DATASET.ONE_CLASS_FILTER_PCT logger.info("[*] One Class Filter Percentage: {}".format( str(one_class_filter_pct))) num_imgs = int(len(imgs)) logger.info("[*] Number of images: {}".format(str(num_imgs))) imgs_one_classes = records["True"] num_one_class = int(len(imgs_one_classes) * one_class_filter_pct) imgs_multi_classes = records["False"] one_class_uniform = ramdom_sampling(records["True"], num_one_class) imgs_multi_classes.extend(one_class_uniform) return imgs_multi_classes
def epoch_step(self, state, current): ''' :param state: 需要保存的信息 :param current: 当前判断指标 :return: ''' if self.save_best_only: if self.monitor_op(current, self.best): logger.info(f"\nEpoch {state['epoch']}: \ {self.monitor} improved from \ {self.best:.5f} to {current:.5f}") self.best = current state['best'] = self.best best_path = self.base_path / self.model_name torch.save(state, str(best_path)) else: filename = self.base_path / f"epoch_{state['epoch']}_{state[self.monitor]}_{self.arch}_model.bin" if state['epoch'] % self.epoch_freq == 0: logger.info(f"\nEpoch {state['epoch']}: save model to disk.") torch.save(state, str(filename))
def delete(update, context): message = update.message chat = message.chat data = message.text.split(" ") if len(data) < 2: message.reply_text("Usage:\n/del <username>") return username = data[1] deleted_flag = False jobs = [t.name for t in context.job_queue.jobs()] for job in context.job_queue.get_jobs_by_name(username): if job.context.get("chat") == chat.id: deleted_flag = True job.schedule_removal() logger.info(f"Deleted Jobs: {username}, Current Jobs: {jobs}") if deleted_flag: message.reply_text( f"Deleted successfully!\nusername: {username}\nCurrent Jobs: {jobs}" ) else: message.reply_text("You cannot delete it.")
def build_epoch(imgs, records, num_classes, mode): if not (mode == "train"): return imgs logger.info("[*] Filter the data with high loss.") records = loss_filter(records, cfg.DATASET.LOSS_UPPER_BOUND) logger.info("[*] Sampling the image with max num of image.") imgs_sampling = [] class_counter = 0 for class_id in range(num_classes): num_sampling = cfg.DATASET.NUM_IMG_PER_CLASS records_len = len(records[class_id]) if records_len == 0: pass else: class_records = random_sampling(records[class_id], num_sampling) imgs_sampling.extend(class_records) class_counter += 1 logger.info("[*] Sampling including {} classes.".format(class_counter)) imgs_sampling = filter_duplicate(imgs_sampling) return imgs_sampling
def add(update, context): message = update.message chat = message.chat data = message.text.split(" ") if len(data) < 3: message.reply_text( "Usage:\n/add <username> <password> [region-num]\nregin-num: \n1 - 上海\n2 - 湖北\n3 - 其他中国地区\n5 - 国外\n" ) return username, password = data[1], data[2] region = 1 if len(data) == 3 else data[3] for job in context.job_queue.get_jobs_by_name(username): job.schedule_removal() jobs = [t.name for t in context.job_queue.jobs()] context.job_queue.run_once( checkin_queue, 1, context={ "username": username, "password": password, "region": region, "chat": chat.id, }, ) context.job_queue.run_daily( checkin_queue, datetime.time( 0, min(2 + len(jobs), 59), 0, 0, datetime.timezone(datetime.timedelta(hours=8)), ), context={"username": username, "password": password, "chat": chat.id}, name=username, ) message.reply_text( f"Added successfully!\nusername: {username}\npassword: {password}\nCurrent Jobs: {jobs}" ) logger.info(f"Added Jobs: {username}, Current Jobs: {jobs}")
def build_epoch(imgs, records, num_classes, mode): """ Generate an epoch of image using uniform sampling Will not apply uniform sampling if not train or class uniform is off. Args: imgs: list of images: (img_fn, mask_fn) records: dict of classes which is list including img_fn, mask_fn, class_id num_classes: int mode: str Returns: imgs: list of images """ class_uniform_pct = cfg.DATASET.CLASS_UNIFORM_PCT if not (mode == "train" and class_uniform_pct): return imgs logger.info("[*] Class Uniform Percentage: {}".format( str(class_uniform_pct))) num_epoch = int(len(imgs)) logger.info("[*] Class Uniform items per Epoch: {}".format(str(num_epoch))) num_per_class = int((num_epoch * class_uniform_pct) / num_classes) class_uniform_count = num_per_class * num_classes num_rand = num_epoch - class_uniform_count imgs_uniform = ramdom_sampling(imgs, num_rand) for class_id in range(num_classes): num_per_class_biased = num_per_class records_len = len(records[class_id]) if records_len == 0: pass else: class_records = ramdom_sampling(records[class_id], num_per_class_biased) imgs_uniform.extend(class_records) return imgs_uniform
def checkin_queue(context): job = context.job username, password, region, chat = ( job.context.get("username"), job.context.get("password"), job.context.get("region"), job.context.get("chat"), ) s = requests.Session() s.headers.update( { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36" } ) retry_count = 5 message = context.bot.send_message(chat, f"Job: Running for {username}") for i in range(retry_count): result = login(s, username, password) if result: append_text = f"Login: {username} Successful!" logger.info(append_text) message = message.edit_text(f"{message.text}\n{append_text}") break else: append_text = f"Login: {username} Fail {i}" logger.warning(append_text) message = message.edit_text(f"{message.text}\n{append_text}") for i in range(retry_count): result = checkin(s, username, region) if result: append_text = f"Checkin: {username} Successful!" logger.info(append_text) message = message.edit_text(f"{message.text}\n{append_text}") break else: append_text = f"Checkin: {username} Fail {i}" logger.warning(append_text) message = message.edit_text(f"{message.text}\n{append_text}")
def checkin(): data = { "xgh": USERNAME, "lon": "", "lat": "", "region": 1, "rylx": 4, "status": 0, } post = s.post(CHECKIN_URL, data=data) data = { "xgh": USERNAME, "alwaysinsh": 1, "fromaddr": "", "fromtime": "", "totime": "", "jtgj": "", "status": 0 } post = s.post(ARRSH_URL, data=data) logger.info("Checkin: Checkin...") soup = BeautifulSoup(post.content, "lxml") return (True if "success" in str( soup.find("div", attrs={"class": "form-group"})) else False)
def main(): model = ResNet(ResidualBlock).to(device) model.reload() model.train() logger.info("Train: Init model") criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) scheduler_after = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5) scheduler = GradualWarmupScheduler(optimizer, 8, 10, after_scheduler=scheduler_after) train_dataloader = get_train_data_loader() loss_best = 1 for epoch in range(num_epochs): for i, (images, labels) in enumerate(tqdm(train_dataloader)): images = images.to(device) labels = labels.to(device) labels = labels.long() label1, label2 = labels[:, 0], labels[:, 1] optimizer.zero_grad() y1, y2 = model(images) loss1, loss2 = criterion(y1, label1), criterion(y2, label2) loss = loss1 + loss2 # outputs = model(images) # loss = criterion(outputs, labels) loss.backward() optimizer.step() scheduler.step() logger.info(f"epoch: {epoch}, step: {i}, loss: {loss.item()}") model.save() if loss_best > loss.item(): loss_best = loss.item() torch.save(model.state_dict(), "model/best.pkl") logger.info("Train: Saved best model") torch.save(model.state_dict(), "model/final.pkl") logger.info("Train: Saved last model")
def build_classwised_sets(imgs, num_classes, mode, id2trainid=None): """ Build the Class-wised image sets """ if not (mode == "train" and cfg.DATASET.CLASS_UNIFORM_PCT): return [] json_fn = os.path.join(cfg.DATASET.PCL_DIR, "classwised_set.json") if os.path.isfile(json_fn): logger.info("[*] Loading Class-wised file: {}".format(json_fn)) with open(json_fn, "r") as f: records = json.load(f) records = {int(k): v for k, v in records.items()} logger.info("[*] Found {} classes".format(len(records))) else: logger.info("[*] Didn\'t find {}, so building it.".format(json_fn)) records = generate_classwised_all(imgs, num_classes, id2trainid) with open(json_fn, "w") as f: json.dump(records, f, indent=4) return records