def write_data(data_dir, idx, W, L, T, D, N, phi, alpha, beta, chains): B = generate_lda(T, W, D, N, phi, alpha) pickle.dump(B, open(os.path.join(data_dir, 'counts.pkl'), 'w')) write_pb(data_dir, idx, W, T, D, alpha, beta, B) write_stan(data_dir, idx, W, T, D, alpha, beta, B, chains=chains) write_prism(data_dir, idx, W, T, D, alpha, beta, B) write_txt(data_dir, idx, B, T, alpha, beta)
def main(): txt_path = './data.txt' txt_list = load_txt(txt_path) data_path = './dataset.txt' dataset_txt = load_dataset(data_path) fin_result_list = [] # 对划分的内容逐个处理 print('{} sentence to do.'.format(len(txt_list))) for txt_line in txt_list: result_list = [] seg_list = seg_words(txt_line) print('Sentence index : {}.'.format(txt_list.index(txt_line) + 1)) for seg_word in tqdm.tqdm(list(set(seg_list))): if len(seg_word) <= 1: continue result = judge_txt(seg_word, dataset_txt) result_list.append((seg_word, result)) # print('{} : {}'.format(seg_word, result)) fin_result_list.extend(result_list) write_txt(fin_result_list, './result.txt') result_list = load_txt('./result.txt') result_list = [ result.replace('(', '').replace(')', '').replace(' ', '').split(',') for result in result_list ] top_list = pridict(result_list, 10) print(top_list)
def __create_concat_txt(files, subTxt): """生成 concat.txt Arguments: files {[string]} -- 要合并的文件地址 subTxt {[type]} -- concat.txt 的名称 """ subs = [] sub = "file '{0}'\n" for f in files: subs.append(sub.format(f)) utils.write_txt(subTxt, subs)
def save_checkpoint(self, state, stage, index, is_best): # 保存权重,每一epoch均保存一次,若为最优,则复制到最优权重;index可以区分不同的交叉验证 pth_path = os.path.join( self.save_path, '%s_%d_%d.pth' % (self.model_type, stage, index)) torch.save(state, pth_path) if is_best: print('Saving Best Model.') write_txt(self.save_path, 'Saving Best Model.') shutil.copyfile( os.path.join(self.save_path, '%s_%d_%d.pth' % (self.model_type, stage, index)), os.path.join( self.save_path, '%s_%d_%d_best.pth' % (self.model_type, stage, index)))
def modify_annotation(self, annotation_file): # modify the annotation for CAD, as same as VD new_file = annotation_file.split('.txt')[0] + '_new.txt' open(new_file, 'w') f = open(annotation_file) lines = f.readlines() cur_frameId = int(lines[0].split('\t')[0]) content_str = '' action_count = np.zeros([5]) sep = ' ' num = 0 for line in lines: keywords = line.split('\t') frame_id = int(keywords[0]) action = int(keywords[5]) if frame_id % 10 == 1 and action != 1: action = action - 2 x, y, w, h = int(keywords[1]), int(keywords[2]), int( keywords[3]), int(keywords[4]) x = 0 if x < 0 else x y = 0 if y < 0 else y if w <= 0 or h <= 0: print 'error!' break anno_str = sep + str(x) + sep + str(y) + sep + str( w) + sep + str(h) + sep + str(action) '''if frame_id == cur_frameId: action_label_count[action_label] += 1 content_str = content_str + anno_str''' if frame_id != cur_frameId: activity = np.argmax(action_count) content_str = str(cur_frameId) + sep + str( activity) + content_str + '\n' utils.write_txt(new_file, content_str, 'a') num += 1 cur_frameId = frame_id content_str = '' action_count = np.zeros([5]) action_count[action] += 1 content_str = content_str + anno_str activity = np.argmax(action_count) content_str = str(cur_frameId) + sep + str( activity) + content_str + '\n' utils.write_txt(new_file, content_str, 'a') num += 1 return num
def get_src_trace(detailed_config, out_folder): # process the cmd trace_cmd = detailed_config['trace_cmd'] poc = detailed_config['poc'] replace_idx = np.where(np.asarray(trace_cmd) == '***')[0] cmd = dc(trace_cmd) replace_num = len(replace_idx) for id in range(replace_num): cmd[replace_idx[id]] = poc[id] # write the cmd cmd_path = os.path.join(out_folder, 'cmd.txt') utils.write_txt(cmd_path, [' '.join(cmd)]) # get binary path bin_path = detailed_config['bin_path'] # get the source trace tmp_folder = './tempDr' if not os.path.exists(tmp_folder): os.mkdir(tmp_folder) my_parser = parse_dwarf.DwarfParser(bin_path) flineNumberDict, fileBoundRangesList, fileBoundIndexList, src_filepath = my_parser.get_main_addr( ) ifSrcList = tracer.findIfSrcInOrderDyn(bin_path, src_filepath, flineNumberDict, fileBoundRangesList, fileBoundIndexList, cmdFile=cmd_path) logging.info("Got the source trace!") # process the source trace insn2src = {} src2insn = {} for item in ifSrcList: insn = item[0] src = '-'.join(item[1:3]) if insn not in insn2src: insn2src[insn] = src if src in src2insn: src2insn[src].add(insn) else: src2insn[src] = {insn} info = {'raw': ifSrcList, 'insn2src': insn2src, 'src2insn': src2insn} # write the source trace output_path = os.path.join(out_folder, 'poc_source_trace.pkl') utils.write_pkl(output_path, info) logging.info("Recorded the source trace -> %s" % output_path) return insn2src, src2insn
def load_checkpoint(self, load_optimizer=True): # Load the pretrained Encoder weight_path = os.path.join(self.save_path, self.resume) if os.path.isfile(weight_path): checkpoint = torch.load(weight_path) # 加载模型的参数,学习率,优化器,开始的epoch,最小误差等 if torch.cuda.is_available: self.unet.module.load_state_dict(checkpoint['state_dict']) else: self.unet.load_state_dict(checkpoint['state_dict']) self.start_epoch = checkpoint['epoch'] self.max_dice = checkpoint['max_dice'] if load_optimizer: self.lr = checkpoint['lr'] self.optimizer.load_state_dict(checkpoint['optimizer']) print('%s is Successfully Loaded from %s' % (self.model_type, weight_path)) write_txt( self.save_path, '%s is Successfully Loaded from %s' % (self.model_type, weight_path)) else: raise FileNotFoundError( "Can not find weight file in {}".format(weight_path))
def put_index(movie_id, total, filename): utils.write_txt('%s,%s' % (movie_id, total), filename)
async def writelog(message): id = str(message.author.id) if not user_info[id]['isAdmin']: msg = f"({str(message.channel)}) {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {user_info[id]['username']} : {message.content.strip().lower()}" write_txt('test_collect.txt', msg)
predict = mlp_model.predict(validation_data) ''' ''' torch_mlp = model.torch_train(train_data, train_label, validation_data, validation_label) test_label = model.torch_predict(test_data, torch_mlp) predict_torch = model.torch_predict(validation_data, torch_mlp) for i in range(predict.shape[0]): print(str(predict[i]) + str(predict_torch[i]) + str(validation_label[i])) ''' # 线性回归 linear_model = model.Linear_model_build(train_data, train_label) predict_linear = linear_model.predict(validation_data) print("linear_loss:" + str(mse(predict_linear, validation_label))) linear_test_label = linear_model.predict(test_data) utils.write_txt(linear_test_label) predict = linear_model.predict(train_data) delete_list = [] for i in range(train_data.shape[0]): if abs(predict[i] - train_label[i]) > 1: delete_list.append(i) data = [] label = [] for i in range(train_data.shape[0]): if i not in delete_list: data.append(list(train_data[i])) label.append(train_label[i]) train_data = np.array(data) train_label = np.reshape(np.array(label), [-1, 1]) linear_model = model.Linear_model_build(train_data, train_label)
def validation(self, stage=1): d_h, d_l = 0, 0 # 验证的时候,train(False)是必须的0,设置其中的BN层、dropout等为eval模式 # with torch.no_grad(): 可以有,在这个上下文管理器中,不反向传播,会加快速度,可以使用较大batch size self.unet.eval() tbar = tqdm.tqdm(self.valid_loader) loss_sum, dice_sum = 0, 0 if stage == 1: criterion = self.criterion elif stage == 2: criterion = self.criterion_stage2 elif stage == 3: criterion = self.criterion_stage3 with torch.no_grad(): for i, (images, masks) in enumerate(tbar): images = images.to(self.device) masks = masks.to(self.device) net_output = self.unet(images) net_output_flat = net_output.view(net_output.size(0), -1) masks_flat = masks.view(masks.size(0), -1) loss_set = criterion(net_output_flat, masks_flat) try: loss_num = len(loss_set) except: loss_num = 1 # 依据返回的损失个数分情况处理 if loss_num > 1: loss = loss_set[0] else: loss = loss_set loss_sum += loss.item() # 计算dice系数,预测出的矩阵要经过sigmoid含义以及阈值,阈值默认为0.5 net_output_flat_sign = (torch.sigmoid(net_output_flat) > 0.5).float() dice = self.dice_overall(net_output_flat_sign, masks_flat).mean() # if dice>0.8: # # masks_i = masks.view((masks.shape[0],-1,masks.shape[1],masks.shape[2])) # # img_show = torch.cat((images,masks,net_output)) # self.writer.add_images('images_Dice>0.8',images, d_h) # self.writer.add_images('masks_Dice>0.8',masks_i, d_h) # self.writer.add_images('preds_Dice>0.8',net_output, d_h) # d_h += 1 # # if dice<0.15: # # masks_i = masks.view((masks.shape[0],-1,masks.shape[1],masks.shape[2])) # # img_show = torch.cat((images, masks, net_output)) # self.writer.add_images('images_Dice<0.15', images, d_l) # self.writer.add_images('masks_Dice<0.15', masks_i, d_l) # self.writer.add_images('preds_Dice<0.15', net_output, d_l) # d_l += 1 dice_sum += dice.item() descript = "Val Loss: {:.7f}, dice: {:.7f}".format( loss.item(), dice.item()) tbar.set_description(desc=descript) loss_mean, dice_mean = loss_sum / len(tbar), dice_sum / len(tbar) print("Val Loss: {:.7f}, dice: {:.7f}".format(loss_mean, dice_mean)) write_txt( self.save_path, "Val Loss: {:.7f}, dice: {:.7f}".format(loss_mean, dice_mean)) return loss_mean, dice_mean
def train_stage2(self, index): # for param in self.unet.module.encoder.parameters(): # param.requires_grad = False # self.optimizer = optim.Adam(filter(lambda p:p.requires_grad, self.unet.module.parameters()),self.lr_stage2,weight_decay=self.weight_decay) # # 冻结BN层, see https://zhuanlan.zhihu.com/p/65439075 and https://www.kaggle.com/c/siim-acr-pneumothorax-segmentation/discussion/100736591271 for more information def set_bn_eval(m): classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: m.eval() # self.optimizer = optim.Adam([{'params': self.unet.decoder.parameters(), 'lr': 1e-5}, {'params': self.unet.encoder.parameters(), 'lr': 1e-7},]) # self.optimizer = optim.Adam(self.unet.module.parameters(), self.lr_stage2, weight_decay=self.weight_decay_stage2 # ) # self.optimizer = NoamLR(self.unet.module.parameters(),10) self.optimizer = optim.SGD(self.unet.module.parameters(), lr=self.lr_stage2, momentum=self.momentum, weight_decay=self.weight_decay_stage2) # 加载的resume分为两种情况:之前没有训练第二个阶段,现在要加载第一个阶段的参数;第二个阶段训练了一半要继续训练 if self.resume: # 若第二个阶段训练一半,要重新加载 TODO if self.resume.split('_')[-3] == '2': self.load_checkpoint( load_optimizer=False) # 当load_optimizer为True会重新加载学习率和优化器 ''' CosineAnnealingLR:若存在['initial_lr'],则从initial_lr开始衰减; 若不存在,则执行CosineAnnealingLR会在optimizer.param_groups中添加initial_lr键值,其值等于lr 重置初始学习率,在load_checkpoint中会加载优化器,但其中的initial_lr还是之前的,所以需要覆盖为self.lr,让其从self.lr衰减 ''' self.optimizer.param_groups[0]['initial_lr'] = self.lr # 若第一阶段结束后没有直接进行第二个阶段,中间暂停了 elif self.resume.split('_')[-3] == '1': self.load_checkpoint(load_optimizer=False) self.start_epoch = 0 self.max_dice = 0 # 第一阶段结束后直接进行第二个阶段,中间并没有暂停 else: self.start_epoch = 0 self.max_dice = 0 # 防止训练到一半暂停重新训练,日志被覆盖 global_step_before = self.start_epoch * len(self.train_loader) stage2_epoches = self.epoch_stage2 - self.start_epoch lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer, 30) d_h, d_l = 0, 0 for epoch in range(self.start_epoch, self.epoch_stage2): # if self.epoch >= 20: # self.unet.apply(set_bn_eval) epoch += 1 self.unet.train(True) epoch_loss = 0 self.reset_grad() # 梯度累加的时候需要使用 tbar = tqdm.tqdm(self.train_loader) for i, (images, masks) in enumerate(tbar): # GT : Ground Truth images = images.to(self.device) masks = masks.to(self.device) assert images.size(2) == 512 # SR : Segmentation Result net_output = self.unet(images) net_output_flat = net_output.view(net_output.size(0), -1) masks_flat = masks.view(masks.size(0), -1) loss_set = self.criterion_stage2(net_output_flat, masks_flat) try: loss_num = len(loss_set) except: loss_num = 1 # 依据返回的损失个数分情况处理 if loss_num > 1: for loss_index, loss_item in enumerate(loss_set): if loss_index > 0: loss_name = 'stage2_loss_%d' % loss_index self.writer.add_scalar(loss_name, loss_item.item(), global_step_before + i) loss = loss_set[0] else: loss = loss_set epoch_loss += loss.item() # Backprop + optimize, see https://discuss.pytorch.org/t/why-do-we-need-to-set-the-gradients-manually-to-zero-in-pytorch/4903/20 for Accumulating Gradients if epoch <= self.epoch_stage2 - self.epoch_stage2_accumulation: self.reset_grad() loss.backward() self.optimizer.step() else: # loss = loss / self.accumulation_steps # Normalize our loss (if averaged) loss.backward() # Backward pass if ( i + 1 ) % self.accumulation_steps == 0: # Wait for several backward steps self.optimizer.step( ) # Now we can do an optimizer step self.reset_grad() params_groups_lr = str() for group_ind, param_group in enumerate( self.optimizer.param_groups): params_groups_lr = params_groups_lr + 'params_group_%d' % ( group_ind) + ': %.12f, ' % (param_group['lr']) # 保存到tensorboard,每一步存储一个 self.writer.add_scalar('Stage2_train_loss', loss.item(), global_step_before + i) descript = "Train Loss: %.7f, lr: %s" % (loss.item(), params_groups_lr) tbar.set_description(desc=descript) # 更新global_step_before为下次迭代做准备 global_step_before += len(tbar) # Print the log info print('Finish Stage2 Epoch [%d/%d], Average Loss: %.7f' % (epoch, self.epoch_stage2, epoch_loss / len(tbar))) write_txt( self.save_path, 'Finish Stage2 Epoch [%d/%d], Average Loss: %.7f' % (epoch, self.epoch_stage2, epoch_loss / len(tbar))) # 验证模型,保存权重,并保存日志 loss_mean, dice_mean = self.validation(stage=2) if dice_mean > self.max_dice: is_best = True self.max_dice = dice_mean else: is_best = False self.lr = lr_scheduler.get_lr() state = { 'epoch': epoch, 'state_dict': self.unet.module.state_dict(), 'max_dice': self.max_dice, 'optimizer': self.optimizer.state_dict(), 'lr': self.lr } self.save_checkpoint(state, 2, index, is_best) self.writer.add_scalar('Stage2_val_loss', loss_mean, epoch) self.writer.add_scalar('Stage2_val_dice', dice_mean, epoch) self.writer.add_scalar('Stage2_lr', self.lr[0], epoch) # 学习率衰减 lr_scheduler.step()
def train(self, index): # for param in self.unet.module.encoder.parameters(): # param.requires_grad = False # self.optimizer = optim.Adam(filter(lambda p:p.requires_grad, self.unet.module.parameters()),self.lr,weight_decay=self.weight_decay) self.optimizer = optim.Adam(self.unet.module.parameters(), self.lr, weight_decay=self.weight_decay) # 若训练到一半暂停了,则需要加载之前训练的参数,并加载之前学习率 TODO:resume学习率没有接上,所以resume暂时无法使用 # if self.resume: # self.load_checkpoint(load_optimizer=True) # ''' # CosineAnnealingLR:若存在['initial_lr'],则从initial_lr开始衰减; # 若不存在,则执行CosineAnnealingLR会在optimizer.param_groups中添加initial_lr键值,其值等于lr # 重置初始学习率,在load_checkpoint中会加载优化器,但其中的initial_lr还是之前的,所以需要覆盖为self.lr,让其从self.lr衰减 # ''' # self.optimizer.param_groups[0]['initial_lr'] = self.lr stage1_epoches = self.epoch_stage1 - self.start_epoch lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer, 30) # lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode='max', factor=0.2, patience=5, verbose=True, # threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, # eps=1e-08) # 防止训练到一半暂停重新训练,日志被覆盖 global_step_before = self.start_epoch * len(self.train_loader) for epoch in range(self.start_epoch, self.epoch_stage1): epoch += 1 self.unet.train(True) # 学习率重启 if epoch >= 18: def set_bn_eval(m): classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: m.eval() self.unet.apply(set_bn_eval) # if epoch == 25: # self.optimizer.param_groups[0]['initial_lr'] = 0.00005 # lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer, 25) epoch_loss = 0 tbar = tqdm.tqdm(self.train_loader) for i, (images, masks) in enumerate(tbar): # GT : Ground Truth images = images.to(self.device) masks = masks.to(self.device) # SR : Segmentation Result net_output = self.unet(images) net_output_flat = net_output.view(net_output.size(0), -1) masks_flat = masks.view(masks.size(0), -1) loss_set = self.criterion(net_output_flat, masks_flat) try: loss_num = len(loss_set) except: loss_num = 1 # 依据返回的损失个数分情况处理 if loss_num > 1: for loss_index, loss_item in enumerate(loss_set): if loss_index > 0: loss_name = 'stage1_loss_%d' % loss_index self.writer.add_scalar(loss_name, loss_item.item(), global_step_before + i) loss = loss_set[0] else: loss = loss_set epoch_loss += loss.item() # Backprop + optimize self.reset_grad() loss.backward() self.optimizer.step() params_groups_lr = str() for group_ind, param_group in enumerate( self.optimizer.param_groups): params_groups_lr = params_groups_lr + 'params_group_%d' % ( group_ind) + ': %.12f, ' % (param_group['lr']) # 保存到tensorboard,每一步存储一个 self.writer.add_scalar('Stage1_train_loss', loss.item(), global_step_before + i) descript = "Train Loss: %.7f, lr: %s" % (loss.item(), params_groups_lr) tbar.set_description(desc=descript) # 更新global_step_before为下次迭代做准备 global_step_before += len(tbar) # Print the log info print('Finish Stage1 Epoch [%d/%d], Average Loss: %.7f' % (epoch, self.epoch_stage1, epoch_loss / len(tbar))) write_txt( self.save_path, 'Finish Stage1 Epoch [%d/%d], Average Loss: %.7f' % (epoch, self.epoch_stage1, epoch_loss / len(tbar))) # 验证模型,保存权重,并保存日志 loss_mean, dice_mean = self.validation(stage=1) if dice_mean > self.max_dice: is_best = True self.max_dice = dice_mean else: is_best = False self.lr = lr_scheduler.get_lr() state = { 'epoch': epoch, 'state_dict': self.unet.module.state_dict(), 'max_dice': self.max_dice, 'optimizer': self.optimizer.state_dict(), 'lr': self.lr } self.save_checkpoint(state, 1, index, is_best) self.writer.add_scalars('Stage1_val_loss_dice', { 'val_loss': loss_mean, 'val_dice': dice_mean }, epoch) self.writer.add_scalar('Stage1_lr', self.lr[0], epoch) # 学习率衰减 lr_scheduler.step()
def write_pb_txt(data_dir, idx, W, L, T, D, N, phi, alpha, beta): #B = generate_lda(T, W, D, N, phi, alpha) B,phi = gen_lda2(T, W, D, N, alpha) write_pb(data_dir, idx, W, T, D, alpha, beta, B) write_txt(data_dir, idx, B)
def write_data(data_dir, idx, W, L, T, D, N, phi, alpha, beta, chains): B = generate_lda(T, W, D, N, phi, alpha) write_pb(data_dir, idx, W, T, D, alpha, beta, B, write_params=False) write_stan(data_dir, idx, W, T, D, alpha, beta, B, chains=chains, write_params=False) write_prism(data_dir, idx, W, T, D, alpha, beta, B, write_params=False) write_txt(data_dir, idx, B, T, alpha, beta, write_params=False)
def save_setting(dc): """保存设置到json文件 bool 类型用字符串 1 或 0来表示 """ s = json.dumps(dc, ensure_ascii=False, indent=2) utils.write_txt(setting_file, s)