Пример #1
0
def write_data(data_dir, idx, W, L, T, D, N, phi, alpha, beta, chains):
    B = generate_lda(T, W, D, N, phi, alpha)
    pickle.dump(B, open(os.path.join(data_dir, 'counts.pkl'), 'w'))
    write_pb(data_dir, idx, W, T, D, alpha, beta, B)
    write_stan(data_dir, idx, W, T, D, alpha, beta, B, chains=chains)
    write_prism(data_dir, idx, W, T, D, alpha, beta, B)
    write_txt(data_dir, idx, B, T, alpha, beta)
Пример #2
0
def main():
    txt_path = './data.txt'
    txt_list = load_txt(txt_path)
    data_path = './dataset.txt'
    dataset_txt = load_dataset(data_path)
    fin_result_list = []
    # 对划分的内容逐个处理
    print('{} sentence to do.'.format(len(txt_list)))
    for txt_line in txt_list:
        result_list = []
        seg_list = seg_words(txt_line)
        print('Sentence index : {}.'.format(txt_list.index(txt_line) + 1))
        for seg_word in tqdm.tqdm(list(set(seg_list))):
            if len(seg_word) <= 1:
                continue
            result = judge_txt(seg_word, dataset_txt)
            result_list.append((seg_word, result))
            # print('{} : {}'.format(seg_word, result))
        fin_result_list.extend(result_list)
    write_txt(fin_result_list, './result.txt')
    result_list = load_txt('./result.txt')
    result_list = [
        result.replace('(', '').replace(')', '').replace(' ', '').split(',')
        for result in result_list
    ]
    top_list = pridict(result_list, 10)
    print(top_list)
Пример #3
0
def __create_concat_txt(files, subTxt):
    """生成 concat.txt

    Arguments:
        files {[string]} --  要合并的文件地址
        subTxt {[type]} --  concat.txt 的名称
    """
    subs = []
    sub = "file '{0}'\n"
    for f in files:
        subs.append(sub.format(f))
    utils.write_txt(subTxt, subs)
Пример #4
0
 def save_checkpoint(self, state, stage, index, is_best):
     # 保存权重,每一epoch均保存一次,若为最优,则复制到最优权重;index可以区分不同的交叉验证
     pth_path = os.path.join(
         self.save_path, '%s_%d_%d.pth' % (self.model_type, stage, index))
     torch.save(state, pth_path)
     if is_best:
         print('Saving Best Model.')
         write_txt(self.save_path, 'Saving Best Model.')
         shutil.copyfile(
             os.path.join(self.save_path,
                          '%s_%d_%d.pth' % (self.model_type, stage, index)),
             os.path.join(
                 self.save_path,
                 '%s_%d_%d_best.pth' % (self.model_type, stage, index)))
Пример #5
0
    def modify_annotation(self, annotation_file):
        # modify the annotation for CAD, as same as VD
        new_file = annotation_file.split('.txt')[0] + '_new.txt'
        open(new_file, 'w')

        f = open(annotation_file)
        lines = f.readlines()
        cur_frameId = int(lines[0].split('\t')[0])
        content_str = ''
        action_count = np.zeros([5])
        sep = ' '
        num = 0
        for line in lines:
            keywords = line.split('\t')
            frame_id = int(keywords[0])
            action = int(keywords[5])
            if frame_id % 10 == 1 and action != 1:
                action = action - 2
                x, y, w, h = int(keywords[1]), int(keywords[2]), int(
                    keywords[3]), int(keywords[4])
                x = 0 if x < 0 else x
                y = 0 if y < 0 else y
                if w <= 0 or h <= 0:
                    print 'error!'
                    break
                anno_str = sep + str(x) + sep + str(y) + sep + str(
                    w) + sep + str(h) + sep + str(action)
                '''if frame_id == cur_frameId:
                    action_label_count[action_label] += 1 
                    content_str = content_str + anno_str'''
                if frame_id != cur_frameId:
                    activity = np.argmax(action_count)
                    content_str = str(cur_frameId) + sep + str(
                        activity) + content_str + '\n'
                    utils.write_txt(new_file, content_str, 'a')
                    num += 1
                    cur_frameId = frame_id
                    content_str = ''
                    action_count = np.zeros([5])

                action_count[action] += 1
                content_str = content_str + anno_str
        activity = np.argmax(action_count)
        content_str = str(cur_frameId) + sep + str(
            activity) + content_str + '\n'
        utils.write_txt(new_file, content_str, 'a')
        num += 1
        return num
Пример #6
0
def get_src_trace(detailed_config, out_folder):
    # process the cmd
    trace_cmd = detailed_config['trace_cmd']
    poc = detailed_config['poc']
    replace_idx = np.where(np.asarray(trace_cmd) == '***')[0]
    cmd = dc(trace_cmd)
    replace_num = len(replace_idx)
    for id in range(replace_num):
        cmd[replace_idx[id]] = poc[id]
    # write the cmd
    cmd_path = os.path.join(out_folder, 'cmd.txt')
    utils.write_txt(cmd_path, [' '.join(cmd)])
    # get binary path
    bin_path = detailed_config['bin_path']
    # get the source trace
    tmp_folder = './tempDr'
    if not os.path.exists(tmp_folder):
        os.mkdir(tmp_folder)
    my_parser = parse_dwarf.DwarfParser(bin_path)
    flineNumberDict, fileBoundRangesList, fileBoundIndexList, src_filepath = my_parser.get_main_addr(
    )
    ifSrcList = tracer.findIfSrcInOrderDyn(bin_path,
                                           src_filepath,
                                           flineNumberDict,
                                           fileBoundRangesList,
                                           fileBoundIndexList,
                                           cmdFile=cmd_path)
    logging.info("Got the source trace!")
    # process the source trace
    insn2src = {}
    src2insn = {}
    for item in ifSrcList:
        insn = item[0]
        src = '-'.join(item[1:3])
        if insn not in insn2src:
            insn2src[insn] = src
        if src in src2insn:
            src2insn[src].add(insn)
        else:
            src2insn[src] = {insn}
    info = {'raw': ifSrcList, 'insn2src': insn2src, 'src2insn': src2insn}
    # write the source trace
    output_path = os.path.join(out_folder, 'poc_source_trace.pkl')
    utils.write_pkl(output_path, info)
    logging.info("Recorded the source trace -> %s" % output_path)
    return insn2src, src2insn
Пример #7
0
    def load_checkpoint(self, load_optimizer=True):
        # Load the pretrained Encoder
        weight_path = os.path.join(self.save_path, self.resume)
        if os.path.isfile(weight_path):
            checkpoint = torch.load(weight_path)
            # 加载模型的参数,学习率,优化器,开始的epoch,最小误差等
            if torch.cuda.is_available:
                self.unet.module.load_state_dict(checkpoint['state_dict'])
            else:
                self.unet.load_state_dict(checkpoint['state_dict'])
            self.start_epoch = checkpoint['epoch']
            self.max_dice = checkpoint['max_dice']
            if load_optimizer:
                self.lr = checkpoint['lr']
                self.optimizer.load_state_dict(checkpoint['optimizer'])

            print('%s is Successfully Loaded from %s' %
                  (self.model_type, weight_path))
            write_txt(
                self.save_path, '%s is Successfully Loaded from %s' %
                (self.model_type, weight_path))
        else:
            raise FileNotFoundError(
                "Can not find weight file in {}".format(weight_path))
Пример #8
0
def put_index(movie_id, total, filename):
    utils.write_txt('%s,%s' % (movie_id, total), filename)
Пример #9
0
def put_index(movie_id, total, filename):
    utils.write_txt('%s,%s' % (movie_id, total), filename)
Пример #10
0
async def writelog(message):
    id = str(message.author.id)

    if not user_info[id]['isAdmin']:
        msg = f"({str(message.channel)}) {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {user_info[id]['username']} : {message.content.strip().lower()}"
        write_txt('test_collect.txt', msg)
Пример #11
0
predict = mlp_model.predict(validation_data)
'''
'''
torch_mlp = model.torch_train(train_data, train_label, validation_data, validation_label)
test_label = model.torch_predict(test_data, torch_mlp)
predict_torch = model.torch_predict(validation_data, torch_mlp)
for i in range(predict.shape[0]):
	print(str(predict[i]) + str(predict_torch[i]) + str(validation_label[i]))
'''

# 线性回归
linear_model = model.Linear_model_build(train_data, train_label)
predict_linear = linear_model.predict(validation_data)
print("linear_loss:" + str(mse(predict_linear, validation_label)))
linear_test_label = linear_model.predict(test_data)
utils.write_txt(linear_test_label)

predict = linear_model.predict(train_data)
delete_list = []
for i in range(train_data.shape[0]):
    if abs(predict[i] - train_label[i]) > 1:
        delete_list.append(i)
data = []
label = []
for i in range(train_data.shape[0]):
    if i not in delete_list:
        data.append(list(train_data[i]))
        label.append(train_label[i])
train_data = np.array(data)
train_label = np.reshape(np.array(label), [-1, 1])
linear_model = model.Linear_model_build(train_data, train_label)
Пример #12
0
    def validation(self, stage=1):
        d_h, d_l = 0, 0
        # 验证的时候,train(False)是必须的0,设置其中的BN层、dropout等为eval模式
        # with torch.no_grad(): 可以有,在这个上下文管理器中,不反向传播,会加快速度,可以使用较大batch size
        self.unet.eval()
        tbar = tqdm.tqdm(self.valid_loader)
        loss_sum, dice_sum = 0, 0
        if stage == 1:
            criterion = self.criterion
        elif stage == 2:
            criterion = self.criterion_stage2
        elif stage == 3:
            criterion = self.criterion_stage3
        with torch.no_grad():
            for i, (images, masks) in enumerate(tbar):
                images = images.to(self.device)
                masks = masks.to(self.device)
                net_output = self.unet(images)
                net_output_flat = net_output.view(net_output.size(0), -1)
                masks_flat = masks.view(masks.size(0), -1)

                loss_set = criterion(net_output_flat, masks_flat)
                try:
                    loss_num = len(loss_set)
                except:
                    loss_num = 1

                # 依据返回的损失个数分情况处理
                if loss_num > 1:
                    loss = loss_set[0]
                else:
                    loss = loss_set
                loss_sum += loss.item()

                # 计算dice系数,预测出的矩阵要经过sigmoid含义以及阈值,阈值默认为0.5
                net_output_flat_sign = (torch.sigmoid(net_output_flat) >
                                        0.5).float()
                dice = self.dice_overall(net_output_flat_sign,
                                         masks_flat).mean()

                # if dice>0.8:
                #
                #     masks_i = masks.view((masks.shape[0],-1,masks.shape[1],masks.shape[2]))
                #     # img_show = torch.cat((images,masks,net_output))
                #     self.writer.add_images('images_Dice>0.8',images, d_h)
                #     self.writer.add_images('masks_Dice>0.8',masks_i, d_h)
                #     self.writer.add_images('preds_Dice>0.8',net_output, d_h)
                #     d_h += 1
                #
                # if dice<0.15:
                #
                #     masks_i = masks.view((masks.shape[0],-1,masks.shape[1],masks.shape[2]))
                #     # img_show = torch.cat((images, masks, net_output))
                #     self.writer.add_images('images_Dice<0.15', images, d_l)
                #     self.writer.add_images('masks_Dice<0.15', masks_i, d_l)
                #     self.writer.add_images('preds_Dice<0.15', net_output, d_l)
                #     d_l += 1
                dice_sum += dice.item()

                descript = "Val Loss: {:.7f}, dice: {:.7f}".format(
                    loss.item(), dice.item())
                tbar.set_description(desc=descript)

        loss_mean, dice_mean = loss_sum / len(tbar), dice_sum / len(tbar)
        print("Val Loss: {:.7f}, dice: {:.7f}".format(loss_mean, dice_mean))
        write_txt(
            self.save_path,
            "Val Loss: {:.7f}, dice: {:.7f}".format(loss_mean, dice_mean))
        return loss_mean, dice_mean
Пример #13
0
    def train_stage2(self, index):
        # for param in self.unet.module.encoder.parameters():
        #     param.requires_grad = False
        # self.optimizer = optim.Adam(filter(lambda p:p.requires_grad, self.unet.module.parameters()),self.lr_stage2,weight_decay=self.weight_decay)

        # # 冻结BN层, see https://zhuanlan.zhihu.com/p/65439075 and https://www.kaggle.com/c/siim-acr-pneumothorax-segmentation/discussion/100736591271 for more information
        def set_bn_eval(m):
            classname = m.__class__.__name__
            if classname.find('BatchNorm') != -1:
                m.eval()

        # self.optimizer = optim.Adam([{'params': self.unet.decoder.parameters(), 'lr': 1e-5}, {'params': self.unet.encoder.parameters(), 'lr': 1e-7},])
        # self.optimizer = optim.Adam(self.unet.module.parameters(), self.lr_stage2, weight_decay=self.weight_decay_stage2
        #                             )
        # self.optimizer = NoamLR(self.unet.module.parameters(),10)
        self.optimizer = optim.SGD(self.unet.module.parameters(),
                                   lr=self.lr_stage2,
                                   momentum=self.momentum,
                                   weight_decay=self.weight_decay_stage2)
        # 加载的resume分为两种情况:之前没有训练第二个阶段,现在要加载第一个阶段的参数;第二个阶段训练了一半要继续训练
        if self.resume:
            # 若第二个阶段训练一半,要重新加载 TODO
            if self.resume.split('_')[-3] == '2':
                self.load_checkpoint(
                    load_optimizer=False)  # 当load_optimizer为True会重新加载学习率和优化器
                '''
                CosineAnnealingLR:若存在['initial_lr'],则从initial_lr开始衰减;
                若不存在,则执行CosineAnnealingLR会在optimizer.param_groups中添加initial_lr键值,其值等于lr

                重置初始学习率,在load_checkpoint中会加载优化器,但其中的initial_lr还是之前的,所以需要覆盖为self.lr,让其从self.lr衰减
                '''
                self.optimizer.param_groups[0]['initial_lr'] = self.lr

            # 若第一阶段结束后没有直接进行第二个阶段,中间暂停了
            elif self.resume.split('_')[-3] == '1':
                self.load_checkpoint(load_optimizer=False)
                self.start_epoch = 0
                self.max_dice = 0

        # 第一阶段结束后直接进行第二个阶段,中间并没有暂停
        else:
            self.start_epoch = 0
            self.max_dice = 0

        # 防止训练到一半暂停重新训练,日志被覆盖
        global_step_before = self.start_epoch * len(self.train_loader)

        stage2_epoches = self.epoch_stage2 - self.start_epoch
        lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer, 30)
        d_h, d_l = 0, 0
        for epoch in range(self.start_epoch, self.epoch_stage2):
            # if self.epoch >= 20:
            # self.unet.apply(set_bn_eval)
            epoch += 1
            self.unet.train(True)
            epoch_loss = 0

            self.reset_grad()  # 梯度累加的时候需要使用

            tbar = tqdm.tqdm(self.train_loader)
            for i, (images, masks) in enumerate(tbar):
                # GT : Ground Truth
                images = images.to(self.device)
                masks = masks.to(self.device)
                assert images.size(2) == 512

                # SR : Segmentation Result
                net_output = self.unet(images)
                net_output_flat = net_output.view(net_output.size(0), -1)
                masks_flat = masks.view(masks.size(0), -1)
                loss_set = self.criterion_stage2(net_output_flat, masks_flat)

                try:
                    loss_num = len(loss_set)
                except:
                    loss_num = 1
                # 依据返回的损失个数分情况处理
                if loss_num > 1:
                    for loss_index, loss_item in enumerate(loss_set):
                        if loss_index > 0:
                            loss_name = 'stage2_loss_%d' % loss_index
                            self.writer.add_scalar(loss_name, loss_item.item(),
                                                   global_step_before + i)
                    loss = loss_set[0]
                else:
                    loss = loss_set
                epoch_loss += loss.item()

                # Backprop + optimize, see https://discuss.pytorch.org/t/why-do-we-need-to-set-the-gradients-manually-to-zero-in-pytorch/4903/20 for Accumulating Gradients
                if epoch <= self.epoch_stage2 - self.epoch_stage2_accumulation:
                    self.reset_grad()
                    loss.backward()
                    self.optimizer.step()
                else:
                    # loss = loss / self.accumulation_steps                # Normalize our loss (if averaged)
                    loss.backward()  # Backward pass
                    if (
                            i + 1
                    ) % self.accumulation_steps == 0:  # Wait for several backward steps
                        self.optimizer.step(
                        )  # Now we can do an optimizer step
                        self.reset_grad()

                params_groups_lr = str()
                for group_ind, param_group in enumerate(
                        self.optimizer.param_groups):
                    params_groups_lr = params_groups_lr + 'params_group_%d' % (
                        group_ind) + ': %.12f, ' % (param_group['lr'])

                # 保存到tensorboard,每一步存储一个
                self.writer.add_scalar('Stage2_train_loss', loss.item(),
                                       global_step_before + i)

                descript = "Train Loss: %.7f, lr: %s" % (loss.item(),
                                                         params_groups_lr)
                tbar.set_description(desc=descript)
            # 更新global_step_before为下次迭代做准备
            global_step_before += len(tbar)

            # Print the log info
            print('Finish Stage2 Epoch [%d/%d], Average Loss: %.7f' %
                  (epoch, self.epoch_stage2, epoch_loss / len(tbar)))
            write_txt(
                self.save_path,
                'Finish Stage2 Epoch [%d/%d], Average Loss: %.7f' %
                (epoch, self.epoch_stage2, epoch_loss / len(tbar)))

            # 验证模型,保存权重,并保存日志
            loss_mean, dice_mean = self.validation(stage=2)
            if dice_mean > self.max_dice:
                is_best = True
                self.max_dice = dice_mean
            else:
                is_best = False

            self.lr = lr_scheduler.get_lr()
            state = {
                'epoch': epoch,
                'state_dict': self.unet.module.state_dict(),
                'max_dice': self.max_dice,
                'optimizer': self.optimizer.state_dict(),
                'lr': self.lr
            }

            self.save_checkpoint(state, 2, index, is_best)

            self.writer.add_scalar('Stage2_val_loss', loss_mean, epoch)
            self.writer.add_scalar('Stage2_val_dice', dice_mean, epoch)
            self.writer.add_scalar('Stage2_lr', self.lr[0], epoch)

            # 学习率衰减
            lr_scheduler.step()
Пример #14
0
    def train(self, index):
        # for param in self.unet.module.encoder.parameters():
        #     param.requires_grad = False
        # self.optimizer = optim.Adam(filter(lambda p:p.requires_grad, self.unet.module.parameters()),self.lr,weight_decay=self.weight_decay)

        self.optimizer = optim.Adam(self.unet.module.parameters(),
                                    self.lr,
                                    weight_decay=self.weight_decay)
        # 若训练到一半暂停了,则需要加载之前训练的参数,并加载之前学习率 TODO:resume学习率没有接上,所以resume暂时无法使用
        # if self.resume:
        #     self.load_checkpoint(load_optimizer=True)
        #     '''
        #     CosineAnnealingLR:若存在['initial_lr'],则从initial_lr开始衰减;
        #     若不存在,则执行CosineAnnealingLR会在optimizer.param_groups中添加initial_lr键值,其值等于lr
        #     重置初始学习率,在load_checkpoint中会加载优化器,但其中的initial_lr还是之前的,所以需要覆盖为self.lr,让其从self.lr衰减
        #     '''
        #     self.optimizer.param_groups[0]['initial_lr'] = self.lr

        stage1_epoches = self.epoch_stage1 - self.start_epoch
        lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer, 30)
        # lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode='max', factor=0.2, patience=5, verbose=True,
        #                                            threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0,
        #                                            eps=1e-08)
        # 防止训练到一半暂停重新训练,日志被覆盖
        global_step_before = self.start_epoch * len(self.train_loader)

        for epoch in range(self.start_epoch, self.epoch_stage1):
            epoch += 1
            self.unet.train(True)

            # 学习率重启

            if epoch >= 18:

                def set_bn_eval(m):
                    classname = m.__class__.__name__
                    if classname.find('BatchNorm') != -1:
                        m.eval()

                self.unet.apply(set_bn_eval)
            # if epoch == 25:
            #     self.optimizer.param_groups[0]['initial_lr'] = 0.00005
            #     lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer, 25)
            epoch_loss = 0
            tbar = tqdm.tqdm(self.train_loader)
            for i, (images, masks) in enumerate(tbar):
                # GT : Ground Truth
                images = images.to(self.device)
                masks = masks.to(self.device)

                # SR : Segmentation Result
                net_output = self.unet(images)
                net_output_flat = net_output.view(net_output.size(0), -1)
                masks_flat = masks.view(masks.size(0), -1)
                loss_set = self.criterion(net_output_flat, masks_flat)

                try:
                    loss_num = len(loss_set)
                except:
                    loss_num = 1
                # 依据返回的损失个数分情况处理
                if loss_num > 1:
                    for loss_index, loss_item in enumerate(loss_set):
                        if loss_index > 0:
                            loss_name = 'stage1_loss_%d' % loss_index
                            self.writer.add_scalar(loss_name, loss_item.item(),
                                                   global_step_before + i)
                    loss = loss_set[0]
                else:
                    loss = loss_set
                epoch_loss += loss.item()

                # Backprop + optimize
                self.reset_grad()
                loss.backward()
                self.optimizer.step()

                params_groups_lr = str()
                for group_ind, param_group in enumerate(
                        self.optimizer.param_groups):
                    params_groups_lr = params_groups_lr + 'params_group_%d' % (
                        group_ind) + ': %.12f, ' % (param_group['lr'])

                # 保存到tensorboard,每一步存储一个
                self.writer.add_scalar('Stage1_train_loss', loss.item(),
                                       global_step_before + i)

                descript = "Train Loss: %.7f, lr: %s" % (loss.item(),
                                                         params_groups_lr)
                tbar.set_description(desc=descript)
            # 更新global_step_before为下次迭代做准备
            global_step_before += len(tbar)

            # Print the log info
            print('Finish Stage1 Epoch [%d/%d], Average Loss: %.7f' %
                  (epoch, self.epoch_stage1, epoch_loss / len(tbar)))
            write_txt(
                self.save_path,
                'Finish Stage1 Epoch [%d/%d], Average Loss: %.7f' %
                (epoch, self.epoch_stage1, epoch_loss / len(tbar)))

            # 验证模型,保存权重,并保存日志
            loss_mean, dice_mean = self.validation(stage=1)
            if dice_mean > self.max_dice:
                is_best = True
                self.max_dice = dice_mean
            else:
                is_best = False

            self.lr = lr_scheduler.get_lr()
            state = {
                'epoch': epoch,
                'state_dict': self.unet.module.state_dict(),
                'max_dice': self.max_dice,
                'optimizer': self.optimizer.state_dict(),
                'lr': self.lr
            }

            self.save_checkpoint(state, 1, index, is_best)

            self.writer.add_scalars('Stage1_val_loss_dice', {
                'val_loss': loss_mean,
                'val_dice': dice_mean
            }, epoch)
            self.writer.add_scalar('Stage1_lr', self.lr[0], epoch)

            # 学习率衰减
            lr_scheduler.step()
Пример #15
0
def write_pb_txt(data_dir, idx, W, L, T, D, N, phi, alpha, beta):
    #B = generate_lda(T, W, D, N, phi, alpha)
    B,phi = gen_lda2(T, W, D, N, alpha)
    write_pb(data_dir, idx, W, T, D, alpha, beta, B)
    write_txt(data_dir, idx, B)
Пример #16
0
def write_data(data_dir, idx, W, L, T, D, N, phi, alpha, beta, chains):
    B = generate_lda(T, W, D, N, phi, alpha)
    write_pb(data_dir, idx, W, T, D, alpha, beta, B, write_params=False)
    write_stan(data_dir, idx, W, T, D, alpha, beta, B, chains=chains, write_params=False)
    write_prism(data_dir, idx, W, T, D, alpha, beta, B, write_params=False)
    write_txt(data_dir, idx, B, T, alpha, beta, write_params=False)
Пример #17
0
def save_setting(dc):
    """保存设置到json文件
    bool 类型用字符串 1 或 0来表示
    """
    s = json.dumps(dc, ensure_ascii=False, indent=2)
    utils.write_txt(setting_file, s)