def main(**kwargs):
    #动态加全职衰减
    origin_weight_decay = 1e-5

    opt.parse(kwargs, print_=False)
    if opt.debug:
        import ipdb
        ipdb.set_trace()

    ###################################
    # opt.model_names=['MultiCNNTextBNDeep','CNNText_inception',
    # #'RCNN',
    # 'LSTMText','CNNText_inception']
    # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.410330780091','checkpoints/CNNText_tmp_word_0.41096749885',
    # #'checkpoints/RCNN_word_0.411511574999',
    # 'checkpoints/LSTMText_word_0.411994005382','checkpoints/CNNText_tmp_char_0.402429167301']
    ######################################
    #     opt.model_names=['MultiCNNTextBNDeep',
    #  #'CNNText_inception',
    #     #'RCNN',
    #     'LSTMText_boost',
    #     #'CNNText_inception_boost'
    #     ]
    #     opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.410330780091',
    #     # 'checkpoints/CNNText_tmp_word_0.41096749885',
    #     #'checkpoints/RCNN_word_0.411511574999',
    #     'checkpoints/LSTMText_word_0.381833388089',
    #     #'checkpoints/CNNText_tmp_0.376364647145'
    #     ]
    #####################################################3
    opt.model_names = [
        'MultiCNNTextBNDeep',

        #'RCNN',
        'LSTMText',
        'CNNText_inception',
        'CNNText_inception-boost'
    ]
    opt.model_paths = [
        'checkpoints/MultiCNNTextBNDeep_word_0.410330780091',
        'checkpoints/LSTMText_word_0.381833388089',
        'checkpoints/CNNText_tmp_0.380390420742',
        #'checkpoints/RCNN_word_0.411511574999',
        'checkpoints/CNNText_tmp_0.376364647145'
    ]
    # opt.model_path='checkpoints/BoostModel_word_0.412524727048'

    #**************************************
    #############################################3

    opt.model_names = [
        'MultiCNNTextBNDeep', 'LSTMText', 'MultiCNNTextBNDeep-boost'
    ]
    opt.model_paths = [
        'checkpoints/MultiCNNTextBNDeep_word_0.410330780091',
        'checkpoints/LSTMText_word_0.411994005382', None
    ]
    opt.model_path = 'checkpoints/BoostModel2_word_0.410618920827'
    #*********************************************

    # opt.model_names=['MultiCNNTextBNDeep','LSTMText','CNNText_inception','RCNN']
    # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_0.37125473788','checkpoints/LSTMText_word_0.381833388089','checkpoints/CNNText_tmp_0.376364647145','checkpoints/RCNN_char_0.3456599248']

    model = getattr(models, opt.model)(opt).cuda()
    # if opt.model_path:
    # model.load(opt.model_path)
    print(model)

    opt.parse(kwargs, print_=True)

    vis.reinit(opt.env)
    pre_loss = 1.0
    lr, lr2 = opt.lr, opt.lr2
    loss_function = getattr(models, opt.loss)()
    if opt.all:
        dataset = ZhihuALLData(opt.train_data_path,
                               opt.labels_path,
                               type_=opt.type_)
    else:
        dataset = ZhihuData(opt.train_data_path,
                            opt.labels_path,
                            type_=opt.type_)
    dataloader = data.DataLoader(dataset,
                                 batch_size=opt.batch_size,
                                 shuffle=opt.shuffle,
                                 num_workers=opt.num_workers,
                                 pin_memory=True)

    optimizer = model.get_optimizer(opt.lr, opt.lr2, 0)
    loss_meter = tnt.meter.AverageValueMeter()
    score_meter = tnt.meter.AverageValueMeter()
    best_score = 0
    # pre_score = 0

    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        score_meter.reset()
        for ii, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)):
            title, content, label = (Variable(
                title[0].cuda()), Variable(title[1].cuda())), (Variable(
                    content[0].cuda()), Variable(content[1].cuda())), Variable(
                        label.cuda())
            optimizer.zero_grad()
            score = model(title, content)
            loss = loss_function(score, opt.weight * label.float())
            loss_meter.add(loss.data[0])
            loss.backward()
            optimizer.step()

            if ii % opt.plot_every == opt.plot_every - 1:
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

                predict = score.data.topk(5, dim=1)[1].cpu().tolist(
                )  #(dim=1,descending=True)[1][:,:5].tolist()
                true_target = label.data.float().topk(
                    5, dim=1)  #[1].cpu().tolist()#sort(dim=1,descending=True)
                true_index = true_target[1][:, :5]
                true_label = true_target[0][:, :5]
                predict_label_and_marked_label_list = []
                for jj in range(label.size(0)):
                    true_index_ = true_index[jj]
                    true_label_ = true_label[jj]
                    true = true_index_[true_label_ > 0]
                    predict_label_and_marked_label_list.append(
                        (predict[jj], true.tolist()))
                score_, prec_, recall_, _ss = get_score(
                    predict_label_and_marked_label_list)
                score_meter.add(score_)
                vis.vis.text('prec:%s,recall:%s,score:%s,a:%s' %
                             (prec_, recall_, score_, _ss),
                             win='tmp')
                vis.plot('scores', score_meter.value()[0])

                #eval()
                vis.plot('loss', loss_meter.value()[0])
                # 随机展示一个输出的分布
                k = t.randperm(label.size(0))[0]
                output = t.nn.functional.sigmoid(score)
                # vis.vis.histogram(
                #     output.data[k].view(-1).cpu(), win=u'output_hist', opts=dict
                #     (title='output_hist'))
                # print "epoch:%4d/%4d,time: %.8f,loss: %.8f " %(epoch,ii,time.time()-start,loss_meter.value()[0])

            if ii % opt.decay_every == opt.decay_every - 1:
                del loss
                scores, prec_, recall_, _ss = val(model, dataset)
                if scores > best_score:
                    best_score = scores
                    best_path = model.save(name=str(scores), new=True)

                vis.log({
                    ' epoch:': epoch,
                    ' lr: ': lr,
                    'scores': scores,
                    'prec': prec_,
                    'recall': recall_,
                    'ss': _ss,
                    'scores_train': score_meter.value()[0],
                    'loss': loss_meter.value()[0]
                })
                if scores < best_score:
                    model.load(best_path, change_opt=False)
                    #lr = lr*opt.lr_decay
                    #optimizer = model.get_optimizer(lr)
                    lr = lr * opt.lr_decay
                    # 第二种降低学习率的方法:不会有moment等的丢失
                    if lr2 == 0: lr2 = 2e-4
                    else: lr2 = lr2 * opt.lr_decay
                    optimizer = model.get_optimizer(lr, lr2, 0)
                    origin_weight_decay = 5 * origin_weight_decay
                    # optimizer = model.get_optimizer(lr,lr2,0,weight_decay=origin_weight_decay)
                    # origin_weight_decay=5*origin_weight_decay
                    # for param_group in optimizer.param_groups:
                    #     param_group['lr']  *= opt.lr_decay
                    #     if param_group['lr'] ==0:
                    #         param_group['lr'] = 1e-4

                pre_loss = loss_meter.value()[0]
                # pre_score = score_meter.value()[0]
                # pre_score = scores
                loss_meter.reset()
                score_meter.reset()

                if lr < opt.min_lr:
                    break
示例#2
0
def main(**kwargs):
    #动态加全职衰减
    opt.parse(kwargs, print_=False)
    if opt.debug:
        import ipdb
        ipdb.set_trace()
    # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','CNNText_inception','RCNN','CNNText_inception','LSTMText']
    # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.41124002492','checkpoints/RCNN_word_0.411511574999','checkpoints/LSTMText_word_0.411994005382','checkpoints/CNNText_tmp_char_0.402429167301','checkpoints/RCNN_char_0.403710422571','checkpoints/CNNText_tmp_word_0.41096749885','checkpoints/LSTMText_char_0.403192339135',]#'checkpoints/FastText_word_0.400391584867']
    #############################iMultiModelAll2_word_0.425600838271##################################
    # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','RCNN','CNNText_inception']
    # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.41124002492','checkpoints/RCNN_word_0.411511574999','checkpoints/LSTMText_word_0.411994005382','checkpoints/RCNN_char_0.403710422571','checkpoints/CNNText_tmp_char_0.402429167301']
    ###################################################################################################3
    #############################################################
    # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','RCNN','CNNText_inception']
    # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_0.37125473788','checkpoints/RCNN_word_0.373609030286','checkpoints/LSTMText_word_0.381833388089','checkpoints/RCNN_char_0.3456599248','checkpoints/CNNText_tmp_0.352036505041']
    ##################################################################333
    # opt.model_names=['LSTMText','MultiCNNTextBNDeep']
    # opt.model_paths=['checkpoints/LSTMText_word_0.396765494482','checkpoints/MultiCNNTextBNDeep_word_0.391018392216']
    # opt.fold=1
    # from data.dataset import ALLFoldData as ZhihuALLData
    ########################################################################
    # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','RCNN','CNNText_inception']
    # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_0.37125473788','checkpoints/RCNN_word_0.373609030286','checkpoints/LSTMText_word_0.381833388089','checkpoints/RCNN_char_0.3456599248','checkpoints/CNNText_tmp_0.352036505041']
    #######################################0.0.41884129858126845-force#####################
    # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','RCNN','MultiCNNTextBNDeep']
    # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.410011182415','checkpoints/RCNN_word_0.413446202556','checkpoints/LSTMText_word_0.413681107036','checkpoints/RCNN_char_0.398655349075','checkpoints/MultiCNNTextBNDeep_char_0.38666657051']

    #######################################################################
    ############################################################################################
    opt.model_names = [
        'MultiCNNTextBNDeep', 'FastText3', 'LSTMText', 'CNNText_inception'
    ]
    opt.model_paths = [
        'checkpoints/MultiCNNTextBNDeep_word_0.41124002492',
        'checkpoints/FastText3_word_0.40810787337',
        'checkpoints/LSTMText_word_0.413681107036',
        'checkpoints/CNNText_tmp_char_0.402429167301'
    ]

    ########################################################################################3
    model = getattr(models, opt.model)(opt).cuda()
    if opt.model_path:
        model.load(opt.model_path)
    print(model)

    opt.parse(kwargs, print_=True)

    vis.reinit(opt.env)
    pre_loss = 1.0
    lr, lr2 = opt.lr, opt.lr2
    loss_function = getattr(models, opt.loss)()
    if opt.all:
        dataset = ZhihuALLData(opt.train_data_path,
                               opt.labels_path,
                               type_=opt.type_,
                               augument=opt.augument)
    # else :dataset = ZhihuData(opt.train_data_path,opt.labels_path,type_=opt.type_)
    dataloader = data.DataLoader(dataset,
                                 batch_size=opt.batch_size,
                                 shuffle=opt.shuffle,
                                 num_workers=opt.num_workers,
                                 pin_memory=True)

    optimizer = model.get_optimizer(opt.lr, opt.lr2)
    loss_meter = tnt.meter.AverageValueMeter()
    score_meter = tnt.meter.AverageValueMeter()
    best_score = 0
    # pre_score = 0

    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        score_meter.reset()
        for ii, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)):
            title, content, label = (Variable(
                title[0].cuda()), Variable(title[1].cuda())), (Variable(
                    content[0].cuda()), Variable(content[1].cuda())), Variable(
                        label.cuda())
            optimizer.zero_grad()
            score = model(title, content)
            loss = loss_function(score, label.float())
            loss_meter.add(loss.data[0])
            loss.backward()
            optimizer.step()

            if ii % opt.plot_every == opt.plot_every - 1:
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

                predict = score.data.topk(5, dim=1)[1].cpu().tolist(
                )  #(dim=1,descending=True)[1][:,:5].tolist()
                true_target = label.data.float().topk(
                    5, dim=1)  #[1].cpu().tolist()#sort(dim=1,descending=True)
                true_index = true_target[1][:, :5]
                true_label = true_target[0][:, :5]
                predict_label_and_marked_label_list = []
                for jj in range(label.size(0)):
                    true_index_ = true_index[jj]
                    true_label_ = true_label[jj]
                    true = true_index_[true_label_ > 0]
                    predict_label_and_marked_label_list.append(
                        (predict[jj], true.tolist()))
                score_, prec_, recall_, _ss = get_score(
                    predict_label_and_marked_label_list)
                score_meter.add(score_)
                vis.vis.text('prec:%s,recall:%s,score:%s,a:%s' %
                             (prec_, recall_, score_, _ss),
                             win='tmp')
                vis.plot('scores', score_meter.value()[0])
                vis.plot('loss', loss_meter.value()[0])

            if ii % opt.decay_every == opt.decay_every - 1:
                del loss
                scores, prec_, recall_, _ss = val(model, dataset)
                vis.log({
                    ' epoch:': epoch,
                    ' lr: ': lr,
                    'scores': scores,
                    'prec': prec_,
                    'recall': recall_,
                    'ss': _ss,
                    'scores_train': score_meter.value()[0],
                    'loss': loss_meter.value()[0]
                })

                if scores > best_score:
                    best_score = scores
                    best_path = model.save(name=str(scores), new=True)

                if scores < best_score:
                    model.load(best_path, change_opt=False)
                    lr = lr * opt.lr_decay
                    if lr2 == 0: lr2 = 1e-4
                    else: lr2 = lr2 * 0.5
                    optimizer = model.get_optimizer(lr, lr2, 0)

                pre_loss = loss_meter.value()[0]
                loss_meter.reset()
                score_meter.reset()