Пример #1
0
def eval(model, data_eval, voc_size, epoch):
    model.eval()

    ja, prauc, avg_p, avg_r, avg_f1 = [[] for _ in range(5)]
    smm_record = []
    med_cnt, visit_cnt = 0, 0

    for step, input in enumerate(data_eval):
        y_gt = []
        y_pred = []
        y_pred_prob = []
        y_pred_label = []

        for adm_index, adm in enumerate(input):
            output_logits = model(adm)

            y_gt_tmp = np.zeros(voc_size[2])
            y_gt_tmp[adm[2]] = 1
            y_gt.append(y_gt_tmp)

            # prediction prod
            output_logits = output_logits.detach().cpu().numpy()

            # prediction med set
            out_list, sorted_predict = sequence_output_process(
                output_logits, [voc_size[2], voc_size[2] + 1])
            y_pred_label.append(sorted(sorted_predict))
            y_pred_prob.append(np.mean(output_logits[:, :-2], axis=0))

            # prediction label
            y_pred_tmp = np.zeros(voc_size[2])
            y_pred_tmp[out_list] = 1
            y_pred.append(y_pred_tmp)
            visit_cnt += 1
            med_cnt += len(sorted_predict)

        smm_record.append(y_pred_label)

        adm_ja, adm_prauc, adm_avg_p, adm_avg_r, adm_avg_f1 = \
                sequence_metric(np.array(y_gt), np.array(y_pred), np.array(y_pred_prob), np.array(y_pred_label))
        ja.append(adm_ja)
        prauc.append(adm_prauc)
        avg_p.append(adm_avg_p)
        avg_r.append(adm_avg_r)
        avg_f1.append(adm_avg_f1)
        llprint('\rtest step: {} / {}'.format(step, len(data_eval)))

    # ddi rate
    ddi_rate = ddi_rate_score(smm_record,
                              path='../data/output/ddi_A_final.pkl')

    llprint(
        '\nDDI Rate: {:.4}, Jaccard: {:.4},  PRAUC: {:.4}, AVG_PRC: {:.4}, AVG_RECALL: {:.4}, AVG_F1: {:.4}, AVG_MED: {:.4}\n'
        .format(ddi_rate, np.mean(ja), np.mean(prauc), np.mean(avg_p),
                np.mean(avg_r), np.mean(avg_f1), med_cnt / visit_cnt))

    return ddi_rate, np.mean(ja), np.mean(prauc), np.mean(avg_p), np.mean(
        avg_r), np.mean(avg_f1), med_cnt / visit_cnt
Пример #2
0
def eval(model, data_eval, voc_size, epoch):
    # evaluate
    print('')
    model.eval()

    ja, prauc, avg_p, avg_r, avg_f1 = [[] for _ in range(5)]
    records = []
    med_cnt, visit_cnt = 0, 0
    for step, input in enumerate(data_eval):
        y_gt = []
        y_pred = []
        y_pred_prob = []
        y_pred_label = []
        i1_state, i2_state, i3_state = None, None, None
        for adm in input:
            y_gt_tmp = np.zeros(voc_size[2])
            y_gt_tmp[adm[2]] = 1
            y_gt.append(y_gt_tmp)

            output_logits, i1_state, i2_state, i3_state = model(
                adm, i1_state, i2_state, i3_state)
            output_logits = output_logits.detach().cpu().numpy()

            out_list, sorted_predict = sequence_output_process(
                output_logits, [voc_size[2], voc_size[2] + 1])

            y_pred_label.append(sorted_predict)
            y_pred_prob.append(np.mean(output_logits[:, :-2], axis=0))

            y_pred_tmp = np.zeros(voc_size[2])
            y_pred_tmp[out_list] = 1
            y_pred.append(y_pred_tmp)
            visit_cnt += 1
            med_cnt += len(y_pred_tmp)
        records.append(y_pred_label)

        adm_ja, adm_prauc, adm_avg_p, adm_avg_r, adm_avg_f1 = sequence_metric(
            np.array(y_gt), np.array(y_pred), np.array(y_pred_prob),
            np.array(y_pred_label))
        ja.append(adm_ja)
        prauc.append(adm_prauc)
        avg_p.append(adm_avg_p)
        avg_r.append(adm_avg_r)
        avg_f1.append(adm_avg_f1)

        llprint('\rEval--Epoch: %d, Step: %d/%d' %
                (epoch, step, len(data_eval)))

    # ddi rate
    ddi_rate = ddi_rate_score(records)
    llprint(
        '\tDDI Rate: %.4f, Jaccard: %.4f,  PRAUC: %.4f, AVG_PRC: %.4f, AVG_RECALL: %.4f, AVG_F1: %.4f, AVG_Med: %.4f\n'
        % (ddi_rate, np.mean(ja), np.mean(prauc), np.mean(avg_p),
           np.mean(avg_r), np.mean(avg_f1), med_cnt / visit_cnt))
    return ddi_rate, np.mean(ja), np.mean(prauc), np.mean(avg_p), np.mean(
        avg_r), np.mean(avg_f1)
Пример #3
0
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    print_loss_total = 0  # Reset every print_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

    training_pairs = [tensorsFromPair(random.choice(train_pairs))
                      for i in range(n_iters)]
    criterion = nn.CrossEntropyLoss()
    history = defaultdict(list)
    for epoch in range(30):
        for iter in range(1, n_iters + 1):
            training_pair = training_pairs[iter - 1]
            input_tensor = training_pair[0]
            target_tensor = training_pair[1]

            loss = train(input_tensor, target_tensor, encoder,
                        decoder, encoder_optimizer, decoder_optimizer, criterion)
            print_loss_total += loss
            llprint('\rTrain--Epoch: %d, Step: %d/%d' % (epoch, iter, n_iters))

        print_loss_avg = print_loss_total / n_iters
        print_loss_total = 0

        #eval
        y_gt = []
        y_pred = []
        y_pred_prob = []
        y_pred_label = []
        for pair in eval_pairs:
            y_gt_tmp = np.zeros(len(med_voc.idx2word))
            y_gt_tmp[np.array(pair[1])[:-1]-2] = 1
            y_gt.append(y_gt_tmp)

            input_tensor, output_tensor = tensorsFromPair(pair)
            output_logits = evaluate(encoder, decoder, input_tensor)
            output_logits = F.softmax(output_logits)
            output_logits = output_logits.detach().cpu().numpy()
            out_list, sorted_predict = sequence_output_process(output_logits, [SOS_token, EOS_token])

            y_pred_label.append(np.array(sorted_predict)-2)
            y_pred_prob.append(np.mean(output_logits[:, 2:], axis=0))

            y_pred_tmp = np.zeros(len(med_voc.idx2word))
            if len(out_list) != 0 :
                y_pred_tmp[np.array(out_list) - 2] = 1
            y_pred.append(y_pred_tmp)

        ja, prauc, avg_p, avg_r, avg_f1 = sequence_metric(np.array(y_gt), np.array(y_pred),
                                                        np.array(y_pred_prob),
                                                        np.array(y_pred_label))
        # ddi rate
        ddi_A = dill.load(open('../data/ddi_A_final.pkl', 'rb'))
        all_cnt = 0
        dd_cnt = 0
        for adm in y_pred_label:
            med_code_set = adm
            for i, med_i in enumerate(med_code_set):
                for j, med_j in enumerate(med_code_set):
                    if j <= i:
                        continue
                    all_cnt += 1
                    if ddi_A[med_i, med_j] == 1 or ddi_A[med_j, med_i] == 1:
                        dd_cnt += 1
        ddi_rate = dd_cnt / all_cnt

        history['ja'].append(ja)
        history['ddi_rate'].append(ddi_rate)
        history['avg_p'].append(avg_p)
        history['avg_r'].append(avg_r)
        history['avg_f1'].append(avg_f1)
        history['prauc'].append(prauc)
        llprint('\n\tDDI Rate: %.4f, Jaccard: %.4f,  PRAUC: %.4f, AVG_PRC: %.4f, AVG_RECALL: %.4f, AVG_F1: %.4f\n' % (
            ddi_rate, ja, prauc, avg_p, avg_r, avg_f1
        ))

        dill.dump(history, open(os.path.join('saved', model_name, 'history.pkl'), 'wb'))

        torch.save(encoder.state_dict(),
                   open(
                       os.path.join('saved', model_name, 'encoder_Epoch_%d_JA_%.4f_DDI_%.4f.model' % (epoch, ja, dd_cnt/all_cnt)),
                       'wb'))
        torch.save(decoder.state_dict(),
                   open(
                       os.path.join('saved', model_name, 'decoder_Epoch_%d_JA_%.4f_DDI_%.4f.model' % (epoch, ja, dd_cnt/all_cnt)),
                       'wb'))
Пример #4
0
        input_tensor, output_tensor = tensorsFromPair(pair)
        output_logits = evaluate(encoder1, decoder1, input_tensor)
        output_logits = F.softmax(output_logits)
        output_logits = output_logits.detach().cpu().numpy()
        out_list, sorted_predict = sequence_output_process(output_logits, [SOS_token, EOS_token])

        y_pred_label.append(np.array(sorted_predict) - 2)
        y_pred_prob.append(np.mean(output_logits[:, 2:], axis=0))

        y_pred_tmp = np.zeros(len(med_voc.idx2word))
        if len(out_list) != 0:
            y_pred_tmp[np.array(out_list) - 2] = 1
        y_pred.append(y_pred_tmp)

    ja, prauc, avg_p, avg_r, avg_f1 = sequence_metric(np.array(y_gt), np.array(y_pred),
                                                    np.array(y_pred_prob),
                                                    np.array(y_pred_label))
    # ddi rate
    ddi_A = dill.load(open('../data/ddi_A_final.pkl', 'rb'))
    all_cnt = 0
    dd_cnt = 0
    for adm in y_pred_label:
        med_code_set = adm
        for i, med_i in enumerate(med_code_set):
            for j, med_j in enumerate(med_code_set):
                if j <= i:
                    continue
                all_cnt += 1
                if ddi_A[med_i, med_j] == 1 or ddi_A[med_j, med_i] == 1:
                    dd_cnt += 1
    ddi_rate = dd_cnt / all_cnt
Пример #5
0
def eval(model, data_eval, voc_size, epoch):
    model.eval()

    ja, prauc, avg_p, avg_r, avg_f1 = [[] for _ in range(5)]
    smm_record = []
    med_cnt, visit_cnt = 0, 0
    add_list, delete_list = [], []

    for step, input in enumerate(data_eval):
        y_gt, y_pred, y_pred_prob, y_pred_label = [], [], [], []
        if len(input) < 2: continue
        add_temp_list, delete_temp_list = [], []

        for adm_idx, adm in enumerate(input):
            if adm_idx == 0: 
                previous_set = adm[2] 
                continue
            output_logits = model(adm)

            y_gt_tmp = np.zeros(voc_size[2])
            y_gt_tmp[adm[2]] = 1
            y_gt.append(y_gt_tmp)

            # prediction prod
            output_logits = output_logits.detach().cpu().numpy()

            # prediction med set
            out_list, sorted_predict = sequence_output_process(output_logits, [voc_size[2], voc_size[2]+1])
            y_pred_label.append(sorted(sorted_predict))
            y_pred_prob.append(np.mean(output_logits[:, :-2], axis=0))

            # prediction label
            y_pred_tmp = np.zeros(voc_size[2])
            y_pred_tmp[out_list] = 1
            y_pred.append(y_pred_tmp)
            visit_cnt += 1
            med_cnt += len(sorted_predict)

            #### add or delete
            add_gt = set(np.where(y_gt_tmp == 1)[0]) - set(previous_set)
            delete_gt = set(previous_set) - set(np.where(y_gt_tmp == 1)[0])

            add_pre = set(np.where(y_pred_tmp == 1)[0]) - set(previous_set)
            delete_pre = set(previous_set) - set(np.where(y_pred_tmp == 1)[0])
            
            add_distance = len(set(add_pre) - set(add_gt)) + len(set(add_gt) - set(add_pre))
            delete_distance = len(set(delete_pre) - set(delete_gt)) + len(set(delete_gt) - set(delete_pre))
            ####

            add_temp_list.append(add_distance)
            delete_temp_list.append(delete_distance)

            previous_temp_set = out_list

        if len(add_temp_list) > 1:
            add_list.append(np.mean(add_temp_list))
            delete_list.append(np.mean(delete_temp_list))
        else:
            add_list.append(add_temp_list[0])
            delete_list.append(delete_temp_list[0])

        smm_record.append(y_pred_label)

        adm_ja, adm_prauc, adm_avg_p, adm_avg_r, adm_avg_f1 = \
                sequence_metric(np.array(y_gt), np.array(y_pred), np.array(y_pred_prob), np.array(y_pred_label))
        ja.append(adm_ja)
        prauc.append(adm_prauc)
        avg_p.append(adm_avg_p)
        avg_r.append(adm_avg_r)
        avg_f1.append(adm_avg_f1)
        llprint('\rtest step: {} / {}'.format(step, len(data_eval)))

    # ddi rate
    ddi_rate = ddi_rate_score(smm_record, path='../data/output/ddi_A_final.pkl')

    llprint('\nDDI Rate: {:.4}, Jaccard: {:.4},  AVG_F1: {:.4}, Add: {:.4}, Delete: {:.4}, AVG_MED: {:.4}\n'.format(
        np.float(ddi_rate), np.mean(ja), np.mean(avg_f1), np.mean(add_list), np.mean(delete_list), med_cnt / visit_cnt
    ))

    return np.float(ddi_rate), np.mean(ja), np.mean(prauc), np.mean(avg_p), np.mean(avg_r), np.mean(avg_f1), np.mean(add_list), np.mean(delete_list), med_cnt / visit_cnt