Exemplo n.º 1
0
 def test(self):
     with torch.no_grad():
         self.model.eval()
         AUC_list, MRR_list, nDCG5_list, nDCG10_list = [], [], [], []
         total_news_id = {}
         with tqdm(total=len(self.test_news_loader),
                   desc='Generating test news vector') as p:
             for i, batch in enumerate(self.test_news_loader):
                 news_id_batch, news_batch = batch
                 news_vector = self.model.model.get_news_vector(news_batch)
                 if i == 0:
                     total_test_news = news_vector
                 else:
                     total_test_news = torch.cat(
                         [total_test_news, news_vector])
                 for news_id in news_id_batch:
                     total_news_id[news_id] = len(total_news_id)
                 p.update(1)
         test_user_loader = DataLoader(TestUserDataset(
             self.args, total_test_news, total_news_id),
                                       batch_size=self.args.batch_size,
                                       shuffle=False,
                                       num_workers=self.args.n_threads,
                                       pin_memory=False)
         with tqdm(total=len(test_user_loader),
                   desc='Generating test user vector') as p:
             for i, batch in enumerate(test_user_loader):
                 user_vector = self.model.model.get_user_vector(batch)
                 if i == 0:
                     total_test_user = user_vector
                 else:
                     total_test_user = torch.cat(
                         [total_test_user, user_vector])
                 p.update(1)
         behaviors = pd.read_table(os.path.join(self.args.data_dir,
                                                'test_behaviors.csv'),
                                   na_filter=False,
                                   usecols=[2, 3],
                                   names=['impressions', 'y_true'],
                                   header=0)
         with tqdm(total=len(behaviors), desc='Predicting') as p:
             for row in behaviors.itertuples():
                 user_vector = total_test_user[row.Index]
                 tmp = row.impressions.split(' ')
                 news_vector = torch.cat([
                     total_test_news[total_news_id[i]].unsqueeze(dim=0)
                     for i in tmp
                 ])
                 y_true = [int(x) for x in row.y_true.split(' ')]
                 predict = torch.matmul(news_vector, user_vector).tolist()
                 AUC_list.append(AUC(y_true, predict))
                 MRR_list.append(MRR(y_true, predict))
                 nDCG5_list.append(nDCG(y_true, predict, 5))
                 nDCG10_list.append(nDCG(y_true, predict, 10))
                 p.update(1)
         print('AUC:', np.mean(AUC_list))
         print('MRR:', np.mean(MRR_list))
         print('nDCG@5:', np.mean(nDCG5_list))
         print('nDCG@10:', np.mean(nDCG10_list))
    cfg = edict(json.load(f))

if isinstance(cfg.batch_size, list) and isinstance(cfg.long_side, list):
    list_batch = cfg.batch_size
    list_res = cfg.long_side
elif isinstance(cfg.batch_size, int) and isinstance(cfg.long_side, int):
    list_batch = [cfg.batch_size]
    list_res = [cfg.long_side]
else:
    raise Exception("'batch_size' and 'long_side' in config file should be same instance!!!")

loss_func = BCEWithLogitsLoss()

# data_dir = '/home/tungthanhlee/bdi_xray/data/images'
data_dir = '/home/dual1/thanhtt/assigned_jpeg'
metrics_dict = {'acc': ACC(), 'auc':AUC(), 'precision':Precision(), 'recall':Recall(), 'specificity':Specificity(), 'f1':F1()}

model_names=[
    'dense',
    'dense',
    'dense',
    # 'resnet',
    # 'dense',
    # 'efficient',
    #'resnest'
    ]
ids = [
    '121',
    '121',
    '121',
    # '101',
Exemplo n.º 3
0
selected_sum = []
for i in np.arange(num_classes):
    selected_sum.append(0)

print num_results
for i in np.arange(num_results):
    print i
    st = time.time()
    sub_auc = [[], [], [], [], [], []]

    if i > 0:
        for j in np.arange(num_classes):
            selected_sum[j] = selected_sum[j] + data[j][:, selected_idcs[j][-1]]
    
    for k in np.arange(num_classes):
        s = time.time()
        for idx in candidate_idcs[k]:
            sub_auc[k].append(AUC(labels[:, k], selected_sum[k] + data[k][:, idx]))
        e = time.time()
        print (e - s)
        max_idx = np.argmax(np.asarray(sub_auc[k]))
        selected_idcs[k].append(candidate_idcs[k][max_idx])
        candidate_idcs[k].remove(candidate_idcs[k][max_idx])
        auc[i, k] = sub_auc[k][max_idx]
        selected_features[k].append(results[selected_idcs[k][-1]])
        
    et = time.time()
    print "elapsed time is %f seconds, auc is: " % (et - st)
    print auc[i]
    np.save('selection_result.npy', [auc, selected_idcs, selected_features])
Exemplo n.º 4
0
                             mode='train',
                             dicom=False,
                             type=cfg.type)
val_loader = create_loader(cfg.dev_csv,
                           data_dir,
                           cfg,
                           mode='val',
                           dicom=False,
                           type=cfg.type)

# loss_func = BCELoss()
# loss_func = BCEWithLogitsLoss()
loss_func = MSELoss()

metrics_dict = {
    'auc': AUC(),
    'sensitivity': Recall(),
    'specificity': Specificity(),
    'f1': F1()
}
loader_dict = {'train': train_loader, 'val': val_loader}

#------------------------------- additional config for ensemble ---------------------------------------
model_names = [
    'dense',
    'resnet',
    'dense',
    # 'efficient',
    #'resnest'
]
ids = [
Exemplo n.º 5
0
    data_class = ImageDataset_full
else:
    data_class = ImageDataset

train_loader = DataLoader(data_class(cfg.train_csv, cfg, mode='train'),
                          num_workers=4,
                          drop_last=True,
                          shuffle=True,
                          batch_size=cfg.batch_size)
val_loader = DataLoader(data_class(cfg.dev_csv, cfg, mode='dev'),
                        num_workers=4,
                        drop_last=False,
                        shuffle=False,
                        batch_size=cfg.batch_size)

metrics_dict = {'acc': ACC(), 'auc': AUC()}
loader_dict = {'train': train_loader, 'val': val_loader}

chexpert_model = CheXpert_model(cfg, loss_func, metrics_dict)

# chexpert_model.load_ckp(cfg.ckp_path)
# chexpert_model.freeze_backbone()

writer = SummaryWriter(os.path.join('experiment', cfg.log_dir))
ckp_dir = os.path.join('experiment', cfg.log_dir, 'checkpoint')

chexpert_model.train(train_loader,
                     val_loader,
                     epochs=cfg.epochs,
                     iter_log=cfg.iter_log,
                     writer=writer,
N_VALIDATION = 6000
N_VAL_GOOD = N_VALIDATION * 0.9
N_VAL_BAD = N_VALIDATION * 0.1

# constants
IF_DATA_AUGMENTATION = True
NUM_CLASSES = 2
IMAGE_WIDTH = IMAGE_HEIGHT = 224
IMAGE_SIZE = (IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS = 1
INPUT_SHAPE = [IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS]

METRICS = [
    keras.metrics.BinaryAccuracy(name='accuracy'),
    AUC(name='auc_good_0')
    # AUC(name='auc_bad_1') # 以 bad 为 positive 的 AUC
]


def main():
    """ Use tensorflow version 2 """
    assert tf.__version__[0] == "2"
    """ Load Config """
    with open('./config/config.json', 'r') as f:
        CONFIG = json.load(f)
    BATCH_SIZE = 32  # CONFIG["BATCH_SIZE"]
    ROOT_PATH = CONFIG["ROOT_PATH"]
    TRAIN_DATA_DIR = CONFIG["TRAIN_DATA_DIR"]
    VALIDATION_DATA_DIR = CONFIG["VALIDATION_DATA_DIR"]
    TRAIN_DATA_DIR = os.path.join(ROOT_PATH, TRAIN_DATA_DIR)
Exemplo n.º 7
0
if __name__ == '__main__':

    # NetData = np.loadtxt("./Datasets/raw/wikivote.txt")
    f = open("./Datasets/raw/wikivote.txt", "r")
    # train, test = train_test_split.k_fold_split(f, 10)
    # sim = np.dot(train, train)
    # score = AUC.Calculation_AUC(train, test, sim, train.shape[0])
    # print(score)

    # train, test = train_test_split.time_based_split(f, 0.9)
    f_tr = open("train.txt", "r")
    f_te = open("test.txt", "r")
    train, test = train_test_split.read_from_txt(f_tr, f_te)
    sim_1 = np.dot(train, train)
    score_1 = AUC.Calculation_AUC(train, test, sim_1, train.shape[0], 10000)
    print(score_1)
    sim_2 = np.dot(np.dot(train, train.T), train)
    score_2 = AUC.Calculation_AUC(train, test, sim_2, train.shape[0], 10000)
    print(score_2)
    sim_3 = basic_measures.IP(train, 0.3)
    score_3 = AUC.Calculation_AUC(train, test, sim_3, train.shape[0], 10000)
    print(score_3)

    # n_folds = 10
    # linklist = []
    # train_list = []
    # test_list = []
    # f = open("./Datasets/temporal_sort/email-Eu-core-temporal_sorted.txt", "r")
    # train_list, test_list = train_test_split.train_test_split(f, 0.8)
    # # f_t = open("train.txt", "r")
Exemplo n.º 8
0
from preprocess import train_test_split
from metrics import evaluationMetric
from metrics import AUC
from algorithms import basic_measures
import numpy as np

if __name__ == '__main__':
    # f = open("./Datasets/raw/wikivote.txt", "r")
    # linklist = prehandle_dataset.gen_linklist_from_txt(f)

    # f = open("linklist.txt", "r")
    # linklist = prehandle_dataset.gen_linklist_from_txt(f)
    # adj_train, adj_test = train_test_split.k_fold_split(linklist, 10)

    network = "PB"
    N_exp = 5

    auc = np.zeros((1, N_exp))
    pre = np.zeros((1, N_exp))
    for ith_exp in range(N_exp):
        f_tr = open("./divided_dataset/"+network+"_tr_0.9_"+str(ith_exp+1)+".txt", "r")
        f_te = open("./divided_dataset/"+network+"_te_0.9_"+str(ith_exp+1)+".txt", "r")
        adj_train, adj_test = train_test_split.read_from_txt(f_tr, f_te)
        sim = basic_measures.Bifan(adj_train)
        auc[0, ith_exp] = AUC.Calculation_AUC(adj_train, adj_test, sim, adj_train.shape[0], 10000)
        pre[0, ith_exp] = evaluationMetric.cal_precision(adj_train, adj_test, sim, 100)
    print(auc.mean())
    print(pre.mean())