def load_raw_interaction_data(name, natts, eatts, tvt):
    if 'drugbank' in name:
        dir_name = 'DrugBank'
        drugbank_dir = join(get_data_path(), dir_name)
        interaction_dir = join(drugbank_dir, 'ddi_data')
        graph_data = load(join(drugbank_dir, 'klepto', 'graph_data.klepto'))
        fname_to_gid_func = lambda fname: int(fname[2:])
        interaction_fname = 'ddi_snap.tsv'
        parse_edge_func = parse_edges_biosnap
        data_dir = join(interaction_dir, 'drugs_snap')
        if 'small' in name:
            data_dir = join(interaction_dir, 'drugs_small')
        interaction_file_path = join(interaction_dir, interaction_fname)
        edge_types_edge_list, nodes = get_interaction_edgelist(
            interaction_file_path,
            parse_edge_func,
            False,
            fname_to_gid_func)

    elif 'drugcombo' in name:
        dir_name = 'DrugCombo'
        drugcombo_dir = join(get_data_path(), dir_name)
        graph_data = load(join(drugcombo_dir, "klepto", 'graph_data.klepto'))
        data_dir = drugcombo_dir
        interaction_dir = join(drugcombo_dir, 'ddi_data')
        interaction_file_path = join(interaction_dir, 'Syner&Antag_voting.csv')
        drugname_to_cid = load(join(drugcombo_dir, 'klepto', 'drug_name_to_cid'))
        edge_to_gid_func = lambda x: int(drugname_to_cid[x.lower()][4:])
        fname_to_gid_func = lambda x: int(x[4:])

        num_pairs_synergy_antagonism = count_pairs_synergy_antagonism(
            interaction_file_path)

        edge_types_edge_list, nodes = get_interaction_edgelist(
            interaction_file_path,
            parse_edges_drugcombo,
            True,
            edge_to_gid_func,
            skip_first_line=True,
            num_pairs_synergy_antagonism=num_pairs_synergy_antagonism)
    else:
        raise NotImplementedError

    graphs = iterate_get_graphs(data_dir, graph_data, nodes,
                                fname_to_gid_func, natts=natts)
    pairs, graph_ids, edge_types_edge_list_filtered = get_graph_pairs(
        edge_types_edge_list, graphs)
    hyper_edge_labels = {'interaction': 1, 'no_interaction': 0}
    sparse_node_feat, gid_to_idx = get_molecular_node_feats(graph_data,
                                                            graph_ids,
                                                            fname_to_gid_func)
    if 'drugcombo' in name:
        for pair in pairs.values():
            if next(iter(pair.edge_types)) == 'antagonism':
                pair.true_label = 2
        hyper_edge_labels = {'antagonism': 2, 'synergy': 1, 'no_interaction': 0}

    graphs = [graphs[gid] for gid in sorted(graph_ids)]
    return BiGNNDataset(name, graphs, natts, hyper_edge_labels, eatts,
                        pairs, tvt, sparse_node_feat)
Exemplo n.º 2
0
def load_pair_tvt_splits():
    dir = join(get_save_path(), 'pairs_tvt_split')
    train_ratio = int(FLAGS.tvt_ratio[0] * 100)
    val_ratio = int(FLAGS.tvt_ratio[1] * 100)
    test_ratio = 100 - train_ratio - val_ratio
    ensure_train_connectivity_str = 'ensure_train_connectivity_{}'\
        .format(str(FLAGS.ensure_train_connectivity).lower())

    num_folds = 1 if FLAGS.cross_val is None else FLAGS.num_folds

    sfn = '{}_{}_seed_{}_folds_{}_train_{}_val_{}_test_{}_num_negative_pairs_' \
          '{}_{}_feat_size_{}_{}'.format(
            FLAGS.dataset,
            FLAGS.random_seed,
            num_folds,
            train_ratio,
            val_ratio,
            test_ratio,
            ensure_train_connectivity_str,
            FLAGS.num_negative_samples if FLAGS.negative_sample else 0,
            '_'.join(get_flags_with_prefix_as_list('node_fe', FLAGS)),
            FLAGS.feat_size,
            '_'.join([node_feat.replace('_', '') for node_feat in FLAGS.node_feats])
    )

    tp = join(dir, sfn)
    rtn = load(tp)
    if rtn:
        tvt_pairs_dict = rtn
    else:
        tvt_pairs_dict = _load_pair_tvt_splits_helper()
        save(tvt_pairs_dict, tp)
    return tvt_pairs_dict
Exemplo n.º 3
0
def load_dataset(dataset_name, tvt, node_feats, edge_feats):
    if tvt not in ['train', 'val', 'test', 'all']:
        raise ValueError('Unknown tvt specifier {}'.format(tvt))
    name_list = list((dataset_name, tvt))
    name_list.append('_'.join(
        [node_feat.replace('_', '') for node_feat in node_feats]))
    f_name = '_'.join(name_list)
    f_path = join(get_save_path(), 'dataset', f_name)
    ld = load(f_path)

    if ld:
        dataset = BiGNNDataset(None, None, None, None, None, None, None, None,
                               ld)
    else:
        try:
            dataset = load_raw_interaction_data(dataset_name, node_feats,
                                                edge_feats, tvt)
        except Exception as e:
            print(e)
            raise FileNotFoundError(f'Please get {f_name} from google drive')

        gc.collect()
        save(dataset.__dict__, f_path)

    return dataset
Exemplo n.º 4
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    if args.gpu == -1:
        device = torch.device('cpu')
    else:
        device = torch.device('cuda:{}'.format(args.gpu))

    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)

    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype = eval("genotypes.%s" % args.arch)
    #model = Network(args.init_channels, dataset_classes, args.layers, args.auxiliary, genotype)
    model = Network(args)
    model = model.to(device)
    util.load(model, args.model_path)

    logging.info("param size = %fMB", util.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    test_data = MyDataset(args=args, subset='test')

    test_queue = torch.utils.data.DataLoader(test_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=2)

    model.drop_path_prob = args.drop_path_prob
    test_acc, test_obj, test_fscores, test_MIoU = infer(
        test_queue, model, criterion)
    logging.info('test_acc %f _fscores %f _MIoU %f', test_acc, test_fscores,
                 test_MIoU)
Exemplo n.º 5
0
with tf.Session(config=config) as sess:
    # summaries
    summary_writer = tf.summary.FileWriter(summary_dir, graph=sess.graph)

    # initialize weights
    sess.run(tf.global_variables_initializer())
    print('Init successfully!')

    # tf saver
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=None)
    restore_var = [v for v in tf.global_variables()]
    loader = tf.train.Saver(var_list=restore_var)
    if os.path.isdir(snapshot_dir):
        ckpt = tf.train.get_checkpoint_state(snapshot_dir)
        if ckpt and ckpt.model_checkpoint_path:
            load(loader, sess, ckpt.model_checkpoint_path)
        else:
            print('No checkpoint file found.')
    else:
        load(loader, sess, snapshot_dir)

    print('Start training ...')

    _step, _loss, _summaries = 0, None, None
    while _step < iterations:
        try:
            _, _step, _g_loss, _p_psnr, _summaries = \
                sess.run([g_train_op, g_step, g_loss, train_positive_psnr, summary_op])

            if _step % 10 == 0:
                print('Iteration = {}, global loss = {:.6f}, positive psnr = {:.6f}'.format(_step, _g_loss, _p_psnr))
Exemplo n.º 6
0
    def inference_func(ckpt, dataset_name, evaluate_name):
        load(loader, sess, ckpt)

        psnr_records = []
        total = 0
        timestamp = time.time()

        if const.INTERPOLATION:
            vol_size = num_his + 1
            for v_id, (video_name, video) in enumerate(video_info.items()):
                length = video['length']
                total += length
                gts = frame_masks[v_id]

                x_ids = np.arange(0, length, vol_size)
                x_ids[-1] = length - 1
                psnrs_ids = np.empty(shape=(len(x_ids),), dtype=np.float32)

                for i, t in enumerate(x_ids):
                    if t == length - 1:
                        start = length - vol_size
                        end = length
                    else:
                        start = t
                        end = t + vol_size

                    video_clip = data_loader.get_video_clip(video_name, start, end)
                    psnr = sess.run(psnr_tensor, feed_dict={video_clips_tensor: video_clip[np.newaxis, ...]})
                    psnrs_ids[i] = psnr

                    print('video = {} / {}, i = {} / {}, psnr = {:.6f}, gt = {}'.format(
                        video_name, num_videos, t, length, psnr, gts[end - 1]))

                # interpretation
                inter_func = interpolate.interp1d(x_ids, psnrs_ids)
                ids = np.arange(0, length)
                psnrs = inter_func(ids)
                psnr_records.append(psnrs)

        else:
            for v_id, (video_name, video) in enumerate(video_info.items()):
                length = video['length']
                total += length
                psnrs = np.empty(shape=(length,), dtype=np.float32)
                gts = frame_masks[v_id]

                for i in range(num_his, length):
                    video_clip = data_loader.get_video_clip(video_name, i - num_his, i + 1)
                    psnr = sess.run(psnr_tensor, feed_dict={video_clips_tensor: video_clip[np.newaxis, ...]})
                    psnrs[i] = psnr

                    print('video = {} / {}, i = {} / {}, psnr = {:.6f}, gt = {}'.format(
                        video_name, num_videos, i, length, psnr, gts[i]))

                psnrs[0:num_his] = psnrs[num_his]
                psnr_records.append(psnrs)

        result_dict = {'dataset': dataset_name, 'psnr': psnr_records, 'diff_mask': [], 'frame_mask': frame_masks}

        used_time = time.time() - timestamp
        print('total time = {}, fps = {}'.format(used_time, total / used_time))

        # TODO specify what's the actual name of ckpt.
        pickle_path = os.path.join(psnr_dir, os.path.split(ckpt)[-1])
        with open(pickle_path, 'wb') as writer:
            pickle.dump(result_dict, writer, pickle.HIGHEST_PROTOCOL)

        results = evaluate.evaluate(evaluate_name, pickle_path)
        print(results)