def val_score(self):
        """
        Compute MAP@5 score for validation dataset
        """
        val_known = load_pickle_file(val_known_file)
        tmp = load_pickle_file(val_submit_file)
        val_submit = [ img for (img, _) in tmp ]
        y_true = [ w for (_, w) in tmp ]
        del tmp

        fknown = self.branch_model.predict_generator(FeatureGen(val_known, self.img_gen.read_for_testing), max_queue_size=20, workers=8, verbose=0)
        fsubmit = self.branch_model.predict_generator(FeatureGen(val_submit, self.img_gen.read_for_testing), max_queue_size=20, workers=8, verbose=0)
        score = self.head_model.predict_generator(ScoreGen(fknown, fsubmit), max_queue_size=20, workers=8, verbose=0)
        score = self.score_reshape(score, fknown, fsubmit)

        img2whale = load_pickle_file(img2whale_file)

        best_5 = []
        for i, _ in enumerate(tqdm(val_submit)):
            t = []
            s = set()
            a = score[i,:]
            for j in list(reversed(np.argsort(a))):
                img = val_known[j]
                whale = img2whale[img]
                if whale not in s:
                    s.add(whale)
                    t.append(whale)
                if len(t) == 5:
                    break
            assert len(t) == 5 and len(s) == 5
            best_5.append(t)

        map_5 = self.map5(best_5, y_true)
        return map_5
 def __init__(self, lr, l2, model_name, histories = [], img_shape=(384, 384, 1), step=0, use_val=True, small_dataset=False):
     self.model, self.branch_model, self.head_model = build_model(lr, l2)
     self.histories = histories
     self.step = step
     self.img_shape = img_shape
     self.img_gen = ImageGenerator()
     self.best_map5 = 0
     self.model_name = model_name
     # Make callbacklist
     self.callback_list = self.make_callback_list()
     # Load train
     if small_dataset:
         self.train = load_pickle_file(train_examples_small_file)
         print('SMALL DATASET')
     else:
         self.train = load_pickle_file(train_examples_file)
     if small_dataset:
         validation_data = load_pickle_file(validation_examples_small_file)
     else:
         validation_data = load_pickle_file(validation_examples_file)
     if use_val:
         self.validation = ValData(validation_data, self.img_gen.read_for_testing, batch_size=16)
     else:
         self.validation = None
     # Make whale to training dict
     self.w2ts = self.make_w2ts()
    def test_prepare_for_wikidata_function(self):
        the_so_called_correspondance = utils.load_pickle_file(
            "../tests/cellosaurus_informations_to_wikidata_ids.pickle")

        species = the_so_called_correspondance["species"]
        references = the_so_called_correspondance["references"]
        categories = utils.get_cell_line_category_to_wikidata("../project/category.txt")
        diseases = the_so_called_correspondance["diseases"]
        cellosaurus_dump_in_dictionary_format = utils.format_cellosaurus_dump_as_dictionary("../project/test_cellosaurus.txt")
        wikidata = utils.query_wikidata_for_cell_lines()
        releaseID = "Q87574023"
        login = wdi_login.WDLogin(WDUSER, WDPASS)

        cell_line = utils.CellossaurusCellLine(wdi_login_object=login,
                                      release_qid=releaseID,
                                      cellosaurus_dump=cellosaurus_dump_in_dictionary_format,
                                      wikidata_dictionary_with_existing_cell_lines=wikidata,
                                      references=references,
                                      species=species,
                                      cell_line_categories=categories,
                                      diseases=diseases,
                                      cell_line_id="CVCL_2260")
        data, data_to_delete = cell_line.prepare_for_wikidata()
        print(data)
        print(data_to_delete)

        self.assertEqual(1, 1)


        self.assertEqual(cell_line.cell_line_id, "CVCL_2260")
    def test_save__load_pickle(self):
        test_dictionary = {"a": 1, "b": 2}

        utils.save_pickle_file(test_dictionary, "/tmp/test.pickle")
        test_dictionary_after_processing = utils.load_pickle_file("/tmp/test.pickle")

        os.remove("/tmp/test.pickle")

        self.assertEqual(test_dictionary, test_dictionary_after_processing)
示例#5
0
def load_model(KB_id):
    """loads all necessary Data"""

    KB_id = 1
    path_live = Path(ROOT) / "models" / f"KB_id_{KB_id}" / "live"

    # get the configuration
    config = configparser.ConfigParser()
    config.read(path_live / "config.cfg")
    config_set = "DEFAULT"
    without_stopwords = config[config_set].getboolean("without_stopwords")
    num_of_sentences = config[config_set].getint("num_of_sentences")
    all_docs_kb_filepath = path_live / config[config_set]["all_docs_kb_filename"]
    
    # load model and scaler
    model = load(path_live / "logreg_model.joblib")
    scaler = load_pickle_file(path_live / "std_scaler.pkl")

    # load the kb that the model was built on
    logger.info("# load the kb that the model was built on")
    all_docs_kb = load_json_file(all_docs_kb_filepath, logger)

    # load kb with vectors
    data_kb_with_vectors = load_pickle_file(path_live / DATA_KB_WITH_VECTORS_FILE)
    
    # load the two dictionaries
    qid_to_class, class_to_qid = (
        load_pickle_file(path_live / "qid_to_class.pkl"),
        load_pickle_file(path_live / "class_to_qid.pkl"),
    )

    return (
        model,
        scaler,
        all_docs_kb,
        data_kb_with_vectors,
        qid_to_class,
        class_to_qid,
        without_stopwords,
        num_of_sentences,
    )
    def test_make_statement(self):
        the_so_called_correspondance = utils.load_pickle_file(
            "../tests/cellosaurus_informations_to_wikidata_ids.pickle")
        references = the_so_called_correspondance["references"]
        statement = utils.make_statement(statement_property="P31",
                                         statement_value="Q5",
                                         references=references)

        statement_native_wdi = wdi_core.WDItemID(value="Q5",
                                                 prop_nr="P31",
                                                 references=references)
        self.assertEqual(statement, statement_native_wdi)
def main():
    tic = time.time()

    known = load_pickle_file(train_known_file)
    submit = load_pickle_file(train_submit_file)

    print('inference HWI')
    args = parse_args()
    model_path = models_path + args.model_filename
    #submission_path = output_path + args.output_filename
    threshold = args.threshold

    # Load model
    weights = load_model(model_path).get_weights()
    # weights =  keras.models.load_model(model_path, custom_objects={'contrastive_loss': contrastive_loss}).get_weights()
    model = Model(0, 0, 'submission', use_val=False)
    model.model.set_weights(weights)

    # Evaluate model
    fknown = model.branch_model.predict_generator(FeatureGen(
        known, model.img_gen.read_for_testing),
                                                  max_queue_size=20,
                                                  workers=8,
                                                  verbose=0)
    fsubmit = model.branch_model.predict_generator(FeatureGen(
        submit, model.img_gen.read_for_testing),
                                                   max_queue_size=20,
                                                   workers=8,
                                                   verbose=0)
    score = model.head_model.predict_generator(ScoreGen(fknown, fsubmit),
                                               max_queue_size=20,
                                               workers=8,
                                               verbose=0)
    score = model.score_reshape(score, fknown, fsubmit)

    # Generate submission file
    prepare_submission(threshold, args.output_filename, score, known, submit,
                       args.model_filename)
    toc = time.time()
    print("Inference time: ", (toc - tic) / 60, 'mins')
def prepare_submission(threshold, filename, score, known, submit, model_file):
    """
    Generate kaggle submission file.
    @param threshold: threshold given to 'new_whale'
    @param filname: submission file name
    """
    image2whale = load_pickle_file(img2whale_file)

    # Create scores dir if doesn't exist
    scores_dir = callback_path + 'scores/'
    os.makedirs(scores_dir, exist_ok=True)

    # Create model_dir if doesn't exist
    model_dir = output_path + model_file.split('/')[0] + '/'
    os.makedirs(model_dir)

    new_whale = 'new_whale'

    # Prepare files paths`
    score_file = scores_dir + filename.replace('.h5', '.score')
    output_file = model_dir + filename

    # Code for creating submission file, 5 best scores for each whale image
    with open(score_file, 'w+') as sf:
        with open(output_file, 'wt', newline='\n') as f:
            f.write('Image,Id\n')
            for i, p in enumerate(tqdm(submit)):
                t = []
                s = set()
                a = score[i, :]
                probs = []
                for j in list(reversed(np.argsort(a))):
                    img = known[j]
                    if a[j] < threshold and new_whale not in s:
                        s.add(new_whale)
                        t.append(new_whale)
                        probs.append(a[j])
                        if len(t) == 5:
                            break
                    for w in image2whale[img]:
                        assert w != new_whale
                        if w not in s:
                            s.add(w)
                            t.append(w)
                            probs.append(a[j])
                            if len(t) == 5:
                                break
                    if len(t) == 5:
                        break
                assert len(t) == 5 and len(s) == 5
                f.write(p + ',' + ' '.join(t[:5]) + '\n')
                sf.write(p + ',' + ' '.join(map(str, probs)) + '\n')
示例#9
0
def main():
    datapath = "../dataset/data_2/redial/"
    train_file = datapath + "train_data.jsonl"
    test_file = datapath + "test_data.jsonl"
    valid_file = datapath + "valid_data.jsonl"

    entity2entityId = load_pickle_file(datapath + "entity2entityId.pkl")
    text_dict = load_pickle_file(datapath + "text_dict.pkl")
    id2entity = load_pickle_file(datapath + "id2entity.pkl")

    train_dataset = read_data(train_file, text_dict, entity2entityId,
                              id2entity)
    test_dataset = read_data(test_file, text_dict, entity2entityId, id2entity)
    valid_dataset = read_data(valid_file, text_dict, entity2entityId,
                              id2entity)

    with open(datapath + "dataset_train.pkl", "wb") as f:
        pkl.dump(train_dataset, f)
    with open(datapath + "dataset_test.pkl", "wb") as f:
        pkl.dump(test_dataset, f)
    with open(datapath + "dataset_valid.pkl", "wb") as f:
        pkl.dump(valid_dataset, f)
    def make_w2ts(self):
        w2ts = {}
        whale2imgs = load_pickle_file(whale2imgs_file)
        for whale, imgs in tqdm(whale2imgs.items()):
            for img in imgs:
                if img in self.train:
                    if whale not in w2ts:
                        w2ts[whale] = []
                    if img not in w2ts[whale]:
                        w2ts[whale].append(img)
        for w, ts in w2ts.items():
            w2ts[w] = np.array(ts)

        return w2ts
 def __init__(self, path):
     self.all_articles = pickle.load(open("model_files/output.data",
                                          'rb'))['articles']
     self.all_results = load_pickle_file(path)
     self.inner_results = {}
     self.directory_results = {}
def make_dicts(reset_all, make_val):

    # Create meta directory if doesn't already exist for all dictionaries generated below
    os.makedirs(meta_dir, exist_ok=True)

    train_data = dict([
        (img, whale) for (_, img, whale) in read_csv(train_csv).to_records()
    ])
    test_data = [img for (_, img, _) in read_csv(sample_csv).to_records()]

    # Load whale_to_imgs dictionary if exists, or create it otherwise
    if isfile(whale2imgs_file and not reset_all):
        whale2imgs = load_pickle_file(whale2imgs_file)
    else:
        whale2imgs = {}
        for img, whale in tqdm(train_data.items()):
            if whale not in whale2imgs:
                whale2imgs[whale] = []
            if img not in whale2imgs[whale]:
                whale2imgs[whale].append(img)

        save_to_pickle(whale2imgs_file, whale2imgs)

    if not isfile(img2whale_file) or reset_all:
        # Find elements from training set other then 'new_whale'
        img2whale = {}
        for img, whale in tqdm(train_data.items()):
            if whale != 'new_whale':
                if img not in img2whale:
                    img2whale[img] = whale
        train_known = sorted(list(img2whale.keys()))

        save_to_pickle(img2whale_file, img2whale)
        save_to_pickle(train_known_file, train_known)
        save_to_pickle(train_submit_file, test_data)

    if not (isfile(train_examples_file) and isfile(validation_examples_file)
            and reset_all == False):
        train_examples = []
        validation_examples = []
        lonely = []
        new_whale = []
        val_match = []
        lonely_count = len([x for x in whale2imgs.values()
                            if len(x) == 1])  # 2073
        couple_count = len([x for x in whale2imgs.values()
                            if len(x) == 2])  # 1285
        new_count = len([x for x in train_data.values()
                         if x == 'new_whale'])  # 9664
        # aditional matching whales count needed or creating balanced validation dataset (same matching and unmatching number of examples)
        extra_count = lonely_count - couple_count  # 2073 - 1285 = 788

        val_known = []
        val_submit = []
        matching_count = 0
        small_train_examples = []
        small_count = 0

        if make_val:
            for whale, imgs in tqdm(whale2imgs.items()):
                if whale == 'new_whale':
                    new_whale += imgs
                elif len(imgs) == 1:
                    lonely += imgs
                    val_known += imgs
                elif len(imgs) == 2:
                    val_match.append((imgs[0], imgs[1], 1))
                    val_known.append(imgs[1])
                    val_submit.append((imgs[0], whale))
                elif len(imgs) >= 4 and matching_count < extra_count:
                    val_match.append((imgs[0], imgs[1], 1))
                    val_known.append(imgs[0])
                    val_submit.append((imgs[1], whale))
                    matching_count += 1
                    train_examples += imgs[2:]
                    if (small_count + 2) % 10 < 2:
                        small_train_examples += imgs[2:]
                        small_count += 2
                else:
                    train_examples += imgs
                    if (small_count + len(imgs)) % 10 < len(imgs):
                        small_train_examples += imgs
                        small_count += len(imgs)
        else:
            for whale, imgs in tqdm(whale2imgs.items()):
                if whale == 'new_whale':
                    new_whale += imgs
                elif len(imgs) == 1:
                    lonely += imgs
                    val_known += imgs
                elif len(imgs) == 2:
                    val_match.append((imgs[0], imgs[1], 1))
                    val_known.append(imgs[1])
                    val_submit.append((imgs[0], whale))
                    train_examples += imgs
                    if (small_count + 2) % 10 < 2:
                        small_train_examples += imgs
                        small_count += 2
                elif len(imgs) >= 4 and matching_count < extra_count:
                    val_match.append((imgs[0], imgs[1], 1))
                    val_known.append(imgs[0])
                    val_submit.append((imgs[1], whale))
                    matching_count += 1
                    train_examples += imgs
                    if (small_count + len(imgs)) % 10 < len(imgs):
                        small_train_examples += imgs
                        small_count += len(imgs)
                else:
                    train_examples += imgs
                    if (small_count + len(imgs)) % 10 < len(imgs):
                        small_train_examples += imgs
                        small_count += len(imgs)

        print('lonely whales count: ', lonely_count)
        print('new whales count: ', new_count)
        print('couple whales count: ', couple_count)
        print('extra whales count: ', extra_count)

        random.shuffle(lonely)
        val_unmatch = list(
            zip(lonely,
                np.random.choice(new_whale, size=lonely_count, replace=False),
                np.zeros(lonely_count, dtype=np.int8)))
        validation_examples = val_match + val_unmatch
        random.shuffle(validation_examples)
        random.shuffle(train_examples)

        # small_train_size = len(train_examples) // 10
        # small_train_examples = train_examples[:small_train_size]

        small_validation_size = len(validation_examples) // 10
        small_validation_examples = validation_examples[:small_validation_size]

        # print('TRAIN')
        # print(train_examples[:10])
        # print('VALIDATION')
        # print(validation_examples[:10])

        print('Train size: ', len(train_examples))
        print('Validation size: ', len(validation_examples))

        print('val_known size: ', len(val_known))
        print('val_submit size: ', len(val_submit))

        save_to_pickle(train_examples_file, train_examples)
        save_to_pickle(validation_examples_file, validation_examples)

        save_to_pickle(train_examples_small_file, small_train_examples)
        save_to_pickle(validation_examples_small_file,
                       small_validation_examples)

        save_to_pickle(val_known_file, val_known)
        save_to_pickle(val_submit_file, val_submit)
示例#13
0
            break

    return shift_keypoints


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--pickle_file",
                        default="pickles/image_data_normalized.p")
    parser.add_argument("--cluster", default="basic")
    parser.add_argument("--save_folder", default="save_results")

    args = parser.parse_args()
    pickle_file = args.pickle_file
    cluster = args.cluster
    images, keypoints = load_pickle_file(args.pickle_file)
    keypoints = np.array(keypoints)

    keypoints = keypoints.reshape(keypoints.shape[0], keypoints.shape[1], 2)

    if cluster == "basic":
        save_path = args.save_folder
        cluster_basic(keypoints, save_path)

    elif cluster == "kmeans":
        grps, centers = cluster_kmeans(keypoints, DEFAULT_K_VALUE,
                                       args.save_folder)
        #sse = compute_sse(k, keypoints,grps, centers)
    elif cluster == "mean_shift":
        save_path = args.save_folder
        new_points = mean_shift(keypoints)
#!/usr/bin/env python3

import os
import cv2
import shutil

from utils import load_pickle_file

allowed_width = 256
allowed_height = 192

images, labels = load_pickle_file("pickles/image_data_normalized.p")

if not os.path.exists("test_images"):
    os.makedirs("test_images")

for img_path in images:
    img = cv2.imread(img_path)
    resized_img = cv2.resize(img, (allowed_width, allowed_height))

    parts = img_path.split("/")
    save_path = os.path.join("test_images", "_".join(parts[-2:]))
    cv2.imwrite(save_path, resized_img)
示例#15
0
 def __init__(self, opt):
     self.entity2entityId = load_pickle_file(opt["entity2entityId"])
     self.relation2relationId = load_pickle_file(opt["relation2relationId"])
示例#16
0
 def __init__(self, opt, transform):
     self.transform = transform
     self.entity2entityId = load_pickle_file(opt["entity2entityId"])
     self.relation2relationId = load_pickle_file(opt["relation2relationId"])
     self.dataset = load_pickle_file(opt["dataset"])
     self.movie_ids = load_pickle_file(opt["movie_ids"])