Exemplo n.º 1
0
def prepare_data(k, frt, feature, master):
    min_max_scaler = MinMaxScaler()

    if master:
        images, meta, matrix = get_full_matrix(feature, master=True)
        matrix = min_max_scaler.fit_transform(matrix)
        matrix, _, _ = reducer(matrix, k, frt)

        # Image-Image similarity
        img_img = 1 / (euclidean_distances(matrix) + 1)
        np.fill_diagonal(img_img, 0)

        return images, meta, img_img

    l_images, l_meta, l_matrix = get_full_matrix(feature)
    u_images, u_meta, u_matrix = get_full_matrix(feature, unlabelled=True)

    meta = l_meta
    meta.update(u_meta)

    matrix = min_max_scaler.fit_transform(np.vstack((
        l_matrix,
        u_matrix,
    )))
    matrix, _, _ = reducer(matrix, k, frt)

    # Image-Image similarity
    img_img = 1 / (euclidean_distances(matrix) + 1)
    np.fill_diagonal(img_img, 0)

    return l_images + u_images, meta, img_img
Exemplo n.º 2
0
def svm_feedback(relevant_images, irrelevant_images, images_to_display, query,
                 results):
    if not irrelevant_images:
        print("Please provide irrelevant images set for svm to work properly.")
        return relevant_images
    model = settings.SVM.CLASSIFIER.MODEL
    k = settings.SVM.CLASSIFIER.K
    frt = settings.SVM.CLASSIFIER.FRT
    images_rel, data_matrix_rel = get_all_vectors(
        model, f={'path': {
            '$in': relevant_images
        }}, master_db=True)
    images_irel, data_matrix_irel = get_all_vectors(
        model, f={'path': {
            '$in': irrelevant_images
        }}, master_db=True)
    images_test, test_vector = get_all_vectors(
        model, f={'path': {
            '$in': results + [query]
        }}, master_db=True)
    labelled_vectors, _, _, unlabelled_vectors = reducer(
        np.vstack((data_matrix_rel, data_matrix_irel)),
        k,
        frt,
        query_vector=test_vector)
    rel_class = np.array([1] * len(data_matrix_rel))
    irel_class = np.array([-1] * len(data_matrix_irel))
    x_train = labelled_vectors
    x_train = np.array(x_train) * 2
    y_train = np.concatenate((rel_class, irel_class))
    svclassifier = SVM()
    svclassifier.fit(np.array(x_train), np.array(y_train))
    unlabelled_vectors = np.array(unlabelled_vectors) * 2
    y_pred = svclassifier.predict(unlabelled_vectors)
    c = 0
    dic = {}
    for y in y_pred:
        if y == 1:
            dic[images_test[c]] = unlabelled_vectors[c]
        c += 1
    length_dict = {}
    for key in dic.keys():
        length_dict[key] = np.dot(dic[key], svclassifier.w)
    sorted_dict = sorted(length_dict.items(), key=lambda x: x[1], reverse=True)
    list_img = []
    c = 0
    for key, j in sorted_dict:
        if c < images_to_display - len(irrelevant_images):
            list_img.append(key)
            c += 1
        else:
            break
    return (list_img + irrelevant_images)
Exemplo n.º 3
0
def prepare_data(k, frt, feature, paths=None):
    min_max_scaler = MinMaxScaler()
    if paths:
        images, meta, matrix = get_data_matrix(feature,
                                               f={'path': {
                                                   '$in': paths
                                               }})
    else:
        images, meta, matrix = get_data_matrix(feature)
    matrix = min_max_scaler.fit_transform(matrix)
    matrix, _, _ = reducer(matrix, k, frt)

    return images, meta, matrix
Exemplo n.º 4
0
def run_svm(evaluate, model='lbp', k=30, frt='pca'):
    train_data, train_labels = build_labelled(model)
    if evaluate:
        test_data, test_labels, test_paths = build_unlabelled(model)
    else:
        test_paths, test_data = get_all_vectors(model, f={}, unlabelled_db=True)
    labelled_vectors, _, _, unlabelled_vectors  = reducer(train_data, k, frt, query_vector=test_data)
    labelled_vectors *= 2
    unlabelled_vectors *= 2
    svclassifier = SVM()
    svclassifier.fit(labelled_vectors, train_labels)
    y_pred = svclassifier.predict(unlabelled_vectors)
    if evaluate:
        print(classification_report(test_labels,y_pred))
    return test_paths, y_pred
Exemplo n.º 5
0
    def prepare_data(cls, feature, k_latent_semantics, frt_technique,
                     ignore_metadata):
        # Get the images from the folders specified in config
        # We expect the vectors to be build for the features for both the labelled
        # and unlabelled data.
        u_images, u_meta, u_matrix = cls.get_data_matrix(
            feature, unlabelled=True, ignore_metadata=ignore_metadata)

        l_images, l_meta, l_matrix = cls.get_data_matrix(
            feature, ignore_metadata=ignore_metadata)

        # Reduce the labeled and unlabeled matrix together
        old_matrix = np.vstack((
            l_matrix,
            u_matrix,
        ))

        matrix, _, _ = reducer(old_matrix, k_latent_semantics, frt_technique)

        r_l_matrix = matrix[:len(l_images)]
        r_u_matrix = matrix[len(l_images):]

        return l_images, u_images, l_meta, u_meta, r_l_matrix, r_u_matrix
Exemplo n.º 6
0
def feedback_probab(relevant, irrelevant, t, query, prev_results):
    if not relevant:
        print("Probabilistic model requires relevant images for re-ordering.")
        return prev_results

    img_all, img_all_vec = get_all_vectors(
        model, f={'path': {
            '$in': prev_results + [query]
        }}, master_db=True)
    #f={'path': {'$nin': relevant}}

    img_all_vec_red, _, __ = reducer(img_all_vec, k, frt)
    img_all_vec_red = scale(img_all_vec_red, 0, 1)

    dict_all_red = {}
    for i in range(len(img_all)):
        name = img_all[i]
        dict_all_red[name] = img_all_vec_red[i]

    img_rel_vec_red = []
    for name in relevant:
        img_rel_vec_red.append(dict_all_red[name])
    img_rel_vec_red = np.array(img_rel_vec_red)

    img_all_vec_red = makeArrayBinary(img_all_vec_red,
                                      img_all_vec_red.shape[0],
                                      img_all_vec_red.shape[1])
    img_rel_vec_red = makeArrayBinary(img_rel_vec_red,
                                      img_rel_vec_red.shape[0],
                                      img_rel_vec_red.shape[1])

    R = img_rel_vec_red.shape[0]
    N = len(img_all)

    p_list = []
    for j in range(k):
        r = 0
        for i in range(R):
            if img_rel_vec_red[i][j] == 1:
                r += 1
        p_list.append((r + 0.5) / (R + 1))

    n_list = []
    for j in range(k):
        n = 0
        for i in range(N):
            if img_all_vec_red[i][j] == 1:
                n += 1
        n_list.append(n)

    for i in range(k):
        n_list[i] = (n_list[i] - p_list[i] + 0.5) / (N - R + 1)

    log_list = []
    for i in range(k):
        num = (p_list[i] * (1 - n_list[i])) / (n_list[i] * (1 - p_list[i]))
        if num > 0:
            log_list.append(math.log(num, 2))
    log_list = np.array(log_list)

    new_result = []
    for name in dict_all_red.keys():
        sim = np.dot(dict_all_red[name], log_list)
        new_result.append((name, sim))

    new_result = sorted(new_result, key=lambda x: x[1], reverse=True)

    final = []
    for i in range(t):
        final.append(new_result[i][0])

    return final
Exemplo n.º 7
0
    
    # Only 1 input to be taken, that is k (latent semantics), number of features to be extracted 
    k_each = args.Features
            
    # On extensive testing, the best feature extraction model was found out to be SIFT
    model = settings.TASK1_CONFIG.MODEL

    # On extensive testing, the best feature reduction technique was founf out to be PCA
    feature = settings.TASK1_CONFIG.FRT

    # Generating the vectors for Dorsal Labelled, Palmar labelled and the test vectors
    # Also fetching the labels of unlabelled images so as to check accuracy later
    dorsal_vectors, palmar_vectors, test_data, test_data_paths = generate_vec()

    # Applying PCA to Dorsal Images and fetching the 'k' latent semantics 
    reduced_dorsal_vectors, _, _, _, dorsal_pca = reducer(dorsal_vectors,k_each,feature,get_scaler_model=True)
    dorsal_variance_ratio = dorsal_pca.explained_variance_ratio_
    print("Computed ",k_each," Latent Semantics for Dorsal")

    # Applying PCA to Palmar Images and fetching the 'k' latent semantics 
    reduced_palmar_vectors, _, _, _, palmar_pca = reducer(palmar_vectors,k_each,feature,get_scaler_model=True)
    palmar_variance_ratio = palmar_pca.explained_variance_ratio_
    print("Computed ",k_each," Latent Semantics for Palmar")
    
    # Applying PCA to Test Images and fetching the 'k' latent semantics 
    reduced_test_data, _, _, _, test_pca = reducer(test_data,k_each,feature,get_scaler_model=True)
    test_variance_ratio = test_pca.explained_variance_ratio_

    # Initiate List that will store the total dorsal dot product scores for each test image   
    dorsal = []
Exemplo n.º 8
0
    images, feature_space = utils.get_all_vectors(
        settings.PPR.CLASSIFIER.FEATURE)
    feature_space = min_max_scaler.fit_transform(feature_space)

    meta = utils.get_metadata()
    meta = {m['path']: m for m in meta}
    """
    u_images, u_feature_space = utils.get_all_vectors(
        settings.PPR.CLASSIFIER.FEATURE, unlabelled_db=True)
    u_feature_space = min_max_scaler.fit_transform(u_feature_space)

    matrix = np.vstack((
        feature_space,
        u_feature_space,
    ))

    matrix, eigen_values, latent_vs_old = reducer(
        matrix,
    """
    matrix, eigen_values, latent_vs_old = reducer(feature_space,
                                                  settings.PPR.CLASSIFIER.K,
                                                  settings.PPR.CLASSIFIER.FRT)

    dm = helper.build_matrix_with_labels(matrix, images, meta)
    """
    dm = helper.build_labelled_matrix(matrix, images + u_images,
                                      'aspectOfHand')
    """
    evaluate(dm)
Exemplo n.º 9
0
        sub_meta[meta[img]['id']] = meta[img]

    # sub id to order in matrix
    sub_to_idx = {sub: idx for idx, sub in enumerate(subs)}
    # index to sub id
    idx_to_sub = [0] * len(sub_to_idx)
    for sub in sub_to_idx:
        idx_to_sub[sub_to_idx[sub]] = sub
    # A subject subject similarity index
    sub_sub = np.zeros((len(subs), len(subs),))

    for sub1 in sub_to_idx:
        for sub2 in sub_to_idx:
            sub_sub[sub_to_idx[sub1], sub_to_idx[sub2]] = img_img[subs[sub1],:].take(subs[sub2], axis=1).mean()

    w, _, h = reducer(sub_sub, args.k_latent_semantics, "nmf")

    # Print term weigth pairs
    get_term_weight_pairs(w, "task7_{}.csv".format(args.k_latent_semantics))
    sub_weight = [
        sorted([("z{}".format(idx), weight,) for idx, weight in enumerate(row)], key=lambda x: x[1])
        for row in w
    ]

    output.write_to_file("visualize_task7.html",
                         "task7-{}.html".format(args.k_latent_semantics),
                         vectors=sub_weight,
                         subs=subs,
                         idx_to_sub=idx_to_sub,
                         images=images,
                         sub_meta=sub_meta,
Exemplo n.º 10
0
    img_meta = []

    try:
        for m in meta:
            images.append(m['path'])
            img_meta.append([
                m["age"], mapping[m["gender"]], mapping[m["skinColor"]],
                mapping[m["accessories"]], m["nailPolish"],
                mapping[m["aspectOfHand"].split()[0]],
                mapping[m["aspectOfHand"].split()[1]], m["irregularities"]
            ])
    except KeyError:
        raise Exception("Invalid metadata detected")

    vectors, eigen_values, latent_vs_old = reducer(img_meta,
                                                   args.k_latent_semantics,
                                                   "nmf")

    get_term_weight_pairs(vectors,
                          "task8_{}.csv".format(args.k_latent_semantics))
    get_term_weight_pairs(latent_vs_old,
                          "task8_{}.csv".format(args.k_latent_semantics))

    # Extra Credit
    # image path with a vector in the latent semantic space
    data_z = zip(images, vectors)
    # image path for each latenet semantic in h
    feature_z = [(idx, images[np.argmax(np.dot(img_meta, i))])
                 for idx, i in enumerate(latent_vs_old)]

    output.write_to_file("visualize_data_z.html",