Exemplo n.º 1
0
def generate_representor(data_dir, dictionary_path, subclass):
    vocabulary_dict = load_vocabulary(dictionary_path)
    shape_vocabulary = np.shape(vocabulary_dict[0])
    vocabulary_size = shape_vocabulary[0]
    representers = []
    patches, coding_labeles, labeles = extract_patches_multidir(
        data_dir, subclasses=[subclass], return_flag=True)
    for case_index, cur_patches in enumerate(patches):
        cur_case_representor = np.zeros([10, vocabulary_size])
        patches_coding_labeles = {}
        for patch_index, cur_patch in enumerate(cur_patches):
            cur_coding_label = coding_labeles[case_index][patch_index]
            if cur_coding_label not in patches_coding_labeles.keys():
                patches_coding_labeles[cur_coding_label] = []
            patches_coding_labeles[cur_coding_label].append(cur_patch)
        for key in patches_coding_labeles.keys():
            cur_patches_coding_label = patches_coding_labeles[key]
            cur_vocabulary = vocabulary_dict[key]
            distance_arr = cal_distance(cur_patches_coding_label,
                                        cur_vocabulary)
            for i in range(len(distance_arr)):
                min_index = np.argmin(distance_arr[i])
                cur_case_representor[int(key), min_index] += 1
        representers.append(cur_case_representor.flatten())
    return representers, labeles
Exemplo n.º 2
0
def generate_representor_version2(data_dir, dictionary_path, subclass, vectorizer=None):
    save_dir = '/home/give/PycharmProjects/ICPR2018/LeaningBased/BoVW-NGram/patches'
    save_dir = os.path.join(save_dir, subclass)
    paths = glob(os.path.join(save_dir, '*.npy'))

    kmeans_model = joblib.load(dictionary_path)
    shape_vocabulary = np.shape(kmeans_model.cluster_centers_)
    vocabulary_size = shape_vocabulary[0]
    if len(paths) == 0:
        patches, _, labeles = extract_patches_multidir(data_dir, subclasses=[subclass], return_flag=True,
                                                                    patch_size=3)
    else:
        patches = []
        labeles = []
        for path in paths:
            patches.append(np.load(path))
            labeles.append(int(os.path.basename(path).split('.npy')[0].split('_')[1]))
    all_patches = []
    counts = []
    for case_index, cur_patches in enumerate(patches):
        # print np.shape(cur_patches)
        if len(paths) == 0:
            np.save(os.path.join(save_dir, str(case_index) + '_' + str(labeles[case_index])), cur_patches)

        all_patches.extend(cur_patches)
        counts.append(len(cur_patches))
    print 'all patches shape are ', np.shape(all_patches)
    predicted_labels = kmeans_model.predict(all_patches)
    start = 0
    strs = convert2str(predicted_labels, counts)
    if vectorizer is None:
        vectorizer = TfidfVectorizer(analyzer='char', min_df=1, ngram_range=(1, 3), use_idf=False, stop_words=None)
        vectorizer = vectorizer.fit(strs)
    crs_matrix = vectorizer.transform(strs).toarray()
    return np.asarray(crs_matrix, np.float32), labeles, vectorizer
Exemplo n.º 3
0
def generate_representor(data_dir, dictionary_path, subclass, phase_name):
    dictionary = load_vocabulary(dictionary_path)
    shape_vocabulary = np.shape(dictionary)
    vocabulary_size = shape_vocabulary[0]
    representers = []
    patches, coding_labeles, labeles = extract_patches_multidir(
        data_dir,
        subclasses=[subclass],
        return_flag=True,
        phase_name=phase_name)
    all_patches = []
    counts = []
    for case_index, cur_patches in enumerate(patches):
        print np.shape(cur_patches)
        all_patches.extend(cur_patches)
        counts.append(len(cur_patches))
    all_distance_arr = cal_distance(all_patches, dictionary)
    start = 0
    for case_index, count in enumerate(counts):
        distance_arr = all_distance_arr[start:start + count]
        cur_case_representor = np.zeros([1, vocabulary_size])
        for i in range(len(distance_arr)):
            min_index = np.argmin(distance_arr[i])
            cur_case_representor[0, min_index] += 1
        representers.append(cur_case_representor.squeeze())
        start += count
    return representers, labeles
Exemplo n.º 4
0
def generate_representor_version2(data_dir, dictionary_path, subclass):
    kmeans_model = joblib.load(dictionary_path)
    shape_vocabulary = np.shape(kmeans_model.cluster_centers_)
    vocabulary_size = shape_vocabulary[0]
    representers = []
    patches, coding_labeles, labeles = extract_patches_multidir(data_dir, subclasses=[subclass], return_flag=True)
    all_patches = []
    counts = []
    for case_index, cur_patches in enumerate(patches):
        print np.shape(cur_patches)
        all_patches.extend(cur_patches)
        counts.append(len(cur_patches))
    predicted_labels = kmeans_model.predict(all_patches)
    start = 0
    for case_index, count in enumerate(counts):
        cur_predicted_label = predicted_labels[start: start + count]
        representer = np.histogram(cur_predicted_label, bins=vocabulary_size, normed=True)[0]
        representers.append(np.array(representer).squeeze())
        start += count
    return representers, labeles
Exemplo n.º 5
0
def generate_representor(data_dir, dictionary_path, subclass):
    dictionary = load_vocabulary(dictionary_path)
    shape_vocabulary = np.shape(dictionary)
    vocabulary_size = shape_vocabulary[0]
    representers = []
    patches, coding_labeles, labeles = extract_patches_multidir(
        data_dir, subclasses=[subclass], return_flag=True)
    all_patches = []
    counts = []
    for case_index, cur_patches in enumerate(patches):
        print np.shape(cur_patches)
        all_patches.extend(cur_patches)
        counts.append(len(cur_patches))
    all_distance_arr = cal_distance(all_patches, dictionary)
    start = 0
    for case_index, count in enumerate(counts):

        distance_arr = all_distance_arr[start:start + count]
        cur_case_representor = np.zeros([1, vocabulary_size])
        for i in range(len(distance_arr)):
            min_index = np.argmin(distance_arr[i])
            cur_case_representor[0, min_index] += 1
        representers.append(cur_case_representor.squeeze())
        start += count
        # patches_coding_labeles = {}
        # for patch_index, cur_patch in enumerate(cur_patches):
        #     cur_coding_label = coding_labeles[case_index][patch_index]
        #     if cur_coding_label not in patches_coding_labeles.keys():
        #         patches_coding_labeles[cur_coding_label] = []
        #     patches_coding_labeles[cur_coding_label].append(cur_patch)
        # for key in patches_coding_labeles.keys():
        #     cur_patches_coding_label = patches_coding_labeles[key]
        #     cur_vocabulary = vocabulary_dict[key]
        #     distance_arr = cal_distance(cur_patches_coding_label, cur_vocabulary)
        #     for i in range(len(distance_arr)):
        #         min_index = np.argmin(distance_arr[i])
        #         cur_case_representor[int(key), min_index] += 1
        # representers.append(cur_case_representor.flatten())
    return representers, labeles