Пример #1
0
def classify0(to_test_matrix, training_matrix, labels, k):
    training_count = training_matrix.shape[0]
    input_matrix = tile(to_test_matrix, (training_count, 1))

    distances = distances_of_two_matrixes(input_matrix, training_matrix)
    sorted_distance_indices = distances.argsort()
    top_k_distance_indices = sorted_distance_indices[:k]
    label_count_dict = count_by(lambda i: labels[i], top_k_distance_indices)
    sorted_label_count_pairs = sorted(label_count_dict.items(),
                                      key=lambda k_v: k_v[1],
                                      reverse=True)

    return sorted_label_count_pairs[0][0]
def scan_D(D, Ck, min_support):
    matched_c_items = [c for row in D for c in Ck if set(c).issubset(set(row))]

    matched_c_items = map(frozenset, matched_c_items)
    item_counts = count_by(identity, matched_c_items)

    def support(item):
        return item_counts[item] / float(len(D))

    Lk = list(
        set(filter(lambda item: support(item) >= min_support,
                   matched_c_items)))

    support_data = {item: support(item) for item in Lk}

    return map(list, Lk), support_data
Пример #3
0
def calc_shannon_entropy(dataset):
    label_counts = count_by(lambda r: r[-1], dataset)

    def prop(k): return label_counts[k] / float(len(dataset))

    return reduce(lambda r, k: r - prop(k) * log(prop(k), 2), label_counts, 0.0)
Пример #4
0
def majority_class(classes):
    class_counts = count_by(lambda c: c, classes)
    sorted_class_count_pairs = sorted(class_counts.items(), key=lambda k_v: k_v[1], reverse=True)
    return sorted_class_count_pairs[0][0]
Пример #5
0
def words_to_bag_vector(all_words, test_words):
    word_counts = count_by(lambda w: w, test_words)
    return map(lambda word: word_counts[word]
               if word in test_words else 0, all_words)