示例#1
0
def auto():
    data_dict = get_data(bbc_dataset)
    (train_dict, test_dict) = divide_data(data_dict)
    le = get_labels(list(data_dict.keys()))
    vectorizers = create_vectorizers(train_dict)
    (X, y) = create_dataset_auto(test_dict, le, vectorizers)
    evaluate_auto(X, y, le)
def main():
    data_dict = get_data(bbc_dataset)
    le = get_labels(list(data_dict.keys()))
    df = create_dataset(data_dict, le)
    (X_train, X_test, y_train, y_test) = split_dataset(df, 'text', 'label')
    vectorizer = create_and_fit_vectorizer(X_train)
    X_train = vectorizer.transform(X_train).todense()
    X_test = vectorizer.transform(X_test).todense()
    clf = train_svm_classifier(X_train, y_train)
    pickle.dump(clf, open("ch4/bbc_svm.pkl", "wb"))
    #clf = pickle.load(open("ch4/bbc_svm.pkl", "rb"))
    evaluate(clf, X_test, y_test, le)
    test_new_example(new_example, clf, vectorizer, le)
def main():
    data_dict = get_data(bbc_dataset)
    (train_dict, test_dict) = divide_data(data_dict)
    all_training = []
    all_test = []
    for topic in train_dict.keys():
        all_training = all_training + train_dict[topic]
    for topic in test_dict.keys():
        all_test = all_test + train_dict[topic]
    vectorizer = create_vectorizer(all_training)
    matrix = vectorizer.transform(all_training)
    km = KMeans(n_clusters=5, init='k-means++', random_state=0)
    km.fit(matrix)
    predicted_data = make_predictions(test_dict, vectorizer, km)
    print_report(predicted_data)
    print_most_common_words_by_cluster(all_training, km)
    pickle.dump(km, open("ch4/bbc_kmeans.pkl", "wb"))
示例#4
0
def main():
    data_dict = get_data(bbc_dataset)
    (X, y) = create_dataset(data_dict)
    evaluate(X, y)
def main():
    data_dict = get_data(bbc_dataset)
    le = get_labels(list(data_dict.keys()))
    df = create_dataset(data_dict, le)
    train_model(df, le)