示例#1
0
def test_model(notes, predictions, clf):
    print("\tGetting note vectors...")
    note_vectors = [note_to_vec(note) for note in notes]
    print("\tSplitting train test...")
    X_train, X_test, y_train, y_test = train_test_split(note_vectors,
                                                        predictions,
                                                        test_size=0.2)
    print("\tFitting...")
    clf.fit(X_train, y_train)
    print("\tPredicting...")
    generated_predictions = clf.predict(X_test)
    return accuracy_score(y_test, generated_predictions)
示例#2
0
def visualize_pca_by_lang(notes):
    _prepare_plots()
    pca = __get_pca_model(notes)

    langs = [note.get_lang() for note in notes]
    langs = sorted(set(langs))
    for lang in langs:
        lang_notes = [note for note in notes if note.get_lang() == lang]
        lang_notes_vectors = [note_to_vec(note) for note in lang_notes]
        pca_lang_notes = pca.transform(lang_notes_vectors)
        plt.scatter(pca_lang_notes[:, 0], pca_lang_notes[:, 1], alpha=0.7, label=lang)
    plt.legend()
    _save_plot('lang')
示例#3
0
def visualize_pca_by_growth(notes):
    _prepare_plots()
    pca = __get_pca_model(notes)

    growths = [note.growth for note in notes]
    growths = sorted(set(growths))

    colors = iter(['#e74c3c', '#2ecc71'])

    for growth in growths:
        growth_notes = [note for note in notes if note.growth == growth]
        growth_notes_vectors = [note_to_vec(note) for note in growth_notes]
        pca_growth_notes = pca.transform(growth_notes_vectors)
        plt.scatter(pca_growth_notes[:, 0],
                    pca_growth_notes[:, 1], label=growth, alpha=0.7, color=next(colors))
    plt.legend()
    _save_plot('growth')
示例#4
0
def visualize_pca_by_currency(notes):
    _prepare_plots()
    pca = __get_pca_model(notes)

    currencies = [note.currency for note in notes]
    currencies = sorted(set(currencies))

    colors = iter(sns.color_palette("hls", len(currencies)).as_hex())

    for currency in currencies:
        currency_notes = [note for note in notes if note.currency == currency]
        currency_notes_vectors = [note_to_vec(note) for note in currency_notes]
        pca_currency_notes = pca.transform(currency_notes_vectors)
        plt.scatter(pca_currency_notes[:, 0],
                    pca_currency_notes[:, 1], label=currency, alpha=0.7, color=next(colors))
    plt.legend()
    _save_plot('currency')
示例#5
0
def __get_pca_model(notes):
    pca = PCA(n_components=2)
    notes_vectors = [note_to_vec(note) for note in notes]
    pca.fit(notes_vectors)
    return pca