Пример #1
0
def treenimine(m):
    algus = datetime.now()

    print(f"\n\nN-grammidega ennustamine {n['tuup']} {n['tahis']}")
    print(f"{time.ctime(time.time())}")
    print(f"ngram size: {n['ngram_size']}, df size: {n['df_size']}")
    print("andmepunktid: {0[0]}, tunnused: {0[1]}".format(vektorid.shape))

    results = []
    names = []

    for name, model in m:
        algus2 = datetime.now()

        kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
        cv_results = cross_val_score(
            model,
            preprocessing.scale(X_train, with_mean=False),
            np.ravel(Y_train),
            cv=kfold,
            scoring="accuracy",
            n_jobs=-1,
        )
        results.append(cv_results)
        names.append(name)

        lõpp2 = datetime.now()
        aeg2 = lõpp2 - algus2

        print(
            "{:40s} {:3.06f} {:10s} {:20s}".format(
                f"{name}:",
                round(cv_results.mean(), 6),
                "({:1.06f})".format(round(cv_results.std(), 6)),
                f"(Aeg: {str(aeg2)})",
            )
        )

    lõpp = datetime.now()
    aeg = lõpp - algus

    print(f"Aeg: {aeg}\n\n")

    pyplot.boxplot(results, labels=names)
    pyplot.title("Algorithm Comparison / ngramm {} {}".format(n["tuup"], n["tahis"])),
    pyplot.xticks(rotation=90)
    pyplot.draw()

    pyplot.savefig(
        nimi(
            "graafikud/{} eksimismaatriks ngramm {} {}".format(
                name, n["tuup"], n["tahis"]
            ),
            "png",
        ),
        bbox_inches="tight",
        dpi=100,
    )
Пример #2
0
def funktsioon(ds, m):
    algus = datetime.now()

    array = ds.values
    x = np.array(array[:, 1:])
    y = np.array(array[:, :1])

    scaler = preprocessing.StandardScaler().fit(x)
    X_scaled = scaler.transform(x)

    X_train, X_validation, Y_train, Y_validation = train_test_split(
        X_scaled, y, test_size=0.20, shuffle=True
    )

    print("\n\nsõnatüübid")
    print(f"{time.ctime(time.time())}")
    print("andmepunktid: {}, tunnused: 17\n".format(len(X_train)))

    for name, model in m:
        algus2 = datetime.now()

        model.fit(X_train, Y_train)

        names = ["eesti", "soome", "vene"]

        disp = plot_confusion_matrix(
            model,
            X_validation,
            Y_validation,
            display_labels=names,
            cmap=pyplot.cm.Blues,
            normalize="true",
        )
        disp.ax_.set_title(f"Normaliseeritud eksimismaatriks {name}")

        pyplot.draw()
        pyplot.savefig(
            nimi("graafikud/Normaliseeritud eksimismaatriks {name} ", "png"),
            bbox_inches="tight",
            dpi=100,
        )

        lõpp2 = datetime.now()
        aeg2 = lõpp2 - algus2

        print(
            "{:40s} {:150s} {:20s}".format(
                f"{name}:",
                f"{list(disp.confusion_matrix)}",
                f"(Aeg: {str(aeg2)})",
            )
        )

    lõpp = datetime.now()
    aeg = lõpp - algus

    print(f"Aeg: {aeg}\n\n")
Пример #3
0
            y = np.ceil((df_suurused[1] - df_suurused[0]) / df_aste)
            pyplot.xticks(
                np.arange(x),
                list(range(ngrammi_suurused[0], ngrammi_suurused[1], ngrammi_aste)),
            )
            pyplot.yticks(
                np.arange(y), list(range(df_suurused[0], df_suurused[1], df_aste))
            )
            pyplot.title(
                str(f"N-grammi tüüp: {ngrammi_tahis}, N-grammi tüüp {ngrammi_tuup}")
            )
            pyplot.draw()

            pyplot.savefig(
                nimi(
                    f"graafikud/täpsused ngramm {ngrammi_tahis} {ngrammi_tuup}", "png"
                ),
                bbox_inches="tight",
                dpi=100,
            )

        except TypeError:
            print(
                ngrammi_tahis,
                ngrammi_tuup,
                "#######################################",
                toodeldud_andmed,
            )

        lõpp = datetime.now()
        aeg = lõpp - algus
uus_tulemused = [[] for _ in range(len(models))]

for i in range(len(tulemused)):
    for j in range(len(tulemused[i])):
        uus_tulemused[j].append(tulemused[i][j])

laius = 3
korgus = 2

for j in range(len(uus_tulemused)):
    fig, axs = pyplot.subplots(2, laius, figsize=(15, 10))
    fig.suptitle(f"Normaliseeritud eksimismaatriks {nimed[j]}")
    for i in range(len(uus_tulemused[0])):
        axs[i // laius, i % laius].set_title(variandid[i])
        df_cm = pd.DataFrame(uus_tulemused[j][i], index=keeled, columns=keeled)
        sn.heatmap(df_cm,
                   annot=True,
                   cmap=pyplot.cm.Blues,
                   ax=axs[i // laius, i % laius])
    pyplot.draw()
    pyplot.savefig(
        nimi(
            f"graafikud/Normaliseeritud eksimismaatriks {nimed[j]} ngrammid",
            "png",
        ),
        bbox_inches="tight",
        dpi=100,
    )

pyplot.show()
pyplot.close()
Пример #5
0
def treenimine(ds, m):
    algus = datetime.now()

    array = ds.values
    x = np.array(array[:, 1:])
    y = np.array(array[:, :1])

    scaler = preprocessing.StandardScaler().fit(x)
    X_scaled = scaler.transform(x)

    X_train, X_validation, Y_train, Y_validation = train_test_split(
        X_scaled, y, test_size=0.20, shuffle=True)

    results = []
    names = []

    print(f"\n\nSõnatüüpidel ennustamine")
    print(f"{time.ctime(time.time())}")
    print("andmepunktid: {}, tunnused: 17\n".format(len(X_train)))

    for name, model in m:
        algus2 = datetime.now()
        kfold = StratifiedKFold(n_splits=10, shuffle=True)
        try:
            cv_results = cross_val_score(
                model,
                preprocessing.scale(X_train),
                np.ravel(Y_train),
                cv=kfold,
                scoring="accuracy",
                n_jobs=-1,
            )
        except:
            print(
                "================================| Error |================================"
            )

        results.append(cv_results)
        names.append(name)

        lõpp2 = datetime.now()
        aeg2 = lõpp2 - algus2

        print("{:40s} {:3.06f} {:10s} {:20s}".format(
            f"{name}:",
            round(cv_results.mean(), 6),
            "({:1.06f})".format(round(cv_results.std(), 6)),
            f"(Aeg: {str(aeg2)})",
        ))

    pyplot.figure(figsize=(8, 6))
    pyplot.boxplot(results, labels=names)
    pyplot.title("Algoritmide täpsused")
    pyplot.xticks(rotation=90)

    pyplot.savefig(
        nimi(
            r"C:\Users\rasmu\OneDrive\Töölaud\Programmid\Python 3\Uurimistöö\Graafikud\Lõplikud\Sõnatüüpide efektiivsus ",
            "png",
        ),
        bbox_inches="tight",
        dpi=100,
    )

    pyplot.draw()

    lõpp = datetime.now()
    aeg = lõpp - algus

    print(f"Aeg: {aeg}\n\n")