Exemplo n.º 1
0
import numpy as np

from autokeras import TextClassifier
from autokeras.utils import read_csv_file


def convert_labels_to_one_hot(labels, num_labels):
    labels = [int(label) for label in labels]
    one_hot = np.zeros((len(labels), num_labels))
    one_hot[np.arange(len(labels)), labels] = 1
    return one_hot


if __name__ == "__main__":
    file_path = "../data/tmp_dataset.csv"
    x_test, y_test = read_csv_file(file_path)
    y_test = convert_labels_to_one_hot(y_test, num_labels=5)

    clf = TextClassifier(verbose=True)
    clf.num_labels = 5
    clf.output_model_file = "../data/v1.h5"

    print("Classification accuracy is: ", 100 * clf.evaluate(x_test, y_test),
          "%")
                    continue
                if line[i] == '.':
                    line = line[i+2:]
                    break

            data_dict[line[:-1]] = len(labels)
            data.append(translit(u"{}".format(line[:-1]), "ru", reversed=True))
            label.append(len(labels))
    # добавляем подкрепление
    '''
    
    for dir in labels:
        if dir in os.listdir('truck-link/Modern'):
            for text in os.listdir('truck-link/Modern/{0}'.format(dir)):
                data.append(text)
                label.append(labels.index(dir) + 1)
    '''




    trainX, testX, trainY, testY = train_test_split(data, label, test_size = 0.3, random_state = 42)




    clf = TextClassifier(verbose=True)
    clf.fit(x=trainX, y=trainY, time_limit=3 * 60 * 60)
    clf.final_fit(trainX, trainY, testX, testY, retrain=True)
    y = clf.evaluate(testX, testY)
    print(y * 100)
Exemplo n.º 3
0
def convert_one_hot_to_labels(one_hots, num_labels):
    labels = []
    for one_hot in one_hots:
        for label in range(num_labels):
            if one_hot[label] == 1:
                labels.append(label)
                break
    return labels


if __name__ == "__main__":
    file_path = "../data/w_train_v3.csv"
    x_test, y_test = read_csv_file(file_path)
    y_test = convert_labels_to_one_hot(y_test, num_labels=3)

    clf = TextClassifier(verbose=True)
    clf.num_labels = 3
    clf.output_model_file = "../data/v2.h5"

    predictions = zip(x_test, convert_one_hot_to_labels(y_test, num_labels=3),
                      clf.predict(x_test))
    csvfile = "../data/w_test_output.csv"
    with open(csvfile, "w") as output:
        writer = csv.writer(output)
        for line in predictions:
            writer.writerow(line)

    # print("Classification accuracy is: ", 100 * clf.evaluate(
    #     x_test,
    #     y_test
    #     ), "%")
Exemplo n.º 4
0
import pandas as pd

from autokeras import TextClassifier


def read_csv(file_path):
    """csv file read example method
    It helps you to read the csv file into python array

    Attributes:
        file_path: csv file path
    """

    print("reading data...")
    data_train = pd.read_csv(file_path, sep='\t')

    x_train = []
    y_train = []
    for idx in range(data_train.review.shape[0]):
        # Modify this according to each different dataset
        x_train.append(data_train.review[idx])
        y_train.append(data_train.sentiment[idx])
    return x_train, y_train


if __name__ == '__main__':
    file_path = "labeledTrainData.tsv"
    x_train, y_train = read_csv(file_path=file_path)
    clf = TextClassifier(verbose=True)
    clf.fit(x=x_train, y=y_train, time_limit=12 * 60 * 60)