Python DataLoader примеры использования

Язык программирования: Python

Пространство имен/Пакет: ArgumentationClassifier

Класс/Тип: DataLoader

Примеров на hotexamples.com: 11

Python DataLoader - 11 примеров найдено. Это лучшие примеры Python кода для ArgumentationClassifier.DataLoader, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

prepare_data_for_ZeroR(3)

loadData(2)

prepare_data_for_NN(2)

prepare_data_for_RF(2)

prepare_data_for_RNN(2)

balance_dataset(1)

loadStatistics(1)

supervised_split(1)

Пример #1

Показать файл

Файл: ADSClassifier.py Проект: Milzi/ArguE

def test_classifier(dataset, classifier):

    print("-----TESTING CLASSIFIER-----")

    if isinstance(classifier, keras.engine.training.Model):

        x_test, y_test = dl.prepare_data_for_RNN(dataset)

        print("-----TEST SET SIZE: " + str(x_test["sentence1"].shape) +
              "-----")
        scores = classifier.evaluate(x_test, y_test)
        print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1] * 100))

    elif isinstance(classifier, keras.models.Sequential):

        x_test, y_test = dl.prepare_data_for_NN(dataset)

        print("-----TEST SET SIZE: " + str(len(x_test)) + "-----")
        scores = classifier.evaluate(x_test, y_test)
        print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1] * 100))

    elif isinstance(classifier,
                    sklearn.ensemble.forest.RandomForestClassifier):

        x_test, y_test = dl.prepare_data_for_RF(dataset)
        print("-----TEST SET SIZE: " + str(len(x_test)) + "-----")

    else:
        x_test, y_test = dl.prepare_data_for_ZeroR(dataset)
        print("-----TEST SET SIZE: " + str(len(x_test)) + "-----")

    prediction = classifier.predict(x_test)

    numberOfClasses = y_test.shape[1]

    position = np.argmax(prediction, axis=-1)
    y_pred = np.identity(numberOfClasses)[position]

    target_names = ['nonrelated', 'related']
    print(classification_report(y_test, y_pred, target_names=target_names))

    y_test = [np.where(r == 1)[0][0] for r in y_test]
    y_pred = [np.where(r == 1)[0][0] for r in y_pred]

    y_true = pd.Series(y_test)
    y_pred = pd.Series(y_pred)

    print(
        pd.crosstab(y_true,
                    y_pred,
                    rownames=['True'],
                    colnames=['Predicted'],
                    margins=True))

Пример #2

Показать файл

Файл: ADSClassifier.py Проект: Milzi/ArguE

def train_NN_classifier(dataset, epochs, singlePrint=False):

    print("-----TRAIN CLASSIFIER-----")

    x_train, y_train = dl.prepare_data_for_NN(dataset)
    numberOfClasses = y_train.shape[1]

    model = Sequential()
    model.add(Dense(500, input_dim=len(x_train[0]), activation='sigmoid'))
    model.add(Dense(numberOfClasses, activation='softmax'))
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    if (singlePrint):
        history = model.fit(x_train,
                            y_train,
                            validation_split=0.2,
                            epochs=epochs,
                            batch_size=150,
                            verbose=0)
        print(history.history["acc"])
    else:
        history = model.fit(x_train,
                            y_train,
                            validation_split=0.2,
                            epochs=epochs,
                            batch_size=150)

    plot_Training(history)

    print("-----TRAINING COMPLETE-----")
    return model

Пример #3

Показать файл

Файл: ADSClassifier.py Проект: Milzi/ArguE

def baseline_results(dataset):

    x_test, y_test = dl.prepare_data_for_ZeroR(dataset)

    prediction = np.random.choice(2, x_test.shape[0])
    numberOfLabels = np.unique(prediction).shape[0]
    prediction = np.identity(numberOfLabels)[prediction.astype(int).flatten()]

    numberOfClasses = y_test.shape[1]

    position = np.argmax(prediction, axis=-1)
    y_pred = np.identity(numberOfClasses)[position]

    target_names = ['nonrelated', 'related']
    print(classification_report(y_test, y_pred, target_names=target_names))

    y_test = [np.where(r == 1)[0][0] for r in y_test]
    y_pred = [np.where(r == 1)[0][0] for r in y_pred]

    y_true = pd.Series(y_test)
    y_pred = pd.Series(y_pred)

    print(
        pd.crosstab(y_true,
                    y_pred,
                    rownames=['True'],
                    colnames=['Predicted'],
                    margins=True))

Пример #4

Показать файл

Файл: ADSClassifier.py Проект: Milzi/ArguE

def train_Dummy_classifier(train):

    x_train, y_train = dl.prepare_data_for_ZeroR(train)

    classifier = DummyClassifier(strategy="stratified", random_state=0)
    classifier.fit(x_train, y_train)

    return classifier

Пример #5

Показать файл

    def balance_data(self, dataset, balancing=0.5):
        """balances the passed data
            parameter:
            dataset: pandas dataframe containing the data
            balancing: precentage of the balancing -> 0.5 = equal 50-50 balncing
        """

        dataset = dl.balance_dataset(dataset, balancing)

        return dataset

Пример #6

Показать файл

    def split_data(self, dataset, splitting=0.1):
        """splits data into training and testset
            parameter:
            dataset: pandas dataframe containing the training data
            splitting: percentage split of the data 0.1 = 10% testing data
            return values: trainset, testset
        """

        train, test = dl.supervised_split(dataset, splitting)

        return train, test

Пример #7

Показать файл

Файл: ADSClassifier.py Проект: Milzi/ArguE

def train_RF_classifier(dataset):

    print("-----TRAIN CLASSIFIER-----")

    x_train, y_train = dl.prepare_data_for_RF(dataset)

    estimators = 200

    randomForest = RandomForestClassifier(n_estimators=estimators)

    randomForest.fit(x_train, y_train)

    print("-----TRAINING COMPLETE-----")
    return randomForest

Пример #8

Показать файл

    def load_Data(self, dataDirectory=None, store=None):
        """load the data from XML-file and automatically extracts the features
            parameter:
            dataDirectory: directory of the XML-file
            store: directory to the h5.file for storing the loaded data and features
        """

        data = dl.loadData(dataDirectory)
        dataset = self.generate_Features(data)

        if store != None:
            dataStore = pd.HDFStore(store)
            dataStore['dataset'] = data
            dataStore['feature'] = dataset

        return dataset

Пример #9

Показать файл

def train_RNN_classifier(dataset, epochs, singlePrint=False):

    print("-----TRAIN CLASSIFIER-----!")

    x_train, y_train = dl.prepare_data_for_RNN(dataset)

    numberOfClasses = y_train.shape[1]
    print("Number of classes:" + str(numberOfClasses))

    lstm_input_dim = x_train["sentence1"].shape[1:]
    concatenateInput = x_train["sharedFeatures"].shape[1:]

    sentence1 = Input(lstm_input_dim, name="sentence1")
    sentence2 = Input(lstm_input_dim, name="sentence2")
    sharedFeatures = Input(concatenateInput, name="sharedFeatures")

    lstm1 = LSTM(16, return_sequences=False)(sentence1)
    lstm2 = LSTM(16, return_sequences=False)(sentence2)
    concatenateLayer = concatenate([lstm1, lstm2, sharedFeatures], axis=-1)
    dense = Dense(500, activation='sigmoid')(concatenateLayer)
    softmax = Dense(numberOfClasses, activation='softmax')(dense)

    model = Model(inputs=[sentence1, sentence2, sharedFeatures],
                  outputs=[softmax])
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    if (singlePrint):
        history = model.fit(x_train,
                            y_train,
                            validation_split=0.2,
                            epochs=epochs,
                            batch_size=150,
                            verbose=0)
        print(history.history["acc"])
    else:
        history = model.fit(x_train,
                            y_train,
                            validation_split=0.2,
                            epochs=epochs,
                            batch_size=500)

    plot_Training(history)

    print("-----TRAINING COMPLETE-----")
    return model

Пример #10

Показать файл

    def show_dataset_statistics(self):
        """print the statistics of the datasets"""

        print("AraucariaDB")
        dl.loadStatistics("resources/corpora/araucaria")
        print("microtext")
        dl.loadStatistics("resources/corpora/microtext")
        print("rrd")
        dl.loadStatistics("resources/corpora/rrd")
        print("schemes")
        dl.loadStatistics("resources/corpora/schemes")
        print("STAB")
        dl.loadStatistics("resources/corpora/studentEssays")
        print("IBM")
        dl.loadStatistics("resources/corpora/ibm")
        print("ArguE")
        dl.loadStatistics("resources/corpora/arguE")

Пример #11

Показать файл

class main:

    current_dir = os.path.dirname(inspect.stack()[0][1]) + '/'
    aif = current_dir + "resources/datasets/aif.h5"
    se = current_dir + "resources/datasets/se.h5"
    ibm = current_dir + "resources/datasets/ibm.h5"
    argu = current_dir + "resources/datasets/arguE.h5"

    aifTrain = current_dir + "resources/datasets/training/aifTrain.h5"
    aifTest = current_dir + "resources/datasets/testing/aifTest.h5"
    seTrain = current_dir + "resources/datasets/training/seTrain.h5"
    seTest = current_dir + "resources/datasets/testing/seTest.h5"
    ibmTrain = current_dir + "resources/datasets/training/ibmTrain.h5"
    ibmTest = current_dir + "resources/datasets/testing/ibmTest.h5"
    argueTrain = current_dir + "resources/datasets/training/argueTrain.h5"
    argueTest = current_dir + "resources/datasets/testing/argueTest.h5"

    arguE = ArguE()
    
    ####### Build resources if not existing #######
    if not os.path.exists(seTrain):
        if not os.path.exists(se):
            se_data = dl.loadData((current_dir + 'resources/datasets/brat-project/'))
            AFE = af.AdvancedFeatureExtractor()
            se_data = AFE.extractFeatures(se_data)
            store = pd.HDFStore(se,'w')
            store["feature"] = se_data
            store.close()
            print("SE generated")
        se_data = arguE.load_Data_From_Store(se)
        se_train, se_test = arguE.split_data(se_data)
        store = pd.HDFStore(seTrain,'w')
        store["feature"] = se_train
        store.close()
        store = pd.HDFStore(seTest,'w')
        store["feature"] = se_test
        store.close()
        print("Train-test generated")
        

    #######################################################################

    ####### Training #######

    print("################## TRAINING:")

    #data is already balanced and labels are changed
    trainSet = arguE.load_Data_From_Store(seTrain)
    trainSet = arguE.change_labels(trainSet)
    trainSet = arguE.balance_data(trainSet)

    #OneR = arguE.train_Dummy_classifier(trainSet, current_dir + "resources/classifierModels/se_or.pkl")
    #RF = arguE.train_RF_classifier(trainSet, current_dir+ "resources/classifierModels/all_rf.pkl")
    RNN = arguE.train_RNN_classifier(trainSet, epochs=25, saveModel=current_dir + "resources/classifierModels/se_rnn.h5")

    ####### Testing #######

    print("################## TESTING:")

    testSet = arguE.load_Data_From_Store(seTest)
    testSet = arguE.change_labels(testSet)

    #arguE.test_classifier(testSet, OneR)

    #arguE.test_classifier(testSet, RF)

    arguE.test_classifier(testSet, RNN)