Exemplo n.º 1
0
	epochs = 500

	num_classes = 9

	m = Model_(batch_size, 100, num_classes)

	if one_d == True:
		model = m.cnn()
	else:
		model = m.cnn2()

	model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

## callbacks
	history = my_callbacks.Histories()
	rocauc = my_callbacks.ROC_AUC(X_train, y_train, X_test, y_test)
	inception = my_callbacks.Inception(X_test, num_classes)

	checkpoint = ModelCheckpoint('TSTR_'+ date +'/train/'+ folder +'/weights.best.trainonsynthetic.hdf5', monitor='val_loss', verbose=1, save_best_only=True, mode='min')
	earlyStopping = EarlyStopping(monitor='val_loss',min_delta = 0.00000001  , patience=10, verbose=1, mode='min') #0.00000001   patience 0

	model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data = (X_val, y_val),
		callbacks = [history,
					checkpoint,
					earlyStopping,
					rocauc,
					inception
					])

	model.save('TSTR_'+ date +'/train/'+ folder +'/trainonsynthetic_model.h5')
Exemplo n.º 2
0
def main(result_dict={}, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE=1.0, v=''):
    folder = '%s%s%.2f' % (BASE_REAL_NAME, v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)
    dataset_real = '%s%.2f' % (BASE_REAL_NAME, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)
    #folder = 'starlight_noisy_irregular_all_same_set_%s%.2f' % (v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)
    #dataset_real = 'starlight_noisy_irregular_all_same_set_%.2f' % PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE


    PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY = str(PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)
    result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY] = {'training': {}, 'testing': {}}

    def read_data(file):

        with open(file, 'rb') as f: data = pickle.load(f)

        X_train = np.asarray(data[0]['generated_magnitude'])
        # print(X_train.shape)
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, 1)
        # print(X_train.shape)
        y_train = np.asarray(data[0]['class'])
        X_train, y_train = shuffle(X_train, y_train, random_state=42)
        y_train = change_classes(y_train)
        y_train = to_categorical(y_train)

        X_val = np.asarray(data[1]['generated_magnitude'])
        X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, 1)
        y_val = np.asarray(data[1]['class'])
        y_val = change_classes(y_val)
        y_val = to_categorical(y_val)
        X_val, y_val = shuffle(X_val, y_val, random_state=42)

        X_test = np.asarray(data[2]['generated_magnitude'])
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, 1)
        y_test = np.asarray(data[2]['class'])
        y_test = change_classes(y_test)
        y_test = to_categorical(y_test)
        X_test, y_test = shuffle(X_test, y_test, random_state=42)

        return X_train, y_train, X_val, y_val, X_test, y_test

    def read_data_original_irr(file):

        with open(file, 'rb') as f: data = pickle.load(f)

        print(data[0].keys())

        mgt = np.asarray(data[0]['original_magnitude'])
        t = np.asarray(data[0]['time'])
        X_train = np.stack((mgt, t), axis=-1)
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, X_train.shape[2])

        y_train = np.asarray(data[0]['class'])

        X_train, y_train = shuffle(X_train, y_train, random_state=42)
        y_train = change_classes(y_train)
        y_train = to_categorical(y_train)

        mgt = np.asarray(data[1]['original_magnitude'])
        t = np.asarray(data[1]['time'])
        X_val = np.stack((mgt, t), axis=-1)
        X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, X_val.shape[2])
        y_val = np.asarray(data[1]['class'])
        y_val = change_classes(y_val)
        y_val = to_categorical(y_val)
        X_val, y_val = shuffle(X_val, y_val, random_state=42)

        mgt = np.asarray(data[2]['original_magnitude'])
        t = np.asarray(data[2]['time'])
        X_test = np.stack((mgt, t), axis=-1)
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, X_test.shape[2])
        y_test = np.asarray(data[2]['class'])
        y_test = change_classes(y_test)
        y_test = to_categorical(y_test)
        X_test, y_test = shuffle(X_test, y_test, random_state=42)

        return X_train, y_train, X_val, y_val, X_test, y_test

    def read_data_generated_irr(file):

        with open(file, 'rb') as f: data = pickle.load(f)

        print(data[0].keys())

        mgt = np.asarray(data[0]['generated_magnitude'])
        t = np.asarray(data[0]['time'])
        X_train = np.stack((mgt, t), axis=-1)
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, X_train.shape[2])
        # print(X_train.shape)
        y_train = np.asarray(data[0]['class'])
        print(np.unique(y_train))
        X_train, y_train = shuffle(X_train, y_train, random_state=42)
        #	for i in y_train:
        #		if i != None:
        #			print(i)
        y_train = change_classes(y_train)
        y_train = to_categorical(y_train)

        mgt = np.asarray(data[1]['generated_magnitude'])
        t = np.asarray(data[1]['time'])
        X_val = np.stack((mgt, t), axis=-1)
        X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, X_val.shape[2])
        y_val = np.asarray(data[1]['class'])
        y_val = change_classes(y_val)
        y_val = to_categorical(y_val)
        X_val, y_val = shuffle(X_val, y_val, random_state=42)

        mgt = np.asarray(data[2]['generated_magnitude'])
        t = np.asarray(data[2]['time'])
        X_test = np.stack((mgt, t), axis=-1)
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, X_test.shape[2])
        y_test = np.asarray(data[2]['class'])
        y_test = change_classes(y_test)
        y_test = to_categorical(y_test)
        X_test, y_test = shuffle(X_test, y_test, random_state=42)

        return X_train, y_train, X_val, y_val, X_test, y_test

    def change_classes(targets):
        # print(targets)
        target_keys = np.unique(targets)
        # print(target_keys)
        target_keys_idxs = np.argsort(np.unique(targets))
        targets = target_keys_idxs[np.searchsorted(target_keys, targets, sorter=target_keys_idxs)]

        return targets

    def open_data(file):

        with open(file, 'rb') as f: data = pickle.load(f)

        print(len(data['generated_magnitude']))

        X = np.asarray(data['generated_magnitude'])
        X = X.reshape(X.shape[0], X.shape[1], 1, 1)
        y = np.asarray(data['class'])
        X, y = shuffle(X, y, random_state=42)
        y = change_classes(y)
        y = to_categorical(y)

        return X, y

    def evaluation(X_test, y_test, n_classes):
        y_pred_prob = model.predict_proba(X_test)

        n = 10
        probs = np.array_split(y_pred_prob, n)

        score = []
        mean = []
        std = []

        Y = []
        for prob in probs:
            ys = np.zeros(n_classes)  # [0, 0
            for class_i in range(n_classes):
                for j in prob:
                    ys[class_i] = ys[class_i] + j[class_i]

            ys[:] = [x / len(prob) for x in ys]

            Y.append(np.asarray(ys))

        ep = 1e-12
        tmp = []
        for s in range(n):
            kl = (probs[s] * np.log((probs[s] + ep) / Y[s])).sum(axis=1)
            E = np.mean(kl)
            IS = np.exp(E)
            # pdb.set_trace()
            tmp.append(IS)

        score.append(tmp)
        mean.append(np.mean(tmp))
        std.append(np.std(tmp))

        print('Inception Score:\nMean score : ', mean[-1])
        print('Std : ', std[-1])

        return score, mean, std

    def check_dir(directory):
        if not os.path.exists(directory):
            os.makedirs(directory)

    check_dir('TSTR_' + date)
    check_dir('TSTR_' + date + '/train/')
    check_dir('TSTR_' + date + '/train/')
    check_dir('TSTR_' + date + '/train/' + folder)
    check_dir('TSTR_' + date + '/test/')
    check_dir('TSTR_' + date + '/test/' + folder)

    if os.path.isfile('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_model.h5'):

        print('\nTrain metrics:')

        mean = np.load('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_is_mean.npy')

        std = np.load('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_is_std.npy')

        print('Training metrics:')
        print('Inception Score:\nMean score : ', mean[-1])
        print('Std : ', std[-1])

        acc = np.load('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_history_acc.npy')
        val_acc = np.load('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_history_val_acc.npy')
        loss = np.load('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_history_loss.npy')
        val_loss = np.load('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_history_val_loss.npy')

        print('ACC : ', np.mean(acc))
        print('VAL_ACC : ', np.mean(val_acc))
        print('LOSS : ', np.mean(loss))
        print('VAL_LOSS : ', np.mean(val_loss))

        print('\nTest metrics:')

        score = np.load('TSTR_' + date + '/train/' + folder + '/testonreal_score.npy')
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])

        roc = np.load('TSTR_' + date + '/train/' + folder + '/testonreal_rocauc.npy')
        print('auc roc', roc)

        result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['training'] = {
            'IS Mean': mean[-1],
            'IS Std': std[-1], 'ACC': np.mean(acc), 'VAL_ACC': np.mean(val_acc),
            'LOSS': np.mean(loss), 'VAL_LOSS': np.mean(val_loss)
        }

        result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['testing'] = {
            'test loss': score[0], 'Test accuracy': score[1], 'auc roc': roc
        }



    else:

        irr = True
        dataset_syn = folder + '_generated'
        one_d = False

        ## Train on synthetic

        if irr == True:
            X_train, y_train, X_val, y_val, X_test, y_test = read_data_generated_irr(
                'TSTR_data/generated/' + folder + '/' + dataset_syn + '.pkl')
        else:
            X_train, y_train, X_val, y_val, X_test, y_test = read_data(
                '/TSTR_data/generated/' + folder + '/' + dataset_syn + '.pkl')

        print('')
        print('Training new model')
        print('')

        batch_size = 512
        epochs = 200

        num_classes = 3

        m = Model_(batch_size, 100, num_classes)

        if one_d == True:
            model = m.cnn()
        else:
            model = m.cnn2()

        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

        ## callbacks
        history = my_callbacks.Histories()
        rocauc = my_callbacks.ROC_AUC(X_train, y_train, X_test, y_test)
        inception = my_callbacks.Inception(X_test, num_classes)

        checkpoint = ModelCheckpoint('TSTR_' + date + '/train/' + folder + '/weights.best.trainonsynthetic.hdf5',
                                     monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        earlyStopping = EarlyStopping(monitor='val_acc', min_delta=0.00000001, patience=10, verbose=1,
                                      mode='max')  # 0.00000001   patience 0

        model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val),
                  callbacks=[history,
                             checkpoint,
                             earlyStopping,
                             rocauc,
                             inception
                             ])

        model = load_model('TSTR_' + date + '/train/' + folder + '/weights.best.trainonsynthetic.hdf5')

        # Create dictionary, then save into two different documments.
        ## Loss
        history_dictionary_loss = history.loss
        ##np.save('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_history_loss.npy', history_dictionary_loss)
        ## Val Loss
        history_dictionary_val_loss = history.val_loss
        #np.save('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_history_val_loss.npy',
        #        history_dictionary_val_loss)
        ## Acc
        history_dictionary_acc = history.acc
        #np.save('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_history_acc.npy', history_dictionary_acc)
        ## Val Acc
        history_dictionary_val_acc = history.val_acc
        #np.save('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_history_val_acc.npy',
        #        history_dictionary_val_acc)
        ## AUC ROC
        roc_auc_dictionary = rocauc.roc_auc
        #np.save('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_rocauc_dict.npy', roc_auc_dictionary)
        ## IS
        scores_dict = inception.score
        #np.save('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_is.npy', scores_dict)
        mean_scores_dict = inception.mean
        #np.save('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_is_mean.npy', mean_scores_dict)
        std_scores_dict = inception.std
        #np.save('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_is_std.npy', std_scores_dict)

        ### plot loss and validation_loss v/s epochs
        plt.figure(1)
        plt.yscale("log")
        plt.plot(history.loss)
        plt.plot(history.val_loss)
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'val'], loc='upper right')
        plt.savefig('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_loss.png')
        ### plot acc and validation acc v/s epochs
        plt.figure(2)
        plt.yscale("log")
        plt.plot(history.acc)
        plt.plot(history.val_acc)
        plt.title('model acc')
        plt.ylabel('Acc')
        plt.xlabel('epoch')
        plt.legend(['train', 'val'], loc='upper right')
        plt.savefig('TSTR_' + date + '/train/' + folder + '/trainonsynthetic_acc.png')

        print('Training metrics:')
        print('Inception Score:\nMean score : ', mean_scores_dict[-1])
        print('Std : ', std_scores_dict[-1])

        print('ACC : ', np.mean(history_dictionary_acc))
        print('VAL_ACC : ', np.mean(history_dictionary_val_acc))
        print('LOSS : ', np.mean(history_dictionary_loss))
        print('VAL_LOSS : ', np.mean(history_dictionary_val_loss))

        ## Test on real

        print('\nTest metrics:')

        # Load dataset
        if irr == True:
            X_train, y_train, X_val, y_val, X_test, y_test = read_data_original_irr(
                'TSTR_data/datasets_original/REAL/' + dataset_real + '.pkl')
        else:
            X_train, y_train, X_val, y_val, X_test, y_test = read_data(
                'TSTR_data/datasets_original/REAL/' + dataset_real + '.pkl')

        sc, me, st = evaluation(X_test, y_test, num_classes)
        #np.save('TSTR_' + date + '/test/' + folder + '/testonreal_is.npy', sc)
        #np.save('TSTR_' + date + '/test/' + folder + '/testonreal_is_mean.npy', me)
        #np.save('TSTR_' + date + '/test/' + folder + '/testonreal_is_std.npy', st)

        score = model.evaluate(X_test, y_test, verbose=1)
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])

        #np.save('TSTR_' + date + '/test/' + folder + '/testonreal_score.npy', score)

        y_pred = model.predict(X_test)
        roc = roc_auc_score(y_test, y_pred)
        print('auc roc', roc)
        #np.save('TSTR_' + date + '/test/' + folder + '/testonreal_rocauc.npy', roc)

        result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['training'] = {
            'IS Mean': mean_scores_dict[-1],
            'IS Std': std_scores_dict[-1], 'ACC': np.mean(history_dictionary_acc),
            'VAL_ACC': np.mean(history_dictionary_val_acc),
            'LOSS': np.mean(history_dictionary_loss), 'VAL_LOSS': np.mean(history_dictionary_val_loss)
        }

        result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['testing'] = {
            'test loss': score[0], 'Test accuracy': score[1], 'auc roc': roc
        }