示例#1
0
def test():
    # Construct a Test_CNN_NET obj.
    text_cnn = Test_CNN_NET(Text_train.config)
    with tf.Session() as sess:
        # Load the model.
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state('./old_model/')
        saver.restore(sess, ckpt.model_checkpoint_path)
        # Get sentences and convert them into vector
        for i in range(text_cnn.config.test_steps):
            num = i
            # Load training data and labels
            content_file = open('./jieba_treat_test/' + str(num) + '.txt', 'r')
            label_file = open('./jieba_treat_test/' + str(num) + '_l.txt', 'r')
            all_sentence_words = raw.get_all_words(content_file)
            embeddings = raw.get_embeddings(all_sentence_words)
            embeddings = np.array(embeddings)
            labels = raw.get_labels(label_file)
            labels = np.array(labels)
            # Feed the data into the network
            feed_dict = {
                text_cnn.input_x: embeddings,
                text_cnn.input_label: labels,
                text_cnn.keep_prob: text_cnn.config.keep_prob
            }
            # Compute the loss and accuracy
            acc = sess.run(text_cnn.accuracy, feed_dict)
            print("step {}, acc {:g}".format(i, acc))
            content_file.close()
            label_file.close()
示例#2
0
def train_model(model, max_len, get_cross_validation=False, non_zero=False):
  """For the 0/1 segemation task, load data, compile, fit, evaluate model, and predict frame labels.
  Args:
    model: model name.
    max_len: the number of frames for each video.
    get_cross_validation: whether to cross validate. 
    non_zero: whether to use the non-zero data. If true 
  Returns:
    loss_mean: loss for this model.
    acc_mean: accuracy for classification model.
    classes: predications. Predication for all the videos is using cross validation.
    y_test: test ground truth. Equal to all labels if using cross validation."""
  x = get_data.get_feature_tensor(feature_dir,feature_name,max_len)
  y = get_data.get_frame_01_labels(feature_dir,feature_name,max_len)
  y_video = get_data.get_labels(label_dir, label_name)
  y = np.array(y)
  print 'x', x.shape, 'y', y.shape
  np.set_printoptions(threshold='nan')

  if model == ED_TCN:
    n_nodes = [512, 512]  #, 1024]
    pool_sizes = [2, 2]  #, 2]
    conv_lens = [10, 10]  #, 10]
    causal = False
    model = ED_TCN(n_nodes, pool_sizes, conv_lens, 2, 512, max_len, 
      causal=causal, activation='norm_relu', optimizer='rmsprop')
    model.summary()

  loss = np.zeros((4))
  acc = np.zeros((4))
  classes = np.zeros((200,max_len, 2))
  if get_cross_validation == False:
    if non_zero == True:
      x,labels_new, y = get_data.non_zero_data(x,y_video,max_len, y, use_y_frame=True)
    y_cat = np_utils.to_categorical(y,num_classes=2)
    y_cat = np.reshape(y_cat, (-1, max_len, 2))
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(x,y_cat,test_size=0.2, random_state=1)
    model.fit(x_train,y_train, validation_data=[x_test,y_test],epochs=5)
    loss_and_metrics = model.evaluate(x_test,y_test)
    loss_mean = loss_and_metrics[0]
    acc_mean  = loss_and_metrics[1]
    classes = model.predict(x_test)
  elif get_cross_validation == True:
    y_cat = np_utils.to_categorical(y,num_classes=2)
    y_cat = np.reshape(y_cat, (200, max_len, 2))
    x_train_cro, y_train_cro, x_test_cro, y_test_cro = train.set_cross_validation(x, y_cat)
    for i in range(4):
      print i
      model.fit(x_train_cro[i], y_train_cro[i],batch_size=20)
      loss_and_metrics = model.evaluate(x_test_cro[i], y_test_cro[i]) 
      loss[i] = loss_and_metrics[0]
      acc[i]  = loss_and_metrics[1]
      classes[i*50:(i+1)*50] = model.predict(x_test_cro[i])
    loss_mean = np.mean(loss)
    acc_mean = np.mean(acc)
    y_test = y_cat
  print 'loss_mean: ', loss_mean, ' ', 'acc_mean: ', acc_mean
  return loss_mean, acc_mean, classes, y_test
示例#3
0
    def set_eval_data(self):
        eval_indices = self.generate_indices(self.TRAIN_SIZE, self.SAMPLES)
        self.eval_data = gd.get_melspectrograms(eval_indices)
        self.eval_labels = np.asarray(gd.get_labels(eval_indices))

        if args.verbose:
            unique, counts = np.unique(NN.eval_labels, return_counts=True)
            print('Eval samples: {}, classes: {}'.format(
                NN.eval_labels.shape[0], dict(zip(unique, counts))))
示例#4
0
    def set_training_data(self):
        train_indices = self.generate_indices(0, self.TRAIN_SIZE)
        self.train_data = gd.get_melspectrograms(train_indices)
        self.train_labels = np.asarray(gd.get_labels(train_indices))

        if args.verbose:
            unique, counts = np.unique(NN.train_labels, return_counts=True)
            print('Train samples: {}, classes: {}'.format(
                NN.train_labels.shape[0], dict(zip(unique, counts))))
示例#5
0
def get_error_list(indices):
    import pandas
    csv = pandas.read_csv('./labels.csv', header=0)
    errors = []

    for idx in indices:
        eval_results = NN.evaluate(gd.get_melspectrograms([idx]),
                                   np.asarray(gd.get_labels([idx])))
        if eval_results['accuracy'] == 0.0:
            errors.append(csv['path'][idx])

    return errors
示例#6
0
def train_frame_model(model, y_categorical, max_len, get_cross_validation):
    """Load data, compile, fit, evaluate model, and predict labels.
	Args:
		model: model name.
		y_categorical: whether to use the original label or one-hot label. True for classification models. False for regression models.
		max_len: the number of frames for each video. 
		get_cross_validation: whether to cross validate.
	Returns:
		classes: predications. Predication for all the videos is using cross validation.
		y_test: test ground truth. Equal to all labels if using cross validation."""

    x = get_data.get_frame_labels(feature_dir, feature_name, max_len)
    y = get_data.get_labels(label_dir, label_name)
    y = np.array(y)
    if y_categorical == True:
        y = np_utils.to_categorical(y)
        print x.shape, y.shape

    model = frame_labels_classification(6, max_len)
    if get_cross_validation == True:
        loss = np.zeros((4))
        acc = np.zeros((4))
        classes = np.zeros((200, 6))
        x_train_cro, y_train_cro, x_test_cro, y_test_cro = train.set_cross_validation(
            x, y)
        for i in range(3):
            model.fit(x_train_cro[i],
                      y_train_cro[i],
                      validation_data=[x_test_cro[i], y_test_cro[i]],
                      epochs=5)
            loss_and_metrics = model.evaluate(x_test_cro[i], y_test_cro[i])
            loss[i] = loss_and_metrics[0]
            acc[i] = loss_and_metrics[1]
            classes[i * 50:(i + 1) * 50] = model.predict(x_test_cro[i])
        loss_mean = np.mean(loss)
        acc_mean = np.mean(acc)
        y_test = y
    elif get_cross_validation == False:
        x_train, x_test, y_train, y_test = cross_validation.train_test_split(
            x, y, test_size=0.2, random_state=1)
        model.fit(x_train, y_train, validation_data=[x_test, y_test], epochs=5)
        loss_mean, acc_mean = model.evaluate(x_test, y_test)
        classes = model.predict(x_test)

    return classes, y_test
示例#7
0
def train():
    # Construct a CNN_NET obj.
    text_cnn = CNN_NET(config.word_embedding_length, config.sentence_length,
                       config.learning_rate, config.filter_size,
                       config.num_class, config.regularization_rate)
    with tf.Session() as sess:
        # Define saver and constraint the num of models can save.
        saver = tf.train.Saver(max_to_keep=5)
        # Initialize all of the parameters.
        sess.run(tf.global_variables_initializer())
        for i in range(config.num_epochs):
            print('epoch {}'.format(i))
            for j in range(config.steps):
                num = j
                # Load training data and labels
                content_file = open('./jieba_treat/' + str(num) + '.txt', 'r')
                label_file = open('./jieba_treat/' + str(num) + '_l.txt', 'r')
                all_sentence_words = raw.get_all_words(content_file)
                embeddings = raw.get_embeddings(all_sentence_words)
                embeddings = np.array(embeddings)
                labels = raw.get_labels(label_file)
                labels = np.array(labels)
                # Feed the data into the network
                feed_dict = {
                    text_cnn.input_x: embeddings,
                    text_cnn.input_label: labels,
                    text_cnn.keep_prob: config.keep_prob
                }
                # Compute the loss and accuracy
                loss, _, acc = sess.run(
                    [text_cnn.loss, text_cnn.train_op, text_cnn.accuracy],
                    feed_dict)
                loss_file.write(str(loss) + '\n')
                acc_file.write(str(acc) + '\n')
                print("step {}, loss {:g}, acc {:g}".format(j, loss, acc))
                content_file.close()
                label_file.close()
            # Save models
            saver.save(sess, MODELSAVEPATH + 'epoch_' + str(i) + '.ckpt')
from itertools import cycle
import matplotlib.pyplot as plt
import numpy as np
import feature_extraction, get_data
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import label_binarize
# setup plot details
colors = cycle(['navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal'])

X = feature_extraction.get_tags()[:5240]
y = get_data.get_labels()

classifier = MLPClassifier()
classifier.fit(X, y)
y_score = classifier.predict(X)

# from sklearn.metrics import average_precision_score
# average_precision = average_precision_score(get_data.get_validation_labels(), y_score)
#
# print('Average precision-recall score: {0:0.2f}'.format(
#       average_precision))

from sklearn.metrics import precision_recall_curve
from sklearn.metrics import plot_precision_recall_curve

disp = plot_precision_recall_curve(classifier,
                                   get_data.get_validation_features(),
                                   get_data.get_validation_labels)
# disp.ax_.set_title('2-class Precision-Recall curve: '
#                    'AP={0:0.2f}'.format(average_precision))
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
from sklearn.preprocessing import StandardScaler
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import get_data

pd.set_option('display.max_rows', None)

df_features = get_data.get_features()
df_labels = get_data.get_labels()
df_features_validate = get_data.get_validation_features()
df_labels_validate = get_data.get_validation_labels()
df_features_test = get_data.get_test_features()

# Concatenate datasets
dataset_whole = pd.concat(
    [df_features, df_features_validate, df_features_test])

# Fix errors
dataset_whole['title'] = dataset_whole['title'].fillna('N/A')


def remove_errors(self):
    if type(self) == str and self.isdigit() == False:
        return 0
    else:
        return self


dataset_whole['year'] = dataset_whole['year'].apply(remove_errors).apply(int)
示例#10
0
import os
import datetime
import keras
import pandas as pd

from get_data import get_generators, get_labels, get_class_weights
from model import model
from utils import show_batch, confusion_matrix_callback
import consts as C

if __name__ == "__main__":
    labels_columns = get_labels().columns
    train_generator, validation_generator = get_generators()
    # imgs, labels = train_generator[0]
    # show_batch(imgs,labels,labels_columns)

    print("start training")

    #  -- callbacks --
    log_dir = r"C:\Users\User\PycharmProjects\PlantPathology\logs\fit\\" + datetime.datetime.now(
    ).strftime("%Y%m%d-%H%M%S") + C.MODEL
    os.mkdir(log_dir)

    csv_logger = keras.callbacks.CSVLogger(log_dir + '\\training.log')
    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir,
                                                       histogram_freq=0)
    reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                  factor=0.5,
                                                  patience=3,
                                                  min_lr=C.MINIMUM_LR,
                                                  verbose=True)
示例#11
0
def train_model(model,
                y_categorical,
                max_len,
                get_cross_validation=False,
                non_zero=False):
    """Load data, compile, fit, evaluate model, and predict labels.
	Args:
		model: model name.
		y_categorical: whether to use the original label or one-hot label. True for classification models. False for regression models.
		max_len: the number of frames for each video.
		get_cross_validation: whether to cross validate. 
		non_zero: whether to use the non-zero data. If true 
	Returns:
		loss_mean: loss for this model.
		acc_mean: accuracy for classification model.
		classes: predications. Predication for all the videos is using cross validation.
		y_test: test ground truth. Equal to all labels if using cross validation."""

    # for label_numer = 'OPR', labels are [0,1,2,3,4,5]
    n_classes = 6
    x = get_data.get_feature_tensor(feature_dir, feature_name, max_len)
    y = get_data.get_labels(label_dir, label_name)
    if non_zero == True:
        x, y = get_data.non_zero_data(x, y, max_len, y)
    if y_categorical == True:
        y = np_utils.to_categorical(y)
    y = np.array(y)
    print 'x', x.shape, 'y', y.shape

    # choose model
    if model == TK_TCN_regression:
        model = TK_TCN_regression(n_classes=n_classes,
                                  feat_dim=512,
                                  max_len=max_len)
        model.compile(loss='mean_absolute_error',
                      optimizer='sgd',
                      metrics=['accuracy'])
    else:
        if model == TK_TCN_resnet:
            model = TK_TCN_resnet(n_classes=n_classes,
                                  feat_dim=512,
                                  max_len=max_len)
        elif model == TCN_V1:
            model = TCN_V1(n_classes=n_classes, feat_dim=512, max_len=max_len)
        elif model == TCN_V2:
            model = TCN_V2(n_classes=n_classes, feat_dim=512, max_len=max_len)
        elif model == TCN_V3:
            model = TCN_V3(n_classes=n_classes, feat_dim=512, max_len=max_len)
        elif model == TCN_V4:
            model = TCN_V4(n_classes=n_classes, feat_dim=512, max_len=max_len)
        elif model == TCN_V5:
            model = TCN_V5(n_classes=n_classes, feat_dim=512, max_len=max_len)
        # compile model
        optimizer = Adam(lr=0.01,
                         beta_1=0.9,
                         beta_2=0.999,
                         epsilon=1e-08,
                         decay=0.0)
        model.compile(loss='categorical_crossentropy',
                      optimizer=optimizer,
                      metrics=['categorical_accuracy'])
        # model.compile(loss='mean_absolute_error', optimizer=optimizer,metrics=['categorical_accuracy'])

    # visualize
    # model.summary()

    if get_cross_validation == True:
        loss = np.zeros((4))
        acc = np.zeros((4))
        classes = np.zeros((200, n_classes))
        x_train_cro, y_train_cro, x_test_cro, y_test_cro = set_cross_validation(
            x, y)
        for i in range(3):
            model.fit(x_train_cro[i],
                      y_train_cro[i],
                      validation_data=[x_test_cro[i], y_test_cro[i]],
                      epochs=5)
            loss_and_metrics = model.evaluate(x_test_cro[i], y_test_cro[i])
            loss[i] = loss_and_metrics[0]
            acc[i] = loss_and_metrics[1]
            classes[i * 50:(i + 1) * 50] = model.predict(x_test_cro[i])
        loss_mean = np.mean(loss)
        acc_mean = np.mean(acc)
        y_test = y
    elif get_cross_validation == False:
        x_train, x_test, y_train, y_test = cross_validation.train_test_split(
            x, y, test_size=0.2, random_state=1)
        model.fit(x_train, y_train, validation_data=[x_test, y_test], epochs=5)
        loss_mean, acc_mean = model.evaluate(x_test, y_test)
        classes = model.predict(x_test)

    return loss_mean, acc_mean, classes, y_test