def __init__(self, Examples, Labels, model_spec, n_folds=10, epochs=100): 
        """CrossValidator(Examples, Labels, model_spec, n_folds, epochs)
        Given a list of training examples in Examples and a corresponding
        set of class labels in Labels, train and evaluate a learner
        using cross validation.
        
        arguments:
        Examples:  feature matrix, each row is a feature vector
        Labels:  Class labels, one per feature vector
        n_folds:  Number of folds in experiment
        epochs:  Number of times through data set
        model_spec: Specification of model to learn, see 
            feed_forward_model() for details and example  
        
        """
        
        # Create a plan for k-fold testing with shuffling of examples
        # http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html    #
        kfold = StratifiedKFold(n_folds, shuffle=True)
        
    
        foldidx = 0
        errors  = np.zeros([n_folds, 1])
        models = []
        losses = []
        timer = Timer()
        for (train_idx, test_idx) in kfold.split(Examples, Labels):
            (errors[foldidx], model, loss) = self.train_and_evaluate__model(
                Examples, Labels, train_idx, test_idx, model_spec) 
            models.append(model)
            losses.append(loss)
# =============================================================================
#             print(
#                 "Fold {} error {}, cumulative cross-validation time {}".format(
#                     foldidx, errors[foldidx], timer.elapsed()))
# =============================================================================
            foldidx = foldidx + 1
        
        # Show architecture of last model (all are the same)    
        print("Model summary\n{}".format(model.summary()))
        
        print("Fold errors:  {}".format(errors))
        print("Mean error {} +- {}".format(np.mean(errors), np.std(errors)))
        
        print("Experiment time: {}".format(timer.elapsed()))
        
        self.errors = errors
        self.models = models
        self.losses = losses
Пример #2
0
def main():
    adv_ms = 10
    len_ms = 20
    offset_s = 0.35
    trdir = os.getcwd() + "/train"
    tedir = os.getcwd() + "/test"
    components = 28

    # get training files
    try:
        trainFiles = np.genfromtxt('trainFileList.txt', dtype='str')
        [train_Y, trainFiles] = get_labels_path(trainFiles)
        print('Train Files List Loaded')
    except IOError:
        trainFiles = sorted(get_corpus(trdir))
        [trainFiles, trSilences] = fileFiltSave(trainFiles,
                                                'trainFileList.txt', 'train')
        del trSilences
        [train_Y, trainFiles] = get_labels_path(trainFiles)
        print('Train Files List Saved')

    # get testing files
    try:
        testFiles = np.genfromtxt('testFileList.txt', dtype='str')
        testSilences = np.genfromtxt('teSilenceList.txt', dtype='str')
        print('Test Files List Loaded')
    except IOError:
        testFiles = np.array(sorted(get_corpus(tedir)))
        [testFiles, testSilences] = fileFiltSave(testFiles, 'testFileList.txt',
                                                 'teSilenceList.txt')
        print('Test Files List Saved')

    try:
        #load train data
        train_X = np.fromfile('trainData.dat', dtype=float)
        samplesN = trainFiles.shape[0]
        data_dim = components
        timesteps = int(train_X.shape[0] / samplesN / data_dim)

        train_X = np.reshape(train_X, (samplesN, timesteps, data_dim))
        print('Train Data Features Loaded')

        # load test data
        test_X = np.fromfile('testData.dat', dtype=float)
        test_samplesN = testFiles.shape[0]
        test_X = np.reshape(test_X, (test_samplesN, timesteps, data_dim))
        print('Test Data Features Loaded')

    except IOError:
        timer = Timer()
        pca = pca_analysis_of_spectra(trainFiles, adv_ms, len_ms, offset_s)
        print(
            "PCA feature generation and analysis time {}, feature extraction..."
            .format(timer.elapsed()))

        timer.reset()
        # Read features - each row is a feature vector

        train_X = extract_features_from_corpus(trainFiles, adv_ms, len_ms,
                                               offset_s, pca, components)

        print("Time to generate features {}".format(timer.elapsed()))
        timer.reset()
        [samplesN, data_dim] = train_X.shape
        timesteps = 1

        train_X.tofile('trainData.dat')
        print('Train Data Features Saved')
        train_X = train_X.flatten()
        train_X = np.reshape(train_X, (samplesN, timesteps, data_dim))

        # Read features - each row is a feature vector
        test_X = extract_features_from_corpus(testFiles, adv_ms, len_ms,
                                              offset_s, pca, components)

        [test_samplesN, data_dim] = test_X.shape

        test_X.tofile('testData.dat')
        print('Test Data Features Saved')
        test_X = test_X.flatten()
        test_X = np.reshape(test_X, (test_samplesN, timesteps, data_dim))

    num_classes = len(set(train_Y))

    try:
        json_file = open('model.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        model = model_from_json(loaded_model_json)
        # load weights into new model
        model.load_weights("LSTMgoogle.h5")
        print("Loaded model from disk")

        model.compile(loss='categorical_crossentropy',
                      optimizer='rmsprop',
                      metrics=['accuracy'])
    except IOError:

        model = Sequential()
        model.add(
            LSTM(256, return_sequences=True, input_shape=(
                timesteps,
                data_dim)))  # returns a sequence of vectors of dimension 256
        model.add(LSTM(256, return_sequences=True)
                  )  # returns a sequence of vectors of dimension 256
        model.add(LSTM(256, return_sequences=True)
                  )  # returns a sequence of vectors of dimension 256
        model.add(LSTM(256))  # return a single vector of dimension 256
        model.add(Dense(num_classes, activation='softmax'))

        model.compile(loss='categorical_crossentropy',
                      optimizer='rmsprop',
                      metrics=['accuracy'])

        kfold = StratifiedKFold(2, shuffle=True)

        for (train_idx, test_idx) in kfold.split(train_X, train_Y):
            onehotlabels = np_utils.to_categorical(train_Y)
            model.fit(train_X[train_idx],
                      onehotlabels[train_idx],
                      batch_size=256,
                      epochs=100,
                      validation_data=(train_X[test_idx],
                                       onehotlabels[test_idx]))
        model_json = model.to_json()
        with open("model.json", "w") as json_file:
            json_file.write(model_json)
        model.save_weights("LSTMgoogle.h5")
        print('Model has been saved')

    classmap = [
        'yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go'
    ]

    filenames = []
    for f in testFiles:
        arr = f.split('/')
        filenames.append(arr[-1])

    result = model.predict(test_X)
    [n, bits] = result.shape
    print('obtained total predictions of ')
    print(n)
    dictionary = {}

    for f in testSilences:
        arr = f.split('/')
        name = arr[-1]
        dictionary[name] = 'silence'

    for i in range(0, n):
        classIdx = np.argmax(result[i, :])
        confidence = np.max(result[i, :])
        if classIdx < 10 and confidence > 0.95:
            dictionary[filenames[i]] = classmap[classIdx]
        else:
            dictionary[filenames[i]] = 'unknown'

    with open('submission.csv', 'w') as f:
        fieldnames = ['fname', 'label']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        data = [dict(zip(fieldnames, [k, v])) for k, v in dictionary.items()]
        writer.writerows(data)
Пример #3
0
def main():

    files = get_corpus("C:/users/corpora/tidigits/wav/train")
    # for testing
    if False:
        files[50:] = []  # truncate test for speed

    print("%d files" % (len(files)))

    adv_ms = 10
    len_ms = 20

    # If > 0, extract +/- offset_s, if None take everything unless
    # voice activity detector is used
    offset_s = None

    print("Generating voice activity detection model")
    timer = Timer()
    vad = UnsupervisedVAD(files, adv_ms, len_ms)
    print("VAD training time {}, starting PCA analysis...".format(
        timer.elapsed()))
    timer.reset()

    pca = pca_analysis_of_spectra(files, adv_ms, len_ms, vad, offset_s)
    print("PCA feature generation and analysis time {}, feature extraction...".
          format(timer.elapsed()))
    timer.reset()

    # Read features - each row is a feature vector
    components = 40
    examples = extract_tensors_from_corpus(files, adv_ms, len_ms, vad,
                                           offset_s, pca, components)

    # Find the length of the longest time series
    max_frames = max([e.shape[0] for e in examples])
    print("Longest time series {} steps".format(max_frames))

    print("Time to generate features {}".format(timer.elapsed()))
    timer.reset()

    labels = get_class(files)

    outputN = len(set(labels))
    # Specify model architectures
    T = PaddedBatchGenerator.longest_sequence(examples)
    dim_recurrent = components
    dim_feedforward = T * components

    # This structure is used only for feed-forward networks
    # The RNN networks are built more traditionally as I need
    # to further develop the infrastructure for wrapping layers
    models_ff = [
        # Each list is a model that will be executed
        [
            (Dense, [30], {
                'activation': 'relu',
                'input_dim': dim_feedforward,
                'kernel_regularizer': regularizers.l2(0.01)
            }),
            #(Dropout, [.25], {}),
            (Dense, [30], {
                'activation': 'relu',
                'kernel_regularizer': regularizers.l2(0.01)
            }),
            #(Dropout, [.25], {}),
            (Dense, [outputN], {
                'activation': 'softmax',
                'kernel_regularizer': regularizers.l2(0.01)
            })
        ]
    ]
    models_rnn = []

    print("Time to build matrix {}, starting cross validation".format(
        timer.elapsed()))

    # Use recurrent classifiers if true
    recurrent = True
    if recurrent:
        # Use the recurrent neural net list and train/evaluation fn
        models = models_rnn
        train_eval = classifier.recurrent.train_and_evaluate
    else:
        # Use the feed forward neural net list and train/evaluation fn
        models = models_ff
        train_eval = classifier.feedforward.train_and_evaluate

    batch_size = 100
    epochs = 60

    debug = False
    if debug:
        models = [models[-1]]  # Only test the last model in the list

    results = []
    for architecture in models:
        model = build_model(architecture)
        results.append(
            CrossValidator(examples,
                           labels,
                           model,
                           train_eval,
                           batch_size=batch_size,
                           epochs=epochs))
def main():
   
    
    files = get_corpus("C:/Users/vysha/Downloads/wav/train")
    # for testing
    if False:
        files[50:] = []  # truncate test for speed
    
    print("%d files"%(len(files)))
    
    adv_ms = 10
    len_ms = 20
    # We want to retain offset_s about the center
    offset_s = 0.25    

    timer = Timer()
    pca = pca_analysis_of_spectra(files, adv_ms, len_ms, offset_s)
    print("PCA feature generation and analysis time {}, feature extraction..."
          .format(timer.elapsed()))
    
    timer.reset()
    # Read features - each row is a feature vector
    components = 40
    examples = extract_features_from_corpus(
        files, adv_ms, len_ms, offset_s, pca, components)        
    print("Time to generate features {}".format(timer.elapsed()))
    timer.reset()
    
    labels = get_class(files)
    outputN = len(set(labels))
    
    # Specify model architectures
    models = [       
        # 3 layer 20x20xoutput baseline (problem set 3)
        [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1]}),
         (Dense, [20], {'activation':'relu', 'input_dim':20}),
         (Dense, [outputN], {'activation':'softmax', 'input_dim':20})
        ],
        [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1]}),
         (Dense, [40], {'activation':'relu', 'input_dim':20}),
         (Dense, [outputN], {'activation':'softmax', 'input_dim':40})
        ],
        [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1]}),
         (Dense, [20], {'activation':'relu', 'input_dim':20}),
         (Dense, [40], {'activation':'relu', 'input_dim':20}),
         (Dense, [outputN], {'activation':'softmax', 'input_dim':40})
        ],
        [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1]}),
         (Dropout, [0.3], {}),
         (Dense, [20], {'activation':'relu', 'input_dim':20}),
         (Dropout, [0.3], {}),
         (Dense, [40], {'activation':'relu', 'input_dim':20}),
         (Dense, [outputN], {'activation':'softmax', 'input_dim':40})
        ],
        [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1]}),
         (Dropout, [0.2], {}),
         (Dense, [20], {'activation':'relu', 'input_dim':20}),
         (Dropout, [0.2], {}),
         (Dense, [40], {'activation':'relu', 'input_dim':20}),
         (Dense, [outputN], {'activation':'softmax', 'input_dim':40})
        ],
        [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1]}),
         (Dense, [20], {'activation':'relu', 'input_dim':20,'kernel_regularizer': keras.regularizers.l1(0.01)}),
         (Dense, [40], {'activation':'relu', 'input_dim':20, 'kernel_regularizer': keras.regularizers.l1(0.01)} ),
         (Dense, [outputN], {'activation':'softmax', 'input_dim':40})
        ],
        [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1]}),
         (Dense, [20], {'activation':'relu', 'input_dim':20,'kernel_regularizer': keras.regularizers.l2(0.01)}),
         (Dense, [40], {'activation':'relu', 'input_dim':20, 'kernel_regularizer': keras.regularizers.l2(0.01)}),
         (Dense, [outputN], {'activation':'softmax', 'input_dim':40})
        ],
        [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1]}),
         (Dense, [20], {'activation':'relu', 'input_dim':20,'kernel_regularizer': keras.regularizers.l1(0.01)}),
         (Dense, [40], {'activation':'relu', 'input_dim':20, 'kernel_regularizer': keras.regularizers.l1(0.01)}),
         (Dense, [outputN], {'activation':'softmax', 'input_dim':40})
        ],
        [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1]}),
         (Dense, [20], {'activation':'relu', 'input_dim':10,'kernel_regularizer': keras.regularizers.l1(0.01)}),
         (Dense, [outputN], {'activation':'softmax', 'input_dim':20})
        ],
        [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1]}),
         (Dense, [20], {'activation':'relu', 'input_dim':10,'kernel_regularizer': keras.regularizers.l1(0.01)}),
         (Dense, [outputN], {'activation':'softmax', 'input_dim':20})
        ],
        [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1]}),
         (Dense, [20], {'activation':'relu', 'input_dim':10,'kernel_regularizer': keras.regularizers.l2(0.01)}),
         (Dense, [outputN], {'activation':'softmax', 'input_dim':20,})
        ],
        [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1]}),
         (Dense, [20], {'activation':'relu', 'input_dim':10,'kernel_regularizer': keras.regularizers.l2(0.01)}),
         (Dense, [outputN], {'activation':'softmax', 'input_dim':20})
        ],
        [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1]}),
          (Dense, [20], {'activation':'relu', 'input_dim':20}),
          (Dense, [outputN], {'activation':'softmax', 'input_dim':20})
         ],
         [(Dense, [10], {'activation':'relu', 'input_dim':examples.shape[1]}),
          (Dense, [10], {'activation':'relu', 'input_dim':10}),
          (Dense, [10], {'activation':'relu', 'input_dim':10}),
          (Dense, [10], {'activation':'relu', 'input_dim':10}),
          (Dense, [outputN], {'activation':'softmax', 'input_dim':10})
         ],
         [(Dense, [5], {'activation':'relu', 'input_dim':examples.shape[1]}),
          (Dense, [5], {'activation':'relu', 'input_dim':5}),
          (Dense, [5], {'activation':'relu', 'input_dim':5}),
          (Dense, [5], {'activation':'relu', 'input_dim':5}),
          (Dense, [10], {'activation':'relu', 'input_dim':5}),
          (Dense, [10], {'activation':'relu', 'input_dim':10}),
          (Dense, [outputN], {'activation':'softmax', 'input_dim':10})
         ]
        # Add more models here...  [(...), (...), ...], [(...), ...], ....
        ]
    
    print("Time to build matrix {}, starting cross validation".format(
        timer.elapsed()))
    
# =============================================================================
#     compare L1 and L2
# =============================================================================
# =============================================================================
#     models.clear()
#     models = [
#         [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1]}),
#          (Dense, [20], {'activation':'relu', 'input_dim':20}),
#          (Dense, [outputN], {'activation':'softmax', 'input_dim':20})
#         ],
#         [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1], 
#           'kernel_regularizer': keras.regularizers.l1(0.01)}),
#          (Dense, [20], {'activation':'relu', 'input_dim':10,'kernel_regularizer': keras.regularizers.l1(0.01)}),
#          (Dense, [outputN], {'activation':'softmax', 'input_dim':20})
#         ],
#         [(Dense, [20], {'activation':'relu', 'input_dim':examples.shape[1], 
#           'kernel_regularizer': keras.regularizers.l2(0.01)}),
#          (Dense, [20], {'activation':'relu', 'input_dim':10,'kernel_regularizer': keras.regularizers.l2(0.01)}),
#          (Dense, [outputN], {'activation':'softmax', 'input_dim':20,})
#         ]
#         ]
# =============================================================================
    
    debug = False
    c = []
    if debug: 
        c.append(CrossValidator(examples, labels, models[2], epochs=50))
    else:
        for architecture in models:
            c.append(CrossValidator(examples, labels, architecture, epochs=100))
    # do something useful with c... e.g. generate tables/graphs, etc.
    avg_Err = []
    std_Err = []
    count = 0
    
    for a in c:
        avg_Err.append(np.average(a.get_errors()))
        count = count + 1
    
    layers = np.arange(count)
    print(avg_Err)
    
# =============================================================================
#     files_test = get_corpus("C:/Users/vysha/Downloads/wav/test")
#     print("%d test files"%(len(files_test)))
#     
#     examples_test = extract_features_from_corpus(
#         files_test, adv_ms, len_ms, offset_s, pca, components)        
#     
#     print("Time to generate test features {}".format(timer.elapsed()))
#     timer.reset()
#     
#     labels_test = np_utils.to_categorical(get_class(files_test))
#     
#     accu = []
#     avgAccu = []
#     for m in c:
#         model = m.models
#         for n in model:
#             y = n.evaluate(np.array(examples_test),np.array(labels_test),verbose = 0)
#             accu.append(1-y[1])
#         avgAccu.append(np.average(accu))
# =============================================================================
    
    
    mp.figure()
#    p1 = mp.bar(layers,avgAccu)
    p2 = mp.bar(layers,avg_Err)
    mp.title('L1 and L2 analysis')
    mp.xlabel('Expt number')
    mp.ylabel('Error')
Пример #5
0
    def __init__(self,
                 corpus,
                 keys,
                 model,
                 model_train_eval,
                 n_folds=10,
                 batch_size=100,
                 epochs=100):
        """CrossValidator(corpus, keys, model_spec, n_folds, batch_size, epochs)
        Cross validation for sequence models
        Given a corpus object capable of retrieving features and a list of
        keys to access the data and labels from the corpus object, run a
        cross validation experiment
        
        
        arguments:
        corpus - Object representing data and labels.  Must support
          methods get_features and get_labels which both take one of the 
          keys passed in.  See timit.corpus.Corpus for an example of a class
          that supports this interface.
          get_features returns a feature matrix
          get_labels returns a list of start and stop indices as well as
              labels.  start[i], end[i], label[i] means that labe[i] is 
              present for features between indices of start[i] and end[i] 
              (inclusive).  
        keys - values that can be passed to corpus.get_feautures and
            corpus.get_labels to return data.  These are what will be split
            for the cross validation

        model: Keras model to learn
            e.g. result of buildmodels.build_model()
            
        model_train_evel: function that can be called to train and test
            a model.  Must conform to an interface that expects the following
            arguments:
                corpus - corpus object
                trainkeys - keys used to train
                testkeys - keys used to test
                model - keras network to be used
                batch_size - # examples to process per batch
                epochs - Number of passes through training data
                name - test name
        n_folds - # of cross validation folds
        batch_size - # examples to process per batch
        epochs - Number of passes through training data  
        """

        # Create a plan for k-fold testing with shuffling of examples
        kfold = KFold(n_folds, shuffle=True)

        # HINT:  As you are not working with actual samples here, but rather
        # utterance keys, create a list of indices into the utterances
        # list and use those when iterating over kfold.split()

        # Store the models errors, and losses in lists

        # It is also suggested that you might want to create a Timer
        # to track how long things  are (not that it will help things go
        # any faster)

        # HR: setting test name as fold number
        count = 'rnn'
        # HR: defining lists to hold model,error and loss after the model has finish training
        model_list = []
        err_list = []
        loss_list = []
        # init timer var for cal time elapsed
        time = Timer()

        # HR: iterate over the features and labels to create a list of errors, models and losses
        for (train_idx, test_idx) in kfold.split(keys):
            # HR: The function pointer points to the train_and_eval() from recurrent.py
            # this returns a tuple (err, model, loss)
            (err, model, loss) = model_train_eval(corpus,
                                                  keys[train_idx],
                                                  keys[test_idx],
                                                  model,
                                                  batch_size=batch_size,
                                                  epochs=epochs)
            model_list.append(model)
            err_list.append(err)
            loss_list.append(loss)
            # count += 1

        # logging to time to know build time
        print("Time Elapsed:", time.elapsed())
        time.reset()
        self.errors = err_list
        self.models = model_list
        self.losses = loss_list
 def __init__(self, Examples, Labels, model, model_train_eval,
              n_folds=10, batch_size=100, epochs=100): 
     """CrossValidator(Examples, Labels, model_spec, n_folds, batch_size, epochs)
     Given a list of training examples in Examples and a corresponding
     set of class labels in Labels, train and evaluate a learner
     using cross validation.
     
     arguments:
     Examples:  feature matrix, each row is a feature vector
     Labels:  Class labels, one per feature vector
     model: Keras model to learn
         e.g. result of buildmodels.build_model()
     model_train_evel: function that can be called to train and test
         a model.  Must conform to an interface that expects the following
         arguments:
             examples - list or tensor of examples
             labels - categories corresponding to examples
             train_idx - indices of examples, labels to be used
                 for training
             test_idx - indices of examples, labels to be used to
                 evaluate the model
             model - keras network to be used
             batch_size - # examples to process per batch
             epochs - Number of passes through training data
             name - test name
     n_folds - # of cross validation folds
     batch_size - # examples to process per batch
     epochs - Number of passes through training data       
     """
     
     # Create a plan for k-fold testing with shuffling of examples
     # http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html    #
     kfold = StratifiedKFold(n_folds, shuffle=True)
     
 
     foldidx = 0
     errors  = np.zeros([n_folds, 1])
     models = []
     losses = []
     timer = Timer()
     
     for (train_idx, test_idx) in kfold.split(Examples, Labels):
         (errors[foldidx], model, loss) = \
             model_train_eval(
                 Examples, Labels, train_idx, test_idx, model,
                 batch_size, epochs, name="f{}".format(foldidx)) 
         models.append(model)
         losses.append(loss)
         print(
             "Fold {} error {}, cumulative cross-validation time {}".format(
                 foldidx, errors[foldidx], timer.elapsed()))
         foldidx = foldidx + 1
                 
     # Show architecture of last model (all are the same)    
     print("Model summary\n{}".format(model.summary()))
     
     print("Fold errors:  {}".format(errors))
     print("Mean error {} +- {}".format(np.mean(errors), np.std(errors)))
     
     print("Experiment time: {}".format(timer.elapsed()))
     
     self.errors = errors
     self.models = models
     self.losses = losses
Пример #7
0
def main():
    adv_ms = 10  # frame advance and length
    len_ms = 20

    TimitBaseDir = 'C:\\Users\\rashm\\Documents\\CS682\\Lab2\\timit-for-students'

    corpus = Corpus(TimitBaseDir, os.path.join(TimitBaseDir, 'wav'))

    phonemes = corpus.get_phonemes()  # List of phonemes
    phonemesN = len(phonemes)  # Number of categories

    # Get utterance keys
    devel = corpus.get_utterances('train')  # development corpus
    eval = corpus.get_utterances('test')  # evaluation corpus

    #For testing on smaller dataset
    if True:
        truncate_to_N = 50

        print("Truncating t %d files" % (truncate_to_N))
        devel = devel[:truncate_to_N]  # truncate test for speed
        eval = eval[:truncate_to_N]

    features = Features(adv_ms, len_ms, corpus.get_audio_dir())
    # set features storage location
    features.set_cacheroot(
        os.path.join(TimitBaseDir, 'feature_cache').replace("\\", "/"))
    corpus.set_feature_extractor(features)

    f = corpus.get_features(devel[0])
    input_dim = f.shape[1]

    #Model specification
    models_rnn = [
        lambda dim, width, dropout, l2:
        [(Masking, [], {
            "mask_value": 0.,
            "input_shape": [None, dim]
        }),
         (LSTM, [width], {
             "return_sequences": True,
             "kernel_regularizer": regularizers.l2(l2),
             "recurrent_regularizer": regularizers.l2(l2)
         }), (BatchNormalization, [], {}), (Dropout, [dropout], {}),
         (LSTM, [width * 2], {
             "return_sequences": True,
             "kernel_regularizer": regularizers.l2(l2),
             "recurrent_regularizer": regularizers.l2(l2)
         }), (BatchNormalization, [], {}), (Dropout, [dropout], {}),
         (Dense, [phonemesN], {
             'activation': 'softmax',
             'kernel_regularizer': regularizers.l2(l2)
         }, (TimeDistributed, [], {}))],
        lambda dim, width, dropout, l2: [
            (CuDNNLSTM, [width], {
                "return_sequences": True,
                "kernel_regularizer": regularizers.l2(l2),
                "recurrent_regularizer": regularizers.l2(l2),
                "input_shape": [None, dim]
            }), (Dropout, [dropout], {}), ((BatchNormalization, [], {})),
            (CuDNNLSTM, [width], {
                "return_sequences": True,
                "kernel_regularizer": regularizers.l2(l2),
                "recurrent_regularizer": regularizers.l2(l2)
            }), (Dropout, [dropout], {}), ((BatchNormalization, [], {})),
            (Dense, [phonemesN], {
                'activation': 'softmax',
                'kernel_regularizer': regularizers.l2(l2)
            }, (TimeDistributed, [], {}))
        ],
        lambda dim, width, dropout, l2: [
            (Masking, [], {
                "mask_value": 0.,
                "input_shape": [None, dim]
            }),
            (LSTM, [width], {
                "return_sequences": True,
                "kernel_regularizer": regularizers.l2(l2),
                "recurrent_regularizer": regularizers.l2(l2)
            }), (BatchNormalization, [], {}), (Dropout, [dropout], {}),
            (LSTM, [width * 2], {
                "return_sequences": True,
                "kernel_regularizer": regularizers.l2(l2),
                "recurrent_regularizer": regularizers.l2(l2)
            }), (BatchNormalization, [], {}), (Dropout, [dropout], {}),
            (LSTM, [width * 3], {
                "return_sequences": True,
                "kernel_regularizer": regularizers.l2(l2),
                "recurrent_regularizer": regularizers.l2(l2)
            }), (BatchNormalization, [], {}), (Dropout, [dropout], {}),
            (Dense, [width], {
                'activation': 'relu',
                'kernel_regularizer': regularizers.l2(l2)
            }, (TimeDistributed, [], {})),
            (Dense, [phonemesN], {
                'activation': 'softmax',
                'kernel_regularizer': regularizers.l2(l2)
            }, (TimeDistributed, [], {}))
        ],
    ]

    #List of epochs for Grid Search
    epoch_search = [2, 5, 10]
    search_results_epoch = []

    n_folds = 3

    timer = Timer()
    for i in epoch_search:
        cv = CrossValidator(corpus,
                            devel,
                            models_rnn[2](input_dim, 30, 0.1, 0.001),
                            myclassifier.recurrent.train_and_evaluate,
                            batch_size=100,
                            epochs=i,
                            n_folds=n_folds)
        search_results_epoch.append(cv.get_errors())
        print('Training Results for {} epoch model is {} \n'.format(
            i, cv.get_errors()))

    print("Total time to train: {}", timer.elapsed())
    min_err_epoch = []
    for i in range(len(search_results_epoch)):
        min_err_epoch.append(np.amin(search_results_epoch[i]))

    min_err_epoch_index = np.argmin(min_err_epoch)
    best_epoch = epoch_search[min_err_epoch_index]

    print("Best accuracy observed when number of epochs equals {}".format(
        best_epoch))
    print("Minimum error while training is {}".format(
        min_err_epoch[min_err_epoch_index]))

    #Plot the Loss grapgh during training and validation
    plt.figure()
    loss_values = cv.get_losses()
    for i in range(0, len(loss_values)):
        plt.plot(np.arange(len(cv.get_losses()[i])),
                 cv.get_losses()[i],
                 label='Fold - {}'.format(i))
    plt.xlabel("Loss History Index per fold")
    plt.ylabel("Loss Value")
    savedir = os.path.join('.', 'loss_graph')
    pathlib.Path(savedir).mkdir(parents=True, exist_ok=True)
    plt.savefig("{}/Loss_Graph".format(savedir))
    plt.clf()

    models = cv.get_models()
    least_error_model_values = np.amin(np.asarray(cv.get_errors()))
    least_error_model_index = np.argmin(least_error_model_values)
    least_error_model = models[least_error_model_index]

    #Testing with the Test Data (eval) on the least_error_model
    test_gen = PaddedBatchGenerator(corpus, eval, len(eval))
    test_examples, test_labels = next(test_gen)

    loss, acc = least_error_model.evaluate(test_examples,
                                           test_labels,
                                           verbose=True)

    print("Total time to complete training and testing of the model: {}",
          timer.elapsed())
    print("Accuracy on Test Data", acc)
    y_pred = least_error_model.predict_generator(test_gen, steps=1, verbose=0)
    for i in range(0, y_pred.shape[0]):
        c = confusion_matrix(
            test_labels.argmax(axis=2)[i],
            y_pred.argmax(axis=2)[i])
    print('Confusion Matrix : \n', c)