예제 #1
0
	def train(self):
		start_time = time.time()
        
		self.init_matrices()

		state_matrix = self.compute_state_matrix(self.x_train)
		new_x_train = np.concatenate((self.x_train,state_matrix), axis=2).reshape(
			self.N * self.T , self.num_dim+self.N_x)

		state_matrix = None 
		gc.collect()

		new_labels = np.repeat(self.y_train,self.T,axis=0)
		ridge_classifier = Ridge(alpha=self.lamda)
		ridge_classifier.fit(new_x_train,new_labels)


		state_matrix = self.compute_state_matrix(self.x_val)
		new_x_val = np.concatenate((self.x_val,state_matrix), axis=2).reshape(
			self.x_val.shape[0] * self.T , self.num_dim+self.N_x)

		y_pred_val = ridge_classifier.predict(new_x_val)
		y_pred_val = self.reshape_prediction(y_pred_val,self.x_val.shape[0],self.T)

		df_val_metrics = calculate_metrics(np.argmax(self.y_val, axis=1),y_pred_val,0.0)

		train_acc = df_val_metrics['accuracy'][0]
        
		state_matrix = self.compute_state_matrix(self.x_test)

		new_x_test = np.concatenate((self.x_test,state_matrix), axis=2).reshape(self.x_test.shape[0] * self.T , self.num_dim+self.N_x)
		state_matrix = None 
		gc.collect()

		y_pred = ridge_classifier.predict(new_x_test)

		y_pred = self.reshape_prediction(y_pred,self.x_test.shape[0],self.T)

		duration = time.time() - start_time

		df_metrics = calculate_metrics(self.y_true,y_pred,duration)


		self.W_out = ridge_classifier.coef_
		ridge_classifier = None 
		gc.collect()


		df_metrics.to_csv(self.output_directory+'df_metrics.csv', index=False)

		return df_metrics , train_acc
예제 #2
0
    def predict(self,
                x_test,
                y_true,
                x_train,
                y_train,
                y_test,
                return_df_metrics=True):
        def loss1(y, yhat):
            return KB.sum(KB.abs(yhat - y))

        # model = keras.models.load_model(model_path, custom_objects={'custom_loss_function': custom_loss_function})

        start_time = time.time()
        model_path = self.output_directory + 'best_model.hdf5'
        model = keras.models.load_model(model_path,
                                        custom_objects={'loss1': loss1})
        y_pred = model.predict(x_test, batch_size=self.batch_size)
        if return_df_metrics:
            y_pred = np.argmax(y_pred, axis=1)
            df_metrics = calculate_metrics(y_true, y_pred, 0.0)
            return df_metrics
        else:
            test_duration = time.time() - start_time
            save_test_duration(self.output_directory + 'test_duration.csv',
                               test_duration)
            return y_pred
예제 #3
0
 def predict(self, x_test, y_true, model_path, return_df_metrics=True):
     model = tf.keras.models.load_model(model_path)
     y_pred = model.predict(x_test)
     if return_df_metrics:
         y_pred = np.argmax(y_pred, axis=1)
         df_metrics = calculate_metrics(y_true, y_pred, 0.0)
         return df_metrics
     else:
         return y_pred
예제 #4
0
 def predict(self, x_test,y_true,x_train,y_train,y_test,return_df_metrics = True):
     model_path = self.output_directory + 'best_model.hdf5'
     model = keras.models.load_model(model_path)
     y_pred = model.predict(x_test)
     if return_df_metrics:
         y_pred = np.argmax(y_pred, axis=1)
         df_metrics = calculate_metrics(y_true, y_pred, 0.0)
         return df_metrics
     else:
         return y_pred
예제 #5
0
 def predict(self, x_test, y_true, return_df_metrics=True):
     start_time = time.time()
     model = keras.models.load_model(self.model_path)
     y_pred = model.predict(x_test, batch_size=self.batch_size)
     if return_df_metrics:
         y_pred = np.argmax(y_pred, axis=1)
         df_metrics = calculate_metrics(y_true, y_pred, 0.0)
         return df_metrics
     else:
         test_duration = time.time() - start_time
         save_test_duration(self.output_directory + 'test_duration.csv',
                            test_duration)
         return y_pred
예제 #6
0
def run_iterations(args, augmentator, augmentator_name, tmp_output_directory,
                   iterations, datasets_dict, classifier_name, epochs, start):
    print('\t\twithout augmentation: ', augmentator_name)

    for dataset_name in dataset_names_for_archive[ARCHIVE_NAMES[0]]:

        print('\t\t\tdataset_name: ', dataset_name)

        upper_dir = tmp_output_directory + augmentator_name + '/' + dataset_name

        done = check_dir(upper_dir)

        if not done:
            #save all the predictions and the corresponding true class
            predicted_y = []
            expected_y = []

            for iter in range(iterations):
                print('\t\t\t\titer', iter)
                trr = '_itr_' + str(iter)

                output_directory = upper_dir + '/' + trr + '/'
                #print(output_directory)

                create_directory(output_directory)

                y_pred, y_true = fit_classifier_aug(args, augmentator,
                                                    datasets_dict,
                                                    dataset_name,
                                                    classifier_name, epochs,
                                                    output_directory)

                print('\t\t\t\tDONE')

                # the creation of this directory means
                create_directory(output_directory + '/DONE')

                if (y_pred.shape == y_true.shape):
                    predicted_y.extend(y_pred)
                    expected_y.extend(y_true)
                else:
                    raise Exception("FALSE: y_pred.shape==y_true.shape.")

            totalduration = time.time() - start
            df_metrics = calculate_metrics(expected_y, predicted_y,
                                           totalduration)
            df_metrics.to_csv(upper_dir + '/avg_metrics.csv', index=False)
            create_directory(upper_dir + '/DONE')

            print('iterations DONE!')
            print(df_metrics)
def eval_model(epoch, is_save=True):
    batch_time = AverageMeter()
    losses = AverageMeter()
    acc_score = AverageMeter()
    model.eval()
    num_steps = len(eval_loader)
    print(f'total batches: {num_steps}')
    end = time.time()
    eval_criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        for i, (XI, label) in enumerate(eval_loader):
            x = Variable(XI.cuda(device_id))
            # label = Variable(torch.LongTensor(label).cuda(device_id))
            label = Variable(label.cuda(device_id))

            # Forward pass: Compute predicted y by passing x to the model
            output = model(x)
            # Compute and print loss
            loss = eval_criterion(output, label)
            losses.update(loss.data.item(), x.size(0))
            # update metrics
            output = nn.Softmax(dim=1)(output)
            confs, predicts = torch.max(output.detach(), dim=1)
            acc_score.update(calculate_metrics(predicts.cpu(), label.cpu()), 1)

            lr = optimizer.param_groups[0]['lr']
            batch_time.update(time.time() - end)
            end = time.time()

            if i % LOG_FREQ == 0:
                print(f'{epoch} [{i}/{num_steps}]\t'
                      f'time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      f'loss {losses.val:.4f} ({losses.avg:.4f})\t'
                      f'acc {acc_score.val:.4f} ({acc_score.avg:.4f})\t'
                      f'lr {lr:.8f}')

    print(f' *  Eval loss {losses.avg:.4f}\t'
          f'acc({acc_score.avg:.4f})\t'
          f'total time {batch_time.sum}')
    if is_save:
        train_logger.log(phase="eval",
                         values={
                             'epoch': epoch,
                             'loss': format(losses.avg, '.4f'),
                             'acc': format(acc_score.avg, '.4f'),
                             'lr': optimizer.param_groups[0]['lr']
                         })
    scheduler.step()
    return losses.avg
예제 #8
0
def train_model(epoch):
    batch_time = AverageMeter()
    losses = AverageMeter()
    acc_score = AverageMeter()
    model.train()
    num_steps = len(train_loader)
    print(f'total batches: {num_steps}')
    end = time.time()

    for i, (XI, label) in enumerate(train_loader):
        x = Variable(XI.cuda(device_id))
        # label = Variable(torch.LongTensor(label).cuda(device_id))
        label = Variable(label.cuda(device_id))
        # Forward pass: Compute predicted y by passing x to the model
        output = model(x)
        # Compute and print loss
        loss = criterion(output, label)
        # update metrics
        losses.update(loss.data.item(), x.size(0))
        confs, predicts = torch.max(output.detach(), dim=1)
        acc_score.update(calculate_metrics(predicts.cpu(), label.cpu()), 1)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        lr = optimizer.param_groups[0]['lr']
        batch_time.update(time.time() - end)
        end = time.time()

        if i % LOG_FREQ == 0:
            print(f'{epoch} [{i}/{num_steps}]\t'
                  f'time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  f'loss {losses.val:.4f} ({losses.avg:.4f})\t'
                  f'acc {acc_score.val:.4f} ({acc_score.avg:.4f})\t'
                  f'lr {lr:.8f}')

    print(f' *  Train loss {losses.avg:.4f}\t' f'acc({acc_score.avg:.4f})')
    train_logger.log(phase="train",
                     values={
                         'epoch': epoch,
                         'loss': format(losses.avg, '.4f'),
                         'acc': format(acc_score.avg, '.4f'),
                         'lr': optimizer.param_groups[0]['lr']
                     })
    scheduler.step()
    return losses.val
예제 #9
0
    def fit(self, x_train, y_train, x_test, y_test, y_true):

        y_pred = np.zeros(shape=y_test.shape)

        l = 0

        for dataset in datasets_names:

            if dataset == self.dataset_name:
                continue

            curr_dir = self.transfer_directory + dataset + '/' + self.dataset_name + '/'

            predictions_file_name = curr_dir + 'y_pred.npy'

            if check_if_file_exits(predictions_file_name):
                # then load only the predictions from the file
                curr_y_pred = np.load(predictions_file_name)
            else:
                # predict from models saved
                model = keras.models.load_model(curr_dir + 'best_model.hdf5')
                curr_y_pred = model.predict(x_test)
                keras.backend.clear_session()
                np.save(predictions_file_name, curr_y_pred)

            y_pred = y_pred + curr_y_pred

            l += 1

            keras.backend.clear_session()

        y_pred = y_pred / l

        # save predictions
        np.save(self.output_directory + 'y_pred.npy', y_pred)

        # convert the predicted from binary to integer
        y_pred = np.argmax(y_pred, axis=1)

        df_metrics = calculate_metrics(y_true, y_pred, 0.0)

        df_metrics.to_csv(self.output_directory + 'df_metrics.csv',
                          index=False)

        print(self.dataset_name, df_metrics['accuracy'][0])

        gc.collect()
예제 #10
0
    def fit(self, x_train, y_train, x_test, y_test, y_true):
        # Reshape x_train and x_test so it fits the classifier
        x_train = x_train.reshape((x_train.shape[0], x_train.shape[1]))
        x_test = x_test.reshape((x_test.shape[0], x_test.shape[1]))

        # Get non-oneHot-encoded labels
        y_train = np.argmax(y_train, axis=1)
        y_test = np.argmax(y_test, axis=1)

        ###########################
        ##Train ROCKET Classifier##
        ###########################
        start_time = time.time()
        rocket = Base_Classifier_ROCKET(self.n_kernels, self.kss, self.verbose)
        rocket.fit(x_train, y_train)
        duration = time.time() - start_time
        train_timings = rocket.train_timings_

        ##########################
        ##Test ROCKET Classifier##
        ##########################
        acc = rocket.score(x_test, y_test)
        test_timings = rocket.test_timings_

        #############################
        ##Predict ROCKET Classifier##
        #############################
        y_pred = rocket.predict(x_test)

        # Save Metrics
        df_metrics = calculate_metrics(y_test, y_pred, duration)
        if self.verbose:
            print(df_metrics)
        df_metrics.to_csv(self.output_dir +
                          'df_metrics.csv', index=False)

        # Save train and test timings
        train_timings.append(train_timings[0]+train_timings[1])
        test_timings.append(test_timings[0]+test_timings[1])
        df_timings = pd.DataFrame([train_timings, test_timings], index=[
                                  'train', 'test'], columns=['x_transform', 'ridge_operation', 'total'])
        if self.verbose:
            print(df_timings)
        df_timings.to_csv(self.output_dir + 'df_timings.csv', index=False)
예제 #11
0
    def fit(self, x_test, y_true):
        # no training since models are pre-trained

        y_pred1 = self.model1.predict(x_test)
        y_pred2 = self.model2.predict(x_test)

        y_pred = (y_pred1 + y_pred2) / 2

        # convert the predicted from binary to integer
        y_pred = np.argmax(y_pred, axis=1)

        df_metrics = calculate_metrics(y_true, y_pred, 0.0)

        df_metrics.to_csv(self.output_directory + 'df_metrics.csv',
                          index=False)

        keras.backend.clear_session()

        gc.collect()
예제 #12
0
    def predict(self,
                x_test,
                y_true,
                x_train,
                y_train,
                y_test,
                return_df_metrics=True):
        best_model_path = self.get_file_path('best_model.hdf5')
        model = keras.models.load_model(best_model_path)

        start_time = time.time()
        y_pred = model.predict(x_test)
        if return_df_metrics:
            y_pred = np.argmax(y_pred, axis=1)
            df_metrics = utils.calculate_metrics(y_true, y_pred, 0.0)
            return df_metrics
        else:
            test_duration = time.time() - start_time
            duration_file = self.get_file_path('test_duration.csv')
            utils.save_test_duration(duration_file, test_duration)
            return y_pred
예제 #13
0
    def predict(self, x_test, y_true, x_train, y_train, y_test):
        batch_size = 256

        # limit the number of augmented time series if series too long or too many
        if x_train.shape[1] > 500 or x_train.shape[0] > 2000 or x_test.shape[
                0] > 2000:
            self.warping_ratios = [1]
            self.slice_ratio = 0.9
        # increase the slice if series too short
        if x_train.shape[1] * self.slice_ratio < 8:
            self.slice_ratio = 8 / x_train.shape[1]

        new_x_train, new_y_train, new_x_test, new_y_test, tot_increase_num = \
            self.pre_processing(x_train, y_train, x_test, y_test)

        model_path = self.output_directory + 'best_model.hdf5'
        model = keras.models.load_model(model_path)

        y_pred = model.predict(new_x_test, batch_size=batch_size)
        # convert the predicted from binary to integer
        y_pred = np.argmax(y_pred, axis=1)

        # get the true predictions of the test set
        y_predicted = []
        test_num_batch = int(new_x_test.shape[0] / tot_increase_num)
        for i in range(test_num_batch):
            unique_value, sub_ind, correspond_ind, count = np.unique(
                y_pred, True, True, True)

            idx_max = np.argmax(count)
            predicted_label = unique_value[idx_max]

            y_predicted.append(predicted_label)

        y_pred = np.array(y_predicted)

        df_metrics = calculate_metrics(y_true, y_pred, 0.0)
        return df_metrics
예제 #14
0
    def train(self):
        start_time = time.time()

        ################
        ### Training ###
        ################

        # init the matrices
        self.init_matrices()
        # compute the state matrices which is the new feature space
        state_matrix = self.compute_state_matrix(self.x_train)
        # add the input to form the new feature space and transform to
        # the new feature space to be feeded to the classifier
        new_x_train = np.concatenate((self.x_train, state_matrix),
                                     axis=2).reshape(self.N * self.T,
                                                     self.num_dim + self.N_x)
        # memory free
        state_matrix = None
        gc.collect()
        # transform the corresponding labels
        new_labels = np.repeat(self.y_train, self.T, axis=0)
        # new model
        ridge_classifier = Ridge(alpha=self.lamda)
        # fit the new feature space
        ridge_classifier.fit(new_x_train, new_labels)

        ################
        ## Validation ##
        ################
        # compute state matrix for validation set
        state_matrix = self.compute_state_matrix(self.x_val)
        # add the input to form the new feature space and transform to
        # the new feature space to be feeded to the classifier
        new_x_val = np.concatenate(
            (self.x_val, state_matrix),
            axis=2).reshape(self.x_val.shape[0] * self.T,
                            self.num_dim + self.N_x)
        # get the prediction on the train set
        y_pred_val = ridge_classifier.predict(new_x_val)
        # reconstruct the training prediction
        y_pred_val = self.reshape_prediction(y_pred_val, self.x_val.shape[0],
                                             self.T)
        # get the metrics for the train
        df_val_metrics = calculate_metrics(np.argmax(self.y_val, axis=1),
                                           y_pred_val, 0.0)
        # get the train accuracy
        train_acc = df_val_metrics['accuracy'][0]

        ###############
        ### Testing ###
        ###############

        # get the predicition on the test set
        # transform the test set to the new features
        state_matrix = self.compute_state_matrix(self.x_test)
        # add the input to form the new feature space and transform to the new feature space to be feeded to the classifier
        new_x_test = np.concatenate(
            (self.x_test, state_matrix),
            axis=2).reshape(self.x_test.shape[0] * self.T,
                            self.num_dim + self.N_x)
        # memory free
        state_matrix = None
        gc.collect()
        # get the prediction on the test set
        y_pred = ridge_classifier.predict(new_x_test)
        # reconstruct the test predictions
        y_pred = self.reshape_prediction(y_pred, self.x_test.shape[0], self.T)

        duration = time.time() - start_time
        # get the metrics for the test predictions
        df_metrics = calculate_metrics(self.y_true, y_pred, duration)

        # get the output layer weights
        self.W_out = ridge_classifier.coef_
        ridge_classifier = None
        gc.collect()
        # save the model
        np.savetxt(self.output_directory + 'W_in.txt', self.W_in)
        np.savetxt(self.output_directory + 'W.txt', self.W)
        np.savetxt(self.output_directory + 'W_out.txt', self.W_out)

        # save the metrics
        df_metrics.to_csv(self.output_directory + 'df_metrics.csv',
                          index=False)

        # return the training accuracy and the prediction metrics on the test set
        return df_metrics, train_acc
예제 #15
0
    def fit(self, x_train, y_train, x_test, y_test, y_true):
        # no training since models are pre-trained
        start_time = time.time()

        y_pred = np.zeros(shape=y_test.shape)

        ll = 0

        # loop through all classifiers
        for model_name in self.classifiers:
            # loop through different initialization of classifiers
            for itr in self.iterations_to_take:
                if itr == 0:
                    itr_str = ''
                else:
                    itr_str = '_itr_' + str(itr)

                curr_archive_name = self.archive_name + itr_str

                curr_dir = self.models_dir.replace('classifier',
                                                   model_name).replace(
                                                       self.archive_name,
                                                       curr_archive_name)

                model = self.create_classifier(model_name,
                                               None,
                                               None,
                                               curr_dir,
                                               build=False)

                predictions_file_name = curr_dir + 'y_pred.npy'
                # check if predictions already made
                if check_if_file_exits(predictions_file_name):
                    # then load only the predictions from the file
                    curr_y_pred = np.load(predictions_file_name)
                else:
                    # then compute the predictions
                    curr_y_pred = model.predict(x_test,
                                                y_true,
                                                return_df_metrics=False)

                    keras.backend.clear_session()

                    np.save(predictions_file_name, curr_y_pred)

                y_pred = y_pred + curr_y_pred

                ll += 1

        # average predictions
        y_pred = y_pred / ll

        # save predictions
        np.save(self.output_directory + 'y_pred.npy', y_pred)

        # convert the predicted from binary to integer
        y_pred = np.argmax(y_pred, axis=1)

        duration = time.time() - start_time

        df_metrics = calculate_metrics(y_true, y_pred, duration)

        df_metrics.to_csv(self.output_directory + 'df_metrics.csv',
                          index=False)

        gc.collect()
예제 #16
0
    def fit(self, x_train, y_train, x_val, y_val, y_true):

        self.y_true = y_true

        self.x_test = x_val
        self.y_test = y_val

        self.x_train, self.x_val, self.y_train, self.y_val = \
            train_test_split(x_train, y_train, test_size=0.2)

        # 1. Tune ESN and num_filter
        self.x_train, self.y_train, self.x_val, self.x_test, model_init, hist_init, duration_init, acc_init, num_filter = self.tune_esn()

        current_acc = acc_init
        hist_final = hist_init
        model_final = model_init
        duration_final = duration_init
        ratio_final = [0.1, 0.2]

        for ratio in self.ratio[1:]:

            # 1. Build Model
            input_shape = (self.len_series, self.units, 1)
            model = self.build_model(
                input_shape, self.nb_classes, self.len_series, ratio, num_filter)
            #if(self.verbose == True):
                #model.summary()

            # 3. Train Model
            batch = self.batch
            epoch = self.epoch

            start_time = time.time()

            hist = model.fit(self.x_train, self.y_train, batch_size=batch, epochs=epoch,
                             verbose=False, validation_data=(self.x_val, self.y_val), callbacks=self.callbacks)

            duration = time.time() - start_time

            model_loss, model_acc = model.evaluate(
                self.x_val, self.y_val, verbose=False)

            print('val_loss: {0}, val_acc: {1}'.format(
                model_loss, model_acc))

            y_pred = model.predict(self.x_test)
            # convert the predicted from binary to integer
            y_pred = np.argmax(y_pred, axis=1)
            df_metrics = calculate_metrics(self.y_true, y_pred, duration)

            temp_output_dir = self.output_dir + str(self.it)+'/'
            create_directory(temp_output_dir)

            df_metrics.to_csv(temp_output_dir +
                              'df_metrics.csv', index=False)
            model.save(temp_output_dir + 'model.hdf5')

            params = [self.final_params_selected[0],
                      self.final_params_selected[1], self.final_params_selected[2], ratio]
            param_print = pd.DataFrame(np.array([params], dtype=object), columns=[
                'input_scaling', 'connectivity', 'num_filter', 'ratio'])
            param_print.to_csv(temp_output_dir + 'df_params.csv', index=False)

            if (model_acc > current_acc):
                print('New winner')
                hist_final = hist
                model_final = model
                duration_final = duration
                ratio_final = ratio
                current_acc = model_acc

            keras.backend.clear_session()
            self.it += 1

        print('Final ratio: {0}'.format(ratio_final))
        self.final_params_selected.append(ratio_final)
        self.model = model_final
        self.hist = hist_final

        y_pred = self.model.predict(self.x_test)

        # convert the predicted from binary to integer
        y_pred = np.argmax(y_pred, axis=1)

        param_print = pd.DataFrame(np.array([self.final_params_selected], dtype=object), columns=[
                                   'input_scaling', 'connectivity', 'num_filter', 'ratio'])

        param_print.to_csv(self.output_dir +
                           'df_final_params.csv', index=False)

        save_logs(self.output_dir, self.hist, y_pred, self.y_true,
                  duration_final, self.verbose, lr=False)

        keras.backend.clear_session()
예제 #17
0
    def train(self):
        batch_size = self.batch_size
        nb_epochs = self.nb_epochs

        curr_loss = 1e10
        final_model = None
        final_hist = None
        final_cell = None
        final_dur = None

        input_shape = self.x_train.shape[1:]

        for cell in self.lstm_cells:
            model = self.build_model(input_shape, self.nb_classes, cell)

            if self.verbose:
                model.summary()

            start_time = time.time()

            hist = model.fit(self.x_train,
                             self.y_train,
                             batch_size=batch_size,
                             epochs=nb_epochs,
                             verbose=False,
                             validation_data=(self.x_test, self.y_test),
                             callbacks=self.callbacks)

            model.load_weights(self.output_dir + 'best_curr_weights.hdf5')
            print("Weights loaded from {0}best_curr_weights.hdf5".format(
                self.output_dir))

            #Best model loaded, now evaluate on train set (No overfitting for test set)
            model_loss, model_acc = model.evaluate(self.x_train,
                                                   self.y_train,
                                                   batch_size=batch_size,
                                                   verbose=False)
            print('Best weights --> val loss: {0}, val acc: {1}'.format(
                model_loss, model_acc))

            duration = time.time() - start_time

            y_pred = model.predict(self.x_test)
            # convert the predicted from binary to integer
            y_pred = np.argmax(y_pred, axis=1)
            df_metrics = calculate_metrics(self.y_true, y_pred, duration)

            temp_output_dir = self.output_dir + 'num_cells_' + str(cell) + '/'
            create_directory(temp_output_dir)

            df_metrics.to_csv(temp_output_dir + 'df_metrics.csv', index=False)
            model.save(temp_output_dir + 'model.hdf5')

            if (model_loss < curr_loss):
                curr_loss = model_loss
                final_cell = cell
                #curr_loss = model_loss
                final_model = model
                final_hist = hist
                final_dur = duration
                final_model.save(self.output_dir + 'best_model.hdf5')

            keras.backend.clear_session()

        print('Final Cell Selected:', final_cell)
        file_cells = open(self.output_dir + 'best_num_cells.txt', 'w')
        file_cells.write(str(final_cell))
        file_cells.close()

        return final_model, final_hist, final_dur
예제 #18
0
    def fit(self, x_train, y_train, x_test, y_test, y_true):
        start_time_tuning = time.time()

        #######################
        ##Grid Search Stage 1##
        #######################
        param_grid_1 = dict(input_scaling=self.input_scaling,
                            connectivity=self.connectivity,
                            num_filter=self.num_filter)

        emn_stage_1 = Base_Classifier_EMN(nb_classes=self.nb_classes,
                                          verbose=False)

        grid_1 = GridSearchCV(estimator=emn_stage_1,
                              param_grid=param_grid_1,
                              cv=3,
                              verbose=3)
        grid_1_result = grid_1.fit(x_train, y_train)

        #######################
        ##Grid Search Stage 2##
        #######################
        param_grid_2 = dict(ratio=self.ratio)

        emn_stage_2 = grid_1_result.best_estimator_

        grid_2 = GridSearchCV(estimator=emn_stage_2,
                              param_grid=param_grid_2,
                              cv=3,
                              verbose=3)
        grid_2_result = grid_2.fit(x_train, y_train)

        duration_tuning = time.time() - start_time_tuning

        # Print Tune Duration
        print('Tune duration: {0}s'.format(duration_tuning))
        f = open(self.output_dir + '/grid_search_duration.txt', 'w')
        f.write(str(duration_tuning))
        f.close()

        #####################################
        ##Final Training on whole train set##
        #####################################
        emn_final = grid_2_result.best_estimator_
        if self.verbose:
            emn_final.verbose = True

        # Save Params
        df_final_tuned = pd.DataFrame(
            [[
                emn_final.input_scaling, emn_final.connectivity,
                emn_final.num_filter, emn_final.ratio
            ]],
            columns=['input_scaling', 'connectivity', 'num_filter', 'ratio'])
        print(df_final_tuned)
        df_final_tuned.to_csv(self.output_dir + 'df_final_params.csv',
                              index=False)

        #Three iterations for the best parameters
        for i in range(3):

            start_time = time.time()

            emn_final.fit(x_train, y_train)
            y_pred = emn_final.predict(x_test)

            duration = time.time() - start_time

            # Save Metrics
            df_metrics = calculate_metrics(y_true, y_pred, duration)
            if self.verbose:
                print(df_metrics)
            df_metrics.to_csv(self.output_dir + 'df_metrics' + str(i) + '.csv',
                              index=False)
예제 #19
0
    def fit(self, x_train, y_train, x_val, y_val, y_true):
        ##################################
        ##Train n individual classifiers##
        ##################################
        for it in range(self.num_ensemble_it):
            if it == 0:
                itr_str = 'network'
                verbosity = self.verbose
            else:
                itr_str = 'network'+str(it)
                verbosity = False

            tmp_output_dir = self.output_dir + itr_str + '/'
            create_directory(tmp_output_dir)

            inception = Classifier_INCEPTION(
                tmp_output_dir, self.input_shape, self.nb_classes, verbose = verbosity)
            
            print('Fitting network {0} out of {1}'.format(
                it+1, self.num_ensemble_it))
            
            inception.fit(x_train, y_train, x_val, y_val, y_true)

        #######################################
        ##Ensemble the individual classifiers##
        #######################################
        start_time = time.time()

        y_pred = np.zeros(shape=y_val.shape)

        ll = 0

        for it in range(self.num_ensemble_it):
            if it == 0:
                itr_str = 'network'
            else:
                itr_str = 'network'+str(it)

            classifier_dir = self.output_dir + itr_str + '/'

            predictions_file_name = classifier_dir + 'y_pred.npy'

            curr_y_pred = np.load(predictions_file_name)

            y_pred = y_pred + curr_y_pred

            ll += 1

        # average predictions
        y_pred = y_pred / ll

        # save predictions
        np.save(self.output_dir + 'y_pred.npy', y_pred)

        # convert the predicted from binary to integer
        y_pred = np.argmax(y_pred, axis=1)

        duration = time.time() - start_time

        df_metrics = calculate_metrics(y_true, y_pred, duration)
        print(df_metrics)

        df_metrics.to_csv(self.output_dir + 'df_metrics.csv', index=False)

        gc.collect()
예제 #20
0
    def train(self, x_train, y_train, x_test, y_test,y_true, pool_factor=None, filter_size=None,do_train=True):
        window_size = 0.2
        n_train_batch = 10
        n_epochs = 200
        max_train_batch_size = 256

        # print('Original train shape: ', x_train.shape)
        # print('Original test shape: ', x_test.shape)

        # split train into validation set with validation_size = 0.2 train_size 
        x_train,y_train,x_val,y_val = self.split_train(x_train,y_train)
        
        ori_len = x_train.shape[1] # original_length of time series  
        slice_ratio = 0.9

        if do_train == True:
            kernel_size = int(ori_len * filter_size)

        if do_train == False:
            model = keras.models.load_model(self.output_directory+'best_model.hdf5')

            # model.summary()

            pool_size = model.get_layer('max_pooling1d_1').get_config()['pool_size'][0]

            conv_shape = model.get_layer('conv1d_1').output_shape[1]


            pool_factor = self.get_pool_factor(conv_shape,pool_size)

        #restrict slice ratio when data lenght is too large
        if ori_len > 500 : 
            slice_ratio = slice_ratio if slice_ratio > 0.98 else 0.98
        elif ori_len < 16:
            slice_ratio = 0.7

        increase_num = ori_len - int(ori_len * slice_ratio) + 1 #this can be used as the bath size

        train_batch_size = int(x_train.shape[0] * increase_num / n_train_batch)
        if train_batch_size > max_train_batch_size : 
            # limit the train_batch_size 
            n_train_batch = int(x_train.shape[0] * increase_num / max_train_batch_size)
        
        # data augmentation by slicing the length of the series 
        x_train,y_train = self.slice_data(x_train,y_train,slice_ratio)
        x_val,y_val = self.slice_data(x_val,y_val,slice_ratio)
        x_test,y_test = self.slice_data(x_test,y_test,slice_ratio)

        train_set_x, train_set_y = x_train,y_train
        valid_set_x, valid_set_y = x_val,y_val
        test_set_x, _ = x_test,y_test

        valid_num = valid_set_x.shape[0]
        
        # print("increase factor is ", increase_num, ', ori len', ori_len)
        valid_num_batch = int(valid_num / increase_num)

        test_num = test_set_x.shape[0]
        test_num_batch = int(test_num / increase_num)

        length_train = train_set_x.shape[1] #length after slicing.
        
        window_size = int(length_train * window_size) if window_size < 1 else int(window_size)

        #*******set up the ma and ds********#
        ma_base,ma_step,ma_num   = 5, 6, 1
        ds_base,ds_step, ds_num  = 2, 1, 4

        ds_num_max = length_train / (pool_factor * window_size)
        ds_num = int(min(ds_num, ds_num_max))
        
        #*******set up the ma and ds********#

        (ma_train, ma_valid, ma_test , ma_lengths) = self.batch_movingavrg(train_set_x,
                                                        valid_set_x, test_set_x,
                                                        ma_base, ma_step, ma_num)
        (ds_train, ds_valid, ds_test , ds_lengths) = self.batch_downsample(train_set_x,
                                                        valid_set_x, test_set_x,
                                                        ds_base, ds_step, ds_num)

        #concatenate directly
        data_lengths = [length_train] 
        #downsample part:
        if ds_lengths != []:
            data_lengths +=  ds_lengths
            train_set_x = np.concatenate([train_set_x, ds_train], axis = 1)
            valid_set_x = np.concatenate([valid_set_x, ds_valid], axis = 1)
            test_set_x = np.concatenate([test_set_x, ds_test], axis = 1)

        #moving average part
        if ma_lengths != []:
            data_lengths += ma_lengths
            train_set_x = np.concatenate([train_set_x, ma_train], axis = 1)
            valid_set_x = np.concatenate([valid_set_x, ma_valid], axis = 1)
            test_set_x = np.concatenate([test_set_x, ma_test], axis = 1)
        # print("Data length:", data_lengths)

        n_train_size = train_set_x.shape[0]
        n_valid_size = valid_set_x.shape[0]
        n_test_size = test_set_x.shape[0]
        batch_size = int(n_train_size / n_train_batch)
        n_train_batches = int(n_train_size / batch_size)
        data_dim = train_set_x.shape[1]  
        num_dim = train_set_x.shape[2] # For MTS 
        nb_classes = train_set_y.shape[1] 

        # print('train size', n_train_size, ',valid size', n_valid_size, ' test size', n_test_size)
        # print('batch size ', batch_size)
        # print('n_train_batches is ', n_train_batches)
        # print('data dim is ', data_dim)
        # print('---------------------------')

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        # print('building the model...')

        input_shapes, max_length = self.get_list_of_input_shapes(data_lengths,num_dim)

        start_time = time.time()

        best_validation_loss = np.inf

        if do_train == True:

            model = self.build_model(input_shapes, nb_classes, pool_factor, kernel_size)

            if (self.verbose==True) :
                model.summary()

            # print('Training')


            # early-stopping parameters
            patience = 10000  # look as this many examples regardless
            patience_increase = 2  # wait this much longer when a new best is
                                   # found
            improvement_threshold = 0.995  # a relative improvement of this much is
                                           # considered significant
            validation_frequency = min(n_train_batches, patience / 2)
                                          # go through this many
                                          # minibatche before checking the network
                                          # on the validation set; in this case we
                                          # check every epoch
            max_before_stopping = 500

            best_iter = 0
            valid_loss = 0.

            epoch = 0
            done_looping = False
            num_no_update_epoch = 0
            epoch_avg_cost = float('inf')
            epoch_avg_err = float('inf')

            while (epoch < n_epochs) and (not done_looping):
                epoch = epoch + 1
                epoch_train_err = 0.
                epoch_cost = 0.

                num_no_update_epoch += 1
                if num_no_update_epoch == max_before_stopping:
                    break


                for minibatch_index in range(n_train_batches):

                    iteration = (epoch - 1) * n_train_batches + minibatch_index

                    x = train_set_x[minibatch_index*batch_size: (minibatch_index+1)*batch_size]
                    y = train_set_y[minibatch_index*batch_size: (minibatch_index+1)*batch_size]

                    x = self.split_input_for_model(x,input_shapes)

                    cost_ij, accuracy = model.train_on_batch(x,y)

                    train_err = 1 - accuracy

                    epoch_train_err = epoch_train_err + train_err
                    epoch_cost = epoch_cost + cost_ij


                    if (iteration + 1) % validation_frequency == 0:

                        valid_losses = []
                        for i in range(valid_num_batch):
                            x = valid_set_x[i * (increase_num) : (i + 1) * (increase_num)]
                            y_pred = model.predict_on_batch(self.split_input_for_model(x,input_shapes))

                            # convert the predicted from binary to integer
                            y_pred = np.argmax(y_pred , axis=1)
                            label = np.argmax(valid_set_y[i * increase_num])

                            unique_value, sub_ind, correspond_ind, count = np.unique(y_pred, True, True, True)
                            unique_value = unique_value.tolist()

                            curr_err = 1.
                            if label in unique_value:
                                target_ind = unique_value.index(label)
                                count = count.tolist()
                                sorted_count = sorted(count)
                                if count[target_ind] == sorted_count[-1]:
                                    if len(sorted_count) > 1 and sorted_count[-1] == sorted_count[-2]:
                                        curr_err = 0.5 #tie
                                    else:
                                        curr_err = 0
                            valid_losses.append(curr_err)
                        valid_loss = sum(valid_losses) / float(len(valid_losses))

                        # print('...epoch%i,valid err: %.5f |' % (epoch,valid_loss))

                        # if we got the best validation score until now
                        if valid_loss <= best_validation_loss:
                            num_no_update_epoch = 0

                            #improve patience if loss improvement is good enough
                            if valid_loss < best_validation_loss*improvement_threshold:
                                patience = max(patience,iteration*patience_increase)

                            # save best validation score and iteration number
                            best_validation_loss = valid_loss
                            best_iter = iteration

                            # save model in h5 format
                            model.save(self.output_directory+'best_model.hdf5')

                        model.save(self.output_directory + 'last_model.hdf5')
                    if patience<= iteration:
                        done_looping=True
                        break
                epoch_avg_cost = epoch_cost/n_train_batches
                epoch_avg_err = epoch_train_err/n_train_batches

                # print ('train err %.5f, cost %.4f' %(epoch_avg_err,epoch_avg_cost))
                if epoch_avg_cost == 0:
                    break

            # print('Optimization complete.')

        # test the model
        # print('Testing')
        # load best model
        model = keras.models.load_model(self.output_directory+'best_model.hdf5')

        # get the true predictions of the test set
        y_predicted = []
        for i in range(test_num_batch):
            x = test_set_x[i * (increase_num) : (i + 1) * (increase_num)]
            y_pred = model.predict_on_batch(self.split_input_for_model(x,input_shapes))

            # convert the predicted from binary to integer 
            y_pred = np.argmax(y_pred , axis=1)

            unique_value, sub_ind, correspond_ind, count = np.unique(y_pred, True, True, True)

            idx_max = np.argmax(count)
            predicted_label = unique_value[idx_max]

            y_predicted.append(predicted_label)

        y_pred = np.array(y_predicted)

        duration = time.time() - start_time        

        df_metrics = calculate_metrics(y_true,y_pred, duration)

        # print(y_true.shape)
        # print(y_pred.shape)

        df_metrics.to_csv(self.output_directory+'df_metrics.csv', index=False)

        return df_metrics, model , best_validation_loss
예제 #21
0
                # get the synthetic train and labels
                syn_x_train, syn_y_train = syn_train_set
                # concat the synthetic with the reduced random train and labels
                aug_x_train = np.array(x_train.tolist() + syn_x_train.tolist())
                aug_y_train = np.array(y_train.tolist() + syn_y_train.tolist())

                print(np.unique(y_train, return_counts=True))
                print(np.unique(aug_y_train, return_counts=True))

                y_pred = classifier.fit(aug_x_train, aug_y_train, x_test,
                                        y_test)
            else:
                # no data augmentation
                y_pred = classifier.fit(x_train, y_train, x_test, y_test)

            df_metrics = calculate_metrics(y_test, y_pred, 0.0)
            df_metrics.to_csv(output_dir + 'df_metrics.csv', index=False)
            print('DONE')
            create_directory(output_dir + 'DONE')

        else:
            # for ensemble you will have to compute both models in order to ensemble them
            from ensemble import Classifier_ENSEMBLE
            classifier_ensemble = Classifier_ENSEMBLE(output_dir,
                                                      x_train.shape[1:],
                                                      nb_classes, False)
            classifier_ensemble.fit(x_test, y_test)

# plot pairwise once all results are computed for resnet and resnet_augment and ensemble
plot_pairwise(root_deep_learning_dir, root_dir_dataset_archive, 'resnet',
              'resnet_augment')
예제 #22
0
    def tune_esn(self):
        input_scaling_final = None
        connect_final = None
        num_filter_final = None
        x_train_final = None
        y_train_final = None
        x_val_final = None
        x_test_final = None
        duration_final = None
        model_final = None
        hist_final = None

        current_acc = 0

        self.it = 0

        for input_scaling in self.input_scaling:
            for connect in self.connectivity:

                x_train, y_train, x_val, x_test = self.ff_esn(input_scaling, connect)

                for num_filter in self.num_filter:

                    ratio = [0.1, 0.2]

                    # 2. Build Model
                    input_shape = (self.len_series, self.units, 1)
                    model = self.build_model(
                        input_shape, self.nb_classes, self.len_series, ratio, num_filter)

                    #if(self.verbose == True):
                        #model.summary()

                    # 3. Train Model
                    batch = self.batch
                    epoch = self.epoch

                    start_time = time.time()

                    hist = model.fit(x_train, y_train, batch_size=batch, epochs=epoch,
                                     verbose=False, validation_data=(x_val, self.y_val), callbacks=self.callbacks)

                    duration = time.time() - start_time

                    model_loss, model_acc = model.evaluate(
                        x_val, self.y_val, verbose=False)
                    print('val_loss: {0}, val_acc: {1}'.format(
                        model_loss, model_acc))

                    y_pred = model.predict(x_test)
                    # convert the predicted from binary to integer
                    y_pred = np.argmax(y_pred, axis=1)
                    df_metrics = calculate_metrics(
                        self.y_true, y_pred, duration)

                    temp_output_dir = self.output_dir + str(self.it)+'/'
                    create_directory(temp_output_dir)

                    df_metrics.to_csv(temp_output_dir +
                                      'df_metrics.csv', index=False)
                    model.save(temp_output_dir + 'model.hdf5')

                    params = [input_scaling, connect, num_filter, ratio]
                    param_print = pd.DataFrame(np.array([params], dtype=object), columns=[
                        'input_scaling', 'connectivity', 'num_filter', 'ratio'])
                    param_print.to_csv(temp_output_dir +
                                       'df_params.csv', index=False)

                    if (model_acc > current_acc):
                        print('New winner')
                        input_scaling_final = input_scaling
                        connect_final = connect
                        num_filter_final = num_filter
                        x_train_final = x_train
                        y_train_final = y_train
                        x_val_final = x_val
                        x_test_final = x_test
                        duration_final = duration
                        model_final = model
                        hist_final = hist
                        current_acc = model_acc

                    self.it += 1
                    keras.backend.clear_session()

        print('Final input_scaling: {0}; Final connectivity: {1}; Final filter: {2}'.format(
            input_scaling_final, connect_final, num_filter_final))
        self.final_params_selected.append(input_scaling_final)
        self.final_params_selected.append(connect_final)
        self.final_params_selected.append(num_filter_final)

        return x_train_final, y_train_final, x_val_final, x_test_final, model_final, hist_final, duration_final, current_acc, num_filter_final