示例#1
0
    def train(self, data, labels, epochs=1, record_epochs=False, validation_set=None):
        """
        This method runs a simple version of the perceptron algorithm on the data given.
        :param data: numpy array of each data point to be used for training. This array should already be padded with
        a 1's column in order to ensure a bias weight is included.
        :param labels: numpy array specify the labels {-1, 1}
        :return: None
        """
        # Pad the data with an all ones vector.
        p_data = pad(data)

        # Initialize the weights.
        self.weights = np.random.uniform(low=-0.01, high=0.01, size=p_data.shape[1])

        for epoch in range(epochs):

            # Go through each data point.
            for x, y in zip(*shuffle(p_data, labels)):

                # If (w^t*x + b)*y < margin make an update.
                if np.dot(self.weights, x) * y < self.margin:
                    # Calculate the aggressive learning rate.
                    aggressive_learning_rate = (self.margin - (y * np.dot(self.weights, x))) / (np.dot(x, x) + 1)

                    # Update the weights
                    self.weights = self.weights + (aggressive_learning_rate * y * x)

                    # Record update count.
                    self.update_count += 1

            # record epoch specific information if specified
            if record_epochs:
                val_x, val_y = validation_set[0], validation_set[1]
                self.epoch_records[epoch + 1] = {'accuracy': accuracy(self.predict(val_x), val_y),
                                                 'weights': self.weights}
示例#2
0
def train(train_loader, model, criterion, optimizer, epoch, args):
    batch_time = AverageMeter('Time', ':6.3f', 's')
    data_time = AverageMeter('Data', ':6.3f', 's')
    losses = AverageMeter('Loss', ':.4f')
    top1 = AverageMeter('Acc@1', ':6.2f', '%')
    top3 = AverageMeter('Acc@3', ':6.2f', '%')
    progress = ProgressMeter(len(train_loader),
                             batch_time,
                             data_time,
                             losses,
                             top1,
                             top3,
                             prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.gpu is not None:
            input = input.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1, acc3 = accuracy(output, target, topk=(1, 3))
        # TODO: fix the size(0) bug
        # losses.update(loss.item(), input.size(0))
        # top1.update(acc1[0], input.size(0))
        # top3.update(acc3[0], input.size(0))
        losses.update(loss.item(), args.batch_size)
        top1.update(acc1[0], args.batch_size)
        top3.update(acc3[0], args.batch_size)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            progress.print(i)
示例#3
0
def validate(val_loader, model, criterion, args):
    batch_time = AverageMeter('Time', ':6.3f', 's')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top3 = AverageMeter('Acc@3', ':6.2f')
    progress = ProgressMeter(len(val_loader),
                             batch_time,
                             losses,
                             top1,
                             top3,
                             prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            if args.gpu is not None:
                input = input.cuda(args.gpu, non_blocking=True)
            target = target.cuda(args.gpu, non_blocking=True)

            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc3 = accuracy(output, target, topk=(1, 3))
            # TODO: fix the size(0) bug
            # losses.update(loss.item(), input.size(0))
            # top1.update(acc1[0], input.size(0))
            # top3.update(acc3[0], input.size(0))
            losses.update(loss.item(), args.batch_size)
            top1.update(acc1[0], args.batch_size)
            top3.update(acc3[0], args.batch_size)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                progress.print(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@3 {top3.avg:.3f}'.format(top1=top1,
                                                                    top3=top3))

    return top1.avg
示例#4
0
    def train(self, data, labels, epochs=1, record_epochs=False, validation_set=None):
        """
        This method runs a simple version of the perceptron algorithm on the data given.
        :param data: numpy array of each data point to be used for training. This array should already be padded with
        a 1's column in order to ensure a bias weight is included.
        :param labels: numpy array specify the labels {-1, 1}
        :return: None
        """
        # Pad the data with an all ones vector.
        p_data = pad(data)

        # Initialize the weights and average weigths.
        self.weights = np.random.uniform(low=-0.01, high=0.01, size=p_data.shape[1])
        self.average_weights = self.weights

        for epoch in range(epochs):

            # Go through each data point.
            for x, y in zip(*shuffle(p_data, labels)):

                # If (w^t*x + b)*y < 0 make an update.
                if np.dot(self.weights, x) * y < 0:
                    # Update the weights
                    self.weights = self.weights + self.learning_rate * y * x

                    # Record update count.
                    self.update_count += 1

                # Increment the average weights even if no misprediction happens.
                self.average_weights = self.average_weights + self.weights

            # record epoch specific information if specified
            if record_epochs:
                val_x, val_y = validation_set[0], validation_set[1]
                # Set current weights to the averaged weights so predict will use them.
                temp_weights = self.weights
                self.weights = self.average_weights / (len(data) * epoch + 1)
                self.epoch_records[epoch + 1] = {'accuracy': accuracy(self.predict(val_x), val_y),
                                                 'weights': self.weights}
                # Set them back to resume normal algorithm operation.
                self.weights = temp_weights

        # Divide by the total number of examples it has seen.
        self.average_weights = self.average_weights / (len(data) * epochs)
        # Finally set the final weights to the average weights so they will be used for predictions.
        self.weights = self.average_weights
示例#5
0
def validate(val_loader, model, criterion, epoch):
    losses=AverageMeter()
    acc=AverageMeter()

    model.eval()

    for i, (images, target) in enumerate(train_loader):
        target = target.cuda(non_blocking=True)
        image_var=torch.autograd.Variable(images)
        label_var = torch.autograd.Variable(target)

        y_pred=model(image_var)
        loss=criterion(y_pred, label_var)

        prec1, temp_var = accuracy(y_pred.data, target, topk=(1,1))
        losses.update(loss.data[0], images.size(0))

    print('EPOCH {}|Accuracy:{:.3f} |Loss:{:.3f}'.format(epoch, acc.avg, losses.avg))
示例#6
0
    def train(self, data, labels, epochs=1, record_epochs=False, validation_set=None):
        """
        This method runs a simple version of the perceptron algorithm on the data given.
        :param data: numpy array of each data point to be used for training. This array should already be padded with
        a 1's column in order to ensure a bias weight is included.
        :param labels: numpy array specify the labels {-1, 1}
        :param epochs: number of epochs to run.
        :record_epochs: If set to true will record weights, and accuracy after each epoch.
        :return: None
        """
        # Pad the data with an all ones vector.
        p_data = pad(data)

        # Initialize the weights.
        self.weights = np.random.uniform(low=-0.01, high=0.01, size=p_data.shape[1])

        # Decaying Learning Rate
        t = 0

        for epoch in range(epochs):

            # Go through each data point.
            for x, y in zip(*shuffle(p_data, labels)):

                # If (w^t*x + b)*y < 0 make an update.
                if np.dot(self.weights, x) * y < 0:
                    # Calculate the decayed learning rate.
                    decayed_learning_rate = self.learning_rate / (1 + t)

                    # Update the weights
                    self.weights = self.weights + (self.decayed_learning_rate * y * x)

                    # Record update count.
                    self.update_count += 1

                # Increment t after each example not just mispredictions.
                t += 1

            # record epoch specific information if specified
            if record_epochs:
                val_x, val_y = validation_set[0], validation_set[1]
                self.epoch_records[epoch + 1] = {'accuracy': accuracy(self.predict(val_x), val_y),
                                                 'weights': self.weights}
示例#7
0
def run_train_test_cycle(X,
                         Y,
                         L,
                         LS,
                         S,
                         P,
                         model_class,
                         output_root_dir,
                         data_name,
                         target_name,
                         training_programme=None,
                         do_this_if_model_exists='skip',
                         save_data_in_output_dir=True,
                         force_device_for_training=None,
                         force_device_for_evaluation=None,
                         do_xval=True,
                         decision_tree=False):
    """
    This script trains and evaluates a model using the given data X,Y over all splits as determined in S

    Parameters:
    -----------

    X : np.ndarray - An numpy.ndarray shaped (N, T, C), where N is the number of samples, T is the number
        of time points in the data and C is the number of channels per time point.

    Y : np.ndarray - An numpy.ndarray shaped (N, L), where N is the number of samples and L is the number of classes/labels

    L : list - a list of channel labels of length C, where C is the number of channels in the data.
        L holds textual descriptions of the data's channels

    LS: np.array - An numpy.ndarray shaped (N, S), where N is the number of samples and S is the number of existing subjects.
        Identifies the subject belonging to each datum X.
        Should run in parallel to the training labels Y

    S : list of lists - Contains indices determining the partitioning of the data.
        The outer lists groups the splits (ie len(S) groups of data) and each list element of S contains the indices of those lists.

    P : np.ndarray - An numpy.ndarray shaped (N,) describing the permutation applied to the input data X and the target labels Y.
        This allows referencing LS to Y and X.

    model: model_db.Model - a CLASS providing a set of required functions and the model architecture for executing the training and evaluation loop

    output_root_dir: str - a string pointing towards the root folder for writing results into.

    data_name: str - what is the data/feature type called? e.g. GRF or JA_X_Lower, ...

    target_name: str - what is the prediction target called? e.g. Subject, Gender or Injury, ...

    training_programme: (optional) ModelTraining class - If this parameter is not None, the model's default training regime will be overwritten
        with the passed ModelTraining class' train_model() function

    do_this_if_model_exists: str - variable controlling the training/evaluation behaviour if a trained model already exists
        at the model output location. options:
        retrain (do everything from scratch)
        load (load model and skip training, perform evaluation only)
        skip (completely skip, do nothing)

    save_data_in_output_dir: bool - controls wheter to save the experimental data (X, Y, L, LS, S) in the output directory

    force_device_for_training: str - values can be either gpu or cpu. force the use of this device during training.

    force_device_for_evaluation: str - values can either gpu or cpu. force the use of this device during evaluaton.
        here, the use of the GPU is almost always recommended due to the large batch size to be processed.

    do_xval: bool - controls wheter all data splits are run through a cross-evaluation scheme, or only data splits 0-2 are to be treated as dedicated training, validation and test splits

    decision_tree: bool - if True trains a decision tree model as a baseline/comparison option for the target model
    """

    # some basic sanity checks
    assert Y.shape[0] == X.shape[0] == LS.shape[
        0], 'Number of samples differ between labels Y (n={}), data X (n={}) and subject labels LS (n={})'.format(
            L.shape[0], X.shape[0], LS.shape[0])
    assert len(L) == X.shape[
        2], 'Number of provided channel names/labels in L (c={}) differs from number of channels in data X(c={})'.format(
            len(L), X.shape[2])
    assert sum([len(s) for s in S]) == X.shape[
        0], 'Number of samples distributed over splits in S (n={}) differs from number of samples in X ({})'.format(
            sum([len(s) for s in S]), X.shape[0])

    # save data, labels and split information in output directory.
    if save_data_in_output_dir:
        print('Saving training and evaluation data to {}'.format(
            output_root_dir))
        helpers.ensure_dir_exists(output_root_dir)
        scipy.io.savemat('{}/data.mat'.format(output_root_dir), {'X': X})
        scipy.io.savemat('{}/targets.mat'.format(output_root_dir), {'Y': Y})
        scipy.io.savemat('{}/channel_labels.mat'.format(output_root_dir),
                         {'L': L})
        scipy.io.savemat('{}/subject_labels.mat'.format(output_root_dir),
                         {'LS': LS})
        scipy.io.savemat('{}/splits.mat'.format(output_root_dir), {'S': S})
        scipy.io.savemat('{}/permutation.mat'.format(output_root_dir),
                         {'P': P})

    #prepare log to append anything happending in this session. kinda deprecated.
    logfile = open('{}/log.txt'.format(output_root_dir), 'a')

    # start main loop and execute training/evaluation for all the splits definied in S
    for split_index in range(len(S)):
        if split_index > 0 and not do_xval:
            cprint(
                colored(
                    'Cross-Validation has been disabled. Terminating after first iteration.',
                    'yellow'))
            #terminate here after one iteration, e.g. in case predetermined splits have been given.
            break

        model = model_class(output_root_dir, data_name, target_name,
                            split_index)
        model_dir = model.path_dir()
        helpers.ensure_dir_exists(model_dir)

        # this case: do nothing.
        if model.exists() and do_this_if_model_exists == 'skip':
            print('Model already exists at {}. skipping'.format(model_dir))
            continue  #skip remaining code, there is nothing to be done. please move along.

        # other cases: split data in any case. measure time. set output log
        t_start = time.time()

        # collect data indices from split table
        j_test = split_index
        i_test = S[j_test]
        j_val = (split_index + 1) % len(S)
        i_val = S[j_val]
        j_train = list(set(range(len(S))) - {j_test, j_val})
        i_train = []
        for j in j_train:
            i_train.extend(S[j])

        # collect data from indices
        x_train = X[i_train, ...]
        y_train = Y[i_train, ...]
        x_test = X[i_test, ...]
        y_test = Y[i_test, ...]
        x_val = X[i_val, ...]
        y_val = Y[i_val, ...]

        # remember shape of test data as originally given
        x_test_shape_orig = x_test.shape

        # model-specific data processing
        x_train, x_val, x_test, y_train, y_val, y_test =\
            model.preprocess_data(x_train, x_val, x_test, y_train, y_val, y_test)

        if not model.exists() or (model.exists()
                                  and do_this_if_model_exists == 'retrain'):
            model.build_model(x_train.shape, y_train.shape)
            if training_programme is not None:
                #this instance-based monkey-patching is not the best way to do it, but probably the most flexible one.
                model.train_model = types.MethodType(
                    training_programme.train_model, model)
            model.train_model(x_train,
                              y_train,
                              x_val,
                              y_val,
                              force_device=force_device_for_training)
            model.save_model()
        else:
            model.load_model()

        # compute test scores and relevance maps for model.
        results = model.evaluate_model(
            x_test,
            y_test,
            force_device=force_device_for_evaluation,
            lower_upper=helpers.get_channel_wise_bounds(x_train)
        )  # compute and give data bounds computed from training data.

        # measure time for training/evaluation cycle
        t_end = time.time()

        # write report for terminal printing
        report = '\n{}\n'.format(model.path_dir().replace('/', ' '))
        report += 'test accuracy : {}\n'.format(results['acc'])
        report += 'test loss (l1): {}\n'.format(results['loss_l1'])
        report += 'train-evaluation-sequence done after {}s\n\n'.format(
            t_end - t_start)
        print(report)

        #dump results to output of this run
        with open('{}/scores.txt'.format(model.path_dir()), 'w') as f:
            f.write(report)

        #also write results to parsable log file for eval_score_logs module
        logfile.write(report)
        logfile.flush()

        #dump evaluation results to mat file
        scipy.io.savemat('{}/outputs.mat'.format(model.path_dir()), results)

        if decision_tree:  # and (not model.exists() or (model.exists() and do_this_if_model_exists == 'retrain')):
            # DTree training and evaluation currently limited to settings where also the target model is trained.
            print('Training and evaluating alternative decision tree model')
            t_start = time.time()

            # make sure all data lives in CPU space for the DT model
            x_train, x_val, x_test, y_train, y_val, y_test =\
                 helpers.arrays_to_numpy(x_train, x_val, x_test, y_train, y_val, y_test)

            random_state = 42
            #prep data for DT models
            x_train_dt = np.reshape(x_train, [x_train.shape[0], -1])
            x_val_dt = np.reshape(x_val, [x_val.shape[0], -1])
            x_test_dt = np.reshape(x_test, [x_test.shape[0], -1])

            #some models (SVM flatten the y_train. we need to reinstate this. here, in this case)
            if len(y_train.shape) == 1:
                tmp = np.zeros((y_train.shape[0],
                                y_val.shape[1]))  # n_samples x n_classes
                tmp[np.arange(y_train.shape[0]), y_train] = 1
                y_train = tmp

            clf = tree.DecisionTreeClassifier(random_state=random_state)
            clf.fit(x_train_dt, y_train)

            y_pred_train = clf.predict(x_train_dt)
            acc_train = helpers.accuracy(y_pred_train, y_train)

            y_pred_val = clf.predict(x_val_dt)
            acc_val = helpers.accuracy(y_pred_val, y_val)

            y_pred_test = clf.predict(x_test_dt)
            acc_test = helpers.accuracy(y_pred_test, y_test)

            importances = clf.feature_importances_

            #collect results
            dtree_results = {
                'acc_train': acc_train,
                'acc_test': acc_test,
                'acc_val': acc_val,
                'y_pred_train': y_pred_train,
                'y_pred_test': y_pred_test,
                'y_pred_val': y_pred_val,
                'importances': importances
            }

            t_end = time.time()

            #save results in file, in parallel to outputs.mat for the target model
            scipy.io.savemat('{}/outputs_dtree.mat'.format(model.path_dir()),
                             dtree_results)

            # write report for terminal printing. only test_accuracy (ie the first line after the header) will be parsed by eval_score_logs
            dtree_report = '\n{}\n'.format(model.path_dir().replace(
                '/',
                ' ').replace(model_class.__name__,
                             'comp.DTree:{}'.format(model_class.__name__)))
            dtree_report += 'test accuracy : {}\n'.format(
                dtree_results['acc_test'])
            dtree_report += 'val accuracy : {}\n'.format(
                dtree_results['acc_val'])
            dtree_report += 'train accuracy : {}\n'.format(
                dtree_results['acc_train'])
            dtree_report += 'train-evaluation-sequence done after {}s\n\n'.format(
                t_end - t_start)
            print(dtree_report)

            #dump results to output of this run
            #again, in parallel to scores.txt for the target model
            with open('{}/scores_dtree.txt'.format(model.path_dir()),
                      'w') as f:
                f.write(dtree_report)

            #also write dree report into logfile
            logfile.write(dtree_report)
            logfile.flush()
示例#8
0
        predicted_sequence_ys = []
        hidden = Variable(torch.zeros(1, 1, HIDDEN_DIM))
        for word_index in range(len(word_sequence)):
            x = helpers.featurize(word_index,
                                  word_sequence,
                                  word_vectorizer,
                                  char_vectorizer,
                                  n_words,
                                  n_chars,
                                  n_tags,
                                  word_vocabulary_size,
                                  char_vocabulary_size,
                                  num_features,
                                  predicted_ys=predicted_sequence_ys)
            x = Variable(
                torch.from_numpy(
                    np.expand_dims(x.todense(), axis=0).astype(np.float32)))
            _, output, hidden = model(x, hidden)
            predicted_y = np.argmax(output.data.numpy())
            predicted_sequence_ys.append(predicted_y)
        predicted_ys.extend(predicted_sequence_ys)

    if TO_CACHE is True:
        cache = 'dev'
    else:
        cache = None
    dev_predicted_y = np.array(predicted_ys)
    dev_accuracy = helpers.accuracy(dev_predicted_y, DEV_Y)
    helpers.evaluateLogisticRegressionModelPrint(DEV_X_RAW, DEV_IDENTIFIERS,
                                                 dev_predicted_y)
示例#9
0
        cache = None
    train_x = helpers.extractMaxEntFeatures(TRAIN_X_RAW,
                                            word_vectorizer,
                                            char_vectorizer,
                                            n_words,
                                            n_chars,
                                            n_tags,
                                            num_samples,
                                            cache=cache,
                                            predicted_ys=TRAIN_Y)

    # Train the logistic model
    lr = LogisticRegression()
    lr.fit(train_x, TRAIN_Y)
    train_predicted_y = lr.predict(train_x)
    train_accurary = helpers.accuracy(train_predicted_y, TRAIN_Y)
    word_feature_names = word_vectorizer.get_feature_names()
    char_feature_names = char_vectorizer.get_feature_names()
    coefs = np.abs(lr.coef_[0]).tolist()
    top10 = sorted(zip(word_feature_names * n_words + char_feature_names,
                       coefs),
                   key=lambda x: x[1],
                   reverse=True)[:10]

    # Evaluate on the dev set
    if ARGS.model_type == 1 or ARGS.model_type == 4:
        if TO_CACHE is True:
            cache = 'dev' + '_n_words_' + str(n_words) + '_n_chars_' + str(
                n_chars) + '_n_tags_' + str(n_tags) + '_ngrams_max_' + str(
                    ngram_max)
        else:
    def evaluate_model(self,
                       x_test,
                       y_test,
                       force_device=None,
                       lower_upper=None):
        """
        test model and computes relevance maps

        Parameters:
        -----------

        x_test: array - shaped such that it is ready for consumption by the model

        y_test: array - expected test labels

        target_shape: list or tuple - the target output shape of the test data and relevance maps.

        force_device: str - (optional) force execution of the evaluation either on cpu or gpu.
            accepted values: "cpu", "gpu" respectively. None does nothing.

        lower_upper: (array of float, array of float) - (optional): lower and upper bounds of the inputs, for LRP_zB.
            automagically inferred from x_test.
            arrays should match the feature dimensionality of the inputs, including broadcastable axes.
            e.g. if x_test is shaped (N, featuredims), then the bounds should be shaped (1, featuredims)

        Returns:
        --------

        results, packed in dictionary, as numpy arrays
        """

        assert isinstance(
            self.model, Sequential
        ), "self.model should be modules.sequential.Sequentialm but is {}. ensure correct type by converting model after training.".format(
            type(self.model))
        # remove the softmax output of the model.
        # this does not change the ranking of the outputs but is required for most LRP methods
        # self.model is required to be a modules.Sequential
        results = {}  #prepare results dictionary

        #force model to specific device, if so desired.
        x_test, y_test = helpers.force_device(self, (x_test, y_test),
                                              force_device)

        print('...forward pass for {} test samples for model performance eval'.
              format(x_test.shape[0]))
        y_pred = self.model.forward(x_test)

        #evaluate accuracy and loss on cpu-copyies of prediction vectors
        y_pred_c, y_test_c = helpers.arrays_to_numpy(y_pred, y_test)
        results['acc'] = helpers.accuracy(y_test_c, y_pred_c)
        results['loss_l1'] = helpers.l1loss(y_test_c, y_pred_c)
        results['y_pred'] = y_pred_c

        #NOTE: drop softmax layer AFTER forward for performance measures to obtain competetive loss values
        self.model.drop_softmax_output_layer()

        #NOTE: second forward pass without softmax for relevance computation
        print('...forward pass for {} test samples (without softmax) for LRP'.
              format(x_test.shape[0]))
        y_pred = self.model.forward(
            x_test)  # this is also a requirement for LRP

        # prepare initial relevance vectors for actual class and dominantly predicted class, on model-device (gpu or cpu)
        R_init_act = y_pred * y_test  #assumes y_test to be binary matrix

        y_dom = (y_pred == y_pred.max(axis=1, keepdims=True))
        R_init_dom = y_pred * y_dom  #assumes prediction maxima are unique per sample

        # compute epsilon-lrp for all model layers
        for m in self.model.modules:
            m.set_lrp_parameters(lrp_var='epsilon', param=1e-5)
        print('...lrp (eps) for actual classes')
        results['R_pred_act_epsilon'] = self.model.lrp(R_init_act)

        print('...lrp (eps) for dominant classes')
        results['R_pred_dom_epsilon'] = self.model.lrp(R_init_dom)

        # eps + zB (lowest convolution/flatten layer) for all models here.

        # infer lower and upper bounds from data, if not given
        if not lower_upper:
            print(
                '    ...inferring per-channel lower and upper bounds for zB from test data. THIS IS PROBABLY NOT OPTIMAL'
            )
            lower_upper = helpers.get_channel_wise_bounds(x_test)
        else:
            print('    ...using input lower and upper bounds for zB')
        if self.use_gpu:
            lower_upper = helpers.arrays_to_cupy(*lower_upper)
        else:
            lower_upper = helpers.arrays_to_numpy(*lower_upper)

        # configure the lowest weighted layer to be decomposed with zB. This should be the one nearest to the input.
        # We are not just taking the first layer, since the MLP models are starting with a Flatten layer for reshaping the data.
        for m in self.model.modules:
            if isinstance(m, (Linear, Convolution)):
                m.set_lrp_parameters(lrp_var='zB', param=lower_upper)
                break

        print('...lrp (eps + zB) for actual classes')
        results['R_pred_act_epsilon_zb'] = self.model.lrp(R_init_act)

        print('...lrp (eps + zB) for dominant classes')
        results['R_pred_dom_epsilon_zb'] = self.model.lrp(R_init_dom)

        # compute CNN composite rules, if model has convolution layes
        has_convolutions = False
        for m in self.model.modules:
            has_convolutions = has_convolutions or isinstance(m, Convolution)

        if has_convolutions:
            # convolution layers found.

            # epsilon-lrp with flat decomposition in the lowest convolution layers
            # process lowest convolution layer with FLAT lrp
            # for "normal" cnns, this should overwrite the previously set zB rule
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='flat')
                    break

            print('...lrp (eps+flat) for actual classes')
            results['R_pred_act_epsilon_flat'] = self.model.lrp(R_init_act)

            print('...lrp (eps+flat) for dominant classes')
            results['R_pred_dom_epsilon_flat'] = self.model.lrp(R_init_dom)

            # preparing alpha2beta-1 for those layers
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='alpha', param=2.0)

            print('...lrp (composite:alpha=2) for actual classes')
            results['R_pred_act_composite_alpha2'] = self.model.lrp(R_init_act)

            print('...lrp (composite:alpha=2) for dominant classes')
            results['R_pred_dom_composite_alpha2'] = self.model.lrp(R_init_dom)

            # process lowest convolution layer with FLAT lrp
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='flat')
                    break

            print('...lrp (composite:alpha=2+flat) for actual classes')
            results['R_pred_act_composite_alpha2_flat'] = self.model.lrp(
                R_init_act)

            print('...lrp (composite:alpha=2+flat) for dominant classes')
            results['R_pred_dom_composite_alpha2_flat'] = self.model.lrp(
                R_init_dom)

            #process lowest convolution layer with zB lrp
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='zB', param=lower_upper)
                    break

            print('...lrp (composite:alpha=2+zB) for actual classes')
            results['R_pred_act_composite_alpha2_zB'] = self.model.lrp(
                R_init_act)

            print('...lrp (composite:alpha=2+zB) for dominant classes')
            results['R_pred_dom_composite_alpha2_zB'] = self.model.lrp(
                R_init_dom)

            # switching alpha1beta0 for those layers
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='alpha', param=1.0)

            print('...lrp (composite:alpha=1) for actual classes')
            results['R_pred_act_composite_alpha1'] = self.model.lrp(R_init_act)

            print('...lrp (composite:alpha=1) for dominant classes')
            results['R_pred_dom_composite_alpha1'] = self.model.lrp(R_init_dom)

            # process lowest convolution layer with FLAT lrp
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='flat')
                    break

            print('...lrp (composite:alpha=1+flat) for actual classes')
            results['R_pred_act_composite_alpha1_flat'] = self.model.lrp(
                R_init_act)

            print('...lrp (composite:alpha=1+flat) for dominant classes')
            results['R_pred_dom_composite_alpha1_flat'] = self.model.lrp(
                R_init_dom)

            #process lowest convolution layer with zB lrp
            for m in self.model.modules:
                if isinstance(m, Convolution):
                    m.set_lrp_parameters(lrp_var='zB', param=lower_upper)
                    break

            print('...lrp (composite:alpha=1+zB) for actual classes')
            results['R_pred_act_composite_alpha1_zB'] = self.model.lrp(
                R_init_act)

            print('...lrp (composite:alpha=1+zB) for dominant classes')
            results['R_pred_dom_composite_alpha1_zB'] = self.model.lrp(
                R_init_dom)

        print('...copying collected results to CPU and reshaping if necessary')
        for key in results.keys():
            tmp = helpers.arrays_to_numpy(results[key])[0]
            if key.startswith('R'):
                tmp = self.postprocess_relevance(tmp)[0]
            results[key] = tmp

        return results