Пример #1
0
    def test_cross_validation_without_self_partitioning_ok(self):

        split_count = 5
        instance_num = 100

        self.__X, self.__y = make_classification(n_samples=instance_num,
                                                 n_features=4,
                                                 n_informative=2,
                                                 n_redundant=2,
                                                 n_repeated=0,
                                                 n_classes=2,
                                                 n_clusters_per_class=2,
                                                 weights=None,
                                                 flip_y=0.01,
                                                 class_sep=1.0,
                                                 hypercube=True,
                                                 shift=0.0,
                                                 scale=1.0,
                                                 shuffle=True,
                                                 random_state=None)

        train_idx, test_idx, label_idx, unlabel_idx = split(
            X=self.__X,
            y=self.__y,
            test_ratio=0.3,
            initial_label_rate=0.05,
            split_count=split_count,
            all_class=True)

        # init the AlExperiment
        experiment = CrossValidationExperiment(
            self.__X,
            self.__y,
            self_partition=False,
            stopping_criteria=UnlabelSetEmpty(),
            train_idx=train_idx,
            test_idx=test_idx,
            label_idx=label_idx,
            unlabel_idx=unlabel_idx)

        assert len(experiment._train_idx) == split_count
        assert len(experiment._test_idx) == split_count
        assert len(experiment._label_idx) == split_count
        assert len(experiment._unlabel_idx) == split_count

        for i in range(split_count):
            train = set(experiment._train_idx[i])
            test = set(experiment._test_idx[i])
            lab = set(experiment._label_idx[i])
            unl = set(experiment._unlabel_idx[i])

            assert len(test) == round(0.3 * instance_num)
            assert len(lab) == round(0.05 * len(train))

            # validity
            traintest = train.union(test)
            labun = lab.union(unl)
            assert traintest == set(range(instance_num))
            assert labun == train
    def test_query_regression_std_batch_size(self):

        # Get the data
        X = np.random.choice(np.linspace(0, 20, 1000), size=100, replace=False).reshape(-1, 1)
        y = np.sin(X) + np.random.normal(scale=0.3, size=X.shape)

        # assembling initial training set
        train_idx, test_idx, label_idx, unlabel_idx = split(
            X=X,
            y=y,
            test_ratio=0.3,
            initial_label_rate=0.05,
            split_count=1,
            all_class=True)

        # defining the kernel for the Gaussian process
        ml_technique = GaussianProcessRegressor(
            kernel=RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) \
                   + WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1)))

        experiment = HoldOutExperiment(
            client=self.__client,
            X=X,
            Y=y,
            scenario_type=PoolBasedSamplingScenario,
            train_idx=train_idx,
            test_idx=test_idx,
            label_idx=label_idx,
            unlabel_idx=unlabel_idx,
            ml_technique=ml_technique,
            performance_metrics=[Mse(squared=True)],
            query_strategy=QueryRegressionStd(),
            oracle=SimulatedOracle(labels=y),
            stopping_criteria=PercentOfUnlabel(value=70),
            self_partition=False,
            batch_size=self.__batch_size
        )

        result = experiment.evaluate(verbose=True)
        regressor = result[0].ml_technique

        # plotting the initial estimation
        with plt.style.context('seaborn-white'):
            plt.figure(figsize=(14, 7))
            x = np.linspace(0, 20, 1000)
            pred, std = regressor.predict(x.reshape(-1, 1), return_std=True)
            plt.plot(x, pred)
            plt.fill_between(x, pred.reshape(-1, ) - std, pred.reshape(-1, ) + std, alpha=0.2)
            plt.scatter(X, y, c='k')
            plt.title('Initial estimation')
            plt.show()
Пример #3
0
    def test_cross_validation_without_self_partitioning_wrong_kfold_size(self):

        split_count = 5
        instance_num = 100

        self.__X, self.__y = make_classification(n_samples=instance_num,
                                                 n_features=4,
                                                 n_informative=2,
                                                 n_redundant=2,
                                                 n_repeated=0,
                                                 n_classes=2,
                                                 n_clusters_per_class=2,
                                                 weights=None,
                                                 flip_y=0.01,
                                                 class_sep=1.0,
                                                 hypercube=True,
                                                 shift=0.0,
                                                 scale=1.0,
                                                 shuffle=True,
                                                 random_state=None)

        train_idx, test_idx, label_idx, unlabel_idx = split(
            X=self.__X,
            y=self.__y,
            test_ratio=0.3,
            initial_label_rate=0.05,
            split_count=split_count,
            all_class=True)

        train_idx.pop()
        test_idx.pop()
        label_idx.pop()
        unlabel_idx.pop()

        # init the AlExperiment
        try:
            CrossValidationExperiment(X=self.__X,
                                      Y=self.__y,
                                      self_partition=False,
                                      kfolds=5,
                                      stopping_criteria=UnlabelSetEmpty(),
                                      train_idx=train_idx,
                                      test_idx=test_idx,
                                      label_idx=label_idx,
                                      unlabel_idx=unlabel_idx)
        except ValueError as valExc:
            assert ("Number of folds for inputs" in "{0}".format(valExc))
        else:
            raise Exception("Expected ValueError exception")
Пример #4
0
    def setUp(self):
        df = pd.read_csv('../resources/data/hmeq.csv')  # import the dataset
        df.dropna(axis=0, how='any', inplace=True)
        df = pd.get_dummies(df, columns=['REASON', 'JOB'])

        self.__X = df.drop(['BAD'], axis=1)
        self.__y = df.filter(['BAD'], axis=1)

        self.__train_idx, self.__test_idx, self.__label_idx, self.__unlabel_idx = split(
            X=self.__X.to_numpy(),
            y=self.__y['BAD'].to_numpy(),
            test_ratio=0.3,
            initial_label_rate=0.05,
            split_count=1,
            all_class=True)

        self.__client = Client("tcp://192.168.2.100:8786")
Пример #5
0
    def __init__(self,
                 client: Client,
                 X,
                 Y,
                 ml_technique,
                 scenario_type: AbstractScenario,
                 performance_metrics: [],
                 query_strategy: SingleLabelIndexQuery,
                 oracle: Oracle,
                 stopping_criteria: AbstractStopCriterion,
                 self_partition: bool,
                 kfolds: int = 1,
                 batch_size=1,
                 **kwargs):
        """
        Parameters
        ----------
        :param client: distributed.Client
        :param X: array-like
            Data matrix with [n_samples, n_features]
        :param Y: array-like, optional
            labels of given data [n_samples, n_labels] or [n_samples]
        :param ml_technique
        :param scenario_type: Sub-Type of AbstractScenario
            Type of Active Learning scenario to use
        :param performance_metrics: array-like of BaseMetrics elements
        :param query_strategy: SinlgeLabelIndexQuery
        :param oracle: Oracle
        :param stopping_criteria: AbstractStopCriterion
        :param self_partition: bool
        :param kfolds: int, optional (default=1)
             If self_partition is True Random split data k sets according to the extra parameters
                -> test_ratio: float, optional (default=0.3)
                    Ratio of test set
                -> initial_label_rate: float, optional (default=0.05)
                    Ratio of initial label set
                    e.g. Initial_labelset*(1-test_ratio)*n_samples
                -> all_class: bool, optional (default=True)
                    Whether each split will contain at least one instance for each class.
                    If False, a totally random split will be performed.

            If self_partition is False the following the following parameter must be specified
                -> train_idx:
                -> test_idx:
                -> label_idx:
                ->  unlabel_idx:
        :param kwargs: optional
            Extra parameters
        """
        self._client = client

        if type(X) is da.core.Array:
            self._X = X.persist()
        else:
            self._X = da.from_array(X, chunks=len(X) // 50).persist()

        if isinstance(Y, da.core.Array):
            self._Y = Y.persist()
        else:
            self._Y = da.from_array(Y, chunks=len(Y) // 50).persist()

        # Persists the Dask Storage Structures
        if client is not None and kwargs.pop("rebalance", False):
            client.rebalance(self._X)
            client.rebalance(self._Y)

        check_X_y(self._X,
                  self._Y,
                  accept_sparse='csc',
                  multi_output=True,
                  distributed=False)

        self._scenario_type = scenario_type
        if self._scenario_type is None:
            raise ValueError("required param 'scenario_type' can not be empty")
        if not issubclass(self._scenario_type, AbstractScenario):
            raise ValueError(
                "the 'scenario_type' must be a subclass of 'AbstractScenario'")

        if self_partition:
            self._kfolds = kfolds
            self._train_idx, self._test_idx, self._label_idx, self._unlabel_idx = split(
                X=self._X,
                y=self._Y,
                test_ratio=kwargs.pop("test_ratio", 0.3),
                initial_label_rate=kwargs.pop("initial_label_rate", 0.05),
                split_count=self._kfolds,
                all_class=kwargs.pop("all_class", True))
        else:
            train_idx = kwargs.pop("train_idx", None)
            test_idx = kwargs.pop("test_idx", None)
            label_idx = kwargs.pop("label_idx", None)
            unlabel_idx = kwargs.pop("unlabel_idx", None)

            if train_idx is None:
                raise ValueError(
                    "required param 'train_idx' can not be empty ")
            if test_idx is None:
                raise ValueError("required param 'test_idx' can not be empty ")
            if label_idx is None:
                raise ValueError(
                    "required param 'label_idx' can not be empty ")
            if unlabel_idx is None:
                raise ValueError(
                    "required param 'unlabel_idx' can not be empty ")

            num_inst_x, num_feat = da.shape(self._X)
            num_inst_y, num_labels = da.shape(
                self._Y) if len(da.shape(self._Y)) > 1 else (da.shape(
                    self._Y)[0], 1)
            folds_train, num_inst_train = np.shape(train_idx)
            folds_test, num_inst_test = np.shape(test_idx)
            folds_labeled, num_inst_labeled = np.shape(label_idx)
            folds_unlabeled, num_inst_unlabeled = np.shape(unlabel_idx)

            if num_inst_x != num_inst_y:
                raise ValueError(
                    "Different numbers of instances for inputs (x:%s, y:%s)" %
                    (num_inst_x, num_inst_y))

            if folds_train != folds_test or folds_test != folds_labeled or folds_labeled != folds_unlabeled:
                raise ValueError(
                    "Different numbers of folds for inputs (train_idx:%s, test_idx:%s "
                    "label_idx:%s, unlabel_idx:%s)" %
                    (folds_train, folds_test, folds_labeled, folds_unlabeled))
            if kfolds != folds_test:
                raise ValueError(
                    "Number of folds for inputs (train_idx:%s, test_idx:%s "
                    "label_idx:%s, unlabel_idx:%s) must be equals to kfolds:%s param"
                    % (folds_train, folds_test, folds_labeled, folds_unlabeled,
                       kfolds))

            if num_inst_train + num_inst_test != num_inst_x:
                raise ValueError(
                    "The sum of the number of instances for train_idx and test_idx must be equal to the "
                    "number of instances for x"
                    "(num_inst_x:%s, num_inst_train:%s num_inst_test:%s)" %
                    (num_inst_x, num_inst_train, num_inst_test))

            if num_inst_labeled + num_inst_unlabeled != num_inst_train:
                raise ValueError(
                    "The sum of the number of instances for label_idx and unlabel_idx must be equal to the "
                    "number of instances for train_idx"
                    "(num_inst_labeled:%s, num_inst_unlabeled:%s num_inst_unlabeled:%s)"
                    %
                    (num_inst_labeled, num_inst_unlabeled, num_inst_unlabeled))

            self._kfolds = folds_train
            self._train_idx = train_idx
            self._test_idx = test_idx
            self._label_idx = label_idx
            self._unlabel_idx = unlabel_idx

        self._ml_technique = ml_technique
        if self._ml_technique is None:
            raise ValueError("required param 'ml_technique' can not be empty")

        self._performance_metrics = performance_metrics
        if self._performance_metrics is None or len(
                self._performance_metrics) == 0:
            raise ValueError(
                "required param 'performance_metric' can not be empty")
        else:
            for metric in self._performance_metrics:
                if not isinstance(metric, BaseMetrics):
                    raise ValueError(
                        "the elements in 'performance_metrics' must be of type BaseMetrics"
                    )

        self._query_strategy = query_strategy
        if self._query_strategy is None:
            raise ValueError(
                "required param 'query_strategy' can not be empty")

        self._oracle = oracle
        if self._oracle is None:
            raise ValueError("required param 'simOracle' can not be empty")

        self._stopping_criteria = stopping_criteria
        if self._stopping_criteria is None:
            raise ValueError(
                "required param 'stopping_criteria' can not be empty")

        # Dynamically create the scenario Type given the arguments
        importlib.import_module(self._scenario_type.__module__)

        self._scenario = eval(self._scenario_type.__qualname__)(
            X=self._X,
            y=self._Y,
            train_idx=self._train_idx[0],
            test_idx=self._test_idx[0],
            label_idx=copy.deepcopy(IndexCollection(self._label_idx[0])),
            unlabel_idx=copy.deepcopy(IndexCollection(self._unlabel_idx[0])),
            ml_technique=self._ml_technique,
            performance_metrics=self._performance_metrics,
            query_strategy=self._query_strategy,
            oracle=self._oracle,
            batch_size=batch_size)
Пример #6
0
class TestRegression(unittest.TestCase):
    # logging.basicConfig(level=logging.DEBUG)

    # Get the data
    __X = np.random.choice(np.linspace(0, 20, 10000), size=200, replace=False).reshape(-1, 1)
    __y = np.sin(__X) + np.random.normal(scale=0.3, size=__X.shape)

    # assembling initial training set
    __train_idx, __test_idx, __label_idx, __unlabel_idx = split(
        X=__X,
        y=__y,
        test_ratio=0.3,
        initial_label_rate=0.05,
        split_count=1,
        all_class=True)

    # defining the kernel for the Gaussian process
    __ml_technique = GaussianProcessRegressor(
        kernel=RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) \
               + WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1)))

    def test_One_iteration(self):

        experiment = HoldOutExperiment(
            client=None,
            X=self.__X,
            Y=self.__y,
            scenario_type=PoolBasedSamplingScenario,
            train_idx=self.__train_idx,
            test_idx=self.__test_idx,
            label_idx=self.__label_idx,
            unlabel_idx=self.__unlabel_idx,
            ml_technique=self.__ml_technique,
            performance_metrics=[Mse(squared=True)],
            query_strategy=QueryRegressionStd(),
            oracle=SimulatedOracle(labels=self.__y),
            stopping_criteria=MaxIteration(1),
            self_partition=False
        )

        result = experiment.evaluate(verbose=False)
        regressor = result[0].ml_technique

        # plotting the initial estimation
        with plt.style.context('seaborn-white'):
            plt.figure(figsize=(14, 7))
            x = np.linspace(0, 20, 1000)
            pred, std = regressor.predict(x.reshape(-1, 1), return_std=True)
            plt.plot(x, pred)
            plt.fill_between(x, pred.reshape(-1, ) - std, pred.reshape(-1, ) + std, alpha=0.2)
            plt.scatter(self.__X, self.__y, c='k')
            plt.title('Initial estimation')
            plt.show()

    def test_fifteen_iteration(self):

        experiment = HoldOutExperiment(
            client=None,
            X=self.__X,
            Y=self.__y,
            scenario_type=PoolBasedSamplingScenario,
            train_idx=self.__train_idx,
            test_idx=self.__test_idx,
            label_idx=self.__label_idx,
            unlabel_idx=self.__unlabel_idx,
            ml_technique=self.__ml_technique,
            performance_metrics=[Mse(squared=True)],
            query_strategy=QueryRegressionStd(),
            oracle=SimulatedOracle(labels=self.__y),
            stopping_criteria=MaxIteration(15),
            self_partition=False
        )

        result = experiment.evaluate(verbose=False)
        regressor = result[0].ml_technique

        # plotting the initial estimation
        with plt.style.context('seaborn-white'):
            plt.figure(figsize=(14, 7))
            x = np.linspace(0, 20, 1000)
            pred, std = regressor.predict(x.reshape(-1, 1), return_std=True)
            plt.plot(x, pred)
            plt.fill_between(x, pred.reshape(-1, ) - std, pred.reshape(-1, ) + std, alpha=0.2)
            plt.scatter(self.__X, self.__y, c='k')
            plt.title('Initial estimation')
            plt.show()

    def test_five_iteration_batch_size(self):
        experiment = HoldOutExperiment(
            client=None,
            X=self.__X,
            Y=self.__y,
            scenario_type=PoolBasedSamplingScenario,
            train_idx=self.__train_idx,
            test_idx=self.__test_idx,
            label_idx=self.__label_idx,
            unlabel_idx=self.__unlabel_idx,
            ml_technique=self.__ml_technique,
            performance_metrics=[Mse(squared=True)],
            query_strategy=QueryRegressionStd(),
            oracle=SimulatedOracle(labels=self.__y),
            stopping_criteria=MaxIteration(15),
            self_partition=False,
            batch_size=5
        )

        result = experiment.evaluate(verbose=False)
        regressor = result[0].ml_technique

        # plotting the initial estimation
        with plt.style.context('seaborn-white'):
            plt.figure(figsize=(14, 7))
            x = np.linspace(0, 20, 1000)
            pred, std = regressor.predict(x.reshape(-1, 1), return_std=True)
            plt.plot(x, pred)
            plt.fill_between(x, pred.reshape(-1, ) - std, pred.reshape(-1, ) + std, alpha=0.2)
            plt.scatter(self.__X, self.__y, c='k')
            plt.title('Initial estimation')
            plt.show()
Пример #7
0
    def test_keras_digits_recognition_active_learning(self):

        # load the data - it returns 2 tuples of digits & labels - one for
        (x_train, y_train), (x_test, y_test) = mnist.load_data()

        batch_size = 1024
        num_classes = 10
        epochs = 3

        # input image dimensions
        img_rows, img_cols = 28, 28

        # display 14 random images from the training set
        np.random.seed(123)

        rand_14 = np.random.randint(0, x_train.shape[0], 14)
        sample_digits = x_train[rand_14]
        sample_labels = y_train[rand_14]
        num_rows, num_cols = 2, 7
        f, ax = plt.subplots(num_rows,
                             num_cols,
                             figsize=(12, 5),
                             gridspec_kw={
                                 'wspace': 0.03,
                                 'hspace': 0.01
                             },
                             squeeze=True)

        for r in range(num_rows):
            for c in range(num_cols):
                image_index = r * 7 + c
                ax[r, c].axis("off")
                ax[r, c].imshow(sample_digits[image_index], cmap='gray')
                ax[r, c].set_title('No. %d' % sample_labels[image_index])
        plt.show()
        plt.close()

        if K.image_data_format() == 'channels_first':
            x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
            x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
            input_shape = (1, img_rows, img_cols)
        else:
            x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
            x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
            input_shape = (img_rows, img_cols, 1)

        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train /= 255
        x_test /= 255

        ml_technique = Sequential()
        ml_technique.add(
            Conv2D(32,
                   kernel_size=(3, 3),
                   activation='relu',
                   input_shape=input_shape))
        ml_technique.add(Conv2D(64, (3, 3), activation='relu'))
        ml_technique.add(MaxPooling2D(pool_size=(2, 2)))
        ml_technique.add(Dropout(0.25))
        ml_technique.add(Flatten())
        ml_technique.add(Dense(128, activation='relu'))
        ml_technique.add(Dropout(0.5))
        ml_technique.add(Dense(num_classes, activation='softmax'))
        ml_technique.compile(optimizer='Adam',
                             loss='categorical_crossentropy',
                             metrics=['accuracy'])

        # convert class vectors to binary class matrices
        y_train = keras.utils.to_categorical(y_train, num_classes)
        y_test = keras.utils.to_categorical(y_test, num_classes)

        X = np.concatenate((x_train, x_test))
        y = np.concatenate((y_train, y_test))

        train_idx, test_idx, label_idx, unlabel_idx = split(
            X=X,
            y=y,
            test_ratio=0.3,
            initial_label_rate=0.05,
            split_count=1,
            all_class=True)

        # convert to indexed collection
        train_idx = IndexCollection(train_idx[0])
        test_idx = IndexCollection(test_idx[0])
        label_idx = IndexCollection(label_idx[0])
        unlabel_idx = IndexCollection(unlabel_idx[0])

        # Define the active learning components
        stopping_criteria = MaxIteration(10)
        query_strategy = QueryLeastConfidentSampling()
        oracle = SimulatedOracle(labels=y)

        start_time = time.time()
        experimentState = State(
            round=0,
            train_idx=train_idx,
            test_idx=test_idx,
            init_L=label_idx,
            init_U=unlabel_idx,
            performance_metrics=[metric for metric in ["loss", "accuracy"]],
            verbose=True)

        while not stopping_criteria.is_stop() and len(unlabel_idx) > 0:
            label_x = X[label_idx.index, :]
            label_y = y[label_idx.index]
            test_x = X[test_idx, :]
            test_y = y[test_idx]

            # Train and evaluate Model over the labeled instances
            ml_technique.fit(label_x,
                             label_y,
                             batch_size=batch_size,
                             epochs=epochs,
                             verbose=True,
                             validation_data=(test_x, test_y))

            # predict the results over the labeled test instances
            label_pred = ml_technique.predict_classes(test_x)

            # performance calc for all metrics
            label_perf = []
            score = ml_technique.evaluate(x_test, y_test, verbose=1)

            label_perf.append({"name": "loss", "value": score[0]})
            label_perf.append({"name": "accuracy", "value": score[1]})

            # use the query strategy for selecting the indexes
            select_ind = query_strategy.select(X=X,
                                               y=y,
                                               label_index=label_idx,
                                               unlabel_index=unlabel_idx,
                                               batch_size=batch_size,
                                               model=ml_technique,
                                               client=self.__client)

            # show label values
            oracle.query(instances=X[select_ind], indexes=select_ind)

            # update label and unlabel instaces
            label_idx.update(select_ind)
            unlabel_idx.difference_update(select_ind)

            # save intermediate results
            experimentState.add_state(
                StateItem(select_index=select_ind,
                          performance_metrics=[
                              metric['name'] for metric in label_perf
                          ],
                          performance=label_perf))

            # update stopping_criteria
            stopping_criteria.update_information(experimentState)

        end_time = time.time() - start_time
        print(end_time)

        query_analyser = ExperimentAnalyserFactory.experiment_analyser(
            performance_metrics=[metric for metric in ["loss", "accuracy"]],
            method_name=query_strategy.query_function_name,
            method_results=[experimentState],
            type="queries")

        # get a brief description of the experiment
        query_analyser.plot_learning_curves(
            title='Active Learning experiment results')
Пример #8
0
    def test_custom_activeLearning_keras(self):
        batch_size = 5
        epochs = 20

        # partition the data
        train_idx, test_idx, label_idx, unlabel_idx = split(
            X=self.__X,
            y=self.__y,
            test_ratio=0.3,
            initial_label_rate=0.05,
            split_count=1,
            all_class=True)

        # convert to indexed collection
        train_idx = IndexCollection(train_idx[0])
        test_idx = IndexCollection(test_idx[0])
        label_idx = IndexCollection(label_idx[0])
        unlabel_idx = IndexCollection(unlabel_idx[0])

        # Create the model
        ml_technique = Sequential()
        ml_technique.add(Dense(input_dim=30, units=30))
        ml_technique.add(Dense(input_dim=30, units=30))
        ml_technique.add(Dense(input_dim=30, units=2))
        ml_technique.add(Activation('softmax'))
        ml_technique.compile(loss='sparse_categorical_crossentropy',
                             optimizer='adam',
                             metrics=['accuracy'])

        # Define the active learning components
        stopping_criteria = MaxIteration(10)
        query_strategy = QueryLeastConfidentSampling()
        performance_metrics = [
            Accuracy(), F1(average='weighted'),
            HammingLoss()
        ]
        oracle = SimulatedOracle(labels=self.__y)

        start_time = time.time()
        experimentState = State(round=0,
                                train_idx=train_idx,
                                test_idx=test_idx,
                                init_L=label_idx,
                                init_U=unlabel_idx,
                                performance_metrics=[
                                    metric.metric_name
                                    for metric in performance_metrics
                                ],
                                verbose=True)

        while not stopping_criteria.is_stop() and len(unlabel_idx) > 0:

            label_x = self.__X[label_idx.index, :]
            label_y = self.__y[label_idx.index]
            test_x = self.__X[test_idx, :]
            test_y = self.__y[test_idx]

            # Train and evaluate Model over the labeled instances
            ml_technique.fit(label_x,
                             label_y,
                             batch_size=batch_size,
                             epochs=epochs,
                             verbose=True)

            # predict the results over the labeled test instances
            label_pred = ml_technique.predict_classes(test_x)

            # performance calc for all metrics
            label_perf = []
            for metric in performance_metrics:
                value = metric.compute(y_true=test_y, y_pred=label_pred)
                label_perf.append({"name": metric.metric_name, "value": value})

            # use the query strategy for selecting the indexes
            select_ind = query_strategy.select(X=self.__X,
                                               y=self.__y,
                                               label_index=label_idx,
                                               unlabel_index=unlabel_idx,
                                               batch_size=batch_size,
                                               model=ml_technique,
                                               client=self.__client)

            # show label values
            oracle.query(instances=self.__X[select_ind], indexes=select_ind)

            # update label and unlabel instaces
            label_idx.update(select_ind)
            unlabel_idx.difference_update(select_ind)

            # save intermediate results
            experimentState.add_state(
                StateItem(select_index=select_ind,
                          performance_metrics=[
                              metric['name'] for metric in label_perf
                          ],
                          performance=label_perf))

            # update stopping_criteria
            stopping_criteria.update_information(experimentState)

        end_time = time.time() - start_time
        print(end_time)

        query_analyser = ExperimentAnalyserFactory.experiment_analyser(
            performance_metrics=[
                metric.metric_name for metric in performance_metrics
            ],
            method_name=query_strategy.query_function_name,
            method_results=[experimentState],
            type="queries")

        # get a brief description of the experiment
        query_analyser.plot_learning_curves(
            title='Active Learning experiment results')