Пример #1
0
    def test_is_generator(self):

        gen = TestIterator()
        self.assertTrue(is_generator(gen))

        def generator():
            yield ([1, 2], [1, 2])

        self.assertTrue(is_generator(generator))
Пример #2
0
    def test_is_generator(self):

        gen = TestIterator()
        self.assertTrue(is_generator(gen))

        def generator():
            yield ([1, 2], [1, 2])

        self.assertTrue(is_generator(generator))
Пример #3
0
def job_start(job_backend, trainer, keras_callback):
    """
    Starts the training of a job. Needs job_prepare() first.

    :type job_backend: JobBackend
    :type trainer: Trainer
    :return:
    """

    job_backend.set_status('STARTING')
    job_model = job_backend.get_job_model()

    model_provider = job_model.get_model_provider()

    job_backend.set_status('LOAD DATA')
    datasets = job_model.get_datasets(trainer)

    print('trainer.input_shape = %s\n' % (simplejson.dumps(trainer.input_shape, default=invalid_json_values),))
    print('trainer.classes = %s\n' % (simplejson.dumps(trainer.classes, default=invalid_json_values),))

    multiple_inputs = len(datasets) > 1
    insights_x = [] if multiple_inputs else None

    for dataset_name in job_model.get_input_dataset_names():
        dataset = datasets[dataset_name]

        if is_generator(dataset['X_train']):
            batch_x, batch_y = dataset['X_train'].next()

            if multiple_inputs:
                insights_x.append(batch_x[0])
            else:
                insights_x = batch_x[0]
        else:
            if multiple_inputs:
                insights_x.append(dataset['X_train'][0])
            else:
                insights_x = dataset['X_train'][0]

    keras_callback.insights_x = insights_x
    print('Insights sample shape', keras_callback.insights_x.shape)
    keras_callback.write("Possible data keys '%s'\n" % "','".join(list(datasets.keys())))

    data_train = model_provider.get_training_data(trainer, datasets)
    data_validation = model_provider.get_validation_data(trainer, datasets)

    keras_callback.set_validation_data(data_validation, trainer.nb_val_samples)

    trainer.set_status('CONSTRUCT')
    model = model_provider.get_model(trainer)
    trainer.set_model(model)

    trainer.set_status('COMPILING')
    loss = model_provider.get_loss(trainer)
    optimizer = model_provider.get_optimizer(trainer)
    model_provider.compile(trainer, model, loss, optimizer)
    model.summary()

    trainer.callbacks.append(keras_callback)
    model_provider.train(trainer, model, data_train, data_validation)
Пример #4
0
    def set_validation_data(self, validation_data, validation_data_size=None):

        self.data_validation = validation_data
        self.data_validation_size = None

        if self.data_validation is None:
            return

        input_data_x = None

        # It's dict of AETROS code generation
        if isinstance(self.data_validation,
                      dict) and 'x' in self.data_validation:

            if is_generator(self.data_validation['x']):
                # single input
                input_data_x = self.data_validation['x']

            elif isinstance(self.data_validation['x'], dict):
                # multiple inputs named
                input_data_x = next(six.itervalues(self.data_validation['x']))

        # Not from AETROS code generation
        else:
            if is_generator(self.data_validation):
                input_data_x = self.data_validation

            elif isinstance(self.data_validation, dict):
                input_data_x = next(six.itervalues(self.data_validation))

            elif isinstance(self.data_validation, tuple):
                input_data_x = self.data_validation[0]

        if is_generator(input_data_x):
            if validation_data_size is None:
                raise Exception(
                    'validation_data_size needs to be set when a generator is given.'
                )
            self.data_validation_size = validation_data_size

        elif input_data_x is not None:
            self.data_validation_size = len(input_data_x)

        if self.data_validation_size is None:
            raise Exception(
                'data_validation_size could not be determined for given validation_data. Please specify it.'
            )
Пример #5
0
    def set_validation_data(self, validation_data, validation_data_size=None):

        self.data_validation = validation_data
        self.data_validation_size = None

        if self.data_validation is None:
            return

        input_data_x = None

        # It's dict of AETROS code generation
        if isinstance(self.data_validation, dict) and 'x' in self.data_validation:

            if is_generator(self.data_validation['x']):
                # single input
                input_data_x = self.data_validation['x']

            elif isinstance(self.data_validation['x'], dict):
                # multiple inputs named
                input_data_x = next(six.itervalues(self.data_validation['x']))

        # Not from AETROS code generation
        else:
            if is_generator(self.data_validation):
                input_data_x = self.data_validation

            elif isinstance(self.data_validation, dict):
                input_data_x = next(six.itervalues(self.data_validation))

            elif isinstance(self.data_validation, tuple):
                input_data_x = self.data_validation[0]

        if is_generator(input_data_x):
            if validation_data_size is None:
                raise Exception('validation_data_size needs to be set when a generator is given.')
            self.data_validation_size = validation_data_size

        elif input_data_x is not None:
            self.data_validation_size = len(input_data_x)

        if self.data_validation_size is None:
            raise Exception('data_validation_size could not be determined for given validation_data. Please specify it.')
Пример #6
0
    def test_set_validation_data(self):
        job_backend = JobBackend('test')
        job_backend.job = {'id': 'test', 'index': 1, 'modelId': 'test/model'}

        keras_callback = KerasCallback(job_backend, sys.stdout)

        keras_callback.set_validation_data(([1, 2, 3], [1, 2, 3]))
        self.assertEqual(keras_callback.data_validation_size, 3)

        with pytest.raises(Exception):
            keras_callback.set_validation_data([[1, 2], [1, 2]])

        # aetros format
        keras_callback.set_validation_data({'x': {'input': [1, 2, 3]}, 'y': {'output': [1, 2, 3]}})
        self.assertEqual(keras_callback.data_validation_size, 3)

        keras_callback.set_validation_data({'input': [1, 2, 3], 'output': [1, 2, 3]})
        self.assertEqual(keras_callback.data_validation_size, 3)

        keras_callback.set_validation_data(([1, 2, 3], [1, 2, 3]), 5)
        self.assertEqual(keras_callback.data_validation_size, 3)

        def generator():
            yield ([1, 2], [1, 2])

        self.assertTrue(is_generator(generator))

        with pytest.raises(Exception):
            keras_callback.set_validation_data(generator)

        with pytest.raises(Exception):
            keras_callback.set_validation_data((generator, generator))

        with pytest.raises(Exception):
            keras_callback.set_validation_data([generator, generator], 6)

        keras_callback.set_validation_data(generator, 5)
        self.assertEqual(keras_callback.data_validation_size, 5)

        keras_callback.set_validation_data((generator, generator), 4)
        self.assertEqual(keras_callback.data_validation_size, 4)
Пример #7
0
    def build_confusion_matrix(self):
        confusion_matrix = {}

        if self.data_validation_size is None:
            return confusion_matrix

        if len(self.model.output_layers) > 1:
            return confusion_matrix

        first_input_layer = self.model.input_layers[0]
        first_output_layer = self.model.output_layers[0]

        if 'Softmax' not in str(first_output_layer.output) or len(
                first_output_layer.output_shape) != 2:
            return confusion_matrix

        input_data_x = None
        input_data_y = []

        # It's dict of AETROS code generation
        if isinstance(self.data_validation,
                      dict) and 'x' in self.data_validation:

            if is_generator(self.data_validation['x']):
                # single input
                input_data_x = self.data_validation['x']

            elif isinstance(self.data_validation['x'], dict):
                # multiple inputs named
                input_data_x = self.data_validation['x'][
                    first_input_layer.name]
                input_data_y = self.data_validation['y'][
                    first_output_layer.name]

        # Not from AETROS code generation
        else:
            if is_generator(self.data_validation):
                input_data_x = self.data_validation

            elif isinstance(self.data_validation, dict):
                if len(self.model.input_layers) > 1:
                    input_data_x = []
                    for layer in self.model.input_layers:
                        input_data_x.append(self.data_validation[layer.name])

                input_data_y = self.data_validation[first_output_layer.name]

            elif isinstance(self.data_validation, tuple):
                input_data_x = self.data_validation[0]
                input_data_y = self.data_validation[1]

        if input_data_x is None:
            return confusion_matrix

        matrix = np.zeros((first_output_layer.output_shape[1],
                           first_output_layer.output_shape[1]))

        if is_generator(input_data_x):
            processed_samples = 0

            while processed_samples < self.data_validation_size:
                generator_output = next(input_data_x)
                if len(generator_output) == 2:
                    x, y = generator_output
                    sample_weight = None
                elif len(generator_output) == 3:
                    x, y, sample_weight = generator_output
                else:
                    self.model._stop.set()
                    raise Exception('output of generator should be a tuple '
                                    '(x, y, sample_weight) '
                                    'or (x, y). Found: ' +
                                    str(generator_output))

                if type(x) is list:
                    nb_samples = len(x[0])
                elif type(x) is dict:
                    nb_samples = len(list(x.values())[0])
                else:
                    nb_samples = len(x)

                processed_samples += nb_samples

                prediction = self.model.predict_on_batch(x)
                predicted_classes = prediction.argmax(axis=-1)
                expected_classes = y.argmax(axis=-1)

                try:
                    for sample_idx, predicted_class in enumerate(
                            predicted_classes):
                        expected_class = expected_classes[sample_idx]
                        matrix[expected_class, predicted_class] += 1
                except:
                    pass

        else:
            batch_size = self.current[
                'batch_size'] if 'batch_size' in self.current else 16
            prediction = self.model.predict(input_data_x,
                                            batch_size=batch_size)
            predicted_classes = prediction.argmax(axis=-1)
            expected_classes = np.array(input_data_y).argmax(axis=-1)

            try:
                for sample_idx, predicted_class in enumerate(
                        predicted_classes):
                    expected_class = expected_classes[sample_idx]
                    matrix[expected_class, predicted_class] += 1
            except:
                pass

        confusion_matrix[first_output_layer.name] = matrix.tolist()

        return confusion_matrix
Пример #8
0
def job_start(job_backend, trainer, keras_callback):
    """
    Starts the training of a job. Needs job_prepare() first.

    :type job_backend: JobBackend
    :type trainer: Trainer
    :return:
    """

    job_backend.set_status('STARTING')
    job_model = job_backend.get_job_model()

    model_provider = job_model.get_model_provider()

    job_backend.set_status('LOAD DATA')
    datasets = job_model.get_datasets(trainer)

    print(
        'trainer.input_shape = %s\n' %
        (simplejson.dumps(trainer.input_shape, default=invalid_json_values), ))
    print('trainer.classes = %s\n' %
          (simplejson.dumps(trainer.classes, default=invalid_json_values), ))

    multiple_inputs = len(datasets) > 1
    insights_x = [] if multiple_inputs else None

    for dataset_name in job_model.get_input_dataset_names():
        dataset = datasets[dataset_name]

        if is_generator(dataset['X_train']):
            batch_x, batch_y = dataset['X_train'].next()

            if multiple_inputs:
                insights_x.append(batch_x[0])
            else:
                insights_x = batch_x[0]
        else:
            if multiple_inputs:
                insights_x.append(dataset['X_train'][0])
            else:
                insights_x = dataset['X_train'][0]

    keras_callback.insights_x = insights_x
    print('Insights sample shape', keras_callback.insights_x.shape)
    keras_callback.write("Possible data keys '%s'\n" %
                         "','".join(list(datasets.keys())))

    data_train = model_provider.get_training_data(trainer, datasets)
    data_validation = model_provider.get_validation_data(trainer, datasets)

    keras_callback.set_validation_data(data_validation, trainer.nb_val_samples)

    trainer.set_status('CONSTRUCT')
    model = model_provider.get_model(trainer)
    trainer.set_model(model)

    trainer.set_status('COMPILING')
    loss = model_provider.get_loss(trainer)
    optimizer = model_provider.get_optimizer(trainer)
    model_provider.compile(trainer, model, loss, optimizer)
    model.summary()

    trainer.callbacks.append(keras_callback)
    model_provider.train(trainer, model, data_train, data_validation)
Пример #9
0
    def build_confusion_matrix(self):
        confusion_matrix = {}

        if self.data_validation_size is None:
            return confusion_matrix

        if len(self.model.output_layers) > 1:
            return confusion_matrix

        first_input_layer = self.model.input_layers[0]
        first_output_layer = self.model.output_layers[0]

        if 'Softmax' not in str(first_output_layer.output) or len(first_output_layer.output_shape) != 2:
            return confusion_matrix

        input_data_x = None
        input_data_y = []

        # It's dict of AETROS code generation
        if isinstance(self.data_validation, dict) and 'x' in self.data_validation:

            if is_generator(self.data_validation['x']):
                # single input
                input_data_x = self.data_validation['x']

            elif isinstance(self.data_validation['x'], dict):
                # multiple inputs named
                input_data_x = self.data_validation['x'][first_input_layer.name]
                input_data_y = self.data_validation['y'][first_output_layer.name]

        # Not from AETROS code generation
        else:
            if is_generator(self.data_validation):
                input_data_x = self.data_validation

            elif isinstance(self.data_validation, dict):
                if len(self.model.input_layers) > 1:
                    input_data_x = []
                    for layer in self.model.input_layers:
                        input_data_x.append(self.data_validation[layer.name])

                input_data_y = self.data_validation[first_output_layer.name]

            elif isinstance(self.data_validation, tuple):
                input_data_x = self.data_validation[0]
                input_data_y = self.data_validation[1]

        if input_data_x is None:
            return confusion_matrix

        matrix = np.zeros((first_output_layer.output_shape[1], first_output_layer.output_shape[1]))

        if is_generator(input_data_x):
            processed_samples = 0

            while processed_samples < self.data_validation_size:
                generator_output = next(input_data_x)
                if len(generator_output) == 2:
                    x, y = generator_output
                    sample_weight = None
                elif len(generator_output) == 3:
                    x, y, sample_weight = generator_output
                else:
                    self.model._stop.set()
                    raise Exception('output of generator should be a tuple '
                                    '(x, y, sample_weight) '
                                    'or (x, y). Found: ' + str(generator_output))

                if type(x) is list:
                    nb_samples = len(x[0])
                elif type(x) is dict:
                    nb_samples = len(list(x.values())[0])
                else:
                    nb_samples = len(x)

                processed_samples += nb_samples

                prediction = self.model.predict_on_batch(x)
                predicted_classes = prediction.argmax(axis=-1)
                expected_classes = y.argmax(axis=-1)

                try:
                    for sample_idx, predicted_class in enumerate(predicted_classes):
                        expected_class = expected_classes[sample_idx]
                        matrix[expected_class, predicted_class] += 1
                except Exception: pass

        else:
            batch_size = self.current['batch_size'] if 'batch_size' in self.current else 16
            prediction = self.model.predict(input_data_x, batch_size=batch_size)
            predicted_classes = prediction.argmax(axis=-1)
            expected_classes = np.array(input_data_y).argmax(axis=-1)

            try:
                for sample_idx, predicted_class in enumerate(predicted_classes):
                    expected_class = expected_classes[sample_idx]
                    matrix[expected_class, predicted_class] += 1
            except Exception: pass

        confusion_matrix[first_output_layer.name] = matrix.tolist()

        return confusion_matrix