示例#1
0
 def test_update_statistics_progress_within_statistics(self, m):
     statistics = Statistics(progress_percentage=0.5)
     statistics.add_stage(name='other_stage', key1='value1')
     self.client.update_statistics(statistics)
     self.assertEqual(m.call_count, 1)
     url = '{}/organizations/{}/training/definitions/{}/jobs/{}/statistics'.format(
         ABEJA_API_URL, ORGANIZATION_ID, TRAINING_JON_DEFINITION_NAME, TRAINING_JOB_ID)
     expected_data = {
         'statistics': {
             'progress_percentage': 0.5,
             'stages': {
                 'other_stage': {
                     'key1': 'value1'
                 }
             }
         }
     }
     m.assert_called_with(
         'POST',
         url,
         params=None,
         headers={
             'User-Agent': 'abeja-platform-sdk/{}'.format(VERSION)},
         timeout=30,
         data=None,
         json=expected_data)
示例#2
0
def handler(context):
    iris = datasets.load_iris()
    cls = catboost.CatBoostClassifier(loss_function='MultiClass')

    X = iris.data
    y = iris.target

    data_train, data_test, label_train, label_test = model_selection.train_test_split(
        X, y)

    cls.fit(data_train, label_train)

    train_acc = cls.score(data_train, label_train)
    test_acc = cls.score(data_test, label_test)

    statistics = ABEJAStatistics(num_epochs=1, epoch=1)
    statistics.add_stage(ABEJAStatistics.STAGE_TRAIN, train_acc, None)
    statistics.add_stage(ABEJAStatistics.STAGE_VALIDATION, test_acc, None)
    print(train_acc, test_acc)

    try:
        client.update_statistics(statistics)
    except Exception:
        pass

    cls.save_model(os.path.join(ABEJA_TRAINING_RESULT_DIR, 'iris.mlmodel'),
                   format='coreml',
                   export_parameters={'prediction_type': 'probability'})
示例#3
0
def handler(context):
    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    data_train, data_test, label_train, label_test = model_selection.train_test_split(
        X, y)

    clf = svm.SVC()
    clf.fit(data_train, label_train)

    train_acc = clf.score(data_train, label_train)
    test_acc = clf.score(data_test, label_test)

    statistics = ABEJAStatistics(num_epochs=1, epoch=1)
    statistics.add_stage(ABEJAStatistics.STAGE_TRAIN, train_acc, None)
    statistics.add_stage(ABEJAStatistics.STAGE_VALIDATION, test_acc, None)
    print(train_acc, test_acc)

    try:
        client.update_statistics(statistics)
    except Exception:
        pass

    joblib.dump(clf, os.path.join(ABEJA_TRAINING_RESULT_DIR, 'model.pkl'))
示例#4
0
def handler(context):
    data = fetch_movielens(min_rating=5.0)

    model = LightFM(loss='warp')

    epochs = 50

    for epoch in range(1, epochs + 1):
        print('Epoch: {}'.format(epoch))
        model.fit_partial(data['train'], epochs=1, num_threads=1)

        train_acc = precision_at_k(model, data['train'], k=5).mean()
        test_acc = precision_at_k(model, data['test'], k=5).mean()
        print("Train precision: {}".format(train_acc))
        print("Test precision: {}".format(test_acc))

        statistics = ABEJAStatistics(num_epochs=epochs, epoch=epoch)
        statistics.add_stage(ABEJAStatistics.STAGE_TRAIN, float(train_acc),
                             None)
        statistics.add_stage(ABEJAStatistics.STAGE_VALIDATION, float(test_acc),
                             None)

        try:
            client.update_statistics(statistics)
        except Exception:
            pass

    np.save(os.path.join(ABEJA_TRAINING_RESULT_DIR, 'model.npy'),
            model.__dict__)
示例#5
0
 def on_epoch_end(self, epoch, logs=None):
     epochs = self.params['epochs']
     statistics = ABEJAStatistics(num_epochs=epochs, epoch=epoch + 1)
     statistics.add_stage(ABEJAStatistics.STAGE_TRAIN, logs['acc'], logs['loss'])
     statistics.add_stage(ABEJAStatistics.STAGE_VALIDATION, logs['val_acc'], logs['val_loss'])
     try:
         self.client.update_statistics(statistics)
     except Exception:
         logger.warning('failed to update statistics.')
示例#6
0
    def __call__(self, epoch, train_loss, train_acc, val_loss, val_acc):
        statistics = ABEJAStatistics(num_epochs=self._total_epochs,
                                     epoch=epoch)

        statistics.add_stage(ABEJAStatistics.STAGE_TRAIN, train_acc,
                             train_loss)
        statistics.add_stage(ABEJAStatistics.STAGE_VALIDATION, val_acc,
                             val_loss)

        try:
            self.client.update_statistics(statistics)
        except Exception:
            logger.warning('failed to update statistics.')
示例#7
0
def handler(context):
    """
    the following csv file should be stored in the datalake channel.
    ---
    sepal_lenght (cm), sepal_width (cm), petal_lenght (cm), petal_width (cm), target
    float, float, float, float, int
    ---
    """
    iris = datasets.load_iris()
    file_path = load_latest_file_from_datalake(channel_id)
    data = pd.read_csv(file_path, sep=',')
    X = data[iris.feature_names].values.astype('float64')
    Y = data['target'].values.astype('int64')
    print('successfully load datalake channel file.')

    # train test split
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.7)

    # define model
    model = LogisticRegression(solver='lbfgs',
                               C=c,
                               multi_class='multinomial',
                               max_iter=epochs)

    # train model
    model.fit(X_train, Y_train)

    # evaluate model
    train_acc = accuracy_score(Y_train, model.predict(X_train))
    train_loss = log_loss(Y_train, model.predict_proba(X_train))
    valid_acc = accuracy_score(Y_test, model.predict(X_test))
    valid_loss = log_loss(Y_test, model.predict_proba(X_test))

    # update ABEJA statisctics
    train_client = TrainClient()
    statistics = ABEJAStatistics(num_epochs=epochs, epoch=epochs)
    statistics.add_stage(name=ABEJAStatistics.STAGE_TRAIN,
                         accuracy=train_acc,
                         loss=train_loss)
    statistics.add_stage(name=ABEJAStatistics.STAGE_VALIDATION,
                         accuracy=valid_acc,
                         loss=valid_loss)
    train_client.update_statistics(statistics)
    print('Train accuracy is {:.3f}.'.format(train_acc))
    print('Train loss is {:.3f}.'.format(train_loss))
    print('Valid accuracy is {:.3f}.'.format(valid_acc))
    print('Valid loss is {:.3f}.'.format(valid_loss))

    # save model
    joblib.dump(model, os.path.join(ABEJA_TRAINING_RESULT_DIR, 'model.pkl'))
示例#8
0
    def update_statistics(self, statistics: Statistics) -> None:
        """ Notify a job statistics for ABEJA Platform.

        API reference: POST /organizations/<organization_id>/training/definitions/<job_definition_name>/jobs/<training_job_id>/statistics

        Request Syntax:
            .. code-block:: python

                from abeja.train import Client
                from abeja.train.statistics import Statistics as ABEJAStatistics

                client = Client()

                statistics = ABEJAStatistics(num_epochs=10, epoch=1)
                statistics.add_stage(name=ABEJAStatistics.STAGE_TRAIN, accuracy=90.0, loss=0.10)
                statistics.add_stage(name=ABEJAStatistics.STAGE_VALIDATION, accuracy=75.0, loss=0.07)

                client.update_statistics(statistics)

        Params:
            - **statistics** (:class:`abeja.train.statistics.Statistics`): job statistics to nofity

        Returns:
            None
        """
        # To keep backward compatibility with the older SDK (<= 1.0.10),
        # we have to allow a case which either job definition or job id is
        # `None`.
        training_job_id = self.training_job_id or 'None'
        job_definition_name = self.job_definition_name or 'None'

        if not statistics or not statistics.get_statistics():
            self.logger.warning('empty statistics found.')
            return

        try:
            response = self.api.update_statistics(
                organization_id=self.organization_id,
                job_definition_name=job_definition_name,
                training_job_id=training_job_id,
                statistics=statistics.get_statistics())
            self.logger.info('update_statistics result: %s', response)
        except (BadRequest, Unauthorized, Forbidden, NotFound,
                MethodNotAllowed) as e:
            self.logger.warning('update_statistics result was {}.'.format(
                str(e)))
        except Exception:
            self.logger.exception(
                'update_statistics result was unexpected error:')
示例#9
0
 def test_update_statistics_raise_ConnectionError(self, m):
     # check: don't raise Exception when model-api returns 500
     # Internal-Server-Error
     logger_mock = mock.MagicMock()
     self.client.logger = logger_mock
     try:
         statistics = Statistics(progress_percentage=0.5, key1='value1')
         self.client.update_statistics(statistics)
         self.assertEqual(m.call_count, 1)
         url = '{}/organizations/{}/training/definitions/{}/jobs/{}/statistics'.format(
             ABEJA_API_URL, ORGANIZATION_ID, TRAINING_JON_DEFINITION_NAME, TRAINING_JOB_ID)
         m.assert_called_with(
             'POST',
             url,
             params=None,
             headers={
                 'User-Agent': 'abeja-platform-sdk/{}'.format(VERSION)},
             data=None,
             json={
                 'statistics': {
                     'progress_percentage': 0.5,
                     'key1': 'value1'}})
         self.assertEqual(logger_mock.warning.call_count, 0)
         self.assertEqual(logger_mock.exception.call_count, 1)
     except Exception:
         self.fail()
    def _print(self, observation):
        train_loss = None
        train_acc = None
        val_loss = None
        val_acc = None

        train_list = {}
        val_list = {}

        epoch = observation[self._obs_key]
        statistics = ABEJAStatistics(num_epochs=self._total_epochs,
                                     epoch=epoch)

        for key, value in observation.items():
            keys = key.split('/')
            if len(keys) > 1 and keys[0] == 'main':
                name = '/'.join(keys[1:])
                if name == 'loss':
                    train_loss = value
                elif name == 'accuracy':
                    train_acc = value
                else:
                    train_list[name] = value
            elif len(keys
                     ) > 2 and keys[0] == 'validation' and keys[1] == 'main':
                name = '/'.join(keys[2:])
                if name == 'loss':
                    val_loss = value
                elif name == 'accuracy':
                    val_acc = value
                else:
                    val_list[name] = value

        statistics.add_stage(ABEJAStatistics.STAGE_TRAIN, train_acc,
                             train_loss, **train_list)
        statistics.add_stage(ABEJAStatistics.STAGE_VALIDATION, val_acc,
                             val_loss, **val_list)

        try:
            self.client.update_statistics(statistics)
        except Exception:
            logger.warning('failed to update statistics.')
示例#11
0
 def test_update_statistics_with_empty_statistics(self, m):
     # check: don't raise Exception
     logger_mock = mock.MagicMock()
     self.client.logger = logger_mock
     try:
         self.client.update_statistics(Statistics())
         m.assert_not_called()
         self.assertEqual(logger_mock.warning.call_count, 1)
         self.assertEqual(logger_mock.exception.call_count, 0)
     except Exception:
         self.fail()
示例#12
0
 def test_update_statistics_override_organization_id(self, m):
     organization_id = '2222222222222'
     client = Client(organization_id=organization_id)
     statistics = Statistics(progress_percentage=0.5, key1='value1')
     client.update_statistics(statistics)
     self.assertEqual(m.call_count, 1)
     url = '{}/organizations/{}/training/definitions/{}/jobs/{}/statistics'.format(
         ABEJA_API_URL, organization_id, TRAINING_JON_DEFINITION_NAME, TRAINING_JOB_ID)
     m.assert_called_with(
         'POST',
         url,
         params=None,
         headers={
             'User-Agent': 'abeja-platform-sdk/{}'.format(VERSION)},
         timeout=30,
         data=None,
         json={
             'statistics': {
                 'progress_percentage': 0.5,
                 'key1': 'value1'}})
示例#13
0
 def test_update_statistics(self, m):
     statistics = Statistics(progress_percentage=0.5, epoch=1,
                             num_epochs=5, key1='value1')
     statistics.add_stage(
         name=Statistics.STAGE_TRAIN,
         accuracy=0.9,
         loss=0.05)
     statistics.add_stage(name=Statistics.STAGE_VALIDATION,
                          accuracy=0.8, loss=0.1, key2=2)
     self.client.update_statistics(statistics)
     self.assertEqual(m.call_count, 1)
     url = '{}/organizations/{}/training/definitions/{}/jobs/{}/statistics'.format(
         ABEJA_API_URL, ORGANIZATION_ID, TRAINING_JON_DEFINITION_NAME, TRAINING_JOB_ID)
     expected_data = {
         'statistics': {
             'num_epochs': 5,
             'epoch': 1,
             'progress_percentage': 0.5,
             'stages': {
                 'train': {
                     'accuracy': 0.9,
                     'loss': 0.05
                 },
                 'validation': {
                     'accuracy': 0.8,
                     'loss': 0.1,
                     'key2': 2
                 }
             },
             'key1': 'value1'
         }
     }
     m.assert_called_with(
         'POST',
         url,
         params=None,
         headers={
             'User-Agent': 'abeja-platform-sdk/{}'.format(VERSION)},
         timeout=30,
         data=None,
         json=expected_data)
示例#14
0
def train_model(trainloader, validloader, model, optimizer, criterion):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = 3.877533  # np.Inf

    if os.path.exists(save_path):
        model.load_state_dict(torch.load(save_path))

    for epoch in range(1, n_epochs + 1):
        # initialize variables to monitor training and validation loss and accuracy
        train_loss = 0.0
        train_total = 0
        train_correct = 0
        valid_loss = 0.0
        valid_total = 0
        valid_correct = 0

        # train the model
        model.train()
        for data, target in trainloader:
            data, target = data.to(device), target.to(device)

            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # update training loss
            train_loss += loss.item() * data.size(0)
            # count number of correct labels
            _, preds_tensor = torch.max(output, 1)
            train_total += target.size(0)
            train_correct += (preds_tensor == target).sum().item()

        # validate the model
        model.eval()
        for data, target in validloader:
            data, target = data.to(device), target.to(device)

            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # update average validation loss
            valid_loss += loss.item() * data.size(0)
            # count number of correct labels
            _, preds_tensor = torch.max(output, 1)
            valid_total += target.size(0)
            valid_correct += (preds_tensor == target).sum().item()

        # calculate average losses
        train_loss = train_loss / len(trainloader.dataset)
        valid_loss = valid_loss / len(validloader.dataset)
        # calculate accuracy
        train_acc = train_correct / train_total
        valid_acc = valid_correct / valid_total

        # update ABEJA statisctics
        train_client = TrainClient()
        statistics = ABEJAStatistics(num_epochs=n_epochs, epoch=epoch)
        statistics.add_stage(name=ABEJAStatistics.STAGE_TRAIN,
                             accuracy=train_acc,
                             loss=train_loss)
        statistics.add_stage(name=ABEJAStatistics.STAGE_VALIDATION,
                             accuracy=valid_acc,
                             loss=valid_loss)
        train_client.update_statistics(statistics)

        # print training/validation statistics
        print(
            'Epoch: {} \tTrain loss: {:.6f} \tTrain acc: {:.6f} \tValid loss: {:.6f} \tValid acc: {:.6f}'.format(
                epoch,
                train_loss,
                train_acc,
                valid_loss,
                valid_acc
            ))

        # save model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model.'.format(
                valid_loss_min,
                valid_loss))
            torch.save(model.state_dict(), save_path)
            valid_loss_min = valid_loss
    # return trained model
    return model