def test_update_statistics_progress_within_statistics(self, m): statistics = Statistics(progress_percentage=0.5) statistics.add_stage(name='other_stage', key1='value1') self.client.update_statistics(statistics) self.assertEqual(m.call_count, 1) url = '{}/organizations/{}/training/definitions/{}/jobs/{}/statistics'.format( ABEJA_API_URL, ORGANIZATION_ID, TRAINING_JON_DEFINITION_NAME, TRAINING_JOB_ID) expected_data = { 'statistics': { 'progress_percentage': 0.5, 'stages': { 'other_stage': { 'key1': 'value1' } } } } m.assert_called_with( 'POST', url, params=None, headers={ 'User-Agent': 'abeja-platform-sdk/{}'.format(VERSION)}, timeout=30, data=None, json=expected_data)
def handler(context): iris = datasets.load_iris() cls = catboost.CatBoostClassifier(loss_function='MultiClass') X = iris.data y = iris.target data_train, data_test, label_train, label_test = model_selection.train_test_split( X, y) cls.fit(data_train, label_train) train_acc = cls.score(data_train, label_train) test_acc = cls.score(data_test, label_test) statistics = ABEJAStatistics(num_epochs=1, epoch=1) statistics.add_stage(ABEJAStatistics.STAGE_TRAIN, train_acc, None) statistics.add_stage(ABEJAStatistics.STAGE_VALIDATION, test_acc, None) print(train_acc, test_acc) try: client.update_statistics(statistics) except Exception: pass cls.save_model(os.path.join(ABEJA_TRAINING_RESULT_DIR, 'iris.mlmodel'), format='coreml', export_parameters={'prediction_type': 'probability'})
def handler(context): iris = datasets.load_iris() X = iris.data y = iris.target data_train, data_test, label_train, label_test = model_selection.train_test_split( X, y) clf = svm.SVC() clf.fit(data_train, label_train) train_acc = clf.score(data_train, label_train) test_acc = clf.score(data_test, label_test) statistics = ABEJAStatistics(num_epochs=1, epoch=1) statistics.add_stage(ABEJAStatistics.STAGE_TRAIN, train_acc, None) statistics.add_stage(ABEJAStatistics.STAGE_VALIDATION, test_acc, None) print(train_acc, test_acc) try: client.update_statistics(statistics) except Exception: pass joblib.dump(clf, os.path.join(ABEJA_TRAINING_RESULT_DIR, 'model.pkl'))
def handler(context): data = fetch_movielens(min_rating=5.0) model = LightFM(loss='warp') epochs = 50 for epoch in range(1, epochs + 1): print('Epoch: {}'.format(epoch)) model.fit_partial(data['train'], epochs=1, num_threads=1) train_acc = precision_at_k(model, data['train'], k=5).mean() test_acc = precision_at_k(model, data['test'], k=5).mean() print("Train precision: {}".format(train_acc)) print("Test precision: {}".format(test_acc)) statistics = ABEJAStatistics(num_epochs=epochs, epoch=epoch) statistics.add_stage(ABEJAStatistics.STAGE_TRAIN, float(train_acc), None) statistics.add_stage(ABEJAStatistics.STAGE_VALIDATION, float(test_acc), None) try: client.update_statistics(statistics) except Exception: pass np.save(os.path.join(ABEJA_TRAINING_RESULT_DIR, 'model.npy'), model.__dict__)
def on_epoch_end(self, epoch, logs=None): epochs = self.params['epochs'] statistics = ABEJAStatistics(num_epochs=epochs, epoch=epoch + 1) statistics.add_stage(ABEJAStatistics.STAGE_TRAIN, logs['acc'], logs['loss']) statistics.add_stage(ABEJAStatistics.STAGE_VALIDATION, logs['val_acc'], logs['val_loss']) try: self.client.update_statistics(statistics) except Exception: logger.warning('failed to update statistics.')
def __call__(self, epoch, train_loss, train_acc, val_loss, val_acc): statistics = ABEJAStatistics(num_epochs=self._total_epochs, epoch=epoch) statistics.add_stage(ABEJAStatistics.STAGE_TRAIN, train_acc, train_loss) statistics.add_stage(ABEJAStatistics.STAGE_VALIDATION, val_acc, val_loss) try: self.client.update_statistics(statistics) except Exception: logger.warning('failed to update statistics.')
def handler(context): """ the following csv file should be stored in the datalake channel. --- sepal_lenght (cm), sepal_width (cm), petal_lenght (cm), petal_width (cm), target float, float, float, float, int --- """ iris = datasets.load_iris() file_path = load_latest_file_from_datalake(channel_id) data = pd.read_csv(file_path, sep=',') X = data[iris.feature_names].values.astype('float64') Y = data['target'].values.astype('int64') print('successfully load datalake channel file.') # train test split X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.7) # define model model = LogisticRegression(solver='lbfgs', C=c, multi_class='multinomial', max_iter=epochs) # train model model.fit(X_train, Y_train) # evaluate model train_acc = accuracy_score(Y_train, model.predict(X_train)) train_loss = log_loss(Y_train, model.predict_proba(X_train)) valid_acc = accuracy_score(Y_test, model.predict(X_test)) valid_loss = log_loss(Y_test, model.predict_proba(X_test)) # update ABEJA statisctics train_client = TrainClient() statistics = ABEJAStatistics(num_epochs=epochs, epoch=epochs) statistics.add_stage(name=ABEJAStatistics.STAGE_TRAIN, accuracy=train_acc, loss=train_loss) statistics.add_stage(name=ABEJAStatistics.STAGE_VALIDATION, accuracy=valid_acc, loss=valid_loss) train_client.update_statistics(statistics) print('Train accuracy is {:.3f}.'.format(train_acc)) print('Train loss is {:.3f}.'.format(train_loss)) print('Valid accuracy is {:.3f}.'.format(valid_acc)) print('Valid loss is {:.3f}.'.format(valid_loss)) # save model joblib.dump(model, os.path.join(ABEJA_TRAINING_RESULT_DIR, 'model.pkl'))
def update_statistics(self, statistics: Statistics) -> None: """ Notify a job statistics for ABEJA Platform. API reference: POST /organizations/<organization_id>/training/definitions/<job_definition_name>/jobs/<training_job_id>/statistics Request Syntax: .. code-block:: python from abeja.train import Client from abeja.train.statistics import Statistics as ABEJAStatistics client = Client() statistics = ABEJAStatistics(num_epochs=10, epoch=1) statistics.add_stage(name=ABEJAStatistics.STAGE_TRAIN, accuracy=90.0, loss=0.10) statistics.add_stage(name=ABEJAStatistics.STAGE_VALIDATION, accuracy=75.0, loss=0.07) client.update_statistics(statistics) Params: - **statistics** (:class:`abeja.train.statistics.Statistics`): job statistics to nofity Returns: None """ # To keep backward compatibility with the older SDK (<= 1.0.10), # we have to allow a case which either job definition or job id is # `None`. training_job_id = self.training_job_id or 'None' job_definition_name = self.job_definition_name or 'None' if not statistics or not statistics.get_statistics(): self.logger.warning('empty statistics found.') return try: response = self.api.update_statistics( organization_id=self.organization_id, job_definition_name=job_definition_name, training_job_id=training_job_id, statistics=statistics.get_statistics()) self.logger.info('update_statistics result: %s', response) except (BadRequest, Unauthorized, Forbidden, NotFound, MethodNotAllowed) as e: self.logger.warning('update_statistics result was {}.'.format( str(e))) except Exception: self.logger.exception( 'update_statistics result was unexpected error:')
def test_update_statistics_raise_ConnectionError(self, m): # check: don't raise Exception when model-api returns 500 # Internal-Server-Error logger_mock = mock.MagicMock() self.client.logger = logger_mock try: statistics = Statistics(progress_percentage=0.5, key1='value1') self.client.update_statistics(statistics) self.assertEqual(m.call_count, 1) url = '{}/organizations/{}/training/definitions/{}/jobs/{}/statistics'.format( ABEJA_API_URL, ORGANIZATION_ID, TRAINING_JON_DEFINITION_NAME, TRAINING_JOB_ID) m.assert_called_with( 'POST', url, params=None, headers={ 'User-Agent': 'abeja-platform-sdk/{}'.format(VERSION)}, data=None, json={ 'statistics': { 'progress_percentage': 0.5, 'key1': 'value1'}}) self.assertEqual(logger_mock.warning.call_count, 0) self.assertEqual(logger_mock.exception.call_count, 1) except Exception: self.fail()
def _print(self, observation): train_loss = None train_acc = None val_loss = None val_acc = None train_list = {} val_list = {} epoch = observation[self._obs_key] statistics = ABEJAStatistics(num_epochs=self._total_epochs, epoch=epoch) for key, value in observation.items(): keys = key.split('/') if len(keys) > 1 and keys[0] == 'main': name = '/'.join(keys[1:]) if name == 'loss': train_loss = value elif name == 'accuracy': train_acc = value else: train_list[name] = value elif len(keys ) > 2 and keys[0] == 'validation' and keys[1] == 'main': name = '/'.join(keys[2:]) if name == 'loss': val_loss = value elif name == 'accuracy': val_acc = value else: val_list[name] = value statistics.add_stage(ABEJAStatistics.STAGE_TRAIN, train_acc, train_loss, **train_list) statistics.add_stage(ABEJAStatistics.STAGE_VALIDATION, val_acc, val_loss, **val_list) try: self.client.update_statistics(statistics) except Exception: logger.warning('failed to update statistics.')
def test_update_statistics_with_empty_statistics(self, m): # check: don't raise Exception logger_mock = mock.MagicMock() self.client.logger = logger_mock try: self.client.update_statistics(Statistics()) m.assert_not_called() self.assertEqual(logger_mock.warning.call_count, 1) self.assertEqual(logger_mock.exception.call_count, 0) except Exception: self.fail()
def test_update_statistics_override_organization_id(self, m): organization_id = '2222222222222' client = Client(organization_id=organization_id) statistics = Statistics(progress_percentage=0.5, key1='value1') client.update_statistics(statistics) self.assertEqual(m.call_count, 1) url = '{}/organizations/{}/training/definitions/{}/jobs/{}/statistics'.format( ABEJA_API_URL, organization_id, TRAINING_JON_DEFINITION_NAME, TRAINING_JOB_ID) m.assert_called_with( 'POST', url, params=None, headers={ 'User-Agent': 'abeja-platform-sdk/{}'.format(VERSION)}, timeout=30, data=None, json={ 'statistics': { 'progress_percentage': 0.5, 'key1': 'value1'}})
def test_update_statistics(self, m): statistics = Statistics(progress_percentage=0.5, epoch=1, num_epochs=5, key1='value1') statistics.add_stage( name=Statistics.STAGE_TRAIN, accuracy=0.9, loss=0.05) statistics.add_stage(name=Statistics.STAGE_VALIDATION, accuracy=0.8, loss=0.1, key2=2) self.client.update_statistics(statistics) self.assertEqual(m.call_count, 1) url = '{}/organizations/{}/training/definitions/{}/jobs/{}/statistics'.format( ABEJA_API_URL, ORGANIZATION_ID, TRAINING_JON_DEFINITION_NAME, TRAINING_JOB_ID) expected_data = { 'statistics': { 'num_epochs': 5, 'epoch': 1, 'progress_percentage': 0.5, 'stages': { 'train': { 'accuracy': 0.9, 'loss': 0.05 }, 'validation': { 'accuracy': 0.8, 'loss': 0.1, 'key2': 2 } }, 'key1': 'value1' } } m.assert_called_with( 'POST', url, params=None, headers={ 'User-Agent': 'abeja-platform-sdk/{}'.format(VERSION)}, timeout=30, data=None, json=expected_data)
def train_model(trainloader, validloader, model, optimizer, criterion): """returns trained model""" # initialize tracker for minimum validation loss valid_loss_min = 3.877533 # np.Inf if os.path.exists(save_path): model.load_state_dict(torch.load(save_path)) for epoch in range(1, n_epochs + 1): # initialize variables to monitor training and validation loss and accuracy train_loss = 0.0 train_total = 0 train_correct = 0 valid_loss = 0.0 valid_total = 0 valid_correct = 0 # train the model model.train() for data, target in trainloader: data, target = data.to(device), target.to(device) # clear the gradients of all optimized variables optimizer.zero_grad() # forward pass: compute predicted outputs by passing inputs to the model output = model(data) # calculate the batch loss loss = criterion(output, target) # backward pass: compute gradient of the loss with respect to model parameters loss.backward() # perform a single optimization step (parameter update) optimizer.step() # update training loss train_loss += loss.item() * data.size(0) # count number of correct labels _, preds_tensor = torch.max(output, 1) train_total += target.size(0) train_correct += (preds_tensor == target).sum().item() # validate the model model.eval() for data, target in validloader: data, target = data.to(device), target.to(device) # forward pass: compute predicted outputs by passing inputs to the model output = model(data) # calculate the batch loss loss = criterion(output, target) # update average validation loss valid_loss += loss.item() * data.size(0) # count number of correct labels _, preds_tensor = torch.max(output, 1) valid_total += target.size(0) valid_correct += (preds_tensor == target).sum().item() # calculate average losses train_loss = train_loss / len(trainloader.dataset) valid_loss = valid_loss / len(validloader.dataset) # calculate accuracy train_acc = train_correct / train_total valid_acc = valid_correct / valid_total # update ABEJA statisctics train_client = TrainClient() statistics = ABEJAStatistics(num_epochs=n_epochs, epoch=epoch) statistics.add_stage(name=ABEJAStatistics.STAGE_TRAIN, accuracy=train_acc, loss=train_loss) statistics.add_stage(name=ABEJAStatistics.STAGE_VALIDATION, accuracy=valid_acc, loss=valid_loss) train_client.update_statistics(statistics) # print training/validation statistics print( 'Epoch: {} \tTrain loss: {:.6f} \tTrain acc: {:.6f} \tValid loss: {:.6f} \tValid acc: {:.6f}'.format( epoch, train_loss, train_acc, valid_loss, valid_acc )) # save model if validation loss has decreased if valid_loss <= valid_loss_min: print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model.'.format( valid_loss_min, valid_loss)) torch.save(model.state_dict(), save_path) valid_loss_min = valid_loss # return trained model return model