Python Classifier.save примеры использования

Язык программирования: Python

Пространство имен/Пакет: finetune

Класс/Тип: Classifier

Метод/Функция: save

Примеров на hotexamples.com: 13

Python Classifier.save - 13 примеров найдено. Это лучшие примеры Python кода для finetune.Classifier.save, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Classifier(30)

predict(28)

load(14)

fit(13)

save(13)

predict_proba(9)

generate_text(8)

cached_predict(3)

get_estimator(3)

explain(2)

featurize(2)

_init_from_pretrained(1)

_load_base_model(1)

featurize_sequence(1)

finetune(1)

finetune_grid_search_cv(1)

Пример #1

Показать файл

    def test_save_load(self):
        """
        Ensure saving + loading does not cause errors
        Ensure saving + loading does not change predictions
        """
        save_file = "tests/saved-models/test-save-load"
        save_file_fp16 = "tests/saved-models/test-save-load_fp16"

        config = self.default_config(save_adam_vars=False)
        model = Classifier(**config)
        train_sample = self.dataset.sample(n=self.n_sample)
        valid_sample = self.dataset.sample(n=self.n_sample)
        model.fit(train_sample.Text, train_sample.Target)
        predictions = model.predict(valid_sample.Text)

        # testing file size reduction options
        model.save(save_file)
        self.assertLess(os.stat(save_file).st_size, 500000000)

        # reducing floating point precision
        model.saver.save_dtype = np.float16
        model.save(save_file_fp16)
        self.assertLess(os.stat(save_file_fp16).st_size, 260000000)

        model = Classifier.load(save_file_fp16)
        new_predictions = model.predict(valid_sample.Text)
        for i, prediction in enumerate(predictions):
            self.assertEqual(prediction, new_predictions[i])

Пример #2

Показать файл

    def test_fit_lm_only(self):
        """
        Ensure LM only training does not error out
        """
        model = Classifier()
        train_sample = self.dataset.sample(n=self.n_sample)
        valid_sample = self.dataset.sample(n=self.n_sample)

        # Ensure model can still be fit with only text
        model.fit(train_sample.Text)

        # Save and reload check
        save_file = 'tests/saved-models/test-save-load'
        model.save(save_file)
        model = Classifier.load(save_file)

        # Ensure model can still be fit with text + targets
        model.fit(train_sample.Text, train_sample.Target)
        predictions = model.predict(valid_sample.Text)
        for prediction in predictions:
            self.assertIsInstance(prediction, (np.int, np.int64))

        probabilities = model.predict_proba(valid_sample.Text)
        for proba in probabilities:
            self.assertIsInstance(proba, dict)

Пример #3

Показать файл

Файл: test_classifier.py Проект: tc-wolf/finetune

    def test_save_load_language_model(self):
        """
        Ensure saving + loading does not cause errors
        Ensure saving + loading does not change predictions
        """
        save_file = "tests/saved-models/test-save-load"
        model = Classifier()

        lm_out = model.generate_text("The quick brown fox", 6)
        start_id = model.input_pipeline.text_encoder.start_token
        start_token = model.input_pipeline.text_encoder.decoder[start_id]
        self.assertNotIn(start_token, lm_out) # Non finetuned models do not use extra tokens
        
        train_sample = self.dataset.sample(n=self.n_sample)
        model.fit(train_sample.Text, train_sample.Target)
        lm_out = model.generate_text("", 5)
        self.assertIn(start_token, lm_out.lower())
        self.assertEqual(type(lm_out), str)
        model.save(save_file)

        model = Classifier.load(save_file)
        lm_out_2 = model.generate_text("Indico RULE")
        self.assertEqual(type(lm_out_2), str)
        
        self.assertIn("{}Indico RULE".format(start_token).lower(), lm_out_2.lower()) # Both of these models use extra toks

Пример #4

Показать файл

 def test_save_load_language_model(self):
     """
     Ensure saving + loading does not cause errors
     Ensure saving + loading does not change predictions
     """
     save_file = 'tests/saved-models/test-save-load'
     model = Classifier(verbose=False)
     train_sample = self.dataset.sample(n=self.n_sample)
     model.fit(train_sample.Text, train_sample.Target)
     lm_out = model.generate_text("", 5)
     self.assertEqual(type(lm_out), str)
     model.save(save_file)
     model = Classifier.load(save_file)
     lm_out_2 = model.generate_text("Indico RULE")
     self.assertEqual(type(lm_out_2), str)
     self.assertIn('_start_Indico RULE'.lower(), lm_out_2)

Пример #5

Показать файл

 def test_save_load(self):
     """
     Ensure saving + loading does not cause errors
     Ensure saving + loading does not change predictions
     """
     save_file = 'tests/saved-models/test-save-load'
     model = Classifier(config=self.default_config())
     train_sample = self.dataset.sample(n=self.n_sample)
     valid_sample = self.dataset.sample(n=self.n_sample)
     model.fit(train_sample.Text, train_sample.Target)
     predictions = model.predict(valid_sample.Text)
     model.save(save_file)
     model = Classifier.load(save_file)
     new_predictions = model.predict(valid_sample.Text)
     for i, prediction in enumerate(predictions):
         self.assertEqual(prediction, new_predictions[i])

Пример #6

Показать файл

 def test_save_load_language_model(self):
     """
     Ensure saving + loading does not cause errors
     Ensure saving + loading does not change predictions
     """
     save_file = 'tests/saved-models/test-save-load'
     model = Classifier()
     train_sample = self.dataset.sample(n=self.n_sample)
     model.fit(train_sample.Text, train_sample.Target)
     lm_out = model.generate_text("", 5)
     self.assertEqual(type(lm_out), str)
     model.save(save_file)
     model = Classifier.load(save_file)
     lm_out_2 = model.generate_text("Indico RULE")
     self.assertEqual(type(lm_out_2), str)
     start_id = model.input_pipeline.text_encoder.start
     start_token = model.input_pipeline.text_encoder.decoder[start_id]
     self.assertIn('{}Indico RULE'.format(start_token).lower(), lm_out_2.lower())

Пример #7

Показать файл

    def test_save_load(self):
        """
        Ensure saving + loading does not cause errors
        Ensure saving + loading does not change predictions
        """
        save_file = "tests/saved-models/test-save-load"
        config = self.default_config(save_adam_vars=False, n_epochs=1)
        model = Classifier(**config)

        model.fit(self.trainX, self.trainY, context=self.train_context)
        predictions = model.predict(self.trainX, context=self.train_context)
        model.save(save_file)

        model = Classifier.load(save_file)
        new_predictions = model.predict(self.trainX,
                                        context=self.train_context)
        for i, prediction in enumerate(predictions):
            self.assertEqual(prediction, new_predictions[i])

Пример #8

Показать файл

    def test_save_load(self):
        """
        Ensure saving + loading does not cause errors
        Ensure saving + loading does not change predictions
        """
        save_file = "tests/saved-models/test-save-load"
        config = self.default_config(save_adam_vars=False, n_epochs=1)
        model = Classifier(**config)

        (trainX, testX, trainY, _) = self.dataset
        trainY = [random.randint(0, 1) for _ in range(len(trainY))]
        model.fit(trainX, trainY)
        predictions = model.predict(testX)
        model.save(save_file)

        model = Classifier.load(save_file)
        new_predictions = model.predict(testX)
        for i, prediction in enumerate(predictions):
            self.assertEqual(prediction, new_predictions[i])

Пример #9

Показать файл

Файл: combined-strat-v2.py Проект: pdurkin84/W210_Gov_Complaints_Portal

    l2_reg=0.0,
    lr=6.25E-05,
    lm_loss_coef=0.25,
    #                     eval_acc = True, # doesn't work
    #                     oversample = True, # oversamples too much, so I am doing it separately
    params_device=0,
    autosave_path="/W210_Gov_Complaints_Portal/models/",
    verbose=True,
)
model.fit(trainX_res_list,
          trainY_res_list)  # Finetune base model on custom data
duration = time.time() - start
print("Training Done")
print("It took :" + str(duration) + " seconds")

model.save("/W210_Gov_Complaints_Portal/models/combined_model_strat_20181117"
           )  # Serialize the model to disk
print("Model Saved")

print("Starting testing")
# model = Classifier.load("/W210_Gov_Complaints_Portal/models/combined_model_strat_20181117")
print(testX.shape)
print(model)
start = time.time()
predictions = model.predict(testX.tolist())
duration = time.time() - start
print("Predictions done")
print("It took :" + str(duration) + " seconds")

print("Evaluating accuracy")
mainPredictions = []
for pred in predictions:

Пример #10

Показать файл

Файл: finetune_gpt.py Проект: derekhoward/Reachout_triage

DATA_PATH = Path('./data')
MODELS_PATH = Path('./models')
MODELS_PATH.mkdir(exist_ok=True)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--nrows', default=147618, type=int,
                        help='Define number of posts to be used to perform unsupervised finetuning of language model, defaults to all posts available (147618)')
    parser.add_argument('--name', type=str, 
                        help='Name of model to be saved in ./models directory')
    parser.add_argument('--labeled', action='store_true',
                        help='Use only labeled posts for finetuning')
    args = parser.parse_args()

    # read in data and select sample based on CLI args
    posts_df = pd.read_csv(DATA_PATH/'processed'/'all_posts_data.csv', usecols=['post_id', 'cleaned_body', 'label', 'predict_me'])

    if args.labeled:
        posts_sample = posts_df[(posts_df.label.notnull()) | posts_df.predict_me]
    else:
        posts_sample = posts_df.sample(n=args.nrows, random_state=42)     

    texts = list(posts_sample.cleaned_body.astype(str))
    print(f'{len(texts)} posts will be used to finetune the GPT language model')

    model = Classifier(batch_size=8)
    model.fit(texts)

    model.save(MODELS_PATH / args.name)

Пример #11

Показать файл

print(data3.shape)
print(data3.loc[82480])

mask = (data3['description'].str.len() >=
        20) & (data3['description'].str.len() <= 512)
dataFiltered = data3.loc[mask]
print(dataFiltered.shape)

dataFiltered.columns[dataFiltered.isna().any()].tolist()
# ourLabel doesn't have NaN values, so that is good.

trainingData = dataFiltered[["description", "OurLabel"]]
print(type(trainingData))
print(trainingData.shape)
trainX, testX, trainY, testY = train_test_split(trainingData.description,
                                                trainingData.OurLabel,
                                                test_size=0.2,
                                                random_state=42)
# bigMask = (trainingData["description"].str.len() >=1000)
# print(trainingData.loc[bigMask].shape)
# Split in train and test 80/20
print(trainX.shape)
print(type(trainX))
print(trainY.shape)

model = Classifier(max_length=512, val_interval=3000,
                   verbose=True)  # Load base model
model.fit(trainX, trainY)  # Finetune base model on custom data

model.save("newModel")  # Serialize the model to disk

Пример #12

Показать файл

Файл: combined.py Проект: pdurkin84/W210_Gov_Complaints_Portal

                                                stratify=sampleY)
print(trainX.shape)
print("Split into train and test")

print("Starting training")
print(trainX.shape)
start = time.time()
model = Classifier(max_length=512, val_interval=3000,
                   verbose=True)  # Load base model
model.fit(trainX.tolist(),
          trainY.tolist())  # Finetune base model on custom data
duration = time.time() - start
print("Training Done")
print("It took :" + str(duration) + " seconds")

model.save("combined_model_20181018")  # Serialize the model to disk
print("Model Saved")

# model = Classifier.load("../models/combined_model_20181018")
print(testX.shape)
print(model)
start = time.time()
predictions = model.predict(testX.tolist())
duration = time.time() - start
print("Predictions done")
print("It took :" + str(duration) + " seconds")

mainPredictions = []
for pred in predictions:
    mainPredictions.append(labelsMap[pred])

Пример #13

Показать файл

    lr=6.25E-05,
    lm_loss_coef=0.25,
    #                     eval_acc = True, # doesn't work
    #                     oversample = True, # oversamples too much, so I am doing it separately
    params_device=0,
    autosave_path="/W210_Gov_Complaints_Portal/models/",
    verbose=True,
)
model.fit(trainX.tolist(),
          trainY.tolist())  # Finetune base model on custom data
duration = time.time() - start
print("Training Done")
print("It took :" + str(duration) + " seconds")

model.save(
    "/W210_Gov_Complaints_Portal/models/combined_model_full_no_oversample_20181123"
)  # Serialize the model to disk
print("Model Saved")

print("Starting testing")
# model = Classifier.load("/W210_Gov_Complaints_Portal/models/combined_model_full_no_oversample_20181123")
print(testX.shape)
print(model)
start = time.time()
predictions = model.predict(testX.tolist())
duration = time.time() - start
print("Predictions done")
print("It took :" + str(duration) + " seconds")

print("Evaluating accuracy")
mainPredictions = []