def test_pickle_unfitted(): ftdf = pd.DataFrame(data=[['woof woof', 0], ['meow meow', 1]], columns=['txt', 'lbl']) ft_clf = FirstColFtClassifier() pic_fpath = os.path.expanduser('~/.temp/ttemp_ft_model.ft') with open(pic_fpath, 'wb+') as bfile: pickle.dump(ft_clf, bfile) with open(pic_fpath, 'rb') as bfile: ft_clf2 = pickle.load(bfile) with pytest.raises(NotFittedError): assert ft_clf.predict([['woof woof']])[0] == 0 ft_clf.fit(ftdf[['txt']], ftdf['lbl']) assert ft_clf.predict([['woof woof']])[0] == 0 assert ft_clf.predict([['meow meow']])[0] == 1 assert ft_clf.predict([['meow']])[0] == 1 assert ft_clf.predict([['woof lol']])[0] == 0 assert ft_clf.predict([['meow lolz']])[0] == 1 assert ft_clf2 != ft_clf with pytest.raises(NotFittedError): assert ft_clf2.predict([['woof woof']])[0] == 0 ft_clf2.fit(ftdf[['txt']], ftdf['lbl']) assert ft_clf2.predict([['woof woof']])[0] == 0 assert ft_clf2.predict([['meow meow']])[0] == 1 assert ft_clf2.predict([['meow']])[0] == 1 assert ft_clf2.predict([['woof lol']])[0] == 0 assert ft_clf2.predict([['meow lolz']])[0] == 1
def test_pickle(): ftdf = pd.DataFrame( data=[['woof woof', 0], ['meow meow', 1]], columns=['txt', 'lbl'] ) ft_clf = FirstColFtClassifier() ft_clf.fit(ftdf[['txt']], ftdf['lbl']) assert ft_clf.predict([['woof woof']])[0] == 0 assert ft_clf.predict([['meow meow']])[0] == 1 assert ft_clf.predict([['meow']])[0] == 1 assert ft_clf.predict([['woof lol']])[0] == 0 assert ft_clf.predict([['meow lolz']])[0] == 1 fd, pic_fpath = tempfile.mkstemp() with open(pic_fpath, 'wb+') as bfile: pickle.dump(ft_clf, bfile) with open(pic_fpath, 'rb') as bfile: ft_clf2 = pickle.load(bfile) assert ft_clf2 != ft_clf assert ft_clf2.predict([['woof woof']])[0] == 0 assert ft_clf2.predict([['meow meow']])[0] == 1 assert ft_clf2.predict([['meow']])[0] == 1 assert ft_clf2.predict([['woof lol']])[0] == 0 assert ft_clf2.predict([['meow lolz']])[0] == 1 # Clean up os.close(fd) # Prevent a file-handle leak os.unlink(pic_fpath)
def test_pickle(): ftdf = pd.DataFrame(data=[['woof woof', 0], ['meow meow', 1]], columns=['txt', 'lbl']) ft_clf = FirstColFtClassifier() ft_clf.fit(ftdf[['txt']], ftdf['lbl']) assert ft_clf.predict([['woof woof']])[0] == 0 assert ft_clf.predict([['meow meow']])[0] == 1 assert ft_clf.predict([['meow']])[0] == 1 assert ft_clf.predict([['woof lol']])[0] == 0 assert ft_clf.predict([['meow lolz']])[0] == 1 fd, pic_fpath = tempfile.mkstemp() with open(pic_fpath, 'wb+') as bfile: pickle.dump(ft_clf, bfile) with open(pic_fpath, 'rb') as bfile: ft_clf2 = pickle.load(bfile) assert ft_clf2 != ft_clf assert ft_clf2.predict([['woof woof']])[0] == 0 assert ft_clf2.predict([['meow meow']])[0] == 1 assert ft_clf2.predict([['meow']])[0] == 1 assert ft_clf2.predict([['woof lol']])[0] == 0 assert ft_clf2.predict([['meow lolz']])[0] == 1 # Clean up os.close(fd) # Prevent a file-handle leak os.unlink(pic_fpath)
def test_predict_proba(): ftdf = _ftdf() ft_clf = FirstColFtClassifier() ft_clf.fit(ftdf[['txt']], ftdf['lbl']) res = ft_clf.predict_proba([['woof woof']])[0] assert res[0] > res[1] res = ft_clf.predict_proba([['meow meow']])[0] assert res[1] > res[0]
def test_predict(): ftdf = _ftdf() ft_clf = FirstColFtClassifier() ft_clf.fit(ftdf[['txt']], ftdf['lbl']) assert ft_clf.predict([['woof woof']])[0] == 0 assert ft_clf.predict([['meow meow']])[0] == 1 assert ft_clf.predict([['meow']])[0] == 1 assert ft_clf.predict([['woof lol']])[0] == 0 assert ft_clf.predict([['meow lolz']])[0] == 1
def test_pickle(quantize): ftdf = pd.DataFrame(data=[['woof woof', 0], ['meow meow', 1]], columns=['txt', 'lbl']) ft_clf = FirstColFtClassifier() ft_clf.fit(ftdf[['txt']], ftdf['lbl']) if quantize: with pytest.raises(ValueError): ft_clf.quantize(cutoff=1) assert not ft_clf.is_quantized() return assert ft_clf.predict([['woof woof']])[0] == 0 assert ft_clf.predict([['meow meow']])[0] == 1 assert ft_clf.predict([['meow']])[0] == 1 assert ft_clf.predict([['woof lol']])[0] == 0 assert ft_clf.predict([['meow lolz']])[0] == 1 fd, pic_fpath = tempfile.mkstemp() with open(pic_fpath, 'wb+') as bfile: pickle.dump(ft_clf, bfile) with open(pic_fpath, 'rb') as bfile: ft_clf2 = pickle.load(bfile) assert ft_clf2 != ft_clf assert ft_clf2.predict([['woof woof']])[0] == 0 assert ft_clf2.predict([['meow meow']])[0] == 1 assert ft_clf2.predict([['meow']])[0] == 1 assert ft_clf2.predict([['woof lol']])[0] == 0 assert ft_clf2.predict([['meow lolz']])[0] == 1 if quantize: assert not ft_clf2.is_quantized() # Clean up os.close(fd) # Prevent a file-handle leak os.unlink(pic_fpath)
def test_bad_shape(): ft_clf = FirstColFtClassifier() with pytest.raises(ValueError): ft_clf.fit([7], [0]) with pytest.raises(ValueError): ft_clf.fit([[7]], [[0]])
columns = train_data_df.columns.values.tolist() # model train logger.info("start train model") classifier_dict = dict() for column in columns[2:]: train_label = train_data_df[column] logger.info("start train %s model" % column) sk_clf = FirstColFtClassifier(lr=learning_rate, epoch=epoch, wordNgrams=word_ngrams, minCount=min_count, verbose=2) sk_clf.fit(train_data_format, train_label) logger.info("complete train %s model" % column) classifier_dict[column] = sk_clf logger.info("complete train model") logger.info("start save model") model_path = config.model_path if not os.path.exists(model_path): os.makedirs(model_path) joblib.dump(classifier_dict, model_path + model_name) logger.info("complete svae model") # validata model