Python ClassifierFactory примеры использования

Язык программирования: Python

Пространство имен/Пакет: mentor_classifier

Класс/Тип: ClassifierFactory

Примеров на hotexamples.com: 9

Python ClassifierFactory - 9 примеров найдено. Это лучшие примеры Python кода для mentor_classifier.ClassifierFactory, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ClassifierFactory(9)

evaluate(3)

train(1)

Основные методы

ClassifierFactory (9)

evaluate (3)

train (1)

Пример #1

Показать файл

Файл: test_train_and_predict.py Проект: mentorpal/mentor-classifier

def test_compare_cross_validation(
    training_configuration: _MentorTrainAndTestConfiguration,
    compare_configuration: _MentorTrainAndTestConfiguration,
    tmpdir,
    shared_root: str,
):
    mentor = load_mentor_csv(
        fixture_mentor_data(training_configuration.mentor_id, "data.csv"))
    data = {"data": {"mentor": mentor.to_dict()}}
    responses.add(responses.POST,
                  "http://graphql/graphql",
                  json=data,
                  status=200)
    lr_train = (ClassifierFactory().new_training(
        mentor=training_configuration.mentor_id,
        shared_root=shared_root,
        data_path=tmpdir,
        arch=training_configuration.arch,
    ).train(shared_root))
    assert lr_train.accuracy >= training_configuration.expected_training_accuracy

    hf_train = (ClassifierFactory().new_training(
        mentor=compare_configuration.mentor_id,
        shared_root=shared_root,
        data_path=tmpdir,
        arch=compare_configuration.arch,
    ).train(shared_root))
    assert hf_train.accuracy >= compare_configuration.expected_training_accuracy

Пример #2

Показать файл

def _test_gets_off_topic(
    monkeypatch,
    data_root: str,
    shared_root: str,
    mentor_id: str,
    question: str,
    expected_answer_id: str,
    expected_answer: str,
    expected_media: List[Media],
):
    monkeypatch.setenv("OFF_TOPIC_THRESHOLD", "1.0")  # everything is offtopic
    with open(fixture_path("graphql/{}.json".format(mentor_id))) as f:
        data = json.load(f)
    responses.add(responses.POST,
                  "http://graphql/graphql",
                  json=data,
                  status=200)
    _ensure_trained(mentor_id, shared_root, data_root)
    classifier = ClassifierFactory().new_prediction(mentor=mentor_id,
                                                    shared_root=shared_root,
                                                    data_path=data_root)
    result = classifier.evaluate(question, shared_root)
    assert result.highest_confidence < get_off_topic_threshold()
    assert result.answer_id == expected_answer_id
    assert result.answer_text == expected_answer
    assert result.answer_media == expected_media
    assert result.feedback_id is not None

Пример #3

Показать файл

def _ensure_trained(mentor_id: str, shared_root: str, output_dir: str) -> None:
    """
    NOTE: we don't want this test to do any training.
    But for the case that there's not trained model,
    more convienient to just train it here.
    Once it has been trained, it should be committed
    and then subsequent runs of the test
    will use the fixture/trained model
    """
    if path.isdir(path.join(output_dir, mentor_id)):
        return
    training = ClassifierFactory().new_training(mentor_id, shared_root,
                                                output_dir)
    training.train(shared_root)

Пример #4

Показать файл

Файл: test_train_and_predict.py Проект: mentorpal/mentor-classifier

def test_compare_test_accuracy(
    training_configuration: _MentorTrainAndTestConfiguration,
    compare_configuration: _MentorTrainAndTestConfiguration,
    tmpdir,
    shared_root: str,
    example: str,
    test_set_file: str,
):
    mentor = load_mentor_csv(
        fixture_mentor_data(training_configuration.mentor_id, "data.csv"))
    test_set = load_test_csv(
        fixture_mentor_data(training_configuration.mentor_id, test_set_file
                            or "test.csv"))
    data = {"data": {"mentor": mentor.to_dict()}}
    responses.add(responses.POST,
                  "http://graphql/graphql",
                  json=data,
                  status=200)
    lr_train = (ClassifierFactory().new_training(
        mentor=training_configuration.mentor_id,
        shared_root=shared_root,
        data_path=tmpdir,
        arch=training_configuration.arch,
    ).train(shared_root))
    hf_train = (ClassifierFactory().new_training(
        mentor=compare_configuration.mentor_id,
        shared_root=shared_root,
        data_path=tmpdir,
        arch=compare_configuration.arch,
    ).train(shared_root))
    assert hf_train.accuracy >= lr_train.accuracy

    hf_classifier = ClassifierFactory().new_prediction(
        mentor=compare_configuration.mentor_id,
        shared_root=shared_root,
        data_path=tmpdir,
        arch=compare_configuration.arch,
    )
    hf_test_results = run_model_against_testset_ignore_confidence(
        hf_classifier, test_set, shared_root)
    hf_test_accuracy = hf_test_results.passing_tests / len(
        hf_test_results.results)
    lr_classifier = ClassifierFactory().new_prediction(
        mentor=training_configuration.mentor_id,
        shared_root=shared_root,
        data_path=tmpdir,
        arch=training_configuration.arch,
    )
    lr_test_results = run_model_against_testset_ignore_confidence(
        lr_classifier, test_set, shared_root)
    lr_test_accuracy = lr_test_results.passing_tests / len(
        lr_test_results.results)
    assert lr_test_accuracy <= hf_test_accuracy
    hf_result = hf_classifier.evaluate(example, shared_root)
    lr_result = lr_classifier.evaluate(example, shared_root)
    assert hf_result.highest_confidence >= lr_result.highest_confidence

Пример #5

Показать файл

Файл: tasks.py Проект: mentorpal/mentor-classifier

def train_task(mentor: str, arch: str = "") -> float:
    try:
        result = (ClassifierFactory().new_training(
            mentor=mentor,
            shared_root=SHARED_ROOT,
            data_path=OUTPUT_ROOT,
            arch=arch).train(SHARED_ROOT))
        return result.accuracy
    except Exception as err:
        logging.exception(err)
        raise (err)

Пример #6

Показать файл

Файл: test_train.py Проект: mentorpal/mentor-classifier

def test_trains_and_outputs_models(data_root: str, shared_root: str,
                                   mentor_id: str):
    with open(fixture_path("graphql/{}.json".format(mentor_id))) as f:
        data = json.load(f)
    responses.add(responses.POST,
                  "http://graphql/graphql",
                  json=data,
                  status=200)
    result = (ClassifierFactory().new_training(mentor_id, shared_root,
                                               data_root).train(shared_root))
    assert result.model_path == path.join(data_root, mentor_id, ARCH_DEFAULT)
    assert path.exists(path.join(result.model_path, "model.pkl"))
    assert path.exists(path.join(result.model_path, "w2v.txt"))

Пример #7

Показать файл

 def find_classifier(
         self,
         mentor_id: str,
         arch: str = ARCH_DEFAULT) -> QuestionClassifierPrediction:
     if mentor_id in self.cache:
         e = self.cache[mentor_id]
         if e and e.last_trained_at >= e.classifier.get_last_trained_at():
             return e.classifier
     c = ClassifierFactory().new_prediction(mentor=mentor_id,
                                            shared_root=self.shared_root,
                                            data_path=self.data_root)
     self.cache[mentor_id] = Entry(c)
     return c

Пример #8

Показать файл

Файл: test_train_and_predict.py Проект: mentorpal/mentor-classifier

def test_train_and_predict_transformers(
    training_configuration: _MentorTrainAndTestConfiguration,
    tmpdir,
    shared_root: str,
):
    mentor = load_mentor_csv(
        fixture_mentor_data(training_configuration.mentor_id, "data.csv"))
    test_set = load_test_csv(
        fixture_mentor_data(training_configuration.mentor_id, "test.csv"))
    data = {"data": {"mentor": mentor.to_dict()}}
    responses.add(responses.POST,
                  "http://graphql/graphql",
                  json=data,
                  status=200)
    result = (ClassifierFactory().new_training(
        mentor=training_configuration.mentor_id,
        shared_root=shared_root,
        data_path=tmpdir,
        arch=training_configuration.arch,
    ).train(shared_root))
    assert result.accuracy >= training_configuration.expected_training_accuracy

    classifier = ClassifierFactory().new_prediction(
        mentor=training_configuration.mentor_id,
        shared_root=shared_root,
        data_path=tmpdir,
        arch=training_configuration.arch,
    )

    test_results = run_model_against_testset(classifier, test_set, shared_root)

    logging.warning(test_results.errors)
    logging.warning(
        f"percentage passed = {test_results.passing_tests}/{len(test_results.results)}"
    )
    assert len(test_results.errors) == 0

Пример #9

Показать файл

def test_gets_answer_for_exact_match_and_paraphrases(
    data_root: str,
    shared_root: str,
    mentor_id: str,
    question: str,
    expected_answer_id: str,
    expected_answer: str,
    expected_media: List[Media],
):
    with open(fixture_path("graphql/{}.json".format(mentor_id))) as f:
        data = json.load(f)
    responses.add(responses.POST,
                  "http://graphql/graphql",
                  json=data,
                  status=200)
    _ensure_trained(mentor_id, shared_root, data_root)
    classifier = ClassifierFactory().new_prediction(mentor_id, shared_root,
                                                    data_root)
    result = classifier.evaluate(question, shared_root)
    assert result.answer_id == expected_answer_id
    assert result.answer_text == expected_answer
    assert result.answer_media == expected_media
    assert result.highest_confidence == 1
    assert result.feedback_id is not None