Exemplo n.º 1
0
    def process_question(self):

        self.question_class = classify_question(self.question_doc)
        _logger.info("Question Class: {}".format(self.question_class))

        self.question_keywords = extract_features(self.question_class,
                                                  self.question_doc)
        _logger.info("Question Features: {}".format(self.question_keywords))

        self.query = construct_query(self.question_keywords, self.question_doc)
        _logger.info("Query: {}".format(self.query))
Exemplo n.º 2
0
    def test_classify_question(self):
        training_data_path = os.path.join(CORPUS_DIR, QUESTION_CLASSIFICATION_TRAINING_DATA)
        df_question = pandas.read_csv(training_data_path, sep='|', header=0)
        df_question_train, df_question_test = train_test_split(df_question, test_size=0.2, random_state=42)

        predicted_class, clf, df_question_train_label, df_question_train = \
            classify_question(df_question_train=df_question_train, df_question_test=df_question_test)

        scores = cross_val_score(clf, df_question_train, df_question_train_label)

        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
        print("SD:", scores.std())

        assert scores.mean() > self.classification_score
Exemplo n.º 3
0
    def process_question(self, dfOut):

        self.question_class = classify_question(self.question_doc)
        _logger.info("Question Class: {}".format(self.question_class))
        temp = self.question_class.tostring()
        dfentry[0] = "" + self.question_class

        self.question_keywords = extract_features(self.question_class,
                                                  self.question_doc)
        _logger.info("Question Features: {}".format(self.question_keywords))
        dfentry[1] = ','.join(self.question_keywords)

        self.query = construct_query(self.question_keywords, self.question_doc)
        _logger.info("Query: {}".format(self.query))
        dfentry[2] = "{}".format(self.query)
        insert(dfOut, dfentry)