Y_val_pred = self.clf.predict(X_features_val)

        self.evaluatorInstance = Evaluator()
        a = self.evaluatorInstance.getAccuracy(Y_val_true, Y_val_pred)
        p, r, f = self.evaluatorInstance.getPRF(Y_val_true, Y_val_pred)
        print("Accuracy: " + str(a))
        print("Precision: " + str(p))
        print("Recall: " + str(r))
        print("F-measure: " + str(f))


if __name__ == '__main__':
    trainFilePath = sys.argv[
        1]  #please give the path to your reformatted quasar-s json train file
    valFilePath = sys.argv[2]  # provide the path to val file
    retrievalInstance = Retrieval()
    featurizerInstance = [
        TfIdfFeaturizer(),
        CountFeaturizer(),
        HashVectorizer()
    ]
    classifierInstance = [
        NNClassifier(),
        SVMClassifier(),
        MultinomialNaiveBayes()
    ]

    for feature in featurizerInstance:
        for classifier in classifierInstance:
            trainInstance = Pipeline(trainFilePath, valFilePath,
                                     retrievalInstance, feature, classifier)
示例#2
0
 def get(self, query_string):
     retrival = Retrieval()
     return retrival.query(query_string)
示例#3
0
#if __name__ == '__main__':
# from quasar_pipeline import *
#trainFilePath = sys.argv[1] #please give the path to your reformatted quasar-s json train file
#valFilePath = sys.argv[2] # provide the path to val file
#a = json.load(open('data/msmarco_train_formatted.json'))
#a['questions'][0].keys()

train_path = 'data/quasar-s_train_formatted.json'
val_path = 'data/quasar-s_dev_formatted.json'

for N in [5000, 7000, 10000]:
    print ('N = ' + str(N))
    print()

    p = Pipeline(train_path, val_path, Retrieval(), CountFeaturizer(), MultinomialNaiveBayes())
    print('Count MNB')
    p.qa()

p.classifierInstance = SVM()
print()
print('Count SVM')
p.qa()

p.classifierInstance = Perceptron()
print()
print('Count perceptron')
p.qa()


p.classifierInstance = MultinomialNaiveBayes()