Y_val_pred = self.clf.predict(X_features_val) self.evaluatorInstance = Evaluator() a = self.evaluatorInstance.getAccuracy(Y_val_true, Y_val_pred) p, r, f = self.evaluatorInstance.getPRF(Y_val_true, Y_val_pred) print("Accuracy: " + str(a)) print("Precision: " + str(p)) print("Recall: " + str(r)) print("F-measure: " + str(f)) if __name__ == '__main__': trainFilePath = sys.argv[ 1] #please give the path to your reformatted quasar-s json train file valFilePath = sys.argv[2] # provide the path to val file retrievalInstance = Retrieval() featurizerInstance = [ TfIdfFeaturizer(), CountFeaturizer(), HashVectorizer() ] classifierInstance = [ NNClassifier(), SVMClassifier(), MultinomialNaiveBayes() ] for feature in featurizerInstance: for classifier in classifierInstance: trainInstance = Pipeline(trainFilePath, valFilePath, retrievalInstance, feature, classifier)
def get(self, query_string): retrival = Retrieval() return retrival.query(query_string)
#if __name__ == '__main__': # from quasar_pipeline import * #trainFilePath = sys.argv[1] #please give the path to your reformatted quasar-s json train file #valFilePath = sys.argv[2] # provide the path to val file #a = json.load(open('data/msmarco_train_formatted.json')) #a['questions'][0].keys() train_path = 'data/quasar-s_train_formatted.json' val_path = 'data/quasar-s_dev_formatted.json' for N in [5000, 7000, 10000]: print ('N = ' + str(N)) print() p = Pipeline(train_path, val_path, Retrieval(), CountFeaturizer(), MultinomialNaiveBayes()) print('Count MNB') p.qa() p.classifierInstance = SVM() print() print('Count SVM') p.qa() p.classifierInstance = Perceptron() print() print('Count perceptron') p.qa() p.classifierInstance = MultinomialNaiveBayes()