def evaluateScore(pairs, features, labels): vsm_models = dict(models.vsm_models); # For every ML Algorithm we trained... for algorithm, name in algorithms: # For every question (it has 5 answers) num_eval = len(pairs)/5; guesses = []; for i in range(num_eval): correct = [] # Go through each answer/question pair for (answer, question), phi, label in zip(pairs[i*5:i*5+5], features[i*5:i*5+5], labels[i*5:i*5+5]): # Get prediction prediction = algorithm.predict(phi)[0]; # If we predict the answer is right if prediction == 1: correct.append(answer) # If we think no answer is right, we omit to answer the question if len(correct) == 0: guesses.append((-1, 0)); # Otherwise, we choose the first model that gets the answer right else: guesses.append((correct[0], question.getCorrectWord())) # How did this ML algorithm do? (Evaluation) scoring.score_model(guesses, verbose=True, modelname=name);
def evaluateScore(pairs, features, labels): vsm_models = dict(models.vsm_models) # For every ML Algorithm we trained... for algorithm, name in algorithms: # For every question (it has 5 answers) num_eval = len(pairs) / 5 guesses = [] for i in range(num_eval): correct = [] # Go through each answer/question pair for (answer, question), phi, label in zip(pairs[i * 5:i * 5 + 5], features[i * 5:i * 5 + 5], labels[i * 5:i * 5 + 5]): # Get prediction prediction = algorithm.predict(phi)[0] # If we predict the answer is right if prediction == 1: correct.append(answer) # If we think no answer is right, we omit to answer the question if len(correct) == 0: guesses.append((-1, 0)) # Otherwise, we choose the first model that gets the answer right else: guesses.append((correct[0], question.getCorrectWord())) # How did this ML algorithm do? (Evaluation) scoring.score_model(guesses, verbose=True, modelname=name)
def main(questions, glove): ##################################################################################################################### ################################################### MODELS ########################################################## ##################################################################################################################### # Returns answer word based on random chance, given the answers def randomModel(question, distfunc=cosine, threshold=2, rev=False): return question.answers[random.randint(0,len(question.answers)) - 1]; ##################################################################################################################### ################################################# EVAL MODELS ####################################################### ##################################################################################################################### distances = [ (kldist, "kldist"), (jsd, "jsd"), (cosine, "cosine"), (L2, "L2"), (jaccard, "jaccard") ]; param_models = [ ("Sentence", sentenceModel), ("Distance Model", distanceModel), ("Weighted VSM", weightedSentenceModel), ("Double Blank Combo VSM", doubleSentenceModel), ("Double Blank Max VSM", doubleSentenceMaxModel), ("Adjective Model", adjectiveModel), ("Noun Model", nounModel), ("Verb Model", verbModel) ]; for name, model in param_models: if name == "Weighted VSM": scoring.score_model( [(model(glove, unigrams, q, threshold=.9)[0], q.getCorrectWord()) for q in questions], verbose=True, modelname=name) else: scoring.score_model( [(model(glove, q, threshold=.9)[0], q.getCorrectWord()) for q in questions], verbose=True, modelname=name) os.system("say Finished");
def distanceTrainDataDevTest(dev=True): train_data = None eval_data = None if dev: train_data, eval_data = get_distance_ml_training.getEvaluatingTrainingData( ) else: train_data, eval_data = get_distance_ml_training.getTestingTrainingData( ) train = train_data[0] train_labels = train_data[1] evals = eval_data[0] eval_labels = eval_data[1] num_dev = len(evals) / 5 print "Training the models..." for model, name in distance_models: model.fit(train, train_labels) print "Get Training Error..." # TODO: need to actually evlauate + get a SAT score + number correct + not correct for reporting purposes. Do same thing as below for model, name in distance_models: print "\nML Algorithm Training: ", name print "Scored: ", model.score(train, train_labels) print "Evaluating Models On Dev..." for model, name in distance_models: num_right = 0 num_not_answer = 0 num_wrong = 0 for i in range(num_dev): vals = model.predict(evals[i * 5:i * 5 + 5]) if 1 in vals: pred_index = numpy.where(vals == 1)[0][0] answer_index = eval_labels[i * 5:i * 5 + 5].index(1) if pred_index == answer_index: num_right += 1 else: num_wrong += 1 else: num_not_answer += 1 print "\nML Algorithm Dev: ", name print "Answered Correctly: %d Did Not Answer: %d" % (num_right, num_not_answer) print "Percent Right: ", model.score(evals, eval_labels) print "SAT Score: ", scoring.score_model([(1, 1)] * num_right + [(None, 1)] * num_not_answer + [(0, 1)] * num_wrong)
def evaluateScore(questions, features, labels): vsm_models = dict(models.vsm_models); # For every ML Algorithm we trained... for algorithm, name in algorithms: # Go through and using the model it thinks will guess right, guess the question guesses = []; for question, phi, label in zip(questions, features, labels): # The model the algorithm thinks we should use prediction = algorithm.predict(phi)[0]; # If we predict we can't get this question right if(prediction == "No model"): guesses.append((-1, 0)); else: # Get the model we're going to use model = vsm_models[algorithm.predict(phi)[0]]; # Using the model to answer the question guesses.append((model(glove, question)[0], question.getCorrectWord())); # How did this ML algorithm do? scoring.score_model(guesses, verbose=True, modelname=name);
def distanceTrainDataDevTest(dev=True): train_data = None eval_data = None if dev: train_data, eval_data = get_distance_ml_training.getEvaluatingTrainingData(); else: train_data, eval_data = get_distance_ml_training.getTestingTrainingData(); train = train_data[0] train_labels = train_data[1] evals = eval_data[0] eval_labels = eval_data[1] num_dev = len(evals)/5 print "Training the models..."; for model, name in distance_models: model.fit(train, train_labels); print "Get Training Error..." # TODO: need to actually evlauate + get a SAT score + number correct + not correct for reporting purposes. Do same thing as below for model, name in distance_models: print "\nML Algorithm Training: ", name; print "Scored: ", model.score(train, train_labels); print "Evaluating Models On Dev..." for model, name in distance_models: num_right = 0 num_not_answer = 0 num_wrong = 0; for i in range(num_dev): vals = model.predict(evals[i*5:i*5+5]) if 1 in vals: pred_index = numpy.where(vals==1)[0][0] answer_index = eval_labels[i*5:i*5+5].index(1) if pred_index == answer_index: num_right += 1 else: num_wrong += 1 else: num_not_answer += 1 print "\nML Algorithm Dev: ", name; print "Answered Correctly: %d Did Not Answer: %d" %(num_right, num_not_answer) print "Percent Right: ", model.score(evals, eval_labels); print "SAT Score: ", scoring.score_model([(1,1)]*num_right + [(None,1)]*num_not_answer + [(0,1)]*num_wrong);
def scoring(): #check the score of the deployed model score = score_model() return str(score)
new_files = False for filename in os.listdir(input_folder_path): if input_folder_path + "/" + filename not in ingested_files: new_files = True ##################Deciding whether to proceed, part 1 #if you found new data, you should proceed. otherwise, do end the process here if not new_files: print("No new ingested data, exiting") exit(0) ##################Checking for model drift #check whether the score from the deployed model is different from the score from the model that uses the newest ingested data ingestion.merge_multiple_dataframe() scoring.score_model(production=True) with open(os.path.join(prod_deployment_path, "latestscore.txt"), "r") as report_file: old_f1 = float(report_file.read()) with open(os.path.join(model_path, "latestscore.txt"), "r") as report_file: new_f1 = float(report_file.read()) ##################Deciding whether to proceed, part 2 #if you found model drift, you should proceed. otherwise, do end the process here if new_f1 >= old_f1: print( "Actual F1 (%s) is better/equal than old F1 (%s), no drift detected -> exiting" % (new_f1, old_f1))
def main(): if(v): print "Loading passages..."; passages = loadPassages(f); # Initialize all the external data if(v): print "Loading all external data..."; tfidf_array, allWords = computeTFIDFArray(passages); unigrams, bigrams, trigrams = getGrams(); glove = Glove(g, delimiter=" ", header=False, quoting=csv.QUOTE_NONE); cooccurrences = cooccurrence() if(v): print "Running models..." # Initialize arrays to keep answers rand, nn, sent, tfidf, gram, syn, wdn, cc, an = [], [], [], [], [], [], [], [], []; # Loop through all the questions for passage in passages: for question in passage.questions: # Find relevant word targetword = re.findall("[\xe2\x80\x9c\u2019\"\']([A-Za-z\s]+)[\xe2\x80\x9c\u2019\"\']", question.text)[0].lower(); # Tokenize relevant sentence sentence = passage.text.split("\n")[int(re.findall("[0-9]+", question.text)[0]) - 1]; sentence = re.split("[^A-Za-z0-9]", sentence); sentence = filter(lambda x: len(x) > 0, sentence); sentence = map(lambda x: x.strip().lower(), sentence); # Get correct answer correctAnswer = question.answers[question.correctAnswer]; # Get answers randAnswer = randomModel(question.answers); nnAnswer = nearestNeighborModel(targetword, question.answers, glove, threshold=.48); sentAnswer = sentenceModel(sentence, question.answers, glove, threshold=.44); tfidfAnswer = tfidfModel(sentence, question.answers, tfidf_array, allWords, glove, threshold=.44); gramAnswer = gramModel(sentence, question.answers, targetword, unigrams, bigrams, trigrams, glove, threshold=.42); wdnvec, wdnAnswer = wordnetModel(targetword, sentence, question.answers, glove, threshold=.46) synAnswer = synonymModel(targetword, wdnvec, sentence, question.answers, bigrams, trigrams, glove, threshold=.34) ccAnswer = cooccurrenceModel(targetword, sentence, question.answers,cooccurrences, glove) anAnswer = analogyModel(targetword, sentence, question.answers, cooccurrences, glove) # Guess the word if we can answer it rand.append( (randAnswer, correctAnswer) ); nn.append( (nnAnswer, correctAnswer) ); sent.append( (sentAnswer, correctAnswer) ); tfidf.append( (tfidfAnswer, correctAnswer) ); gram.append( (gramAnswer, correctAnswer) ); wdn.append( (wdnAnswer, correctAnswer) ) syn.append( (synAnswer, correctAnswer) ) cc.append( (ccAnswer, correctAnswer) ) an.append( (anAnswer, correctAnswer) ) score_model(rand, verbose=True, modelname="Random Model"); score_model(nn, verbose=True, modelname="Nearest Neighbor Model"); score_model(sent, verbose=True, modelname="Sentence-Based Model"); score_model(tfidf, verbose=True, modelname="TFIDF Model"); score_model(gram, verbose=True, modelname="Gram Model"); score_model(wdn, verbose=True, modelname="WordNet Model") score_model(syn, verbose=True, modelname="Synonym Model") score_model(cc, verbose=True, modelname="Cooccurrence Model") score_model(an, verbose=True, modelname="Analogy Model")
def score(question_dir="../data/cayman_all_training.txt", glove_file="../data/glove_vectors/glove.6B.300d.txt", ngram_path="../data/Holmes_Training_Data/norvig.txt", dev=True): print "Training N-Grams" # Load/Generate N-grams unigrams, bigrams, cgrams = getGrams(path=ngram_path) print "Loading Questions" # Load questions questions = loadQuestions(question_dir) # Holds questions to be evaluated eval_qs = None if dev: # Split into train/dev split = len(questions) - len(questions) / 10 inform("Splitting Data: " + str(split) + " questions in training and " + str(len(questions) - split) + " in dev...") train_questions, eval_qs = questions[:split], questions[split:] else: eval_qs = questions print "Loading Glove" # Loads Glove vectors glove = Glove(glove_file, delimiter=" ", header=False, quoting=csv.QUOTE_NONE, v=False) # For every VSM model for name, model in vsm_models: # We get the model's score print "Scoring ", name answer_guess_pairs = [] for question in eval_qs: guess = None # Weighted VSM has an extra parameter if name == "Weighted VSM": guess = model(glove, question, unigrams)[0] else: guess = model(glove, question)[0] # Get the correct answer answer = question.getCorrectWord() # Add to tuple GOLD and guessed answers answer_guess_pairs.append((guess, answer)) print "\n\n" scoring.score_model(answer_guess_pairs, verbose=True, modelname=name) # Now score Language models # For every Language model for name, model in language_models: # Do the same thing as before print "Scoring ", name answer_guess_pairs = [] # For every question for question in eval_qs: # Generate guess from model guess = model(unigrams, bigrams, question)[0] # Find GOLD answer (correct answer) answer = question.getCorrectWord() # Add tuple for scoring answer_guess_pairs.append((guess, answer)) print "\n\n" scoring.score_model(answer_guess_pairs, verbose=True, modelname=name)
def main(): if (v): print "Loading passages..." passages = loadPassages(f) # Initialize all the external data if (v): print "Loading all external data..." tfidf_array, allWords = computeTFIDFArray(passages) unigrams, bigrams, trigrams = getGrams() glove = Glove(g, delimiter=" ", header=False, quoting=csv.QUOTE_NONE) cooccurrences = cooccurrence() if (v): print "Running models..." # Initialize arrays to keep answers rand, nn, sent, tfidf, gram, syn, wdn, cc, an = [], [], [], [], [], [], [], [], [] # Loop through all the questions for passage in passages: for question in passage.questions: # Find relevant word targetword = re.findall( "[\xe2\x80\x9c\u2019\"\']([A-Za-z\s]+)[\xe2\x80\x9c\u2019\"\']", question.text)[0].lower() # Tokenize relevant sentence sentence = passage.text.split("\n")[ int(re.findall("[0-9]+", question.text)[0]) - 1] sentence = re.split("[^A-Za-z0-9]", sentence) sentence = filter(lambda x: len(x) > 0, sentence) sentence = map(lambda x: x.strip().lower(), sentence) # Get correct answer correctAnswer = question.answers[question.correctAnswer] # Get answers randAnswer = randomModel(question.answers) nnAnswer = nearestNeighborModel(targetword, question.answers, glove, threshold=.48) sentAnswer = sentenceModel(sentence, question.answers, glove, threshold=.44) tfidfAnswer = tfidfModel(sentence, question.answers, tfidf_array, allWords, glove, threshold=.44) gramAnswer = gramModel(sentence, question.answers, targetword, unigrams, bigrams, trigrams, glove, threshold=.42) wdnvec, wdnAnswer = wordnetModel(targetword, sentence, question.answers, glove, threshold=.46) synAnswer = synonymModel(targetword, wdnvec, sentence, question.answers, bigrams, trigrams, glove, threshold=.34) ccAnswer = cooccurrenceModel(targetword, sentence, question.answers, cooccurrences, glove) anAnswer = analogyModel(targetword, sentence, question.answers, cooccurrences, glove) # Guess the word if we can answer it rand.append((randAnswer, correctAnswer)) nn.append((nnAnswer, correctAnswer)) sent.append((sentAnswer, correctAnswer)) tfidf.append((tfidfAnswer, correctAnswer)) gram.append((gramAnswer, correctAnswer)) wdn.append((wdnAnswer, correctAnswer)) syn.append((synAnswer, correctAnswer)) cc.append((ccAnswer, correctAnswer)) an.append((anAnswer, correctAnswer)) score_model(rand, verbose=True, modelname="Random Model") score_model(nn, verbose=True, modelname="Nearest Neighbor Model") score_model(sent, verbose=True, modelname="Sentence-Based Model") score_model(tfidf, verbose=True, modelname="TFIDF Model") score_model(gram, verbose=True, modelname="Gram Model") score_model(wdn, verbose=True, modelname="WordNet Model") score_model(syn, verbose=True, modelname="Synonym Model") score_model(cc, verbose=True, modelname="Cooccurrence Model") score_model(an, verbose=True, modelname="Analogy Model")
def score(question_dir = "../data/cayman_all_training.txt", glove_file="../data/glove_vectors/glove.6B.300d.txt", ngram_path="../data/Holmes_Training_Data/norvig.txt", dev=True): print "Training N-Grams" # Load/Generate N-grams unigrams, bigrams, cgrams = getGrams(path=ngram_path) print "Loading Questions" # Load questions questions = loadQuestions(question_dir) # Holds questions to be evaluated eval_qs = None if dev: # Split into train/dev split = len(questions) - len(questions)/10; inform("Splitting Data: " + str(split) + " questions in training and " + str(len(questions) - split) + " in dev..."); train_questions, eval_qs = questions[:split], questions[split:]; else: eval_qs = questions print "Loading Glove" # Loads Glove vectors glove = Glove(glove_file, delimiter=" ", header=False, quoting=csv.QUOTE_NONE, v=False) # For every VSM model for name, model in vsm_models: # We get the model's score print "Scoring ", name answer_guess_pairs = [] for question in eval_qs: guess = None # Weighted VSM has an extra parameter if name == "Weighted VSM": guess = model(glove, question, unigrams)[0] else: guess = model(glove, question)[0] # Get the correct answer answer = question.getCorrectWord() # Add to tuple GOLD and guessed answers answer_guess_pairs.append((guess, answer)) print "\n\n" scoring.score_model(answer_guess_pairs, verbose=True, modelname=name) # Now score Language models # For every Language model for name, model in language_models: # Do the same thing as before print "Scoring ", name answer_guess_pairs = [] # For every question for question in eval_qs: # Generate guess from model guess = model(unigrams, bigrams, question)[0] # Find GOLD answer (correct answer) answer = question.getCorrectWord() # Add tuple for scoring answer_guess_pairs.append((guess, answer)) print "\n\n" scoring.score_model(answer_guess_pairs, verbose=True, modelname=name)
def main(): if(v): print "Loading passages..."; passages = loadPassages(f); # Initialize all the external data if(v): print "Loading all external data..."; tfidf_array, allWords = computeTFIDFArray(passages); unigrams, bigrams, trigrams = getGrams(); glove = Glove(g, delimiter=" ", header=False, quoting=csv.QUOTE_NONE); cooccurrences = cooccurrence() if(v): print "Running models..." # Initialize arrays to keep answers rand, nn, sent, tfidf, gram, syn, wdn, cc, an = [], [], [], [], [], [], [], [], []; # Loop through all the questions for passage in passages: for question in passage.questions: # Find relevant word targetword = re.findall("[\xe2\x80\x9c\u2019\"\']([A-Za-z\s]+)[\xe2\x80\x9c\u2019\"\']", question.text)[0].lower(); # Tokenize relevant sentence sentence = passage.text.split("\n")[int(re.findall("[0-9]+", question.text)[0]) - 1]; sentence = re.split("[^A-Za-z0-9]", sentence); sentence = filter(lambda x: len(x) > 0, sentence); sentence = map(lambda x: x.strip().lower(), sentence); # Get correct answer correctAnswer = question.answers[question.correctAnswer]; # Get answers randAnswer = randomModel(question.answers); nnAnswer = nearestNeighborModel(targetword, question.answers, glove); sentAnswer = sentenceModel(sentence, question.answers, glove); tfidfAnswer = tfidfModel(sentence, question.answers, tfidf_array, allWords, glove); gramAnswer = gramModel(sentence, question.answers, targetword, unigrams, bigrams, trigrams, glove); synAnswer = synonymModel(targetword, sentence, question.answers, bigrams, trigrams, glove) wdnAnswer = wordnetModel(targetword, sentence, question.answers, glove, threshold=0.3) ccAnswer = cooccurrenceModel(targetword, sentence, question.answers,cooccurrences, glove) anAnswer = analogyModel(targetword, sentence, question.answers, cooccurrences, glove) # Guess the word if we can answer it rand.append( (randAnswer, correctAnswer) ); nn.append( (nnAnswer, correctAnswer) ); sent.append( (sentAnswer, correctAnswer) ); tfidf.append( (tfidfAnswer, correctAnswer) ); gram.append( (gramAnswer, correctAnswer) ); syn.append( (synAnswer, correctAnswer) ) wdn.append( (wdnAnswer, correctAnswer) ) cc.append( (ccAnswer, correctAnswer) ) an.append( (anAnswer, correctAnswer) ) print "NN: ", percentWrong(nn); print "Sent: ", percentWrong(sent); print "gram: ", percentWrong(gram); print "tfidf: ", percentWrong(tfidf); print "syn: ", percentWrong(syn); print "wdn: ", percentWrong(wdn); print "cc: ", percentWrong(cc); print "an: ", percentWrong(an); # names = ["NN","sent","gram","tfidf","syn","wdn","cc","an"] # for i, m1 in enumerate(zip(names, [nn, sent, gram, tfidf, syn, wdn, cc, an])): # for j, m2 in enumerate(zip(names, [nn, sent, gram, tfidf, syn, wdn, cc, an])): # if(i > j): # print m1[0], m2[0], percentWrong(combineModels(m1[1], m2[1])), len(combineModels(m1[1], m2[1])); score_model(rand, verbose=True, modelname="Random Model"); score_model(nn, verbose=True, modelname="Nearest Neighbor Model"); score_model(sent, verbose=True, modelname="Sentence-Based Model"); score_model(tfidf, verbose=True, modelname="TFIDF Model"); score_model(gram, verbose=True, modelname="Gram Model"); score_model(syn, verbose=True, modelname="Synonym Model") score_model(wdn, verbose=True, modelname="WordNet Model") score_model(cc, verbose=True, modelname="Cooccurrence Model") score_model(an, verbose=True, modelname="Analogy Model")