def runTagAffinity(numQuestions): ld.loadUsers() ld.loadTags() ld.loadQuestions(numQuestions, True) folds = ld.getCVFolds() (ttas, finalTagCounts) = getTagTermAffinityScores(folds[0][0]) # ttas_file = codecs.open('ttas-50000.txt', 'r', 'utf-8') # tc_file = codecs.open('tcount-50000.txt', 'r', 'utf-8') # ttas = json.load(ttas_file) # finalTagCounts = json.load(tc_file) # ttas_file.close() # tc_file.close() posts_bodies = codecs.open(posts_body_file, 'r', 'utf-8') (sum5, sum10) = (0.0, 0.0) counter = 0 for qid, q in folds[0][1].items(): posts_bodies.seek(q.bodyByte) body = posts_bodies.readline() (recall5, recall10) = getRecallScores(body, q.tags, ttas, finalTagCounts) sum5 += recall5 sum10 += recall10 counter += 1 if counter % 100 == 0: print 'Done %d' % counter # print 'Q #%d: %f %f' % (qid, recall5, recall10) return (sum5 / counter, sum10 / counter)
r10synpop_avg) print '(%f, %f, %f, %f): r5 = %s, r10 = %s' % ( alpha, beta, gamma, delta, r5_tuple, r10_tuple) if outfile: outfile.write( '%f,%f,%f,%f,%s,%s\n' % (alpha, beta, gamma, delta, r5_tuple, r10_tuple)) return bestParams5, best_r5_avg, bestParams10, best_r10_avg ## Comment out this entire block if not running from Python shell ld.loadData(True) # This function must be run. Be careful if this is commented out. setQuestionModelModifications(ld.questions) folds = ld.getCVFolds() print 'Generating word vectors' frequentWords, wordToIndex = wordvectors.getFrequentWords(ld.questions) wordVecs = wordvectors.getWordVectors(ld.questions, wordToIndex) ## End block counter = 0 recall_test_scores = [0.0, 0.0] for fold in folds[0:5]: resetModels() counter += 1 print 'Starting Fold %d' % counter trainQuestions = fold[0] print 'Fold size %d' % len(fold[0]) comTagCombineModelTrain(trainQuestions) print 'Train complete. Beginning test.'
if r10_avg > best_r10_avg: best_r10_avg = r10_avg updateParameters(alpha, beta, gamma, delta, bestParams10) r5_tuple = (r5_avg, r5pop_avg, r5syn_avg, r5synpop_avg) r10_tuple = (r10_avg, r10pop_avg, r10syn_avg, r10synpop_avg) print '(%f, %f, %f, %f): r5 = %s, r10 = %s' % (alpha, beta, gamma, delta, r5_tuple, r10_tuple) if outfile: outfile.write('%f,%f,%f,%f,%s,%s\n' % (alpha, beta, gamma, delta, r5_tuple, r10_tuple)) return bestParams5, best_r5_avg, bestParams10, best_r10_avg ## Comment out this entire block if not running from Python shell ld.loadData(True) # This function must be run. Be careful if this is commented out. setQuestionModelModifications(ld.questions) folds = ld.getCVFolds() print 'Generating word vectors' frequentWords, wordToIndex = wordvectors.getFrequentWords(ld.questions) wordVecs = wordvectors.getWordVectors(ld.questions, wordToIndex) ## End block counter = 0 recall_test_scores = [0.0, 0.0] for fold in folds[0:5]: resetModels() counter += 1 print 'Starting Fold %d' % counter trainQuestions = fold[0] print 'Fold size %d' % len(fold[0]) comTagCombineModelTrain(trainQuestions) print 'Train complete. Beginning test.'