Пример #1
0
def runTagAffinity(numQuestions):
  ld.loadUsers()
  ld.loadTags()
  ld.loadQuestions(numQuestions, True)
  folds = ld.getCVFolds()
  (ttas, finalTagCounts) = getTagTermAffinityScores(folds[0][0])
  # ttas_file = codecs.open('ttas-50000.txt', 'r', 'utf-8')
  # tc_file = codecs.open('tcount-50000.txt', 'r', 'utf-8')
  # ttas = json.load(ttas_file)
  # finalTagCounts = json.load(tc_file)
  # ttas_file.close()
  # tc_file.close()

  posts_bodies = codecs.open(posts_body_file, 'r', 'utf-8')
  (sum5, sum10) = (0.0, 0.0)
  counter = 0
  for qid, q in folds[0][1].items():
    posts_bodies.seek(q.bodyByte)
    body = posts_bodies.readline()
    (recall5, recall10) = getRecallScores(body, q.tags, ttas, finalTagCounts)
    sum5 += recall5
    sum10 += recall10
    counter += 1
    if counter % 100 == 0:
      print 'Done %d' % counter
    # print 'Q #%d: %f %f' % (qid, recall5, recall10)
  return (sum5 / counter, sum10 / counter)
Пример #2
0
def runTagAffinity(numQuestions):
    ld.loadUsers()
    ld.loadTags()
    ld.loadQuestions(numQuestions, True)
    folds = ld.getCVFolds()
    (ttas, finalTagCounts) = getTagTermAffinityScores(folds[0][0])
    # ttas_file = codecs.open('ttas-50000.txt', 'r', 'utf-8')
    # tc_file = codecs.open('tcount-50000.txt', 'r', 'utf-8')
    # ttas = json.load(ttas_file)
    # finalTagCounts = json.load(tc_file)
    # ttas_file.close()
    # tc_file.close()

    posts_bodies = codecs.open(posts_body_file, 'r', 'utf-8')
    (sum5, sum10) = (0.0, 0.0)
    counter = 0
    for qid, q in folds[0][1].items():
        posts_bodies.seek(q.bodyByte)
        body = posts_bodies.readline()
        (recall5, recall10) = getRecallScores(body, q.tags, ttas,
                                              finalTagCounts)
        sum5 += recall5
        sum10 += recall10
        counter += 1
        if counter % 100 == 0:
            print 'Done %d' % counter
        # print 'Q #%d: %f %f' % (qid, recall5, recall10)
    return (sum5 / counter, sum10 / counter)
Пример #3
0
                                 r10synpop_avg)
                    print '(%f, %f, %f, %f): r5 = %s, r10 = %s' % (
                        alpha, beta, gamma, delta, r5_tuple, r10_tuple)
                    if outfile:
                        outfile.write(
                            '%f,%f,%f,%f,%s,%s\n' %
                            (alpha, beta, gamma, delta, r5_tuple, r10_tuple))

    return bestParams5, best_r5_avg, bestParams10, best_r10_avg


## Comment out this entire block if not running from Python shell
ld.loadData(True)
# This function must be run. Be careful if this is commented out.
setQuestionModelModifications(ld.questions)
folds = ld.getCVFolds()
print 'Generating word vectors'
frequentWords, wordToIndex = wordvectors.getFrequentWords(ld.questions)
wordVecs = wordvectors.getWordVectors(ld.questions, wordToIndex)
## End block

counter = 0
recall_test_scores = [0.0, 0.0]
for fold in folds[0:5]:
    resetModels()
    counter += 1
    print 'Starting Fold %d' % counter
    trainQuestions = fold[0]
    print 'Fold size %d' % len(fold[0])
    comTagCombineModelTrain(trainQuestions)
    print 'Train complete. Beginning test.'
Пример #4
0
          if r10_avg > best_r10_avg:
            best_r10_avg = r10_avg
            updateParameters(alpha, beta, gamma, delta, bestParams10)
          r5_tuple = (r5_avg, r5pop_avg, r5syn_avg, r5synpop_avg)
          r10_tuple = (r10_avg, r10pop_avg, r10syn_avg, r10synpop_avg)
          print '(%f, %f, %f, %f): r5 = %s, r10 = %s' % (alpha, beta, gamma, delta, r5_tuple, r10_tuple)
          if outfile:
            outfile.write('%f,%f,%f,%f,%s,%s\n' % (alpha, beta, gamma, delta, r5_tuple, r10_tuple))

  return bestParams5, best_r5_avg, bestParams10, best_r10_avg

## Comment out this entire block if not running from Python shell
ld.loadData(True)
# This function must be run. Be careful if this is commented out.
setQuestionModelModifications(ld.questions)
folds = ld.getCVFolds()
print 'Generating word vectors'
frequentWords, wordToIndex = wordvectors.getFrequentWords(ld.questions)
wordVecs = wordvectors.getWordVectors(ld.questions, wordToIndex)
## End block

counter = 0
recall_test_scores = [0.0, 0.0]
for fold in folds[0:5]:
  resetModels()
  counter += 1
  print 'Starting Fold %d' % counter
  trainQuestions = fold[0]
  print 'Fold size %d' % len(fold[0])
  comTagCombineModelTrain(trainQuestions)
  print 'Train complete. Beginning test.'