Exemplo n.º 1
0
def main():
    align.load_cache()
    foo = map(int, question_type.classify_questions(1)['Where'])
    trainIDs = foo[:-3]
    validationIDs = foo[-3:]
    #    print trainIDs
    #    trainIDs=range(201,220)
    #    validationIDs=range(339,339)
    #    testIDs=range(338,338)
    evaluator_combinations = [
        #    [],
        #     [seq_length,
        [punc_loc]
        #     bag_of_words,
        #     novelty_bool]
        #    [pos_test]
        #    [seq_length,punc_loc,question_apposition,rewrite_apposition,pos_test,vector_bag,bag_of_words,novelty_bool] #,novelty_count]
        #    [novelty_count]
    ]
    evaluatorCombinationID = 1
    for evaluators in evaluator_combinations:
        y_train, x_train = question_learning_data(evaluators, trainIDs)
        #        print y_train
        trained = train(mlpy.Srda, y_train, x_train)
        results = run_question_predictions(evaluators, trained, validationIDs)
        writeAnswers(
            answerFile(results),
            'results/combination' + str(evaluatorCombinationID) + '.txt')
        evaluatorCombinationID = evaluatorCombinationID + 1
    align.load_cache()
    align.save_cache()
Exemplo n.º 2
0
def main():
    align.load_cache()
    #foo=map(int,question_type.classify_questions(1)['Where'])
    #Where questions
    foo=[202, 211, 223, 226, 227, 243, 245, 249, 258, 266, 272, 283, 304, 306, 317, 318, 356, 359, 368, 369, 373, 385, 393]
    evaluatorCombinationID=9000
#    trainIDs=foo[:-6]
#    validationIDs=foo[-6:]
    trainIDs=foo
    validationIDs=[10025,10026]

    evaluator_combinations=[
    [punc_loc,novelty_bool],
    [punc_loc],
    [novelty_bool]

#Don't use because they're weird
#    vector_bag,
#     [seq_length]
#    [punc_loc]
    ]
    for evaluators in evaluator_combinations:
        y_train,x_train = question_learning_data(evaluators,trainIDs)
#        print y_train
        trained=train(mlpy.Srda,y_train,x_train)
        results=run_question_predictions(evaluators,trained,validationIDs)
        writeAnswers(answerFile(results),'results/combination'+str(evaluatorCombinationID)+'.txt')
        evaluatorCombinationID=evaluatorCombinationID+1
    align.load_cache()
    align.save_cache()
Exemplo n.º 3
0
def main():
    align.load_cache()
    foo=map(int,question_type.classify_questions(1)['Where'])
    trainIDs=foo[:-3]
    validationIDs=foo[-3:]
#    print trainIDs
#    trainIDs=range(201,220)
#    validationIDs=range(339,339)
#    testIDs=range(338,338)
    evaluator_combinations=[
#    [],
#     [seq_length,
    [punc_loc]
#     bag_of_words,
#     novelty_bool]
#    [pos_test]
#    [seq_length,punc_loc,question_apposition,rewrite_apposition,pos_test,vector_bag,bag_of_words,novelty_bool] #,novelty_count]
#    [novelty_count]
    ]
    evaluatorCombinationID=1
    for evaluators in evaluator_combinations:
        y_train,x_train = question_learning_data(evaluators,trainIDs)
#        print y_train
        trained=train(mlpy.Srda,y_train,x_train)
        results=run_question_predictions(evaluators,trained,validationIDs)
        writeAnswers(answerFile(results),'results/combination'+str(evaluatorCombinationID)+'.txt')
        evaluatorCombinationID=evaluatorCombinationID+1
    align.load_cache()
    align.save_cache()
Exemplo n.º 4
0
def question_candidates(q_id):
    '''Select some useful subset of the candidates for a particular question.
    Return them in a list.
    '''
    init.get_corpus(qNum=q_id)
    foo = cache_file(q_id)
    candidate = cache_chunkers.uncache_chunks(open(foo))[q_id]
    new_l = []
    for c in candidate:
        if (c[3] == "NP"):
            dist = align_question_distance(get_question(q_id), c)
            if dist[0] < DIST_CUTOFF and dist[1] > SCORE_CUTOFF:
                new_l.append(c)
    align.save_cache()
    print len(new_l)
    return new_l
Exemplo n.º 5
0
def question_candidates(q_id):
    '''Select some useful subset of the candidates for a particular question.
    Return them in a list.
    '''
    init.get_corpus(qNum=q_id)
    foo=cache_file(q_id)
    candidate = cache_chunkers.uncache_chunks(open(foo))[q_id]
    new_l = []
    for c in candidate:
        if (c[3] == "NP"):
            dist = align_question_distance(get_question(q_id), c)
            if dist[0] < DIST_CUTOFF and dist[1] > SCORE_CUTOFF:
                new_l.append(c)
    align.save_cache()
    print len(new_l)
    return new_l