Exemplo n.º 1
0
def eval():
    print('Loading association matrix')
    matrix = Matrix()
    matrix.read_matrix_from_file(MATRIX_FILE)
    print('Evaluating')
    scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.GAME_SET_100_FILE,
                                             EVAL_WORD_GAME100_FILE)
def eval():
    from convert_xml import output_results
    print('Loading association matrix')
    #matrix = Matrix_Split(MATRIX_FILE_SPLIT_DIR, LOWEST_SCORE)
    matrix = Matrix_Dict()
    matrix.read_matrix_from_file(MATRIX_FILE)
    #matrix.read_matrix_from_file(MATRIX_REVERSED_FILE)
    print('Number of rows: {}'.format(matrix.size()))
    print('Evaluating')

    # 100 GAMES
    #scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.GAME_SET_100_FILE, EVAL_WORD_GAME100_FILE)

    # DEV SET
    #scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.NLP4FUN_DEV_TSV_v2_ALL_FILE, EVAL_NLP4FUN_DEV_ALL_FILE)
    # scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.NLP4FUN_DEV_TSV_v2_tv_FILE, EVAL_NLP4FUN_DEV_TV_FILE)
    # scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.NLP4FUN_DEV_TSV_v2_bg_FILE, EVAL_NLP4FUN_DEV_BG_FILE)
    '''
    # GOLD SET (BLIND)
    scorer.batch_solver(matrix, corpora.NLP4FUN_TEST_TSV_v2_ALL_FILE, TEST_RESULTS_TSV_1, extra_search=False)
    scorer.batch_solver(matrix, corpora.NLP4FUN_TEST_TSV_v2_ALL_FILE, TEST_RESULTS_TSV_2, extra_search=True)
    output_results(corpora.NLP4FUN_TEST_XML_v2_FILE, TEST_RESULTS_TSV_1, TEST_RESULTS_SUBMIT_1)
    output_results(corpora.NLP4FUN_TEST_XML_v2_FILE, TEST_RESULTS_TSV_2, TEST_RESULTS_SUBMIT_2)
    '''

    # GOLD SET
    #scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.NLP4FUN_GOLD_TSV_v2_tv_FILE, EVAL_NLP4FUN_TEST_TV_FILE)
    #scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.NLP4FUN_GOLD_TSV_v2_bg_FILE, EVAL_NLP4FUN_TEST_BG_FILE)
    scorer.evaluate_kbest_MeanReciprocalRank(
        matrix, corpora.NLP4FUN_GOLD_TSV_v2_ALL_FILE,
        EVAL_NLP4FUN_TEST_ALL_FILE)
def correlation_score_match():
    from convert_xml import output_results
    print('Loading association matrix')
    matrix = Matrix_Dict()
    matrix.read_matrix_from_file(MATRIX_FILE)
    print('Computing Correlation')
    input_file = corpora.NLP4FUN_DEV_GOLD_TSV_ALL_FILE
    output_file_clues_matched = OUTPUT_DIR + 'nlp4fun_dev+test_all_scores_clues_matched.txt'
    output_file_solutions_matched = OUTPUT_DIR + 'nlp4fun_dev+test_all_scores_solutions_guessed.txt'
    scorer.compute_correlation_score_match(matrix, input_file,
                                           output_file_clues_matched,
                                           output_file_solutions_matched)
def split_matrix():
    print('Loading association matrix')
    matrix = Matrix_Dict()
    matrix.read_matrix_from_file(MATRIX_FILE)
    print('Lowest score: {}'.format(matrix.get_min_association_score()))
    print('Splitting matrix in {} files in {}'.format(matrix.size(),
                                                      MATRIX_FILE_SPLIT_DIR))
    matrix.split_matrix_dict(MATRIX_FILE_SPLIT_DIR)
Exemplo n.º 5
0
def build_and_eval():
    utility.make_dir(OUTPUT_DIR)
    print('\nBuilding lexicon')

    lex_set = lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE)
    lex_solution_set = lex_set
    '''
    poli_lexicon = list(lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE))
    sost_lexicon = list(corpora.getSostantiviSetFromPaisa(min_freq=100, inflected=True))
    print('\nSize of sostantivi lex: {}'.format(len(sost_lexicon)))
    agg_lexicon = list(corpora.getAggettiviSetFromPaisa(min_freq=100, inflected=True))
    print('\nSize of agg lex: {}'.format(len(agg_lexicon)))
    lex_set = set(poli_lexicon + sost_lexicon + agg_lexicon)
    lex_solution_set =  set(sost_lexicon+agg_lexicon)
    #lex_solution_set = lex_set
    '''

    print('\nComputing lex coverage')
    scorer.computeCoverageOfGameWordLex(lex_set, lex_solution_set,
                                        corpora.GAME_SET_100_FILE,
                                        COVERAGE_WORD_GAME100_FILE)

    print('\nBuilding association matrix')
    matrix = Matrix(lex_set, lex_solution_set)
    matrix.add_patterns_from_corpus(corpora.DE_MAURO_POLIREMATICHE_INFO)
    corpora.addBigramFromPolirematicheInMatrix(matrix, weight=1)
    #corpora.addBigramFromCompunds(matrix, lex_set, min_len=4, weight=10)
    matrix.compute_association_scores()
    matrix.write_matrix_to_file(MATRIX_FILE)

    print('\nEval')
    scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.GAME_SET_100_FILE,
                                             EVAL_WORD_GAME100_FILE)
def print_row_column_sets():
    print('Loading association matrix')
    row_file = OUTPUT_DIR + 'set_row.txt'
    col_file = OUTPUT_DIR + 'set_col.txt'
    matrix = Matrix_Dict()
    matrix.read_matrix_from_file(MATRIX_FILE)
    matrix.print_row_column_sets(row_file, col_file)
Exemplo n.º 7
0
def interactive_solver():
    print('Loading association matrix')
    matrix = Matrix()
    matrix.read_matrix_from_file(MATRIX_FILE)
    scorer.interactive_solver(matrix)
Exemplo n.º 8
0
        print('\n'.join(clues))
        text = input('\nProva a indovinare --> ')
        guess = text.strip().lower()
        if not guess:
            print('exitig')
            return
        if guess == solution:
            print('Indovinato!')
        else:
            print('Sbagliato. La soluzione era {}'.format(solution))
        scores_str = ', '.join(['{0:.1f}'.format(s) for s in scores])
        print('Scores: {}'.format(scores_str))
        print('Sum: {0:.1f}'.format(sum(scores)))
        text = input('\nVuoi continuare? (y/n)')
        if not text or text != 'y':
            print('exitig')
            return


if __name__ == '__main__':
    from matrix_dict import Matrix_Dict
    import path
    OUTPUT_DIR = path.GHIGLIOTTINA_BASE_FILE_PATH + "model_06_evalita_split/"
    MATRIX_REVERSED_FILE = OUTPUT_DIR + 'matrix_reversed.pkl'  #solution in keys
    print('Loading matrix')
    matrix = Matrix_Dict()
    matrix.read_matrix_from_file(MATRIX_REVERSED_FILE)
    print('Number of rows: {}'.format(matrix.size()))
    #unfound_pair_score = matrix.get_min_association_score()
    #print("Min association score: {0:.1f}".format(unfound_pair_score))
    interactive_generator(matrix)
def build_and_eval():
    utility.make_dir(OUTPUT_DIR)

    print('Building lexicon')

    poli_lexicon = list(
        lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE))
    sost_lexicon = list(
        lexicon.loadLexiconFromFile(
            corpora.DIZIONARIO_SOSTANTIVI_AUGMENTED_PAISA_FILE))
    agg_lexicon = list(
        lexicon.loadLexiconFromFile(
            corpora.DIZIONARIO_AGGETTIVI_AUGMENTED_PAISA_FILE))
    lex_set = set(poli_lexicon + sost_lexicon + agg_lexicon)
    lex_solution_set = set(sost_lexicon + agg_lexicon)
    '''
    poli_lexicon = list(lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE))
    sost_lexicon = list(corpora.getSostantiviSetFromPaisa(min_freq=1000, inflected=True))
    print('\nSize of sostantivi lex: {}'.format(len(sost_lexicon)))
    agg_lexicon = list(corpora.getAggettiviSetFromPaisa(min_freq=1000, inflected=True))
    print('\nSize of agg lex: {}'.format(len(agg_lexicon)))
    lex_set = set(poli_lexicon + sost_lexicon + agg_lexicon)
    '''

    lexicon.printLexiconToFile(lex_set, LEX_FREQ_FILE)
    lexicon.printLexiconToFile(lex_solution_set, SOLUTION_LEX_FREQ_FILE)

    print('Computing lex coverage')
    scorer.computeCoverageOfGameWordLex(lex_set, lex_solution_set,
                                        corpora.GAME_SET_100_FILE,
                                        COVERAGE_WORD_GAME100_FILE)

    print('Building association matrix')
    matrix = Matrix_Dict(lex_set, lex_solution_set)
    matrix.add_patterns_from_corpus(corpora.PAISA_RAW_INFO)
    matrix.add_patterns_from_corpus(corpora.DE_MAURO_POLIREMATICHE_INFO,
                                    weight=DE_MAURO_WEIGHT)
    matrix.add_patterns_from_corpus(corpora.PROVERBI_INFO,
                                    weight=PROVERBI_WEIGHT)
    matrix.add_patterns_from_corpus(corpora.ITWAC_RAW_INFO, weight=1)
    matrix.add_patterns_from_corpus(corpora.WIKI_IT_TITLES_INFO,
                                    weight=WIKI_IT_WEIGHT)
    #matrix.add_patterns_from_corpus(corpora.WIKI_IT_TEXT_INFO, weight=1)
    corpora.addBigramFromPolirematicheInMatrix(matrix, DE_MAURO_WEIGHT)
    corpora.addBigramFromCompunds(matrix,
                                  lex_set,
                                  min_len=4,
                                  weight=COMPOUNDS_WEIGHT)
    matrix.compute_association_scores()
    matrix.write_matrix_to_file(MATRIX_FILE)

    print('Eval')
    scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.GAME_SET_100_FILE,
                                             EVAL_WORD_GAME100_FILE)
def reverse_matrix():
    matrix = Matrix_Dict()
    matrix.read_matrix_from_file(MATRIX_FILE)
    reversed_matrix = matrix.reverse_matrix()
    reversed_matrix.write_matrix_to_file(MATRIX_REVERSED_FILE)
Exemplo n.º 11
0
def generate_games():
    import game_generator
    matrix = Matrix_Dict()
    matrix.read_matrix_from_file(MATRIX_REVERSED_FILE)
    print('Number of rows: {}'.format(matrix.size()))
    game_generator.interactive_generator(matrix)