Exemplo n.º 1
0
def main(pos_tagged, document_key, pos_tag_key, language, dump_verbs, dump_tf_idf,
         dump_stdev, dump_popularity, dump_final, processes):
    """ Computes the three verb rankings: average TF-IDF, average of TF-IDF
        standard deviation and popularity.
    """

    logger.info('Computing lemma-to-token map and TF-IDF matrix ...')
    lemma_tokens, (vectorizer, tf_idf_matrix) = parallel.execute(
        2,
        produce_lemma_tokens, (pos_tagged, pos_tag_key, language),
        compute_tf_idf_matrix, (pos_tagged, document_key)
    )

    logger.info('Scoring verbs by popularity ...')
    pop_ranking = PopularityRanking(pos_tagged, pos_tag_key).find_ranking(processes)

    logger.info('Scoring verbs by TF-IDF based metrics (average and standard deviation) ...')
    tfidf_ranking, stdev_ranking = TFIDFRanking(vectorizer, lemma_tokens, tf_idf_matrix).find_ranking(processes)

    logger.info('Producing combined final ranking ...')
    final_ranking = harmonic_ranking(pop_ranking, tfidf_ranking, stdev_ranking)

    json.dump(tfidf_ranking, dump_tf_idf, indent=2)
    json.dump(stdev_ranking, dump_stdev, indent=2)
    json.dump(pop_ranking, dump_popularity, indent=2)
    json.dump(final_ranking, dump_final, indent=2)
    logger.info('Dumped all the rankings to %s' % [dump_tf_idf.name, dump_stdev.name, dump_popularity.name, dump_final.name])
    
    json.dump(lemma_tokens, dump_verbs, default=lambda x: list(x), indent=2)
    logger.info("Dumped lemma-to-token map to '%s'" % dump_verbs.name)
Exemplo n.º 2
0
 def test_execute(self):
     funcs = reduce(lambda x, y: x + y,
                    [(self.function, [x]) for x in xrange(10)])
     self.assertEqual(parallel.execute(0, *funcs),
                      map(self.function, xrange(10)))