self.directory_results[resources] = {}
            self.directory_results[resources][
                'mean_reciprocal_rank'] = mean_reciprocal_rank(
                    directory_results_mrr)
            self.directory_results[resources][
                'mean_average_precision'] = mean_average_precision(
                    directory_results_mrr)
            all_results_mrr += directory_results_mrr

        return self.inner_results, self.directory_results, {
            "mean_reciprocal_rank": mean_reciprocal_rank(all_results_mrr),
            "mean_average_precision": mean_average_precision(all_results_mrr)
        }


finalEvaluator = FinalEvaluator("output/results.data")
inner_results, directory_results, outer_results = finalEvaluator.final_evaluate(
)
print_model_into_file(inner_results, "output/basic_model/0_inner_results.json")
print_model_into_file(directory_results,
                      "output/basic_model/1_directory_results.json")
print_model_into_file(outer_results, "output/basic_model/2_outer_results.json")

finalEvaluator = FinalEvaluator("output/results_bm25.data")
inner_results, directory_results, outer_results = finalEvaluator.final_evaluate(
)
print_model_into_file(inner_results, "output/bm25_model/0_inner_results.json")
print_model_into_file(directory_results,
                      "output/bm25_model/1_directory_results.json")
print_model_into_file(outer_results, "output/bm25_model/2_outer_results.json")
from utils import print_model_into_pickle_file, print_model_into_file


class EvaluatorBM25:
    def __init__(self):
        self.modelQuery = ModelQueryBM25(modelfile="model_files/output_bm25.data")
        self.results = {}

    def evaluate(self, path):
        for root, dirs, files in os.walk(path):
            self.results[root] = {}

            if root != 'news_resources/':
                print('files in ' + root + ': ' + ', '.join(files))

                for file in files:
                    if file.endswith('.json'):
                        fullpath = root + '/' + file
                        article = json.load(open(fullpath), object_hook=json_util.object_hook)  # load article.
                        results = self.modelQuery.search_for_query(article['title'])
                        self.results[root][article['article_id']] = {"results": results, "article": article}
        return self.results


start = datetime.now()
evaluator = EvaluatorBM25()
results = evaluator.evaluate("news_resources/")
print_model_into_pickle_file(results, "output/results_bm25.data")
end = datetime.now()
print_model_into_file({"time_start": start, "time_end": end, "diff": str(end - start)}, "output/timings/evaluation_bm25.json")
                        self.json_files[root])

                self.total_statistics['total_number_of_documents'] += len(
                    self.json_files[root])
                self.total_statistics[
                    'total_avarage_query_length'] += self.statistics[root][
                        'avarage_query_length']
                self.total_statistics[
                    'total_avarage_text_length'] += self.statistics[root][
                        'avarage_text_length']

        self.total_statistics['total_avarage_query_length'] /= len(
            self.json_files)
        self.total_statistics['total_avarage_text_length'] /= len(
            self.json_files)

        return self.json_files, self.statistics, self.total_statistics


INPUT_DIRECTORY = "news_resources/"
OUTPUT_FILEPATH = "output/"

datasetAnalyzer = DatasetAnalyzer()
json_files, statistics, total_statistics = datasetAnalyzer.analyze(
    INPUT_DIRECTORY)
print_model_into_file(statistics,
                      OUTPUT_FILEPATH + "/dataset_statistics/statistics.json")
print_model_into_file(
    total_statistics,
    OUTPUT_FILEPATH + "/dataset_statistics/total_statistics.json")
    def append_inverse_document_frequency(self):
        docs, words = self.bag_of_words.shape
        for i in range(words):
            frequency_per_doc = self.bag_of_words[:, i]
            nw = 0
            for j in range(len(frequency_per_doc)):
                if frequency_per_doc[j] != 0:
                    nw += 1
            idf = np.log(docs / nw)
            self.bag_of_words[:, i] *= idf


start = datetime.now()
indexer = Indexer()
indexer.fill_terms("news_resources/")
indexer.fill_bags_of_words()
indexer.append_inverse_document_frequency()
print_model_into_pickle_file(
    {
        'matrix': scipy.sparse.csr_matrix(indexer.bag_of_words),
        'articles': indexer.article_list,
        'terms': indexer.terms
    }, "model_files/output.data")
end = datetime.now()
print_model_into_file(
    {
        "time_start": start,
        "time_end": end,
        "diff": str(end - start)
    }, "output/timings/index_creation.json")