def get_all_test_documents(self):
     all_test_documents = {}
     sub_directories = dataset_handler.get_all_subdirectory_names(self.data_path)
     for sub_directory in sub_directories:
         sub_directory_path = os.path.join(self.data_path, sub_directory)
         files_indices = dataset_handler.get_names_of_files_in_directory(sub_directory_path)
         for file_id in files_indices:
             file_path = os.path.join(sub_directory_path, file_id)
             all_test_documents[file_id] = dataset_handler.get_document_as_string(file_path,self.encoding)
     return all_test_documents
 def get_training_data_file_string(self,file_name):
     file_path = os.path.join(self.dataset_files_directory,file_name)
     return dataset_handler.get_document_as_string(file_path, encoding=self.encoding)
 def get_document_in_test_set(self, category, document_id):
     category_path = self.__get_category_path(category)
     document_path = os.path.join(category_path, document_id)
     return dataset_handler.get_document_as_string(document_path, self.encoding)