def get_all_test_documents(self): all_test_documents = {} sub_directories = dataset_handler.get_all_subdirectory_names(self.data_path) for sub_directory in sub_directories: sub_directory_path = os.path.join(self.data_path, sub_directory) files_indices = dataset_handler.get_names_of_files_in_directory(sub_directory_path) for file_id in files_indices: file_path = os.path.join(sub_directory_path, file_id) all_test_documents[file_id] = dataset_handler.get_document_as_string(file_path,self.encoding) return all_test_documents
def get_training_data_file_string(self,file_name): file_path = os.path.join(self.dataset_files_directory,file_name) return dataset_handler.get_document_as_string(file_path, encoding=self.encoding)
def get_document_in_test_set(self, category, document_id): category_path = self.__get_category_path(category) document_path = os.path.join(category_path, document_id) return dataset_handler.get_document_as_string(document_path, self.encoding)