def _tokenize_and_preprocess(self, file_path): log.debug(f"Tokenizing {file_path}") file_representation = self._tokenizer.tokenize(file_path) log.debug(f"preprocessing {file_path}") file_representation.preprocess(self._preprocessor) if self._preprocessed_token_output_directory: FileUtil.write_file(self._preprocessed_token_output_directory / (PREPROCESSED_TOKEN_FILENAME_PREFIX +FileUtil.get_filename_from_path(file_path)), file_representation.get_printable_string()) return file_representation
def __init__(self, file_path): self.file_path = file_path self.file_name = FileUtil.get_filename_from_path(file_path)
def __init__(self, file_path, file_vector=None): self.file_path = file_path self.file_vector = file_vector self.file_name = FileUtil.get_filename_from_path(self.file_path)