def test_givenAFasttextEmbeddingsNotLocal_whenDownloadFasttextEmbeddings_thenDownloadIt( self, isfile_mock): # since we create a local fake file, the file exist, so we mock that the file doest not exist. isfile_mock.return_value = False create_file(self.a_fasttext_file_name_path, content="Fake fasttext embedding content") # we create a fake fasttext archive with gzip.open(self.a_fasttext_gz_file_name_path, "wb") as f: f.write(self.a_fasttext_file_name_path.encode("utf-8")) with patch("deepparse.fasttext_tools.download_gz_model") as _: actual = download_fasttext_embeddings(self.a_directory_path) expected = self.a_fasttext_file_name_path self.assertEqual(actual, expected)
def test_givenAFasttextLightEmbeddingsNotLocal_whenDownloadFasttextEmbeddingsNoVerbose_thenNoVerbose( self, isfile_mock): self._capture_output() # since we create a local fake file, the file exist, so we mock that the file doest not exist. isfile_mock.return_value = False create_file(self.a_fasttext_light_name_path, content="Fake fasttext embedding content") # we create a fake fasttext archive with gzip.open(self.a_fasttext_light_gz_file_name_path, "wb") as f: f.write(self.a_fasttext_light_name_path.encode("utf-8")) with patch("deepparse.fasttext_tools.download_from_url"): download_fasttext_magnitude_embeddings(self.a_directory_path, verbose=False) expected = "" actual = self.test_out.getvalue().strip() self.assertEqual(expected, actual)
def populate_directory(self, with_retrain_parser: bool = False): os.makedirs(os.path.join(self.fake_directory, "a_directory"), exist_ok=True) create_file(os.path.join(self.fake_directory, "afile.txt"), "a content") create_file(os.path.join(self.fake_directory, "another_file.txt"), "a content") create_file(os.path.join(self.fake_directory, "random_file.txt"), "a content") checkpoints_dir_path = os.path.join(self.fake_directory, "checkpoints_dir") os.makedirs(checkpoints_dir_path, exist_ok=True) create_file(os.path.join(checkpoints_dir_path, "random_file.txt"), "a content") if with_retrain_parser: create_file( os.path.join(checkpoints_dir_path, "retrained_fasttext_address_parser.ckpt"), "a content", )
def create_cache_version(self, model_name, content): version_file_path = os.path.join(self.fake_cache_path, model_name + ".version") create_file(version_file_path, content)
def populate_directory(self): create_file( os.path.join(self.a_logging_path, "retrained_fasttext_address_parser.ckpt"), "a content", )