Exemplo n.º 1
0
    def test_givenAPickleDatasetContainer_whenGetOneItem_thenReturnTheCorrectItem(
            self):
        create_pickle_file(self.a_pickle_data_container_path)

        pickle_dataset_container = PickleDatasetContainer(
            self.a_pickle_data_container_path)

        # first data point
        idx = 0
        expected_address = base_string.format(idx)
        expected_tags_idx = a_tags_sequence

        actual_address, actual_tags_idx = pickle_dataset_container[idx]
        self.assertEqual(expected_address, actual_address)
        self.assertListEqual(expected_tags_idx, actual_tags_idx)

        # second data point
        idx = 1
        expected_address = base_string.format(idx)
        expected_tags_idx = a_tags_sequence

        actual_address, actual_tags_idx = pickle_dataset_container[idx]
        self.assertEqual(expected_address, actual_address)
        self.assertListEqual(expected_tags_idx, actual_tags_idx)

        # third data point
        idx = 2
        expected_address = base_string.format(idx)
        expected_tags_idx = a_tags_sequence

        actual_address, actual_tags_idx = pickle_dataset_container[idx]
        self.assertEqual(expected_address, actual_address)
        self.assertListEqual(expected_tags_idx, actual_tags_idx)
Exemplo n.º 2
0
    def test_given_list_of_tuple_data_when_predict_container_raise_data_error(
            self):
        number_of_data_points = 4
        create_pickle_file(self.a_pickle_data_container_path,
                           number_of_data_points=number_of_data_points,
                           predict_container=False)

        with self.assertRaises(DataError):
            PickleDatasetContainer(self.a_pickle_data_container_path,
                                   is_training_container=False)
Exemplo n.º 3
0
    def test_integration_cpu(self):
        create_pickle_file(self.fake_data_path_pickle, predict_container=True)

        parse.main([
            self.a_fasttext_model_type,
            self.fake_data_path_pickle,
            self.pickle_p_export_filename,
            "--device",
            self.cpu_device,
        ])

        export_path = generate_export_path(self.fake_data_path_pickle,
                                           self.pickle_p_export_filename)
        self.assertTrue(os.path.isfile(export_path))
Exemplo n.º 4
0
 def test_integration_no_logging(self):
     with self._caplog.at_level(logging.INFO):
         create_pickle_file(self.fake_data_path_pickle,
                            predict_container=True)
         parse.main([
             self.a_fasttext_model_type,
             self.fake_data_path_pickle,
             self.pickle_p_export_filename,
             "--device",
             self.cpu_device,
             "--log",
             "False",
         ])
     self.assertEqual(0, len(self._caplog.records))
Exemplo n.º 5
0
    def test_integration_predict_container(self):
        number_of_data_points = 4
        create_pickle_file(self.a_pickle_data_container_path,
                           number_of_data_points=number_of_data_points,
                           predict_container=True)

        pickle_dataset_container = PickleDatasetContainer(
            self.a_pickle_data_container_path, is_training_container=False)
        expected = number_of_data_points
        self.assertEqual(expected, len(pickle_dataset_container))

        number_of_data_points = 5
        create_pickle_file(self.a_pickle_data_container_path,
                           number_of_data_points=number_of_data_points,
                           predict_container=True)

        pickle_dataset_container = PickleDatasetContainer(
            self.a_pickle_data_container_path, is_training_container=False)
        expected = number_of_data_points
        self.assertEqual(expected, len(pickle_dataset_container))
Exemplo n.º 6
0
    def test_ifPathToFastTextRetrainModel_thenUseFastTextRetrainModel(self):
        with self._caplog.at_level(logging.INFO):
            path_to_retrained_model = self.path_to_retrain_fasttext
            create_pickle_file(self.fake_data_path_pickle,
                               predict_container=True)

            parse.main([
                self.a_fasttext_model_type,
                self.fake_data_path_pickle,
                self.pickle_p_export_filename,
                "--device",
                self.cpu_device,
                "--path_to_retrained_model",
                path_to_retrained_model,
            ])

        expected_first_message = (
            f"Parsing dataset file {self.fake_data_path_pickle} using the parser "
            f"FastTextAddressParser")
        actual_first_message = self._caplog.records[0].message
        self.assertEqual(expected_first_message, actual_first_message)
Exemplo n.º 7
0
    def test_integration(self):
        number_of_data_points = 4
        create_pickle_file(
            self.a_pickle_data_container_path,
            number_of_data_points=number_of_data_points,
        )

        pickle_dataset_container = PickleDatasetContainer(
            self.a_pickle_data_container_path)
        expected = number_of_data_points
        self.assertEqual(expected, len(pickle_dataset_container))

        number_of_data_points = 5
        create_pickle_file(
            self.a_pickle_data_container_path,
            number_of_data_points=number_of_data_points,
        )

        pickle_dataset_container = PickleDatasetContainer(
            self.a_pickle_data_container_path)
        expected = number_of_data_points
        self.assertEqual(expected, len(pickle_dataset_container))
Exemplo n.º 8
0
    def test_givenAPickleDatasetContainer_whenGetSlice_thenReturnTheCorrectItems(
            self):
        create_pickle_file(self.a_pickle_data_container_path)

        pickle_dataset_container = PickleDatasetContainer(
            self.a_pickle_data_container_path)

        start_idx = 0
        end_idx = 2
        expected_addresses = [
            base_string.format(idx) for idx in range(start_idx, end_idx)
        ]
        expected_tags_idxs = [a_tags_sequence] * (end_idx - start_idx)

        sliced_addresses = pickle_dataset_container[start_idx:end_idx]
        self.assertIsInstance(sliced_addresses, list)
        for actual_address_tuple, expected_address, expected_tags_idx in zip(
                sliced_addresses, expected_addresses, expected_tags_idxs):
            actual_address, actual_tags_idx = actual_address_tuple[
                0], actual_address_tuple[1]
            self.assertEqual(expected_address, actual_address)
            self.assertListEqual(expected_tags_idx, actual_tags_idx)

        start_idx = 2
        end_idx = 4
        expected_addresses = [
            base_string.format(idx) for idx in range(start_idx, end_idx)
        ]
        expected_tags_idxs = [a_tags_sequence] * (end_idx - start_idx)

        sliced_addresses = pickle_dataset_container[start_idx:end_idx]
        self.assertIsInstance(sliced_addresses, list)
        for actual_address_tuple, expected_address, expected_tags_idx in zip(
                sliced_addresses, expected_addresses, expected_tags_idxs):
            actual_address, actual_tags_idx = actual_address_tuple[
                0], actual_address_tuple[1]
            self.assertEqual(expected_address, actual_address)
            self.assertListEqual(expected_tags_idx, actual_tags_idx)
Exemplo n.º 9
0
    def test_ifPathToFakeRetrainModel_thenUseFakeRetrainModel(self):
        with self._caplog.at_level(logging.INFO):
            # We use the default path to fasttext model as a "retrain model path"
            path_to_retrained_model = os.path.join(os.path.expanduser("~"),
                                                   ".cache", "deepparse",
                                                   "fasttext.ckpt")
            create_pickle_file(self.fake_data_path_pickle,
                               predict_container=True)

            parse.main([
                self.a_fasttext_model_type,
                self.fake_data_path_pickle,
                self.pickle_p_export_filename,
                "--device",
                self.cpu_device,
                "--path_to_retrained_model",
                path_to_retrained_model,
            ])

        expected_first_message = (
            f"Parsing dataset file {self.fake_data_path_pickle} using the parser "
            f"FastTextAddressParser")
        actual_first_message = self._caplog.records[0].message
        self.assertEqual(expected_first_message, actual_first_message)
Exemplo n.º 10
0
    def test_integration_logging(self):
        with self._caplog.at_level(logging.INFO):
            create_pickle_file(self.fake_data_path_pickle,
                               predict_container=True)
            parse.main([
                self.a_fasttext_model_type,
                self.fake_data_path_pickle,
                self.pickle_p_export_filename,
                "--device",
                self.cpu_device,
            ])
        expected_first_message = (
            f"Parsing dataset file {self.fake_data_path_pickle} using the parser "
            f"FastTextAddressParser")
        actual_first_message = self._caplog.records[0].message
        self.assertEqual(expected_first_message, actual_first_message)

        export_path = generate_export_path(self.fake_data_path_pickle,
                                           "a_file.p")
        expected_second_message = (
            f"4 addresses have been parsed.\n"
            f"The parsed addresses are outputted here: {export_path}")
        actual_second_message = self._caplog.records[1].message
        self.assertEqual(expected_second_message, actual_second_message)