Пример #1
0
class TestApplicator(unittest.TestCase):
    def setUp(self):
        self.a = EADDir(input_dir=r'C:\Users\wboyle\PycharmProjects\bentley_code\utilities')

    def _method_for_testing_write(self, ead):
        texts = ead.tree.xpath("//text")
        for text in texts:
            text.text = "yo"

    def _method_for_testing_characterize(self, ead):
        results = []
        texts = ead.tree.xpath("//text")
        for text in texts:
            results.append(text.text)
        return results

    def test_ead_list(self):
        self.assertEquals(self.a.ead_files, ['ead_appended.xml', 'ead_messy.xml', 'ead_pretty.xml'])

    def test_characterize_directory(self):
        intended_results = [['text'], ['text'], ['text']]
        self.assertEquals(self.a.characterize_dir(function=self._method_for_testing_characterize), intended_results)

    def test_apply_to_directory(self):
        output_dir = os.path.join(self.a.input_dir, "output")
        self.a.apply_function_to_dir(function=self._method_for_testing_write, output_dir=output_dir)

        b = EADDir(output_dir)
        intended_results = [['yo'], ['yo'], ['yo']]
        self.assertEquals(b.characterize_dir(self._method_for_testing_characterize), intended_results)

        for ead in os.listdir(output_dir):
            os.remove(os.path.join(output_dir, ead))
Пример #2
0
class TestApplicator(unittest.TestCase):
    def setUp(self):
        self.a = EADDir(
            input_dir=r'C:\Users\wboyle\PycharmProjects\bentley_code\utilities'
        )

    def _method_for_testing_write(self, ead):
        texts = ead.tree.xpath("//text")
        for text in texts:
            text.text = "yo"

    def _method_for_testing_characterize(self, ead):
        results = []
        texts = ead.tree.xpath("//text")
        for text in texts:
            results.append(text.text)
        return results

    def test_ead_list(self):
        self.assertEquals(
            self.a.ead_files,
            ['ead_appended.xml', 'ead_messy.xml', 'ead_pretty.xml'])

    def test_characterize_directory(self):
        intended_results = [['text'], ['text'], ['text']]
        self.assertEquals(
            self.a.characterize_dir(
                function=self._method_for_testing_characterize),
            intended_results)

    def test_apply_to_directory(self):
        output_dir = os.path.join(self.a.input_dir, "output")
        self.a.apply_function_to_dir(function=self._method_for_testing_write,
                                     output_dir=output_dir)

        b = EADDir(output_dir)
        intended_results = [['yo'], ['yo'], ['yo']]
        self.assertEquals(
            b.characterize_dir(self._method_for_testing_characterize),
            intended_results)

        for ead in os.listdir(output_dir):
            os.remove(os.path.join(output_dir, ead))
            for text in texts:
                if text[0] == tag.tag and text[1] == tag.text and tag.get("authfilenumber"):
                    index = dct["normalize to"]

                    # deleting authfilenumbers if the normalization index is "x"
                    if index == "x":
                        if tag.text == "University of Michigan.":
                            continue

                        del tag.attrib["authfilenumber"]
                        continue

                    # normalizing terms
                    normal_form = texts[int(index) - 1]
                    normal_tag = normal_form[0]
                    normal_text = normal_form[1]

                    tag.tag = normal_tag
                    tag.text = normal_text




if __name__ == "__main__":
    directory = r'C:\Users\wboyle\PycharmProjects\without-reservations\Real_Masters_all'
    ead_dir = EADDir(input_dir=directory)
    data = load_data("normalization data.csv")

    ead_dir.apply_function_to_dir(replace_subjects, output_dir=directory)