def test_read_invalid_xml_01(self, filename, expected_error_msg):
     full_pathname = os.path.join(input_files_root, self.local_path,
                                  filename)
     try:
         Outline.from_opml(full_pathname)
     except ParseError as err:
         self.assertTrue(test_string_segment(expected_error_msg, err.msg))
     except Exception as gen_err:
         self.fail(f"Unexpected exception raised {gen_err}")
     else:
         self.fail(
             f'Exception expected but wasn\'t raised. Msg {expected_error_msg}'
         )
Exemplo n.º 2
0
    def test_opml_creation_nodes(self, opml_file_name, node_sequence_number, exp_text_value, exp_note_value):

        input_opml_path = os.path.join(input_files_root, relative_folder, opml_file_name)
        output_opml_path = os.path.join(output_files_root, relative_folder, test_opml_output_file_01)

        outline_01 = Outline.from_opml(input_opml_path)
        outline_01.write_opml(output_opml_path)
        outline_02 = Outline.from_opml(output_opml_path)

        node_list = outline_02.list_nodes()

        test_node = node_list[node_sequence_number].node()

        self.assertEqual(exp_text_value, test_node.text)
        self.assertEqual(exp_note_value, test_node.note)
    def test_output_generator(self, filename, record_num, record_name,
                              expected_level, expected_text):
        test_data_file = os.path.join(input_files_root,
                                      test_file_folder_relative, filename)

        test_ppt_template = os.path.join(input_files_root,
                                         test_file_folder_relative,
                                         "ppt_template_02.pptx")

        test_ppt_output_path = os.path.join(output_files_root,
                                            test_file_folder_relative,
                                            "ppt_output_02.pptx")

        outline = Outline.from_opml(test_data_file)
        unleashed_outline = UnleashedOutline(
            outline, default_text_tag_delimiter=['', ':'])

        data_nodes = unleashed_outline.extract_data_nodes()
        data_node = unleashed_outline.list_unleashed_nodes()[
            data_nodes[0]['data_node_list_index']].node()

        data_node_descriptor = DataNodeSpecifier(dns)
        data_node_table = data_node_descriptor.extract_data_node_dispatch(
            data_node)
        PptOutputGeneratorSimple.generate_ppt(data_node_table,
                                              test_ppt_output_path,
                                              test_ppt_template)

        ppt_records = list(get_slide_data(test_ppt_output_path))

        test_level, test_text = ppt_records[record_num]

        self.assertEqual(expected_level, test_level,
                         f"Failed on {record_name}")
        self.assertEqual(expected_text, test_text)
Exemplo n.º 4
0
 def __init__(self, outline_path, default_tag_delimiters):
     """
     :param outline_path: Full path to opml file for outline.
     :param default_tag_delimiters: Tag delimiter to use if one isn't supplied for a given descriptor.
     """
     self.outline = Outline.from_opml(outline_path)
     pass
Exemplo n.º 5
0
    def test_opml_creation_header_fields(self, opml_file_name, field_name, expected_field_value):
        """
        Reads an OPML file, writes it out, then reads it in again to check that nothing was changed in writing it.

        :return:
        """
        input_opml_path = os.path.join(input_files_root, relative_folder, opml_file_name)
        output_opml_path = os.path.join(output_files_root, relative_folder, test_opml_output_file_01)

        outline_01 = Outline.from_opml(input_opml_path)
        outline_01.write_opml(output_opml_path)
        outline_02 = Outline.from_opml(output_opml_path)

        actual_field_value = getattr(outline_02, field_name)

        self.assertEqual(expected_field_value, actual_field_value)
Exemplo n.º 6
0
    def setUp(self) -> None:
        self.unleashed_outline = UnleashedOutline(
            Outline.from_opml(
                os.path.join(self.test_root,
                             'opml_data_extraction_test_02.opml')))

        self.outline_node_list = self.unleashed_outline.list_unleashed_nodes()
    def test_json_desc_version_number(self):
        """
        Tests ability to parse a file with heading levels mapped directly to outline level.

        So Outline Level 1 is Heading Level 1 etc.

        Also uses JSON syntax which doesn't specify all matching criteria - these should be assigned None
        in the JSON translation.
        :return:
        """
        json_specifier_generic_levels = os.path.join(
            self.test_root, "custom_json_test_descriptors_generic_levels.json")
        opml_file_name = os.path.join(self.test_root,
                                      "custom_json_test_descriptors.opml")
        root_node_index = 1
        expected_number_of_rows = 32

        # Read json file into a string (later this will be done within the outline engine).
        with open(json_specifier_generic_levels, 'r') as f:
            json_specifier_string = f.read()

        # Read opml file into an outline
        outline = Outline.from_opml(opml_file_name)

        unleashed_outline = UnleashedOutline(outline)

        node_list = list(unleashed_outline.iter_unleashed_nodes())
        root_node = node_list[root_node_index].node()

        specifier = DataNodeSpecifier.from_json_string(json_specifier_string)

        extracted_data_nodes = specifier.extract_data_node_dispatch(root_node)

        self.assertEqual(expected_number_of_rows, len(extracted_data_nodes))
Exemplo n.º 8
0
    def test_iter_nodes(self):

        outline = Outline.from_opml(opml_path=self.input_file_full_path, full_validate=True)

        for index, unleashed_node in enumerate(outline.iter_nodes()):
            print(f'Node {index}: {unleashed_node.node()}')

        self.assertTrue(True)
    def test_identify_data_node_01(self, file_path, expected_node_index,
                                   field_name, expected_field_value):
        outline = Outline.from_opml(file_path)
        unleashed_outline = UnleashedOutline(outline)
        data_nodes = unleashed_outline.extract_data_nodes()
        expected_node_record = data_nodes[expected_node_index]

        self.assertEqual(expected_field_value,
                         expected_node_record[field_name])
    def setUp(self) -> None:

        outline = Outline.from_opml(
            os.path.join(tcfg.input_files_root, 'outline', 'opml',
                         'opml-test-valid-opml-01.opml'))
        self.unleashed_outline = UnleashedOutline(
            outline,
            default_text_tag_delimiter=text_tag_regex,
            default_note_tag_delimiter=note_tag_regex)
Exemplo n.º 11
0
    def setUp(self) -> None:
        tag_delimiters_text = ('[*', '*]')

        outline = Outline.from_opml(
            os.path.join(tcfg.input_files_root, 'data_node_descriptor',
                         'opml_data_extraction_test_01.opml'),
            tag_delimiters_text)

        unleashed_outline = UnleashedOutline(
            outline, default_text_tag_delimiter=tag_delimiters_text)
        # Create list of all nodes (plus ancestry) to allow access to nodes by index.
        self.node_list = list(unleashed_outline.iter_unleashed_nodes())
Exemplo n.º 12
0
    def test_head_data_from_opml(self, field_name, expected_value):
        """
        Args:
            field_name:
            expected_value:
        """
        outline = Outline.from_opml(
            os.path.join(tcfg.input_files_root, self.local_path,
                         self.test_data_file))

        # Access field by attribute name to allow data driven approach for test
        value = getattr(outline, field_name)
        self.assertEqual(expected_value, value)
Exemplo n.º 13
0
    def test_outline_create_01(self, node_sequence_number, parent_node_number,
                               level, text, note):

        generate_outline_from_test_data()
        outline = Outline.from_opml(test_opml_path)

        test_record = outline.get_node(node_sequence_number)
        test_item = test_record[-1]
        test_node = test_item.node

        self.assertEqual(level, test_record.depth)
        self.assertEqual(text, test_node.text)
        self.assertEqual(note, test_node.note)
Exemplo n.º 14
0
    def test_unleashed_outline(self, node_number, text, note, text_tag,
                               note_tag):
        outline = Outline.from_opml(input_file_path)
        unleashed_outline = UnleashedOutline(outline, ['(-', '-)'],
                                             ['(-', '-)'])

        node_record = unleashed_outline.get_node(node_number)
        node_item = node_record[-1]
        node = node_item.node

        self.assertEqual(text, node.text)
        self.assertEqual(note, node.note)
        self.assertEqual(text_tag, node.text_tag)
        self.assertEqual(note_tag, node.note_tag)
Exemplo n.º 15
0
    def test_child_access(self):
        outline = Outline.from_opml(self.test_outline)

        top_level_node = outline.top_outline_node

        num_child_nodes = len(top_level_node)
        self.assertEqual(3, num_child_nodes)

        child_outline_nodes = list(top_level_node)
        self.assertEqual(3, len(child_outline_nodes))

        for index, child in enumerate(top_level_node):
            self.assertIsInstance(child, OutlineNode)
            self.assertIsInstance(child_outline_nodes[index], OutlineNode)
            self.assertEqual(child, child_outline_nodes[index])
    def test_unleashed_outline_node(self):
        outline = Outline.from_opml(
            os.path.join(tcfg.input_files_root, 'data_node_descriptor',
                         'opml_data_extraction_test_02.opml'))

        unleashed_outline = UnleashedOutline(outline)

        unleashed_nodes = list(unleashed_outline.iter_unleashed_nodes())

        node_ancestry_record = unleashed_nodes[1]

        depth = node_ancestry_record.depth

        item = node_ancestry_record[1]
        child_number = item.child_number

        pass
Exemplo n.º 17
0
    def test_opml_from_json(self,
                            index,
                            key1,
                            key2,
                            non_key1,
                            non_key2,
                            non_key3):
        """
        Data driven test to check that a data node specifier record imported from JSON can be used correctly to
        parse a data node and get correct results.  The intention isn't to do a full test of data node extract
        functionality but to use sufficiently complex data to provide confidence that the from_json functionality
        is working.

        :param index:     Index of the node under the data node where the data is to be checked.
        :param key1:      First key field expected to be in the extracted data
        :param key2:      Second key field expected to be in the extracted data
        :param non_key1:  Expected data
        :param non_key2:  Expected data
        :param non_key3:  Expected data
        :return:
        """
        descriptor = DataNodeSpecifier.from_json_string(serialized_json_specifier_03x)
        # tag_text_delimiter = tuple(descriptor.dns_structure['header']['tag_delimiters']['text_delimiters'])
        # tag_note_delimiter = tuple(descriptor.dns_structure['header']['tag_delimiters']['note_delimiters'])

        # Use descriptor to process a node and check that output results are correct.
        data_node_index = 31

        outline = Outline.from_opml(
            os.path.join(test_root, 'opml_data_extraction_test_02.opml'),
        )

        unleashed_outline = UnleashedOutline(outline)

        outline_node_list = unleashed_outline.list_unleashed_nodes()
        data_node = outline_node_list[data_node_index].node()

        extracted_data_records = descriptor.extract_data_node_dispatch(data_node)
        test_record = extracted_data_records[index]

        self.assertEqual(key1, test_record['key_field_1'])
        self.assertEqual(key2, test_record['key_field_2'])
        self.assertEqual(non_key1, test_record['data_field_1'])
        self.assertEqual(non_key2, test_record['data_field_2'])
        self.assertEqual(non_key3, test_record['data_field_3'])
    def test_output_generator_csv_02(self, opml_filename, output_filename,
                                     json_dns_file, row, col, expected_result):
        """
        Creates csv file from outline using JSON data node specifier.

        :param opml_filename:
        :param output_filename:
        :param row:
        :param col:
        :param expected_result:
        :return:
        """
        test_data_file = os.path.join(input_files_root, "output_generator",
                                      opml_filename)

        test_csv_output_path = os.path.join(output_files_root,
                                            "output_generator",
                                            output_filename)

        data_node_descriptor_file = os.path.join(input_files_root,
                                                 "data_node_descriptor",
                                                 json_dns_file)

        outline = Outline.from_opml(test_data_file)
        unleashed_outline = UnleashedOutline(
            outline, default_text_tag_delimiter=['[*', '*]'])

        data_nodes = unleashed_outline.extract_data_nodes()
        data_node_name = data_nodes[0]['data_node_name']

        self.assertEqual('data_node_01', data_node_name)

        data_node_list_index = data_nodes[0]['data_node_list_index']
        data_node = unleashed_outline.list_unleashed_nodes(
        )[data_node_list_index].node()

        data_node_descriptor = DataNodeSpecifier.from_json_file(
            data_node_descriptor_file)

        data_table = data_node_descriptor.extract_data_node_dispatch(data_node)

        CsvOutputGenerator.create_csv_file(data_table, test_csv_output_path)

        result_checker = CsvTestChecker(test_csv_output_path)
        self.assertTrue(result_checker.check(row, col, expected_result))
    def test_output_generator_csv_01(self, opml_filename, output_filename,
                                     ignored_json_file, row, col,
                                     expected_result):
        """
        Creates csv file from outline using in memory data node specifier.

        NOTE: A JSON file name is passed in as the generator is used across both test methods, but
        in this method is ignored.

        :param opml_filename:
        :param output_filename:
        :param row:
        :param col:
        :param expected_result:
        :return:
        """
        test_data_file = os.path.join(input_files_root, "output_generator",
                                      opml_filename)

        test_csv_output_path = os.path.join(output_files_root,
                                            "output_generator",
                                            output_filename)
        outline = Outline.from_opml(test_data_file)
        unleashed_outline = UnleashedOutline(
            outline, default_text_tag_delimiter=['[*', '*]'])

        data_nodes = unleashed_outline.extract_data_nodes()
        data_node_name = data_nodes[0]['data_node_name']

        self.assertEqual('data_node_01', data_node_name)

        data_node_list_index = data_nodes[0]['data_node_list_index']
        data_node = unleashed_outline.list_unleashed_nodes(
        )[data_node_list_index].node()

        data_node_descriptor = DataNodeSpecifier(
            data_node_specifier_csv_test_01)

        data_table = data_node_descriptor.extract_data_node_dispatch(data_node)

        CsvOutputGenerator.create_csv_file(data_table, test_csv_output_path)

        result_checker = CsvTestChecker(test_csv_output_path)
        self.assertTrue(result_checker.check(row, col, expected_result))
Exemplo n.º 20
0
    def test_get_node_01(self, file_name, node_to_get, item_to_test, expected_value):
        input_file_path = os.path.join(input_files_root, relative_folder, file_name)

        outline = Outline.from_opml(input_file_path)

        ancestry_record_to_test: NodeAncestryRecord = outline.get_node(node_to_get)
        ancestry_item_to_test: NodeAncestryItem = ancestry_record_to_test[-1]
        node_to_test: OutlineNode = ancestry_item_to_test.node

        if item_to_test == "level":
            self.assertEqual(expected_value, ancestry_record_to_test.depth)
        elif item_to_test == "child_number":
            self.assertEqual(expected_value, ancestry_item_to_test.child_number)
        elif item_to_test == "text":
            self.assertEqual(expected_value, node_to_test.text)
        elif item_to_test == "note":
            self.assertEqual(expected_value, node_to_test.note)
        else:
            self.fail(f"Unrecognised item to tes {item_to_test}")
Exemplo n.º 21
0
def main():
    num_arguments = len(sys.argv)
    expected_num_arguments = 3  # Note command line arguments will be one more as filename is first

    opml_path = ""
    json_path = ""
    csv_path = ""

    if num_arguments == 1:
        # Temporary hack to allow debugging. No parameters supplied --> use test files.
        print("Debug mode - hard coded arguments for command line")

        opml_path = "tests/test_resources/opml_data_extraction_test_01.opml"
        json_path = "tests/test_resources/custom_json_test_descriptors_risk_01.json"
        csv_path = "tests/test_resources/output_files/opml_data_extraction_test_01.csv"
    elif num_arguments != expected_num_arguments + 1:
        print(f"Wrong number of arguments ({num_arguments - 1} (should be {expected_num_arguments})")
    else:
        opml_path = sys.argv[1]
        json_path = sys.argv[2]
        csv_path = sys.argv[3]

    outline = Outline.from_opml(opml_path)
    print("Successfully read outline, unleashing...")

    unleashed_outline = UnleashedOutline(outline)
    data_nodes = unleashed_outline.extract_data_nodes()
    print(f"Outline is unleashed, there are {len(data_nodes)} data nodes in this outline")
    for index, node in enumerate(data_nodes):
        print(f"{index}: {node['data_node_name']}")

    print("Processing first node")

    data_node_list_index = data_nodes[0]['data_node_list_index']
    data_node = unleashed_outline.list_unleashed_nodes()[data_node_list_index].node()

    data_node_specifier = DataNodeSpecifier.from_json_file(json_path)
    extracted_data_table = data_node_specifier.extract_data_node_dispatch(data_node)

    CsvOutputGenerator.create_csv_file(extracted_data_table, csv_path)
Exemplo n.º 22
0
    def test_field_access(self):
        outline = Outline.from_opml(self.test_outline)

        top_level_node = outline.top_outline_node  # Access the top level OutlineNode object

        # Check that accessing child node gets the right one
        node_01 = top_level_node[0]
        self.assertEqual('H1:Heading A', node_01.text)
        self.assertEqual('Notes for Heading A', node_01.note)

        # Check that accessing sub-nodes from top level works ok.
        # Note that (unleashed) tags are in the text but (correctly) not recognised by outline node.
        # Also note that white space is NOT ignored in the tag text as it isn't recognised as a tag.
        node_01_01 = top_level_node[0][0]
        self.assertEqual(' (-TAG-TEXT-H2B-)H2: Heading B', node_01_01.text)
        self.assertEqual('Notes for Heading C', node_01_01.note)

        # Check that two ways of getting to the same node reveal the same one.
        node_01_01_01a = node_01_01[0]
        node_01_01_01b = top_level_node[0][0][0]

        self.assertEqual(node_01_01_01a, node_01_01_01b)
Exemplo n.º 23
0
    def setUp(self) -> None:
        data_node_index = 1
        data_node_specifier = DataNodeSpecifier(
            data_node_specifier_test_driver[0])

        tag_delimiters_text = ('[*', '*]')

        outline_file_path = os.path.join(tcfg.input_files_root,
                                         'data_node_descriptor',
                                         'opml_data_extraction_test_01.opml')

        outline = Outline.from_opml(outline_file_path)
        unleashed_outline = UnleashedOutline(
            outline, default_text_tag_delimiter=tag_delimiters_text)

        data_node = unleashed_outline.list_unleashed_nodes(
        )[data_node_index].node()

        self.extracted_data_table = data_node_specifier.extract_data_node_dispatch(
            data_node)

        pass
Exemplo n.º 24
0
    def test_output_generator_ppt_01(self, filename, record_num, record_name,
                                     expected_level, expected_text):
        test_data_file = os.path.join(tcfg.input_files_root,
                                      test_file_folder_relative, filename)

        test_ppt_template = os.path.join(tcfg.input_files_root,
                                         test_file_folder_relative,
                                         "ppt_template_01.pptx")

        test_ppt_output_path = os.path.join(tcfg.output_files_root,
                                            test_file_folder_relative,
                                            "ppt_output_01.pptx")
        outline = Outline.from_opml(test_data_file)
        unleashed_outline = UnleashedOutline(
            outline, default_text_tag_delimiter=['', ':'])

        data_node_generators = unleashed_outline.extract_data_nodes()
        data_node_name = data_node_generators[0]['data_node_name']

        self.assertEqual('data_node_01', data_node_name)

        data_node_list_index = data_node_generators[0]['data_node_list_index']

        data_node = unleashed_outline.list_unleashed_nodes(
        )[data_node_list_index].node()

        generator = PowerPointGenerator()
        generator.create_power_point_skeleton(data_node, test_ppt_template,
                                              test_ppt_output_path)

        ppt_records = list(get_slide_data(test_ppt_output_path))

        test_level, test_text = ppt_records[record_num]

        self.assertEqual(expected_level, test_level,
                         f"Failed on {record_name}")
        self.assertEqual(expected_text, test_text)
Exemplo n.º 25
0
 def setUp(self) -> None:
     self.outline = Outline.from_opml(
         os.path.join(tcfg.input_files_root, 'outline', 'outline_node',
                      'outline-test-valid-01.opml'))
 def test_valid_opml_01(self, filename):
     full_pathname = os.path.join(input_files_root, self.local_path, filename)
     try:
         Outline.from_opml(full_pathname)
     except Exception as gen_err:
         self.fail(f"Unexpected exception raised {gen_err}")
Exemplo n.º 27
0
 def setUp(self) -> None:
     self.outline = Outline.from_opml(
         os.path.join(self.folder_from_resources_root,
                      'opml-test-valid-opml-01.opml'))