def test_read_invalid_xml_01(self, filename, expected_error_msg):
     full_pathname = os.path.join(input_files_root, self.local_path,
                                  filename)
     try:
         Outline.from_opml(full_pathname)
     except ParseError as err:
         self.assertTrue(test_string_segment(expected_error_msg, err.msg))
     except Exception as gen_err:
         self.fail(f"Unexpected exception raised {gen_err}")
     else:
         self.fail(
             f'Exception expected but wasn\'t raised. Msg {expected_error_msg}'
         )
Пример #2
0
    def test_opml_creation_nodes(self, opml_file_name, node_sequence_number, exp_text_value, exp_note_value):

        input_opml_path = os.path.join(input_files_root, relative_folder, opml_file_name)
        output_opml_path = os.path.join(output_files_root, relative_folder, test_opml_output_file_01)

        outline_01 = Outline.from_opml(input_opml_path)
        outline_01.write_opml(output_opml_path)
        outline_02 = Outline.from_opml(output_opml_path)

        node_list = outline_02.list_nodes()

        test_node = node_list[node_sequence_number].node()

        self.assertEqual(exp_text_value, test_node.text)
        self.assertEqual(exp_note_value, test_node.note)
Пример #3
0
 def __init__(self, outline_path, default_tag_delimiters):
     """
     :param outline_path: Full path to opml file for outline.
     :param default_tag_delimiters: Tag delimiter to use if one isn't supplied for a given descriptor.
     """
     self.outline = Outline.from_opml(outline_path)
     pass
    def test_json_desc_version_number(self):
        """
        Tests ability to parse a file with heading levels mapped directly to outline level.

        So Outline Level 1 is Heading Level 1 etc.

        Also uses JSON syntax which doesn't specify all matching criteria - these should be assigned None
        in the JSON translation.
        :return:
        """
        json_specifier_generic_levels = os.path.join(
            self.test_root, "custom_json_test_descriptors_generic_levels.json")
        opml_file_name = os.path.join(self.test_root,
                                      "custom_json_test_descriptors.opml")
        root_node_index = 1
        expected_number_of_rows = 32

        # Read json file into a string (later this will be done within the outline engine).
        with open(json_specifier_generic_levels, 'r') as f:
            json_specifier_string = f.read()

        # Read opml file into an outline
        outline = Outline.from_opml(opml_file_name)

        unleashed_outline = UnleashedOutline(outline)

        node_list = list(unleashed_outline.iter_unleashed_nodes())
        root_node = node_list[root_node_index].node()

        specifier = DataNodeSpecifier.from_json_string(json_specifier_string)

        extracted_data_nodes = specifier.extract_data_node_dispatch(root_node)

        self.assertEqual(expected_number_of_rows, len(extracted_data_nodes))
Пример #5
0
    def test_opml_creation_header_fields(self, opml_file_name, field_name, expected_field_value):
        """
        Reads an OPML file, writes it out, then reads it in again to check that nothing was changed in writing it.

        :return:
        """
        input_opml_path = os.path.join(input_files_root, relative_folder, opml_file_name)
        output_opml_path = os.path.join(output_files_root, relative_folder, test_opml_output_file_01)

        outline_01 = Outline.from_opml(input_opml_path)
        outline_01.write_opml(output_opml_path)
        outline_02 = Outline.from_opml(output_opml_path)

        actual_field_value = getattr(outline_02, field_name)

        self.assertEqual(expected_field_value, actual_field_value)
    def test_output_generator(self, filename, record_num, record_name,
                              expected_level, expected_text):
        test_data_file = os.path.join(input_files_root,
                                      test_file_folder_relative, filename)

        test_ppt_template = os.path.join(input_files_root,
                                         test_file_folder_relative,
                                         "ppt_template_02.pptx")

        test_ppt_output_path = os.path.join(output_files_root,
                                            test_file_folder_relative,
                                            "ppt_output_02.pptx")

        outline = Outline.from_opml(test_data_file)
        unleashed_outline = UnleashedOutline(
            outline, default_text_tag_delimiter=['', ':'])

        data_nodes = unleashed_outline.extract_data_nodes()
        data_node = unleashed_outline.list_unleashed_nodes()[
            data_nodes[0]['data_node_list_index']].node()

        data_node_descriptor = DataNodeSpecifier(dns)
        data_node_table = data_node_descriptor.extract_data_node_dispatch(
            data_node)
        PptOutputGeneratorSimple.generate_ppt(data_node_table,
                                              test_ppt_output_path,
                                              test_ppt_template)

        ppt_records = list(get_slide_data(test_ppt_output_path))

        test_level, test_text = ppt_records[record_num]

        self.assertEqual(expected_level, test_level,
                         f"Failed on {record_name}")
        self.assertEqual(expected_text, test_text)
Пример #7
0
    def setUp(self) -> None:
        self.unleashed_outline = UnleashedOutline(
            Outline.from_opml(
                os.path.join(self.test_root,
                             'opml_data_extraction_test_02.opml')))

        self.outline_node_list = self.unleashed_outline.list_unleashed_nodes()
Пример #8
0
    def test_iter_nodes(self):

        outline = Outline.from_opml(opml_path=self.input_file_full_path, full_validate=True)

        for index, unleashed_node in enumerate(outline.iter_nodes()):
            print(f'Node {index}: {unleashed_node.node()}')

        self.assertTrue(True)
    def setUp(self) -> None:

        outline = Outline.from_opml(
            os.path.join(tcfg.input_files_root, 'outline', 'opml',
                         'opml-test-valid-opml-01.opml'))
        self.unleashed_outline = UnleashedOutline(
            outline,
            default_text_tag_delimiter=text_tag_regex,
            default_note_tag_delimiter=note_tag_regex)
    def test_identify_data_node_01(self, file_path, expected_node_index,
                                   field_name, expected_field_value):
        outline = Outline.from_opml(file_path)
        unleashed_outline = UnleashedOutline(outline)
        data_nodes = unleashed_outline.extract_data_nodes()
        expected_node_record = data_nodes[expected_node_index]

        self.assertEqual(expected_field_value,
                         expected_node_record[field_name])
Пример #11
0
    def setUp(self) -> None:
        tag_delimiters_text = ('[*', '*]')

        outline = Outline.from_opml(
            os.path.join(tcfg.input_files_root, 'data_node_descriptor',
                         'opml_data_extraction_test_01.opml'),
            tag_delimiters_text)

        unleashed_outline = UnleashedOutline(
            outline, default_text_tag_delimiter=tag_delimiters_text)
        # Create list of all nodes (plus ancestry) to allow access to nodes by index.
        self.node_list = list(unleashed_outline.iter_unleashed_nodes())
Пример #12
0
    def test_outline_create_01(self, node_sequence_number, parent_node_number,
                               level, text, note):

        generate_outline_from_test_data()
        outline = Outline.from_opml(test_opml_path)

        test_record = outline.get_node(node_sequence_number)
        test_item = test_record[-1]
        test_node = test_item.node

        self.assertEqual(level, test_record.depth)
        self.assertEqual(text, test_node.text)
        self.assertEqual(note, test_node.note)
    def test_head_data_from_opml(self, field_name, expected_value):
        """
        Args:
            field_name:
            expected_value:
        """
        outline = Outline.from_opml(
            os.path.join(tcfg.input_files_root, self.local_path,
                         self.test_data_file))

        # Access field by attribute name to allow data driven approach for test
        value = getattr(outline, field_name)
        self.assertEqual(expected_value, value)
Пример #14
0
    def test_unleashed_outline(self, node_number, text, note, text_tag,
                               note_tag):
        outline = Outline.from_opml(input_file_path)
        unleashed_outline = UnleashedOutline(outline, ['(-', '-)'],
                                             ['(-', '-)'])

        node_record = unleashed_outline.get_node(node_number)
        node_item = node_record[-1]
        node = node_item.node

        self.assertEqual(text, node.text)
        self.assertEqual(note, node.note)
        self.assertEqual(text_tag, node.text_tag)
        self.assertEqual(note_tag, node.note_tag)
Пример #15
0
    def test_child_access(self):
        outline = Outline.from_opml(self.test_outline)

        top_level_node = outline.top_outline_node

        num_child_nodes = len(top_level_node)
        self.assertEqual(3, num_child_nodes)

        child_outline_nodes = list(top_level_node)
        self.assertEqual(3, len(child_outline_nodes))

        for index, child in enumerate(top_level_node):
            self.assertIsInstance(child, OutlineNode)
            self.assertIsInstance(child_outline_nodes[index], OutlineNode)
            self.assertEqual(child, child_outline_nodes[index])
    def test_unleashed_outline_node(self):
        outline = Outline.from_opml(
            os.path.join(tcfg.input_files_root, 'data_node_descriptor',
                         'opml_data_extraction_test_02.opml'))

        unleashed_outline = UnleashedOutline(outline)

        unleashed_nodes = list(unleashed_outline.iter_unleashed_nodes())

        node_ancestry_record = unleashed_nodes[1]

        depth = node_ancestry_record.depth

        item = node_ancestry_record[1]
        child_number = item.child_number

        pass
    def test_output_generator_csv_02(self, opml_filename, output_filename,
                                     json_dns_file, row, col, expected_result):
        """
        Creates csv file from outline using JSON data node specifier.

        :param opml_filename:
        :param output_filename:
        :param row:
        :param col:
        :param expected_result:
        :return:
        """
        test_data_file = os.path.join(input_files_root, "output_generator",
                                      opml_filename)

        test_csv_output_path = os.path.join(output_files_root,
                                            "output_generator",
                                            output_filename)

        data_node_descriptor_file = os.path.join(input_files_root,
                                                 "data_node_descriptor",
                                                 json_dns_file)

        outline = Outline.from_opml(test_data_file)
        unleashed_outline = UnleashedOutline(
            outline, default_text_tag_delimiter=['[*', '*]'])

        data_nodes = unleashed_outline.extract_data_nodes()
        data_node_name = data_nodes[0]['data_node_name']

        self.assertEqual('data_node_01', data_node_name)

        data_node_list_index = data_nodes[0]['data_node_list_index']
        data_node = unleashed_outline.list_unleashed_nodes(
        )[data_node_list_index].node()

        data_node_descriptor = DataNodeSpecifier.from_json_file(
            data_node_descriptor_file)

        data_table = data_node_descriptor.extract_data_node_dispatch(data_node)

        CsvOutputGenerator.create_csv_file(data_table, test_csv_output_path)

        result_checker = CsvTestChecker(test_csv_output_path)
        self.assertTrue(result_checker.check(row, col, expected_result))
Пример #18
0
    def test_opml_from_json(self,
                            index,
                            key1,
                            key2,
                            non_key1,
                            non_key2,
                            non_key3):
        """
        Data driven test to check that a data node specifier record imported from JSON can be used correctly to
        parse a data node and get correct results.  The intention isn't to do a full test of data node extract
        functionality but to use sufficiently complex data to provide confidence that the from_json functionality
        is working.

        :param index:     Index of the node under the data node where the data is to be checked.
        :param key1:      First key field expected to be in the extracted data
        :param key2:      Second key field expected to be in the extracted data
        :param non_key1:  Expected data
        :param non_key2:  Expected data
        :param non_key3:  Expected data
        :return:
        """
        descriptor = DataNodeSpecifier.from_json_string(serialized_json_specifier_03x)
        # tag_text_delimiter = tuple(descriptor.dns_structure['header']['tag_delimiters']['text_delimiters'])
        # tag_note_delimiter = tuple(descriptor.dns_structure['header']['tag_delimiters']['note_delimiters'])

        # Use descriptor to process a node and check that output results are correct.
        data_node_index = 31

        outline = Outline.from_opml(
            os.path.join(test_root, 'opml_data_extraction_test_02.opml'),
        )

        unleashed_outline = UnleashedOutline(outline)

        outline_node_list = unleashed_outline.list_unleashed_nodes()
        data_node = outline_node_list[data_node_index].node()

        extracted_data_records = descriptor.extract_data_node_dispatch(data_node)
        test_record = extracted_data_records[index]

        self.assertEqual(key1, test_record['key_field_1'])
        self.assertEqual(key2, test_record['key_field_2'])
        self.assertEqual(non_key1, test_record['data_field_1'])
        self.assertEqual(non_key2, test_record['data_field_2'])
        self.assertEqual(non_key3, test_record['data_field_3'])
    def test_output_generator_csv_01(self, opml_filename, output_filename,
                                     ignored_json_file, row, col,
                                     expected_result):
        """
        Creates csv file from outline using in memory data node specifier.

        NOTE: A JSON file name is passed in as the generator is used across both test methods, but
        in this method is ignored.

        :param opml_filename:
        :param output_filename:
        :param row:
        :param col:
        :param expected_result:
        :return:
        """
        test_data_file = os.path.join(input_files_root, "output_generator",
                                      opml_filename)

        test_csv_output_path = os.path.join(output_files_root,
                                            "output_generator",
                                            output_filename)
        outline = Outline.from_opml(test_data_file)
        unleashed_outline = UnleashedOutline(
            outline, default_text_tag_delimiter=['[*', '*]'])

        data_nodes = unleashed_outline.extract_data_nodes()
        data_node_name = data_nodes[0]['data_node_name']

        self.assertEqual('data_node_01', data_node_name)

        data_node_list_index = data_nodes[0]['data_node_list_index']
        data_node = unleashed_outline.list_unleashed_nodes(
        )[data_node_list_index].node()

        data_node_descriptor = DataNodeSpecifier(
            data_node_specifier_csv_test_01)

        data_table = data_node_descriptor.extract_data_node_dispatch(data_node)

        CsvOutputGenerator.create_csv_file(data_table, test_csv_output_path)

        result_checker = CsvTestChecker(test_csv_output_path)
        self.assertTrue(result_checker.check(row, col, expected_result))
Пример #20
0
    def test_get_node_01(self, file_name, node_to_get, item_to_test, expected_value):
        input_file_path = os.path.join(input_files_root, relative_folder, file_name)

        outline = Outline.from_opml(input_file_path)

        ancestry_record_to_test: NodeAncestryRecord = outline.get_node(node_to_get)
        ancestry_item_to_test: NodeAncestryItem = ancestry_record_to_test[-1]
        node_to_test: OutlineNode = ancestry_item_to_test.node

        if item_to_test == "level":
            self.assertEqual(expected_value, ancestry_record_to_test.depth)
        elif item_to_test == "child_number":
            self.assertEqual(expected_value, ancestry_item_to_test.child_number)
        elif item_to_test == "text":
            self.assertEqual(expected_value, node_to_test.text)
        elif item_to_test == "note":
            self.assertEqual(expected_value, node_to_test.note)
        else:
            self.fail(f"Unrecognised item to tes {item_to_test}")
    def create_outline(self, outline_spec):
        """
        After parsing a text file, calculating the indent level and extracting the text from each line, we can now
        construct the outline itself.

        In an opml file, the outline nodes at the top of the tree hang off the body element.  But in order to simplify
        the use of recursion to generate the tree, we will initially generate the tree hanging from an outline element,
        and then once the tree is created, create the well-formed xml tree to correctly drive the Outline object.

        :param outline_spec:
        :return:
        """

        top_level_node = self.create_outline_element(None)

        self.add_child_nodes(top_level_node, 0, outline_spec, 0)
        outline_child_nodes = [
            outline_element for outline_element in top_level_node
        ]

        return Outline.from_scratch(outline_child_nodes)
Пример #22
0
def main():
    num_arguments = len(sys.argv)
    expected_num_arguments = 3  # Note command line arguments will be one more as filename is first

    opml_path = ""
    json_path = ""
    csv_path = ""

    if num_arguments == 1:
        # Temporary hack to allow debugging. No parameters supplied --> use test files.
        print("Debug mode - hard coded arguments for command line")

        opml_path = "tests/test_resources/opml_data_extraction_test_01.opml"
        json_path = "tests/test_resources/custom_json_test_descriptors_risk_01.json"
        csv_path = "tests/test_resources/output_files/opml_data_extraction_test_01.csv"
    elif num_arguments != expected_num_arguments + 1:
        print(f"Wrong number of arguments ({num_arguments - 1} (should be {expected_num_arguments})")
    else:
        opml_path = sys.argv[1]
        json_path = sys.argv[2]
        csv_path = sys.argv[3]

    outline = Outline.from_opml(opml_path)
    print("Successfully read outline, unleashing...")

    unleashed_outline = UnleashedOutline(outline)
    data_nodes = unleashed_outline.extract_data_nodes()
    print(f"Outline is unleashed, there are {len(data_nodes)} data nodes in this outline")
    for index, node in enumerate(data_nodes):
        print(f"{index}: {node['data_node_name']}")

    print("Processing first node")

    data_node_list_index = data_nodes[0]['data_node_list_index']
    data_node = unleashed_outline.list_unleashed_nodes()[data_node_list_index].node()

    data_node_specifier = DataNodeSpecifier.from_json_file(json_path)
    extracted_data_table = data_node_specifier.extract_data_node_dispatch(data_node)

    CsvOutputGenerator.create_csv_file(extracted_data_table, csv_path)
Пример #23
0
    def setUp(self) -> None:
        data_node_index = 1
        data_node_specifier = DataNodeSpecifier(
            data_node_specifier_test_driver[0])

        tag_delimiters_text = ('[*', '*]')

        outline_file_path = os.path.join(tcfg.input_files_root,
                                         'data_node_descriptor',
                                         'opml_data_extraction_test_01.opml')

        outline = Outline.from_opml(outline_file_path)
        unleashed_outline = UnleashedOutline(
            outline, default_text_tag_delimiter=tag_delimiters_text)

        data_node = unleashed_outline.list_unleashed_nodes(
        )[data_node_index].node()

        self.extracted_data_table = data_node_specifier.extract_data_node_dispatch(
            data_node)

        pass
Пример #24
0
    def test_field_access(self):
        outline = Outline.from_opml(self.test_outline)

        top_level_node = outline.top_outline_node  # Access the top level OutlineNode object

        # Check that accessing child node gets the right one
        node_01 = top_level_node[0]
        self.assertEqual('H1:Heading A', node_01.text)
        self.assertEqual('Notes for Heading A', node_01.note)

        # Check that accessing sub-nodes from top level works ok.
        # Note that (unleashed) tags are in the text but (correctly) not recognised by outline node.
        # Also note that white space is NOT ignored in the tag text as it isn't recognised as a tag.
        node_01_01 = top_level_node[0][0]
        self.assertEqual(' (-TAG-TEXT-H2B-)H2: Heading B', node_01_01.text)
        self.assertEqual('Notes for Heading C', node_01_01.note)

        # Check that two ways of getting to the same node reveal the same one.
        node_01_01_01a = node_01_01[0]
        node_01_01_01b = top_level_node[0][0][0]

        self.assertEqual(node_01_01_01a, node_01_01_01b)
Пример #25
0
    def test_output_generator_ppt_01(self, filename, record_num, record_name,
                                     expected_level, expected_text):
        test_data_file = os.path.join(tcfg.input_files_root,
                                      test_file_folder_relative, filename)

        test_ppt_template = os.path.join(tcfg.input_files_root,
                                         test_file_folder_relative,
                                         "ppt_template_01.pptx")

        test_ppt_output_path = os.path.join(tcfg.output_files_root,
                                            test_file_folder_relative,
                                            "ppt_output_01.pptx")
        outline = Outline.from_opml(test_data_file)
        unleashed_outline = UnleashedOutline(
            outline, default_text_tag_delimiter=['', ':'])

        data_node_generators = unleashed_outline.extract_data_nodes()
        data_node_name = data_node_generators[0]['data_node_name']

        self.assertEqual('data_node_01', data_node_name)

        data_node_list_index = data_node_generators[0]['data_node_list_index']

        data_node = unleashed_outline.list_unleashed_nodes(
        )[data_node_list_index].node()

        generator = PowerPointGenerator()
        generator.create_power_point_skeleton(data_node, test_ppt_template,
                                              test_ppt_output_path)

        ppt_records = list(get_slide_data(test_ppt_output_path))

        test_level, test_text = ppt_records[record_num]

        self.assertEqual(expected_level, test_level,
                         f"Failed on {record_name}")
        self.assertEqual(expected_text, test_text)
Пример #26
0
def generate_outline_from_test_data():

    # First add all the top level nodes as they are supplied when creating the outline
    top_level_nodes = []
    for record in [
            item for item in outline_expected_node_results if item[1] == 0
    ]:
        _, _, _, text, note = record
        top_level_nodes.append(
            OutlineNode.create_outline_node(text, note)._node)

    # Now create the outline before adding other nodes
    new_outline = Outline.from_scratch(top_level_nodes)

    # Now add other nodes but skipping records for top level ones
    for record in [
            item for item in outline_expected_node_results if item[1] != 0
    ]:
        node_sequence_number, parent_node_number, level, text, note = record

        parent_node = new_outline.get_node(parent_node_number).node()
        parent_node.add_node(text, note)

    new_outline.write_opml(test_opml_path)
Пример #27
0
 def setUp(self) -> None:
     self.outline = Outline.from_opml(
         os.path.join(self.folder_from_resources_root,
                      'opml-test-valid-opml-01.opml'))
 def test_valid_opml_01(self, filename):
     full_pathname = os.path.join(input_files_root, self.local_path, filename)
     try:
         Outline.from_opml(full_pathname)
     except Exception as gen_err:
         self.fail(f"Unexpected exception raised {gen_err}")
Пример #29
0
 def setUp(self) -> None:
     self.outline = Outline.from_opml(
         os.path.join(tcfg.input_files_root, 'outline', 'outline_node',
                      'outline-test-valid-01.opml'))
Пример #30
0
 def pre_process_outline(self):
     # Replace with actual code
     return Outline.from_scratch()