def test_read_invalid_xml_01(self, filename, expected_error_msg): full_pathname = os.path.join(input_files_root, self.local_path, filename) try: Outline.from_opml(full_pathname) except ParseError as err: self.assertTrue(test_string_segment(expected_error_msg, err.msg)) except Exception as gen_err: self.fail(f"Unexpected exception raised {gen_err}") else: self.fail( f'Exception expected but wasn\'t raised. Msg {expected_error_msg}' )
def test_opml_creation_nodes(self, opml_file_name, node_sequence_number, exp_text_value, exp_note_value): input_opml_path = os.path.join(input_files_root, relative_folder, opml_file_name) output_opml_path = os.path.join(output_files_root, relative_folder, test_opml_output_file_01) outline_01 = Outline.from_opml(input_opml_path) outline_01.write_opml(output_opml_path) outline_02 = Outline.from_opml(output_opml_path) node_list = outline_02.list_nodes() test_node = node_list[node_sequence_number].node() self.assertEqual(exp_text_value, test_node.text) self.assertEqual(exp_note_value, test_node.note)
def __init__(self, outline_path, default_tag_delimiters): """ :param outline_path: Full path to opml file for outline. :param default_tag_delimiters: Tag delimiter to use if one isn't supplied for a given descriptor. """ self.outline = Outline.from_opml(outline_path) pass
def test_json_desc_version_number(self): """ Tests ability to parse a file with heading levels mapped directly to outline level. So Outline Level 1 is Heading Level 1 etc. Also uses JSON syntax which doesn't specify all matching criteria - these should be assigned None in the JSON translation. :return: """ json_specifier_generic_levels = os.path.join( self.test_root, "custom_json_test_descriptors_generic_levels.json") opml_file_name = os.path.join(self.test_root, "custom_json_test_descriptors.opml") root_node_index = 1 expected_number_of_rows = 32 # Read json file into a string (later this will be done within the outline engine). with open(json_specifier_generic_levels, 'r') as f: json_specifier_string = f.read() # Read opml file into an outline outline = Outline.from_opml(opml_file_name) unleashed_outline = UnleashedOutline(outline) node_list = list(unleashed_outline.iter_unleashed_nodes()) root_node = node_list[root_node_index].node() specifier = DataNodeSpecifier.from_json_string(json_specifier_string) extracted_data_nodes = specifier.extract_data_node_dispatch(root_node) self.assertEqual(expected_number_of_rows, len(extracted_data_nodes))
def test_opml_creation_header_fields(self, opml_file_name, field_name, expected_field_value): """ Reads an OPML file, writes it out, then reads it in again to check that nothing was changed in writing it. :return: """ input_opml_path = os.path.join(input_files_root, relative_folder, opml_file_name) output_opml_path = os.path.join(output_files_root, relative_folder, test_opml_output_file_01) outline_01 = Outline.from_opml(input_opml_path) outline_01.write_opml(output_opml_path) outline_02 = Outline.from_opml(output_opml_path) actual_field_value = getattr(outline_02, field_name) self.assertEqual(expected_field_value, actual_field_value)
def test_output_generator(self, filename, record_num, record_name, expected_level, expected_text): test_data_file = os.path.join(input_files_root, test_file_folder_relative, filename) test_ppt_template = os.path.join(input_files_root, test_file_folder_relative, "ppt_template_02.pptx") test_ppt_output_path = os.path.join(output_files_root, test_file_folder_relative, "ppt_output_02.pptx") outline = Outline.from_opml(test_data_file) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=['', ':']) data_nodes = unleashed_outline.extract_data_nodes() data_node = unleashed_outline.list_unleashed_nodes()[ data_nodes[0]['data_node_list_index']].node() data_node_descriptor = DataNodeSpecifier(dns) data_node_table = data_node_descriptor.extract_data_node_dispatch( data_node) PptOutputGeneratorSimple.generate_ppt(data_node_table, test_ppt_output_path, test_ppt_template) ppt_records = list(get_slide_data(test_ppt_output_path)) test_level, test_text = ppt_records[record_num] self.assertEqual(expected_level, test_level, f"Failed on {record_name}") self.assertEqual(expected_text, test_text)
def setUp(self) -> None: self.unleashed_outline = UnleashedOutline( Outline.from_opml( os.path.join(self.test_root, 'opml_data_extraction_test_02.opml'))) self.outline_node_list = self.unleashed_outline.list_unleashed_nodes()
def test_iter_nodes(self): outline = Outline.from_opml(opml_path=self.input_file_full_path, full_validate=True) for index, unleashed_node in enumerate(outline.iter_nodes()): print(f'Node {index}: {unleashed_node.node()}') self.assertTrue(True)
def setUp(self) -> None: outline = Outline.from_opml( os.path.join(tcfg.input_files_root, 'outline', 'opml', 'opml-test-valid-opml-01.opml')) self.unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=text_tag_regex, default_note_tag_delimiter=note_tag_regex)
def test_identify_data_node_01(self, file_path, expected_node_index, field_name, expected_field_value): outline = Outline.from_opml(file_path) unleashed_outline = UnleashedOutline(outline) data_nodes = unleashed_outline.extract_data_nodes() expected_node_record = data_nodes[expected_node_index] self.assertEqual(expected_field_value, expected_node_record[field_name])
def setUp(self) -> None: tag_delimiters_text = ('[*', '*]') outline = Outline.from_opml( os.path.join(tcfg.input_files_root, 'data_node_descriptor', 'opml_data_extraction_test_01.opml'), tag_delimiters_text) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=tag_delimiters_text) # Create list of all nodes (plus ancestry) to allow access to nodes by index. self.node_list = list(unleashed_outline.iter_unleashed_nodes())
def test_outline_create_01(self, node_sequence_number, parent_node_number, level, text, note): generate_outline_from_test_data() outline = Outline.from_opml(test_opml_path) test_record = outline.get_node(node_sequence_number) test_item = test_record[-1] test_node = test_item.node self.assertEqual(level, test_record.depth) self.assertEqual(text, test_node.text) self.assertEqual(note, test_node.note)
def test_head_data_from_opml(self, field_name, expected_value): """ Args: field_name: expected_value: """ outline = Outline.from_opml( os.path.join(tcfg.input_files_root, self.local_path, self.test_data_file)) # Access field by attribute name to allow data driven approach for test value = getattr(outline, field_name) self.assertEqual(expected_value, value)
def test_unleashed_outline(self, node_number, text, note, text_tag, note_tag): outline = Outline.from_opml(input_file_path) unleashed_outline = UnleashedOutline(outline, ['(-', '-)'], ['(-', '-)']) node_record = unleashed_outline.get_node(node_number) node_item = node_record[-1] node = node_item.node self.assertEqual(text, node.text) self.assertEqual(note, node.note) self.assertEqual(text_tag, node.text_tag) self.assertEqual(note_tag, node.note_tag)
def test_child_access(self): outline = Outline.from_opml(self.test_outline) top_level_node = outline.top_outline_node num_child_nodes = len(top_level_node) self.assertEqual(3, num_child_nodes) child_outline_nodes = list(top_level_node) self.assertEqual(3, len(child_outline_nodes)) for index, child in enumerate(top_level_node): self.assertIsInstance(child, OutlineNode) self.assertIsInstance(child_outline_nodes[index], OutlineNode) self.assertEqual(child, child_outline_nodes[index])
def test_unleashed_outline_node(self): outline = Outline.from_opml( os.path.join(tcfg.input_files_root, 'data_node_descriptor', 'opml_data_extraction_test_02.opml')) unleashed_outline = UnleashedOutline(outline) unleashed_nodes = list(unleashed_outline.iter_unleashed_nodes()) node_ancestry_record = unleashed_nodes[1] depth = node_ancestry_record.depth item = node_ancestry_record[1] child_number = item.child_number pass
def test_output_generator_csv_02(self, opml_filename, output_filename, json_dns_file, row, col, expected_result): """ Creates csv file from outline using JSON data node specifier. :param opml_filename: :param output_filename: :param row: :param col: :param expected_result: :return: """ test_data_file = os.path.join(input_files_root, "output_generator", opml_filename) test_csv_output_path = os.path.join(output_files_root, "output_generator", output_filename) data_node_descriptor_file = os.path.join(input_files_root, "data_node_descriptor", json_dns_file) outline = Outline.from_opml(test_data_file) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=['[*', '*]']) data_nodes = unleashed_outline.extract_data_nodes() data_node_name = data_nodes[0]['data_node_name'] self.assertEqual('data_node_01', data_node_name) data_node_list_index = data_nodes[0]['data_node_list_index'] data_node = unleashed_outline.list_unleashed_nodes( )[data_node_list_index].node() data_node_descriptor = DataNodeSpecifier.from_json_file( data_node_descriptor_file) data_table = data_node_descriptor.extract_data_node_dispatch(data_node) CsvOutputGenerator.create_csv_file(data_table, test_csv_output_path) result_checker = CsvTestChecker(test_csv_output_path) self.assertTrue(result_checker.check(row, col, expected_result))
def test_opml_from_json(self, index, key1, key2, non_key1, non_key2, non_key3): """ Data driven test to check that a data node specifier record imported from JSON can be used correctly to parse a data node and get correct results. The intention isn't to do a full test of data node extract functionality but to use sufficiently complex data to provide confidence that the from_json functionality is working. :param index: Index of the node under the data node where the data is to be checked. :param key1: First key field expected to be in the extracted data :param key2: Second key field expected to be in the extracted data :param non_key1: Expected data :param non_key2: Expected data :param non_key3: Expected data :return: """ descriptor = DataNodeSpecifier.from_json_string(serialized_json_specifier_03x) # tag_text_delimiter = tuple(descriptor.dns_structure['header']['tag_delimiters']['text_delimiters']) # tag_note_delimiter = tuple(descriptor.dns_structure['header']['tag_delimiters']['note_delimiters']) # Use descriptor to process a node and check that output results are correct. data_node_index = 31 outline = Outline.from_opml( os.path.join(test_root, 'opml_data_extraction_test_02.opml'), ) unleashed_outline = UnleashedOutline(outline) outline_node_list = unleashed_outline.list_unleashed_nodes() data_node = outline_node_list[data_node_index].node() extracted_data_records = descriptor.extract_data_node_dispatch(data_node) test_record = extracted_data_records[index] self.assertEqual(key1, test_record['key_field_1']) self.assertEqual(key2, test_record['key_field_2']) self.assertEqual(non_key1, test_record['data_field_1']) self.assertEqual(non_key2, test_record['data_field_2']) self.assertEqual(non_key3, test_record['data_field_3'])
def test_output_generator_csv_01(self, opml_filename, output_filename, ignored_json_file, row, col, expected_result): """ Creates csv file from outline using in memory data node specifier. NOTE: A JSON file name is passed in as the generator is used across both test methods, but in this method is ignored. :param opml_filename: :param output_filename: :param row: :param col: :param expected_result: :return: """ test_data_file = os.path.join(input_files_root, "output_generator", opml_filename) test_csv_output_path = os.path.join(output_files_root, "output_generator", output_filename) outline = Outline.from_opml(test_data_file) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=['[*', '*]']) data_nodes = unleashed_outline.extract_data_nodes() data_node_name = data_nodes[0]['data_node_name'] self.assertEqual('data_node_01', data_node_name) data_node_list_index = data_nodes[0]['data_node_list_index'] data_node = unleashed_outline.list_unleashed_nodes( )[data_node_list_index].node() data_node_descriptor = DataNodeSpecifier( data_node_specifier_csv_test_01) data_table = data_node_descriptor.extract_data_node_dispatch(data_node) CsvOutputGenerator.create_csv_file(data_table, test_csv_output_path) result_checker = CsvTestChecker(test_csv_output_path) self.assertTrue(result_checker.check(row, col, expected_result))
def test_get_node_01(self, file_name, node_to_get, item_to_test, expected_value): input_file_path = os.path.join(input_files_root, relative_folder, file_name) outline = Outline.from_opml(input_file_path) ancestry_record_to_test: NodeAncestryRecord = outline.get_node(node_to_get) ancestry_item_to_test: NodeAncestryItem = ancestry_record_to_test[-1] node_to_test: OutlineNode = ancestry_item_to_test.node if item_to_test == "level": self.assertEqual(expected_value, ancestry_record_to_test.depth) elif item_to_test == "child_number": self.assertEqual(expected_value, ancestry_item_to_test.child_number) elif item_to_test == "text": self.assertEqual(expected_value, node_to_test.text) elif item_to_test == "note": self.assertEqual(expected_value, node_to_test.note) else: self.fail(f"Unrecognised item to tes {item_to_test}")
def create_outline(self, outline_spec): """ After parsing a text file, calculating the indent level and extracting the text from each line, we can now construct the outline itself. In an opml file, the outline nodes at the top of the tree hang off the body element. But in order to simplify the use of recursion to generate the tree, we will initially generate the tree hanging from an outline element, and then once the tree is created, create the well-formed xml tree to correctly drive the Outline object. :param outline_spec: :return: """ top_level_node = self.create_outline_element(None) self.add_child_nodes(top_level_node, 0, outline_spec, 0) outline_child_nodes = [ outline_element for outline_element in top_level_node ] return Outline.from_scratch(outline_child_nodes)
def main(): num_arguments = len(sys.argv) expected_num_arguments = 3 # Note command line arguments will be one more as filename is first opml_path = "" json_path = "" csv_path = "" if num_arguments == 1: # Temporary hack to allow debugging. No parameters supplied --> use test files. print("Debug mode - hard coded arguments for command line") opml_path = "tests/test_resources/opml_data_extraction_test_01.opml" json_path = "tests/test_resources/custom_json_test_descriptors_risk_01.json" csv_path = "tests/test_resources/output_files/opml_data_extraction_test_01.csv" elif num_arguments != expected_num_arguments + 1: print(f"Wrong number of arguments ({num_arguments - 1} (should be {expected_num_arguments})") else: opml_path = sys.argv[1] json_path = sys.argv[2] csv_path = sys.argv[3] outline = Outline.from_opml(opml_path) print("Successfully read outline, unleashing...") unleashed_outline = UnleashedOutline(outline) data_nodes = unleashed_outline.extract_data_nodes() print(f"Outline is unleashed, there are {len(data_nodes)} data nodes in this outline") for index, node in enumerate(data_nodes): print(f"{index}: {node['data_node_name']}") print("Processing first node") data_node_list_index = data_nodes[0]['data_node_list_index'] data_node = unleashed_outline.list_unleashed_nodes()[data_node_list_index].node() data_node_specifier = DataNodeSpecifier.from_json_file(json_path) extracted_data_table = data_node_specifier.extract_data_node_dispatch(data_node) CsvOutputGenerator.create_csv_file(extracted_data_table, csv_path)
def setUp(self) -> None: data_node_index = 1 data_node_specifier = DataNodeSpecifier( data_node_specifier_test_driver[0]) tag_delimiters_text = ('[*', '*]') outline_file_path = os.path.join(tcfg.input_files_root, 'data_node_descriptor', 'opml_data_extraction_test_01.opml') outline = Outline.from_opml(outline_file_path) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=tag_delimiters_text) data_node = unleashed_outline.list_unleashed_nodes( )[data_node_index].node() self.extracted_data_table = data_node_specifier.extract_data_node_dispatch( data_node) pass
def test_field_access(self): outline = Outline.from_opml(self.test_outline) top_level_node = outline.top_outline_node # Access the top level OutlineNode object # Check that accessing child node gets the right one node_01 = top_level_node[0] self.assertEqual('H1:Heading A', node_01.text) self.assertEqual('Notes for Heading A', node_01.note) # Check that accessing sub-nodes from top level works ok. # Note that (unleashed) tags are in the text but (correctly) not recognised by outline node. # Also note that white space is NOT ignored in the tag text as it isn't recognised as a tag. node_01_01 = top_level_node[0][0] self.assertEqual(' (-TAG-TEXT-H2B-)H2: Heading B', node_01_01.text) self.assertEqual('Notes for Heading C', node_01_01.note) # Check that two ways of getting to the same node reveal the same one. node_01_01_01a = node_01_01[0] node_01_01_01b = top_level_node[0][0][0] self.assertEqual(node_01_01_01a, node_01_01_01b)
def test_output_generator_ppt_01(self, filename, record_num, record_name, expected_level, expected_text): test_data_file = os.path.join(tcfg.input_files_root, test_file_folder_relative, filename) test_ppt_template = os.path.join(tcfg.input_files_root, test_file_folder_relative, "ppt_template_01.pptx") test_ppt_output_path = os.path.join(tcfg.output_files_root, test_file_folder_relative, "ppt_output_01.pptx") outline = Outline.from_opml(test_data_file) unleashed_outline = UnleashedOutline( outline, default_text_tag_delimiter=['', ':']) data_node_generators = unleashed_outline.extract_data_nodes() data_node_name = data_node_generators[0]['data_node_name'] self.assertEqual('data_node_01', data_node_name) data_node_list_index = data_node_generators[0]['data_node_list_index'] data_node = unleashed_outline.list_unleashed_nodes( )[data_node_list_index].node() generator = PowerPointGenerator() generator.create_power_point_skeleton(data_node, test_ppt_template, test_ppt_output_path) ppt_records = list(get_slide_data(test_ppt_output_path)) test_level, test_text = ppt_records[record_num] self.assertEqual(expected_level, test_level, f"Failed on {record_name}") self.assertEqual(expected_text, test_text)
def generate_outline_from_test_data(): # First add all the top level nodes as they are supplied when creating the outline top_level_nodes = [] for record in [ item for item in outline_expected_node_results if item[1] == 0 ]: _, _, _, text, note = record top_level_nodes.append( OutlineNode.create_outline_node(text, note)._node) # Now create the outline before adding other nodes new_outline = Outline.from_scratch(top_level_nodes) # Now add other nodes but skipping records for top level ones for record in [ item for item in outline_expected_node_results if item[1] != 0 ]: node_sequence_number, parent_node_number, level, text, note = record parent_node = new_outline.get_node(parent_node_number).node() parent_node.add_node(text, note) new_outline.write_opml(test_opml_path)
def setUp(self) -> None: self.outline = Outline.from_opml( os.path.join(self.folder_from_resources_root, 'opml-test-valid-opml-01.opml'))
def test_valid_opml_01(self, filename): full_pathname = os.path.join(input_files_root, self.local_path, filename) try: Outline.from_opml(full_pathname) except Exception as gen_err: self.fail(f"Unexpected exception raised {gen_err}")
def setUp(self) -> None: self.outline = Outline.from_opml( os.path.join(tcfg.input_files_root, 'outline', 'outline_node', 'outline-test-valid-01.opml'))
def pre_process_outline(self): # Replace with actual code return Outline.from_scratch()