def test_get_formatted_content_from_tags(self): tag_filter = { 'name': 'div', 'attrs': { 'id': 'formatting_test' } } content = htmlparser.get_formatted_text_from_tags(open('test/test.html').read(), tag_filter) self.assertTrue(content[0] == ' * one\n * two\n\n')
def apply_filter(html_string, filter): if 'type' in filter and filter['type'] == 'a_href': content = htmlparser.get_href_from_tags(html_string, {'name': filter['tag'], 'attrs': {filter['attribute']: filter['value']}})[0] elif 'type' in filter and filter['type'] == 'text': content = htmlparser.get_formatted_text_from_tags(html_string, {'name': filter['tag'], 'attrs': { filter['attribute']: filter['value']}})[0] elif 'type' in filter and filter['type'] == 'list': content = htmlparser.get_content_list_from_tags(html_string, {'name': filter['tag'], 'attrs': { filter['attribute']: filter['value']}})[0] elif 'type' in filter: content = htmlparser.get_attr_from_tags(html_string, {'name': filter['tag'], 'attrs': {filter['attribute']: filter['value']}, 'type': filter['type']})[0] else: content = htmlparser.get_content_from_tags(html_string, {'name': filter['tag'], 'attrs': {filter['attribute']: filter['value']}})[0] return content