Exemplo n.º 1
0
 def test_get_href_from_tags(self):
     tag_filter = {
         'name': 'div',
         'attrs': {
             'id': 'href_test'
         }
     }
     links = htmlparser.get_href_from_tags(open('test/test.html').read(), tag_filter)
     self.assertTrue(links == ['test1.com', 'test2.com'])
Exemplo n.º 2
0
def apply_filter(html_string, filter):

    if 'type' in filter and filter['type'] == 'a_href':
        content = htmlparser.get_href_from_tags(html_string, {'name': filter['tag'],
                                                              'attrs': {filter['attribute']: filter['value']}})[0]
    elif 'type' in filter and filter['type'] == 'text':
        content = htmlparser.get_formatted_text_from_tags(html_string, {'name': filter['tag'],
                                                                        'attrs': {
                                                                             filter['attribute']: filter['value']}})[0]
    elif 'type' in filter and filter['type'] == 'list':
        content = htmlparser.get_content_list_from_tags(html_string, {'name': filter['tag'],
                                                                        'attrs': {
                                                                             filter['attribute']: filter['value']}})[0]
    elif 'type' in filter:
        content = htmlparser.get_attr_from_tags(html_string, {'name': filter['tag'],
                                                              'attrs': {filter['attribute']: filter['value']},
                                                              'type': filter['type']})[0]
    else:
        content = htmlparser.get_content_from_tags(html_string, {'name': filter['tag'],
                                                                 'attrs': {filter['attribute']: filter['value']}})[0]
    return content