Exemplo n.º 1
0
            A('price', "Product price, including any discounts and tax or vat", 
                contains_any_numbers, True),    
            A('image_urls', "URLs for one or more images", image_url, True),
            A('description', "The full description of the product", html),
            ]
        )

SAMPLE_DESCRIPTOR2 = ItemDescriptor('test', 'item test', [
        A('description', 'description field without tags', notags),
        A('price', "Product price, including any discounts and tax or vat",
                contains_any_numbers),
    ])

SAMPLE_DESCRIPTOR3 = ItemDescriptor('test', 
        'item test',
        [A('phone', 'phone number', lambda x: contains_any_numbers(x.text_content))])

SAMPLE_DESCRIPTOR4 =  ItemDescriptor('test', 
        'item test, removes tags from description attribute',
        [A('description', 'description field without tags', lambda x: x.text_content)])

# A list of (test name, [templates], page, extractors, expected_result)
TEST_DATA = [
    # extract from a similar page
    ('similar page extraction', [ANNOTATED_PAGE1], EXTRACT_PAGE1, DEFAULT_DESCRIPTOR,
        {u'title': [u'Nice Product'], u'description': [u'wonderful product'], 
            u'image_url': [u'nice_product.jpg']}
    ),
    # strip the first 5 characters from the title
    ('extractor test', [ANNOTATED_PAGE1], EXTRACT_PAGE1,
        ItemDescriptor('test', 'product test', 
Exemplo n.º 2
0
 def extract(self, htmlregion):
     """Only matches and extracts strings with at least one number"""
     return contains_any_numbers(htmlregion.text_content)
Exemplo n.º 3
0
 def extract(self, htmlregion):
     return extractors.contains_any_numbers(htmlregion.text_content)
Exemplo n.º 4
0
 def extract(self, htmlregion):
     """Only matches and extracts strings with at least one number"""
     return contains_any_numbers(htmlregion.text_content)
Exemplo n.º 5
0
Arquivo: price.py Projeto: 01-/portia
 def extract(self, htmlregion):
     return extractors.contains_any_numbers(htmlregion.text_content)