示例#1
0
    def test_returns_constant_value(self):
        extractor = ConstantExtractor(42)
        field = Field('example', extractor)
        extractor.bind(field)
        resource_info = ResourceInfo()

        self.assertEquals(42, extractor.extract_value(resource_info))
示例#2
0
    def test_returns_constant_value(self):
        extractor = ConstantExtractor(42)
        field = Field('example', extractor)
        extractor.bind(field)
        resource_info = ResourceInfo()

        self.assertEquals(42, extractor.extract_value(resource_info))
示例#3
0
    def test_returns_unicode_for_string_constant(self):
        extractor = ConstantExtractor('foo')
        field = Field('example', extractor)
        extractor.bind(field)
        resource_info = ResourceInfo()
        extracted_value = extractor.extract_value(resource_info)

        self.assertEquals(u'foo', extracted_value)
        self.assertIsInstance(extracted_value, unicode)
示例#4
0
    def test_returns_unicode_for_string_constant(self):
        extractor = ConstantExtractor('foo')
        field = Field('example', extractor)
        extractor.bind(field)
        resource_info = ResourceInfo()
        extracted_value = extractor.extract_value(resource_info)

        self.assertEquals(u'foo', extracted_value)
        self.assertIsInstance(extracted_value, unicode)
示例#5
0
    def test_returns_unicode_for_multivalued_string_constant(self):
        extractor = ConstantExtractor(['foo', 'bar'])
        field = Field('example', extractor, multivalued=True)
        extractor.bind(field)
        resource_info = ResourceInfo()
        extracted_value = extractor.extract_value(resource_info)

        self.assertEquals([u'foo', u'bar'], extracted_value)
        for item in extracted_value:
            self.assertIsInstance(item, unicode)
示例#6
0
    def test_returns_unicode_for_multivalued_string_constant(self):
        extractor = ConstantExtractor(['foo', 'bar'])
        field = Field('example', extractor, multivalued=True)
        extractor.bind(field)
        resource_info = ResourceInfo()
        extracted_value = extractor.extract_value(resource_info)

        self.assertEquals([u'foo', u'bar'], extracted_value)
        for item in extracted_value:
            self.assertIsInstance(item, unicode)
示例#7
0
    def test_raises_if_no_default_and_field_value_not_mapped(self):
        category = self.config.get_field('category')
        subcategory = self.config.get_field('subcategory')
        subcategory.extractor = ConstantExtractor('physics')
        subcategory.extractor.bind(subcategory)

        with self.assertRaises(NoValueExtracted):
            category.extractor.extract_value(self.resource_info)
示例#8
0
    def test_raises_if_no_default_and_field_doesnt_return_value(self):
        category = self.config.get_field('category')
        subcategory = self.config.get_field('subcategory')
        subcategory.extractor = ConstantExtractor(None)
        subcategory.extractor.bind(subcategory)

        with self.assertRaises(NoValueExtracted):
            category.extractor.extract_value(self.resource_info)
示例#9
0
    def test_asserts_proper_type_for_multivalued_extractors(self):
        field = Field('int_field',
                      extractor=ConstantExtractor([42]),
                      type_=int,
                      multivalued=True)
        engine = self._create_engine(fields=[field])

        self.assertEquals({'int_field': [42]}, engine.extract_field_values())
示例#10
0
    def test_asserts_proper_type_for_extractors(self):
        field = Field('int_field',
                      extractor=ConstantExtractor('foo'),
                      type_=int)
        engine = self._create_engine(fields=[field])

        with self.assertRaises(ExtractionError):
            engine.extract_field_values()
示例#11
0
    def test_uses_default_if_field_value_not_mapped(self):
        category = self.config.get_field('category')
        category.extractor.default = 'DEFAULT'
        subcategory = self.config.get_field('subcategory')
        subcategory.extractor = ConstantExtractor('physics')
        subcategory.extractor.bind(subcategory)
        extracted_value = category.extractor.extract_value(self.resource_info)

        self.assertEquals(u'DEFAULT', extracted_value)
        self.assertIsInstance(extracted_value, unicode)
示例#12
0
    def setUp(self):
        CrawlerTestCase.setUp(self)
        # TODO: Refactor this testcase
        site = Site('http://example.org')
        self.resource_info = ResourceInfo()
        self.mapping = {'travel': 'TRAVEL', 'music': 'MUSIC'}

        subcategory = Field('subcategory',
                            extractor=ConstantExtractor('travel'))

        category = Field('category',
                         extractor=FieldMappingExtractor(
                             'subcategory', self.mapping))

        self.config = Config(
            sites=[site],
            tika=None,
            solr=None,
            unique_field=None,
            url_field=None,
            last_modified_field=None,
            fields=[category, subcategory],
        )
示例#13
0
 sites=[
     Site('https://www.sportamt-bern.ch/',
          attributes={'site_area': 'Sportamt Bern'}),
     Site('http://www.sitemapxml.co.uk/',
          attributes={'site_area': 'Sitemap XML'}),
     Site('http://www.pctipp.ch/', attributes={'site_area': 'PCtipp'}),
     Site('http://mailchimp.com', attributes={'site_area': 'MailChimp'}),
     Site('https://bgs.zg.ch', attributes={'site_area':
                                           'Gesetzessammlung'}),
 ],
 unique_field='UID',
 url_field='path_string',
 last_modified_field='modified',
 fields=[
     Field('allowedRolesAndUsers',
           extractor=ConstantExtractor(['Anonymous']),
           multivalued=True),
     Field('created', extractor=LastModifiedExtractor(), type_=datetime),
     Field('Creator', extractor=CreatorExtractor()),
     Field('Description', extractor=DescriptionExtractor()),
     Field('effective', extractor=IndexingTimeExtractor(), type_=datetime),
     Field('expires',
           extractor=ConstantExtractor(datetime(2050, 12, 31)),
           type_=datetime),
     Field('getId', extractor=SlugExtractor()),
     Field('getRemoteUrl', extractor=TargetURLExtractor()),
     Field('modified', extractor=LastModifiedExtractor(), type_=datetime),
     Field('object_type',
           extractor=FieldMappingExtractor('portal_type',
                                           OBJECT_TYPE_MAPPING,
                                           default='File')),