Exemplo n.º 1
0
    def test_zaincremental_harvester(self):

        client = CKANServer()
        metadata_registry = metadata.MetadataRegistry()
        metadata_registry.registerReader('oai_dc', oai_dc_reader)
        metadata_registry.registerWriter('oai_dc', oai_dc_writer)
        serv = BatchingServer(client, metadata_registry=metadata_registry)
        oaipmh.client.Client = mock.Mock(return_value=ServerClient(serv, metadata_registry))
        harv = OAIPMHHarvester()
        harvest_job = HarvestJob()
        harvest_job.source = HarvestSource()
        harvest_job.source.title = "Test"
        harvest_job.source.url = "http://helda.helsinki.fi/oai/request"
        harvest_job.gather_started = ((datetime.now() + timedelta(days=1)))
        harvest_job.source.config = '{"incremental":"True"}'
        harvest_job.source.type = "OAI-PMH"
        Session.add(harvest_job)
        rev = model.repo.new_revision()
        rev.timestamp = ((datetime.now() + timedelta(days=2)))
        pkg = Package(name='footest', revision=rev)
        Session.add(pkg)
        pkg.save()
        roger = Group.get('roger')
        roger.add_package_by_name('footest')
        Session.add(roger)
        roger.save()
        gathered = harv.gather_stage(harvest_job)
        harvest_object = HarvestObject.get(gathered[0])
        harv.fetch_stage(harvest_object)
        harvobj = json.loads(harvest_object.content)
        self.assert_(harvobj['records'])
Exemplo n.º 2
0
 def gather_stage(self, harvest_job):
     url = harvest_job.source.url
     # Test wether we should use OAI-PMH or DDI
     metadata_registry = MetadataRegistry()
     metadata_registry.registerReader('oai_dc', oai_dc_reader)
     client = oaipmh.client.Client(url, metadata_registry)
     try:
         client.identify()
     except XMLSyntaxError:
         self.harvester = DDIHarvester()
     except urllib2.URLError:
         self._save_gather_error('Could not identify source!', harvest_job)
         return None
     if not self.harvester:
         self.harvester = OAIPMHHarvester()
     objs = self.harvester.gather_stage(harvest_job)
     ret = []
     for obj in objs:
         obj = HarvestObject.get(obj)
         cont = obj.content
         dict = json.loads(cont)
         dict['harv'] = jsonpickle.encode(self.harvester)
         obj.content = json.dumps(dict)
         obj.save()
         ret.append(obj.id)
     return ret
Exemplo n.º 3
0
class MetadataHarvester(HarvesterBase):
    config = None
    harvester = None

    def __init__(self):
        self.harvester = None

    def info(self):
        return {
            'name': 'Metadata',
            'title': 'Metadata harvester',
            'description': 'Universal metadata harvester for various formats',
            }

    def gather_stage(self, harvest_job):
        url = harvest_job.source.url
        # Test wether we should use OAI-PMH or DDI
        metadata_registry = MetadataRegistry()
        metadata_registry.registerReader('oai_dc', oai_dc_reader)
        client = oaipmh.client.Client(url, metadata_registry)
        try:
            client.identify()
        except XMLSyntaxError:
            self.harvester = DDIHarvester()
        except urllib2.URLError:
            self._save_gather_error('Could not identify source!', harvest_job)
            return None
        if not self.harvester:
            self.harvester = OAIPMHHarvester()
        objs = self.harvester.gather_stage(harvest_job)
        ret = []
        for obj in objs:
            obj = HarvestObject.get(obj)
            cont = obj.content
            dict = json.loads(cont)
            dict['harv'] = jsonpickle.encode(self.harvester)
            obj.content = json.dumps(dict)
            obj.save()
            ret.append(obj.id)
        return ret

    def fetch_stage(self, harvest_object):
        harv = jsonpickle.decode(json.loads(harvest_object.content)['harv'])
        self.harvester = harv
        bool = self.harvester.fetch_stage(harvest_object)
        cont = harvest_object.content
        dict = json.loads(cont)
        dict['harv'] = jsonpickle.encode(self.harvester)
        harvest_object.content = json.dumps(dict)
        harvest_object.save()
        return bool

    def import_stage(self, harvest_object):
        harv = jsonpickle.decode(json.loads(harvest_object.content)['harv'])
        self.harvester = harv
        return self.harvester.import_stage(harvest_object)
Exemplo n.º 4
0
 def setup_class(cls):
     '''
     Setup database and variables
     '''
     ckan.model.repo.rebuild_db()
     harvest_model.setup()
     kata_model.setup()
     cls.harvester = OAIPMHHarvester()
Exemplo n.º 5
0
    def setup_class(cls):
        '''
        Setup database and variables
        '''
        model.repo.rebuild_db()
        harvest_model.setup()
        kata_model.setup()
        cls.harvester = OAIPMHHarvester()

        # The Pylons globals are not available outside a request. This is a hack to provide context object.
        c = AttribSafeContextObj()
        py_obj = PylonsContext()
        py_obj.tmpl_context = c
        pylons.tmpl_context._push_object(c)