Пример #1
0
def latest_post():
    '''Return the most recent blog post.

    Returns None if there are no blog posts.

    :rtype: ckanext.sweden.blog.model.post.Post or None

    '''
    try:
        from ckanext.sweden.blog.model.post import Post
        post = Session.query(Post).\
            filter(Post.visible == True).\
            order_by('created desc').\
            first()
    except NoResultFound:
        return None

    if post is None:
        return None

    post.content_markdown = markdown(
        unicode(truncate(post.content, length=320, indicator='...',
                         whole_word=True)))
    post.post_author = (model.User.get(post.user_id)
                        or Session.query(model.User).filter_by(
                        id=post.user_id).first())

    return post
Пример #2
0
def record_existing_unique_identifier(package_id, identifier):
    """
    Based on a provided identifier, checks datacite for an existing DOI
    Saves to local CKAN database
    :param package_id: string
    :param identifier: string
    :return DOI object if saved, false if it didn't exist in datacite
    """
    datacite_api = DOIDataCiteAPI()

    # Check this identifier doesn't exist in the table
    existing_doi = Session.query(DOI).filter(DOI.identifier == identifier).first()
    if not existing_doi:
        # And check against the datacite service
        try:
            datacite_doi = datacite_api.get(identifier)
            if datacite_doi.text:
                # Determine whether or not we need to delete a doi that points to the current dataset
                doi_for_this_pkg = Session.query(DOI).filter(DOI.package_id == package_id).first()
                if doi_for_this_pkg:
                    datacite_api
                doi = DOI(package_id=package_id, identifier=identifier)
                Session.add(doi)
                Session.commit()
                return doi
        except HTTPError:
            pass
Пример #3
0
    def test_create_extent(self):

        package = factories.Dataset()

        geojson = json.loads(self.geojson_examples["point"])

        shape = asShape(geojson)
        package_extent = PackageExtent(
            package_id=package["id"],
            the_geom=WKTElement(shape.wkt, self.db_srid),
        )
        package_extent.save()

        assert (package_extent.package_id == package["id"])
        if legacy_geoalchemy:
            assert (Session.scalar(
                package_extent.the_geom.x) == geojson["coordinates"][0])
            assert (Session.scalar(
                package_extent.the_geom.y) == geojson["coordinates"][1])
            assert (Session.scalar(
                package_extent.the_geom.srid) == self.db_srid)
        else:
            from sqlalchemy import func

            assert (Session.query(func.ST_X(package_extent.the_geom)).first()
                    [0] == geojson["coordinates"][0])
            assert (Session.query(func.ST_Y(package_extent.the_geom)).first()
                    [0] == geojson["coordinates"][1])
            assert (package_extent.the_geom.srid == self.db_srid)
Пример #4
0
    def make_package_name(self, title, exclude_existing_package):
        '''
        Creates a URL friendly name from a title

        If the name already exists, it will add some random characters at the end
        '''

        name = munge_title_to_name(title).replace('_', '-')
        while '--' in name:
            name = name.replace('--', '-')
        name = name[0:90] # max length is 100

        # Is this slug already in use (and if we're updating a package, is it in
        # use by a different package?).
        pkg_obj = Session.query(Package).filter(Package.name == name).filter(Package.id != exclude_existing_package).first()
        if not pkg_obj:
            # The name is available, so use it. Note that if we're updating an
            # existing package we will be updating this package's URL, so incoming
            # links may break.
            return name

        if exclude_existing_package:
            # The name is not available, and we're updating a package. Chances
            # are the package's name already had some random string attached
            # to it last time. Prevent spurrious updates to the package's URL
            # (choosing new random text) by just reusing the existing package's
            # name.
            pkg_obj = Session.query(Package).filter(Package.id == exclude_existing_package).first()
            if pkg_obj: # the package may not exist yet because we may be passed the desired package GUID before a new package is instantiated
                return pkg_obj.name

        # Append some random text to the URL. Hope that with five character
        # there will be no collsion.
        return name + "-" + str(uuid.uuid4())[:5]
    def test_create_extent(self):

        package = factories.Dataset()

        geojson = json.loads(self.geojson_examples['point'])

        shape = asShape(geojson)
        package_extent = PackageExtent(package_id=package['id'],
                                       the_geom=WKTElement(
                                           shape.wkt, self.db_srid))
        package_extent.save()

        assert_equals(package_extent.package_id, package['id'])
        if legacy_geoalchemy:
            assert_equals(Session.scalar(package_extent.the_geom.x),
                          geojson['coordinates'][0])
            assert_equals(Session.scalar(package_extent.the_geom.y),
                          geojson['coordinates'][1])
            assert_equals(Session.scalar(package_extent.the_geom.srid),
                          self.db_srid)
        else:
            from sqlalchemy import func
            assert_equals(
                Session.query(func.ST_X(package_extent.the_geom)).first()[0],
                geojson['coordinates'][0])
            assert_equals(
                Session.query(func.ST_Y(package_extent.the_geom)).first()[0],
                geojson['coordinates'][1])
            assert_equals(package_extent.the_geom.srid, self.db_srid)
    def make_package_name(self, title, exclude_existing_package):
        '''
        Creates a URL friendly name from a title

        If the name already exists, it will add some random characters at the end
        '''

        name = munge_title_to_name(title).replace('_', '-')
        while '--' in name:
            name = name.replace('--', '-')
        name = name[0:90] # max length is 100

        # Is this slug already in use (and if we're updating a package, is it in
        # use by a different package?).
        pkg_obj = Session.query(Package).filter(Package.name == name).filter(Package.id != exclude_existing_package).first()
        if not pkg_obj:
            # The name is available, so use it. Note that if we're updating an
            # existing package we will be updating this package's URL, so incoming
            # links may break.
            return name

        if exclude_existing_package:
            # The name is not available, and we're updating a package. Chances
            # are the package's name already had some random string attached
            # to it last time. Prevent spurrious updates to the package's URL
            # (choosing new random text) by just reusing the existing package's
            # name.
            pkg_obj = Session.query(Package).filter(Package.id == exclude_existing_package).first()
            if pkg_obj: # the package may not exist yet because we may be passed the desired package GUID before a new package is instantiated
                return pkg_obj.name

        # Append some random text to the URL. Hope that with five character
        # there will be no collsion.
        return name + "-" + str(uuid.uuid4())[:5]
Пример #7
0
    def test_create_extent(self):

        package = factories.Dataset()

        geojson = json.loads(self.geojson_examples['point'])

        shape = asShape(geojson)
        package_extent = PackageExtent(package_id=package['id'],
                                       the_geom=WKTElement(shape.wkt,
                                                           self.db_srid))
        package_extent.save()

        assert_equals(package_extent.package_id, package['id'])
        if legacy_geoalchemy:
            assert_equals(Session.scalar(package_extent.the_geom.x),
                          geojson['coordinates'][0])
            assert_equals(Session.scalar(package_extent.the_geom.y),
                          geojson['coordinates'][1])
            assert_equals(Session.scalar(package_extent.the_geom.srid),
                          self.db_srid)
        else:
            from sqlalchemy import func
            assert_equals(
                Session.query(func.ST_X(package_extent.the_geom)).first()[0],
                geojson['coordinates'][0])
            assert_equals(
                Session.query(func.ST_Y(package_extent.the_geom)).first()[0],
                geojson['coordinates'][1])
            assert_equals(package_extent.the_geom.srid, self.db_srid)
Пример #8
0
    def test_zfaulty_xml_unknown_errors(self):
        harv, job = self._create_harvester()
        res = "http://www.fsd.uta.fi/fi/aineistot/luettelo/FSD0115/FSD0115.xml"
        urllib2.urlopen = mock.Mock(return_value=StringIO(res))
        gathered = harv.gather_stage(job)

        urllib2.urlopen = mock.Mock(return_value=open("FSD2355.xml"))
        harvest_obj = HarvestObject.get(gathered[0])
        self.assert_(harv.fetch_stage(harvest_obj))
        self.assert_(harv.import_stage(harvest_obj))
        print Package.text_search(\
                            Session.query(Package),
                            'Kansalaiskeskustelu ydinvoimasta 2006').all()
        self.assert_(len(Package.text_search(\
                            Session.query(Package),
                            'Kansalaiskeskustelu ydinvoimasta 2006').all()) >= 1)

        res = "http://www.fsd.uta.fi/fi/aineistot/luettelo/FSD0115/FSD0115.xml"
        urllib2.urlopen = mock.Mock(return_value=StringIO(res))
        gathered = harv.gather_stage(job)
        urllib2.urlopen = mock.Mock(return_value=open("FSD2362.xml"))
        harvest_obj = HarvestObject.get(gathered[0])
        self.assert_(harv.fetch_stage(harvest_obj))
        self.assert_(harv.import_stage(harvest_obj))
        self.assert_(len(Package.text_search(\
                                Session.query(Package),
                                'Energia-asennetutkimus 2004').all()) >= 1)
Пример #9
0
    def test_spatial_extra_base(self, app):

        user = factories.User()
        env = {"REMOTE_USER": user["name"].encode("ascii")}
        dataset = factories.Dataset(user=user)

        offset = url_for("dataset.edit", id=dataset["id"])
        res = app.get(offset, extra_environ=env)

        data = {
            "name": dataset['name'],
            "extras__0__key": u"spatial",
            "extras__0__value": self.geojson_examples["point"]
        }

        res = app.post(offset, environ_overrides=env, data=data)

        assert "Error" not in res, res

        package_extent = (Session.query(PackageExtent).filter(
            PackageExtent.package_id == dataset["id"]).first())

        geojson = json.loads(self.geojson_examples["point"])

        assert package_extent.package_id == dataset["id"]
        from sqlalchemy import func

        assert (Session.query(func.ST_X(
            package_extent.the_geom)).first()[0] == geojson["coordinates"][0])
        assert (Session.query(func.ST_Y(
            package_extent.the_geom)).first()[0] == geojson["coordinates"][1])
        assert package_extent.the_geom.srid == self.db_srid
Пример #10
0
def _create_unique_identifier(package_doi_identifier=None):
    datacite_api = DOIDataCiteAPI()
    while True:
        if package_doi_identifier:
            identifier = os.path.join(package_doi_identifier,
                                      '{0:03}'.format(random.randint(1, 999)))
            query = Session.query(CeonResourceDOI)
            query = query.filter(CeonResourceDOI.identifier == identifier)
            exists = query.count()
        else:
            identifier = os.path.join(
                get_doi_prefix(), '{0:07}'.format(random.randint(1, 9999999)))
            query = Session.query(CeonPackageDOI)
            query = query.filter(CeonPackageDOI.identifier == identifier)
            exists = query.count()
        # Check this identifier doesn't exist in the table
        if not exists:
            # And check against the datacite service
            try:
                datacite_doi = datacite_api.get(identifier)
            except HTTPError:
                pass
            # TODO remove the nest 2 lines (ConnectionError) ignoring
            except ConnectionError:
                pass
            else:
                if datacite_doi.text:
                    continue
        return identifier
Пример #11
0
    def test_zharvester_import(self, mocked=True):
        harvest_object, harv = self._create_harvester()
        self.assert_(harv.info()['name'] == 'OAI-PMH')
        real_content = json.loads(harvest_object.content)
        self.assert_(real_content)
        self.assert_(harv.import_stage(harvest_object))

        the_package = Session.query(Package).filter(Package.title == u"homer")
        print the_package
        the_package = the_package[0]
        self.assert_(the_package)
        self.assert_(len(the_package.get_tags()) == 4)
        self.assert_(len(the_package.get_groups()) == 3)
        self.assert_(the_package.url == "http://helda.helsinki.fi/oai/request?verb=GetRecord&identifier=%s&metadataPrefix=oai_dc" % the_package.id)
        # Test with empty request
        Session.remove()
        CreateTestData.delete()
        Session.query(Package).delete()
        harvest_object, harv = self._create_harvester(config=False)
        real_content = json.loads(harvest_object.content)
        self.assert_(harv.import_stage(harvest_object) == False)
        errs = Session.query(HarvestGatherError).all()
        self.assert_(len(errs) == 2)
        errs = Session.query(HarvestObjectError).all()
        self.assert_(len(errs) == 3)
Пример #12
0
def _create_unique_identifier(package_doi_identifier=None):
    datacite_api = DOIDataCiteAPI()
    while True:
        if package_doi_identifier:
            identifier = os.path.join(package_doi_identifier,
                    '{0:03}'.format(random.randint(1, 999)))
            query = Session.query(CeonResourceDOI)
            query = query.filter(CeonResourceDOI.identifier == identifier)
            exists = query.count()
        else:
            identifier = os.path.join(get_doi_prefix(),
                    '{0:07}'.format(random.randint(1, 9999999)))
            query = Session.query(CeonPackageDOI)
            query = query.filter(CeonPackageDOI.identifier == identifier)
            exists = query.count()
        # Check this identifier doesn't exist in the table
        if not exists:
            # And check against the datacite service
            try:
                datacite_doi = datacite_api.get(identifier)
            except HTTPError:
                pass
            # TODO remove the nest 2 lines (ConnectionError) ignoring
            except ConnectionError:
                pass
            else:
                if datacite_doi.text:
                    continue
        return identifier
Пример #13
0
    def delete_tests(self):

        print 'Deleting all test DOIs'
        Session.query(DOI).filter(
            DOI.identifier.like('%' + TEST_PREFIX +
                                '%')).delete(synchronize_session=False)
        Session.commit()
Пример #14
0
 def get(cls, reference):
     """Returns a IssueCategory object referenced by its id or name."""
     if type(reference) is int:
         # if reference is an integer, get by ID
         return Session.query(cls).filter(cls.id == reference).first()
     else:
         # if not, get by name
         return Session.query(cls).filter(cls.name == reference).first()
Пример #15
0
    def delete_tests(self):
        '''Delete all test DOIs.'''

        print u'Deleting all test DOIs'
        Session.query(DOI).filter(
            DOI.identifier.like(u'%' + get_prefix() +
                                u'%')).delete(synchronize_session=False)
        Session.commit()
Пример #16
0
 def get(cls, reference):
     """Returns a IssueCategory object referenced by its id or name."""
     if type(reference) is int:
         # if reference is an integer, get by ID
         return Session.query(cls).filter(cls.id == reference).first()
     else:
         # if not, get by name
         return Session.query(cls).filter(cls.name == reference).first()
Пример #17
0
    def _gather_entry(self, entry, auth=None):
        # Create a harvest object for each entry
        entry_guid = entry['guid']
        log.debug('gathering %s', entry_guid)
        entry_name = entry['identifier'].replace('v101_', '').replace('.hdf5', '')  # noqa: E501
        entry_restart_date = entry['restart_date']

        package_query = Session.query(Package)
        query_filtered = package_query.filter(Package.name == entry_name)
        package = query_filtered.first()

        if package:
            # Meaning we've previously harvested this,
            # but we may want to reharvest it now.
            previous_obj = Session.query(HarvestObject) \
                .filter(HarvestObject.guid == entry_guid) \
                .filter(HarvestObject.current == True) \
                .first()  # noqa: E712
            if previous_obj:
                previous_obj.current = False
                previous_obj.save()

            if self.update_all:
                log.debug('{} already exists and will be updated.'.format(entry_name))  # noqa: E501
                status = 'change'
            else:
                log.debug('{} will not be updated.'.format(entry_name))  # noqa: E501  # noqa: E501
                status = 'unchanged'

            obj = HarvestObject(guid=entry_guid,
                                job=self.job,
                                extras=[
                                    HOExtra(key='status', value=status),
                                    HOExtra(key='restart_date', value=entry_restart_date)
                                ])

            obj.content = entry['content']
            obj.package = package
            obj.save()
            return obj.id

        elif not package:
            # It's a product we haven't harvested before.
            log.debug(
                '{} has not been harvested before. Creating a new harvest object.'.  # noqa: E501
                format(entry_name))  # noqa: E501
            obj = HarvestObject(
                guid=entry_guid,
                job=self.job,
                extras=[
                    HOExtra(key='status', value='new'),
                    HOExtra(key='restart_date', value=entry_restart_date)
                ])
            obj.content = entry['content']
            obj.package = None
            obj.save()
            return obj.id
Пример #18
0
    def clear(cls):
        Session.query(LocalizedLicenseName).delete()
        Session.query(cls).delete()

        try:
            rev = Session.revision
        except AttributeError:
            rev = None
        Session.flush()
        Session.revision = rev
Пример #19
0
 def listSets(self, cursor=None, batch_size=None):
     '''List all sets in this repository, where sets are groups.
     '''
     data = []
     if not cursor:
         groups = Session.query(Group).all()
     else:
         groups = Session.query(Group).all()[:cursor]
     for dataset in groups:
         data.append((dataset.id, dataset.name, dataset.description))
     return data
Пример #20
0
    def listIdentifiers(self, metadataPrefix, set=None, cursor=None,
                        from_=None, until=None, batch_size=None):
        '''List all identifiers for this repository.
        '''
        data = []
        packages = []
        group = None
        if not set:
            if not from_ and not until:
                packages = Session.query(Package).filter(Package.type=='dataset').\
                    filter(Package.private!=True).filter(Package.state=='active').all()
            else:
                if from_ and not until:
                    packages = Session.query(Package).filter(Package.type=='dataset').filter(Package.private!=True).\
                        filter(PackageRevision.revision_timestamp > from_).\
                        filter(Package.name==PackageRevision.name).filter(Package.state=='active').all()
                if until and not from_:
                    packages = Session.query(Package).filter(Package.type=='dataset').filter(Package.private!=True).\
                        filter(PackageRevision.revision_timestamp < until).\
                        filter(Package.name==PackageRevision.name).filter(Package.state=='active').all()
                if from_ and until:
                    packages = Session.query(Package).filter(Package.type=='dataset').filter(Package.private!=True).\
                        filter(between(PackageRevision.revision_timestamp, from_, until)).\
                        filter(Package.name==PackageRevision.name).filter(Package.state=='active').all()
        else:
            group = Group.get(set)
            if group:
                packages = group.packages(return_query=True).filter(Package.type=='dataset').\
                    filter(Package.private!=True).filter(Package.state=='active')
                if from_ and not until:
                    packages = packages.filter(PackageRevision.revision_timestamp > from_).\
                        filter(Package.name==PackageRevision.name).filter(Package.state=='active')
                if until and not from_:
                    packages = packages.filter(PackageRevision.revision_timestamp < until).\
                        filter(Package.name==PackageRevision.name).filter(Package.state=='active')
                if from_ and until:
                    packages = packages.filter(between(PackageRevision.revision_timestamp, from_, until)).\
                        filter(Package.name==PackageRevision.name).filter(Package.state=='active')
                packages = packages.all()
        if cursor:
            packages = packages[cursor:]
        for package in packages:
            spec = package.name
            if group:
                spec = group.name
            else:
                if package.owner_org:
                    group = Group.get(package.owner_org)
                    if group and group.name:
                        spec = group.name
                    group = None
            data.append(common.Header('', package.id, package.metadata_created, [spec], False))

        return data
Пример #21
0
    def populate_harvest_job(self, harvest_job, set_ids, config, client):
        # Check if this source has been harvested before
        previous_job = Session.query(HarvestJob) \
            .filter(HarvestJob.source == harvest_job.source) \
            .filter(HarvestJob.gather_finished != None) \
            .filter(HarvestJob.id != harvest_job.id) \
            .order_by(HarvestJob.gather_finished.desc()) \
            .limit(1).first()

        last_time = None
        if previous_job and previous_job.finished and model.Package.get(harvest_job.source.id).metadata_modified < previous_job.gather_started:
            last_time = previous_job.gather_started.isoformat()

        # Collect package ids
        package_ids = list(self.get_package_ids(set_ids, config, last_time, client))
        log.debug('Identifiers: %s', package_ids)

        if not self._recreate(harvest_job) and package_ids:
            converted_identifiers = {}
            for identifier in package_ids:
                converted_identifiers[datapid_to_name(identifier)] = identifier
                if identifier.endswith(u'm'):
                    converted_identifiers[datapid_to_name(u"%ss" % identifier[0:-1])] = identifier

            for package in model.Session.query(model.Package).filter(model.Package.name.in_(converted_identifiers.keys())).all():
                converted_name = package.name
                if converted_identifiers[converted_name] not in package_ids:
                    converted_name = "%sm" % converted_name[0:-1]
                package_ids.remove(converted_identifiers[converted_name])

        if previous_job:
            for previous_error in [error.guid for error in Session.query(HarvestObject).
                                   filter(HarvestObject.harvest_job_id == previous_job.id).
                                   filter(HarvestObject.state == 'ERROR').all()]:
                if previous_error not in package_ids:
                    package_ids.append(previous_error)

        try:
            object_ids = []
            if len(package_ids):
                for package_id in islice(package_ids, config['limit']) if 'limit' in config else package_ids:
                    # Create a new HarvestObject for this identifier
                    obj = HarvestObject(guid=package_id, job=harvest_job)
                    obj.save()
                    object_ids.append(obj.id)
                log.debug('Object ids: {i}'.format(i=object_ids))
                return object_ids
            else:
                self._save_gather_error('No packages received for URL: {u}'.format(
                    u=harvest_job.source.url), harvest_job)
                return None
        except Exception as e:
            self._save_gather_error('Gather: {e}'.format(e=e), harvest_job)
            raise
    def test_store_log_data(self):
        Session.query(RequestAuditModel).delete()

        req_audit = RequestAudit()

        req_audit.log({
            'remote_ip': '192.168.1.10',
            'remote_user': '******',
            'session': '123456789',
            'current_language': 'en',
            'access_time': datetime.now(),
            'request_url': '/test/url',
            'http_method': 'GET',
            'http_path': '/test/url?some=value',
            'http_query_params': 'some=value',
            'http_user_agent': 'Test Agent String',
            'client_os': 'linux',
            'client_device': 'firefox',
        })
        req_audit.log({
            'remote_ip': '192.168.1.10',
            'remote_user': '******',
            'session': '123456789',
            'current_language': 'en',
            'access_time': datetime.now(),
            'request_url': '/test/url',
            'http_method': 'GET',
            'http_path': '/test/url?some=value',
            'http_query_params': 'some=value',
            'http_user_agent': 'Test Agent String',
            'client_os': 'linux',
            'client_device': 'firefox',
        })
        req_audit.log({
            'remote_ip': '192.168.1.10',
            'remote_user': '******',
            'session': '123456789',
            'current_language': 'en',
            'access_time': datetime.now(),
            'request_url': '/test/url',
            'http_method': 'GET',
            'http_path': '/test/url?some=value',
            'http_query_params': 'some=value',
            'http_user_agent': 'Test Agent String',
            'client_os': 'linux',
            'client_device': 'firefox',
        })

        sleep(1)
        req_audit.shutdown()

        count, results = RequestAuditModel.get_all(offset=0, limit=10)
        assert_equals(count, 3)
Пример #23
0
 def listRecords(self, metadataPrefix, set=None, cursor=None, from_=None,
                 until=None, batch_size=None):
     '''Show a selection of records, basically lists all datasets.
     '''
     data = []
     packages = []
     group = None
     if not set:
         if not from_ and not until:
             packages = Session.query(Package).filter(Package.type=='dataset').filter(Package.private!=True).\
                 filter(Package.state=='active').all()
         if from_ and not until:
             packages = Session.query(Package).filter(Package.type=='dataset').filter(Package.private!=True).\
                 filter(PackageRevision.revision_timestamp > from_).filter(Package.name==PackageRevision.name).\
                 filter(Package.state=='active').all()
         if until and not from_:
             packages = Session.query(Package).filter(Package.type=='dataset').filter(Package.private!=True).\
                 filter(PackageRevision.revision_timestamp < until).filter(Package.name==PackageRevision.name).\
                 filter(Package.state=='active').all()
         if from_ and until:
             packages = Session.query(Package).filter(Package.type=='dataset').filter(Package.private!=True).\
                 filter(between(PackageRevision.revision_timestamp, from_, until)).\
                 filter(Package.name==PackageRevision.name).filter(Package.state=='active').all()
     else:
         group = Group.get(set)
         if group:
             packages = group.packages(return_query=True)
             if from_ and not until:
                 packages = packages.filter(PackageRevision.revision_timestamp > from_).\
                     filter(Package.type=='dataset').filter(Package.private!=True).\
                     filter(Package.name==PackageRevision.name).filter(Package.state=='active').all()
             if until and not from_:
                 packages = packages.filter(PackageRevision.revision_timestamp < until).\
                     filter(Package.type=='dataset').filter(Package.private!=True).\
                     filter(Package.name==PackageRevision.name).filter(Package.state=='active').all()
             if from_ and until:
                 packages = packages.filter(between(PackageRevision.revision_timestamp, from_, until)).\
                     filter(Package.type=='dataset').filter(Package.private!=True).\
                     filter(Package.name==PackageRevision.name).filter(Package.state=='active').all()
     if cursor:
         packages = packages[cursor:]
     for res in packages:
         spec = res.name
         if group:
             spec = group.name
         else:
             if res.owner_org:
                 group = Group.get(res.owner_org)
                 if group and group.name:
                     spec = group.name
                 group = None
         data.append(self._record_for_dataset(res, spec))
     return data
Пример #24
0
 def update_package(cls, package_id, **kwargs):
     '''
     Update the package_id and/or published fields of a record associated with a given package.
     :param package_id: the id of the package
     :param kwargs: the values to be updated
     :return: the updated record object
     '''
     update_dict = {k: v for k, v in kwargs.items() if k in cls.cols}
     Session.query(DOI).filter(
         DOI.package_id == package_id).update(update_dict)
     Session.commit()
     return cls.read_package(package_id)
Пример #25
0
    def clear(self):
        sq = Session.query(VocabularyTerm.id).filter(VocabularyTerm.vocabulary_id==self.id)
        Session.query(VocabularyLabel)\
                   .filter(VocabularyLabel.term_id.in_(sq.subquery())).delete(synchronize_session=False)

        #for vl in q:
        #    Session.delete(vl)
        sq.delete()

        #for vl in q:
        #    Session.delete(vl)
        Session.flush()
Пример #26
0
 def update_doi(cls, identifier, **kwargs):
     '''
     Update the package_id and/or published fields of a record with a given DOI.
     :param identifier: the DOI string
     :param kwargs: the values to be updated
     :return: the updated record object
     '''
     update_dict = {k: v for k, v in kwargs.items() if k in cls.cols}
     Session.query(DOI).filter(
         DOI.identifier == identifier).update(update_dict)
     Session.commit()
     return cls.read_doi(identifier)
Пример #27
0
def _clean_groups(package):
    """
    Clears package's groups
    """
    if isinstance(package, dict):
        package_id = package['id']
    else:
        package_id = package.id
    Session.query(Member).filter(Member.table_name == 'package',
                                 Member.table_id == package_id,
                                 Member.capacity != 'admin')\
                         .update({'state':'deleted'})
Пример #28
0
 def _filter_packages(set, cursor, from_, until, batch_size):
     '''Get a part of datasets for "listNN" verbs.
     '''
     packages = []
     setspc = None
     if not set:
         packages = Session.query(Package).filter(Package.type=='dataset'). \
             filter(Package.state == 'active').filter(Package.private!=True)
         if from_ and not until:
             packages = packages.filter(PackageRevision.revision_timestamp > from_).\
                 filter(Package.name==PackageRevision.name)
         if until and not from_:
             packages = packages.filter(PackageRevision.revision_timestamp < until).\
                 filter(Package.name==PackageRevision.name)
         if from_ and until:
             packages = packages.filter(between(PackageRevision.revision_timestamp, from_, until)).\
                 filter(Package.name==PackageRevision.name)
         if batch_size:
             packages = packages.limit(batch_size)
         if cursor:
             packages = packages.offset(cursor)
         packages = packages.all()
     elif set == 'openaire_data':
         oa_tag = Session.query(Tag).filter(
             Tag.name == 'openaire_data').first()
         if oa_tag:
             packages = oa_tag.packages
         setspc = set
     else:
         group = Group.get(set)
         if group:
             # Note that group.packages never returns private datasets regardless of 'with_private' parameter.
             packages = group.packages(return_query=True, with_private=False).filter(Package.type=='dataset'). \
                 filter(Package.state == 'active')
             if from_ and not until:
                 packages = packages.filter(PackageRevision.revision_timestamp > from_).\
                     filter(Package.name==PackageRevision.name)
             if until and not from_:
                 packages = packages.filter(PackageRevision.revision_timestamp < until).\
                     filter(Package.name==PackageRevision.name)
             if from_ and until:
                 packages = packages.filter(between(PackageRevision.revision_timestamp, from_, until)).\
                     filter(Package.name==PackageRevision.name)
             if batch_size:
                 packages = packages.limit(batch_size)
             if cursor:
                 packages = packages.offset(cursor)
             packages = packages.all()
     # if cursor is not None:
     #     cursor_end = cursor + batch_size if cursor + batch_size < len(packages) else len(packages)
     #     packages = packages[cursor:cursor_end]
     return packages, setspc
    def get_contributions(cls, pkg_id):
        pkg = Session.query(Package).get(pkg_id)
        if pkg is None:
            pkg = Session.query(Package).filter(Package.name == pkg_id).first()
        if pkg is None:
            raise toolkit.ObjectNotFound('Package does not exist.')
        else:
            pkg_id = pkg.id

        link_records = PackageContributionActivityQuery.read_package(pkg_id)
        activities = sorted([r.contribution_activity for r in link_records],
                            key=lambda x: x.activity)
        return activities
Пример #30
0
    def get(cls, vocab, name):
        if isinstance(vocab, Vocabulary):
            item = Session.query(cls).filter(cls.vocabulary_id==vocab.id,
                                             cls.name==name).first()

        else:
            item = Session.query(cls).join(Vocabulary)\
                                     .filter(Vocabulary.name==vocab, cls.name==name).first()

        if not item:
            log.info(_("No term {} for vocabulary {}").format(name, vocab))

        return item
Пример #31
0
    def test_update_extent(self):

        package = factories.Dataset()

        geojson = json.loads(self.geojson_examples["point"])

        shape = asShape(geojson)
        package_extent = PackageExtent(
            package_id=package["id"],
            the_geom=WKTElement(shape.wkt, self.db_srid),
        )
        package_extent.save()
        if legacy_geoalchemy:
            assert(
                Session.scalar(package_extent.the_geom.geometry_type) ==
                "ST_Point"
            )
        else:
            from sqlalchemy import func

            assert(
                Session.query(
                    func.ST_GeometryType(package_extent.the_geom)
                ).first()[0] ==
                "ST_Point"
            )

        # Update the geometry (Point -> Polygon)
        geojson = json.loads(self.geojson_examples["polygon"])

        shape = asShape(geojson)
        package_extent.the_geom = WKTElement(shape.wkt, self.db_srid)
        package_extent.save()

        assert(package_extent.package_id == package["id"])
        if legacy_geoalchemy:
            assert(
                Session.scalar(package_extent.the_geom.geometry_type) ==
                "ST_Polygon"
            )
            assert(
                Session.scalar(package_extent.the_geom.srid) == self.db_srid
            )
        else:
            assert(
                Session.query(
                    func.ST_GeometryType(package_extent.the_geom)
                ).first()[0] ==
                "ST_Polygon"
            )
            assert(package_extent.the_geom.srid == self.db_srid)
Пример #32
0
 def test_harvester_urlerror(self):
     harv, job = self._create_harvester()
     urllib2.urlopen = realopen
     self.assert_(harv.gather_stage(job) == None)
     errs = Session.query(HarvestGatherError).all()
     self.assert_(len(errs) == 1)
     harv_obj = HarvestObject()
     harv_obj.job = job
     harv_obj.content = json.dumps({'url': "http://foo"})
     # XML error and URL error, also the lack of url in content
     self.assert_(harv.import_stage(harv_obj) == False)
     errs = Session.query(HarvestObjectError).all()
     print errs
     self.assert_(len(errs) == 1)
Пример #33
0
    def rename_term_in_extras(self, old_term, new_term):

        if not self.valid_term(old_term):
            raise ValueError(u"Old term {} is not valid".format(old_term))
        if not self.valid_term(new_term):
            raise ValueError(u"New term {} is not valid".format(new_term))

        if self.is_multivalued:
            q = Session.query(PackageExtra.package_id).join(Package, Package.id==PackageExtra.package_id)\
                                           .filter(PackageExtra.key==self.field_name,
                                                   PackageExtra.value.like('%{}%'.format(old_term)),
                                                   Package.type=='dataset',
                                                   Package.state=='active')

        else:
            q = Session.query(PackageExtra.package_id).join(Package, Package.id==PackageExtra.package_id)\
                                           .filter(PackageExtra.key==self.field_name,
                                                   PackageExtra.value==old_term,
                                                   Package.type=='dataset',
                                                   Package.state=='active')
        
        # import in function to avoid circular dependencies
        from ckanext.faociok.validators import _serialize_to_array, _deserialize_from_array

        ctx = {'ignore_auth': True,
               'user': _get_user()['name']}

        pshow = t.get_action('package_show')
        pupdate = t.get_action('package_update')
        counter = 0
        for pdata in q:
            pkg = pshow(ctx.copy(), {'name_or_id': pdata[0]})
            fdata = pkg.get(self.field_name)
            affected = False
            if self.is_multivalued:
                fdata = _deserialize_from_array(fdata)
                if old_term in fdata:
                    fdata.remove(old_term)
                    fdata.append(new_term)
                    fdata = _serialize_to_array(fdata)
                    affected = True
            else:
                fdata = new_term
                affected = True
            if affected:
                pkg[self.field_name] = fdata
                pkg.pop('metadata_modified', None)
                pupdate(ctx.copy(), pkg)
                counter += 1
        return counter
Пример #34
0
    def test_edit(self):

        name = 'annakarenina'

        offset = url_for(controller='package', action='edit', id=name)
        res = self.app.get(offset, extra_environ=self.extra_environ)
        assert 'Edit - Datasets' in res
        fv = res.forms['dataset-edit']
        prefix = ''
        fv[prefix + 'extras__1__key'] = u'spatial'
        fv[prefix + 'extras__1__value'] = self.geojson_examples['point']

        res = fv.submit('save', extra_environ=self.extra_environ)
        assert not 'Error' in res, res

        package = Package.get(name)

        # Check that a PackageExtent object has been created
        package_extent = Session.query(PackageExtent).filter(
            PackageExtent.package_id == package.id).first()
        geojson = json.loads(self.geojson_examples['point'])

        assert package_extent
        assert package_extent.package_id == package.id
        assert Session.scalar(
            package_extent.the_geom.x) == geojson['coordinates'][0]
        assert Session.scalar(
            package_extent.the_geom.y) == geojson['coordinates'][1]
        assert Session.scalar(package_extent.the_geom.srid) == self.db_srid

        # Update the spatial extra
        offset = url_for(controller='package', action='edit', id=name)
        res = self.app.get(offset, extra_environ=self.extra_environ)
        assert 'Edit - Datasets' in res
        fv = res.forms['dataset-edit']
        prefix = ''
        fv[prefix + 'extras__1__value'] = self.geojson_examples['polygon']

        res = fv.submit('save', extra_environ=self.extra_environ)
        assert not 'Error' in res, res

        # Check that the PackageExtent object has been updated
        package_extent = Session.query(PackageExtent).filter(
            PackageExtent.package_id == package.id).first()
        assert package_extent
        assert package_extent.package_id == package.id
        assert Session.scalar(
            package_extent.the_geom.geometry_type) == 'ST_Polygon'
        assert Session.scalar(package_extent.the_geom.srid) == self.db_srid
Пример #35
0
 def listIdentifiers(self, metadataPrefix, set=None, cursor=None,
                     from_=None, until=None, batch_size=None):
     '''List all identifiers for this repository.
     '''
     data = []
     packages = []
     if not set:
         if not from_ and not until:
             packages = Session.query(Package).all()
         else:
             if from_:
                 packages = Session.query(Package).\
                     filter(PackageRevision.revision_timestamp > from_).\
                     all()
             if until:
                 packages = Session.query(Package).\
                     filter(PackageRevision.revision_timestamp < until).\
                     all()
             if from_ and until:
                 packages = Session.query(Package).\
                     filter(between(PackageRevision.revision_timestamp,
                                    from_,
                                    until)\
                            ).all()
     else:
         group = Group.get(set)
         if group:
             packages = group.active_packages()
             if from_ and not until:
                 packages = packages.\
                     filter(PackageRevision.revision_timestamp > from_)
             if until and not from_:
                 packages = packages.\
                     filter(PackageRevision.revision_timestamp < until)
             if from_ and until:
                 packages = packages.filter(
                     between(PackageRevision.revision_timestamp,
                             from_,
                             until))
             packages = packages.all()
     if cursor:
         packages = packages[:cursor]
     for package in packages:
         data.append(common.Header(package.id,
                                   package.metadata_created,
                                   [package.name],
                                   False))
     return data
Пример #36
0
def harvest_source_url_validator(key,data,errors,context):
    new_url = _normalize_url(data[key])
    source_id = data.get(('id',),'')
    if source_id:
        # When editing a source we need to avoid its own URL
        existing_sources = Session.query(HarvestSource.url,HarvestSource.active) \
                       .filter(HarvestSource.id!=source_id).all()
    else:
        existing_sources = Session.query(HarvestSource.url,HarvestSource.active).all()

    for url,active in existing_sources:
        url = _normalize_url(url)
        if url == new_url:
            raise Invalid('There already is a Harvest Source for this URL: %s' % data[key])

    return data[key] 
Пример #37
0
    def list_packages(cls):
        xmlns = "urn:nbn:se:uu:ub:epc-schema:rs-location-mapping"

        def locns(loc):
            return "{%s}%s" % (xmlns, loc)
        xsi = "http://www.w3.org/2001/XMLSchema-instance"
        schemaLocation = "urn:nbn:se:uu:ub:epc-schema:rs-location-mapping http://urn.kb.se/resolve?urn=urn:nbn:se:uu:ub:epc-schema:rs-location-mapping&godirectly"
        records = Element("{" + xmlns + "}records",
                         attrib={"{" + xsi + "}schemaLocation": schemaLocation},
                         nsmap={'xsi': xsi, None: xmlns})
        q = Session.query(Package)
        q = q.filter(Package.name.ilike('urn:nbn:fi:csc-kata%'))
        pkgs = q.all()
        prot = SubElement(records, locns('protocol-version'))
        prot.text = '3.0'
        datestmp = SubElement(records, locns('datestamp'), attrib={'type': 'modified'})
        now = datetime.datetime.now().isoformat()
        datestmp.text = now
        for pkg in pkgs:
            record = SubElement(records, locns('record'))
            header = SubElement(record, locns('header'))
            datestmp = SubElement(header, locns('datestamp'), attrib={'type': 'modified'})
            datestmp.text = now
            identifier = SubElement(header, locns('identifier'))
            identifier.text = pkg.name
            destinations = SubElement(header, locns('destinations'))
            destination = SubElement(destinations, locns('destination'), attrib={'status': 'activated'})
            datestamp = SubElement(destination, locns('datestamp'), attrib={'type': 'activated'})
            url = SubElement(destination, locns('url'))
            url.text = "%s%s" % (config.get('ckan.site_url', ''),
                             helpers.url_for(controller='package',
                                       action='read',
                                       id=pkg.name))
        return tostring(records)
Пример #38
0
def _get_broken_resource_links(organisation_id=None):
    organisation_id = None

    query = Session.query(Package.name, Package.title, PackageExtra.value, Resource)\
            .join(PackageExtra)\
            .join(ResourceGroup, Package.id==ResourceGroup.package_id)\
            .join(Resource)\
            .join(TaskStatus, TaskStatus.entity_id==Resource.id)\
            .filter(TaskStatus.key==u'openness_score')\
            .filter(TaskStatus.value==u'0')\
            .filter(or_(
                and_(PackageExtra.key=='published_by', 
                     PackageExtra.value.like('%%[%s]' % (organisation_id is None and '%' or organisation_id))),
                and_(PackageExtra.key=='published_via', 
                     PackageExtra.value.like('%%[%s]' % (organisation_id is None and '%' or organisation_id))),
                )\
            )\
            .distinct()

    context = {'model': model, 'session': model.Session}
    data = []
    for row in query:
        resource = resource_dictize(row.Resource, context)
        task_data = {'entity_id': resource['id'], 'task_type': 'qa', 'key': 'openness_score_reason'}
        status = get_action('task_status_show')(context, task_data)
        resource['openness_score'] = u'0'
        resource['openness_score_reason'] = status.get('value')

        data.append([row.name, row.title, row.value, resource])

    return _collapse(data, [_extract_publisher, _extract_dataset])
Пример #39
0
def package_update_rest_minimal(context, data_dict):
    setup()
    package= ''
    fulltext = ''
    old_fulltext = ''
    if data_dict.has_key('extras'):
        if 'full_text_search' in data_dict['extras'].keys():
            fulltext = data_dict['extras']['full_text_search']
            data_dict = _del_extra_field_from_list(data_dict, 'full_text_search')
            package = update.package_update_rest(context, data_dict)
            old_fulltext = None
            
            if package.has_key('id'):
                old_fulltext = Session.query(PackageFulltext) \
                                    .filter(PackageFulltext.package_id==package['id']) \
                                    .first()
            fulltext_dict_save(fulltext, old_fulltext, package, context)
        else:
            package = update.package_update(context, data_dict)
    else:
        package = update.package_update_rest(context, data_dict)

    if check_logged_in(context):
        fulltext = _get_fulltext(package['id'])
        if fulltext:
            package['extras']['full_text_search'] = fulltext.text 
        return package
    
    minimal_package = _del_extra_field_from_list(package)
    minimal_package = _del_main_field_from_dict(minimal_package)
    return minimal_package
Пример #40
0
def five_stars(id=None):
    """
    Return a list of dicts: 1 for each dataset that has an openness score.
    
    Each dict is of the form:
        {'name': <string>, 'title': <string>, 'openness_score': <int>} 
    """
    if id:
        pkg = model.Package.get(id)
        if not pkg:
            return "Not found"

    # take the maximum openness score among dataset resources to be the
    # overall dataset openness core
    query = Session.query(Package.name, Package.title, 
                          func.max(TaskStatus.value).label('value'))\
        .join(ResourceGroup, Package.id==ResourceGroup.package_id)\
        .join(Resource)\
        .join(TaskStatus, TaskStatus.entity_id==Resource.id)\
        .filter(TaskStatus.key==u'openness_score')\
        .group_by(Package.name, Package.title)\
        .distinct()

    if id:
        query = query.filter(Package.id==pkg.id)

    results = []
    for row in query:
        results.append({
            'name': row.name,
            'title': row.title,
            'openness_score': row.value
        })

    return results
Пример #41
0
def create_unique_identifier(package_id):
    """
    Create a unique identifier, using the prefix and a random number: 10.5072/0044634
    Checks the random number doesn't exist in the table or the datacite repository
    All unique identifiers are created with
    @return:
    """
    datacite_api = DOIDataCiteAPI()

    while True:

        identifier = os.path.join(get_prefix(), '{0:07}'.format(random.randint(1, 100000)))

        # Check this identifier doesn't exist in the table
        if not Session.query(DOI).filter(DOI.identifier == identifier).count():

            # And check against the datacite service
            try:
                datacite_doi = datacite_api.get(identifier)
            except HTTPError:
                pass
            else:
                if datacite_doi.text:
                    continue

        doi = DOI(package_id=package_id, identifier=identifier)
        Session.add(doi)
        Session.commit()

        return doi
Пример #42
0
 def all_active(cls):
     query = Session.query(cls).filter(
         cls.status == 'active',
         cls.board_id.in_([b.id for b in Board.filter_active()]))
     if hasattr(cls, 'order_by') and isCallable(cls.order_by):
         query = cls.order_by(query)
     return query
Пример #43
0
    def test_new(self):
        name = "test-spatial-dataset-1"

        offset = url_for(controller="package", action="new")
        res = self.app.get(offset, extra_environ=self.extra_environ)
        assert "Add - Datasets" in res
        fv = res.forms["dataset-edit"]
        prefix = ""
        fv[prefix + "name"] = name
        fv[prefix + "extras__0__key"] = u"spatial"
        fv[prefix + "extras__0__value"] = self.geojson_examples["point"]

        res = fv.submit("save", extra_environ=self.extra_environ)
        assert not "Error" in res, res

        package = Package.get(name)

        # Check that a PackageExtent object has been created
        package_extent = Session.query(PackageExtent).filter(PackageExtent.package_id == package.id).first()

        geojson = json.loads(self.geojson_examples["point"])

        assert package_extent
        assert package_extent.package_id == package.id
        assert Session.scalar(package_extent.the_geom.x) == geojson["coordinates"][0]
        assert Session.scalar(package_extent.the_geom.y) == geojson["coordinates"][1]
        assert Session.scalar(package_extent.the_geom.srid) == self.db_srid
Пример #44
0
def broken_resource_links_by_dataset():
    """
    Return a list of named tuples, one for each dataset that contains
    broken resource links (defined as resources with an openness score of 0).

    The named tuple is of the form:
        (name (str), title (str), resources (list of dicts))
    """
    query = Session.query(Package.name, Package.title, Resource)\
        .join(ResourceGroup, Package.id==ResourceGroup.package_id)\
        .join(Resource)\
        .join(TaskStatus, TaskStatus.entity_id==Resource.id)\
        .filter(TaskStatus.key==u'openness_score')\
        .filter(TaskStatus.value==u'0')\
        .distinct()

    context = {'model': model, 'session': model.Session}
    results = {}
    for name, title, resource in query:
        resource = resource_dictize(resource, context)

        data = {'entity_id': resource['id'], 'task_type': 'qa', 'key': 'openness_score_reason'}
        status = get_action('task_status_show')(context, data)
        resource['openness_score_reason'] = status.get('value')

        if name in results:
            results[name].resources.append(resource)
        else:
            DatasetTuple = namedtuple('DatasetTuple', ['name', 'title', 'resources'])
            results[name] = DatasetTuple(name, title or name, [resource])

    return results.values()
Пример #45
0
    def gather_stage(self,harvest_job):
        log.debug('In OOEHarvester gather_stage (%s)' % harvest_job.source.url)

        package_ids = []

        self._set_config(harvest_job.source.config)
        
        # Get source URL
        base_url = harvest_job.source.url.rstrip('/')
        
        # Check if previous jobs exist and when they took place
        previous_job = Session.query(HarvestJob) \
                        .filter(HarvestJob.source==harvest_job.source) \
                        .filter(HarvestJob.gather_finished!=None) \
                        .filter(HarvestJob.id!=harvest_job.id) \
                        .order_by(HarvestJob.gather_finished.desc()) \
                        .limit(1).first()
        if (previous_job and not previous_job.gather_errors and not len(previous_job.objects) == 0):
            if not self.config.get('force_all',False):
                get_all_packages = False
    
                # Request only the packages modified since last harvest job
                last_time = harvest_job.gather_started.strftime("%Y-%m-%d")
                url = base_url + '?since_time=%s' % last_time
            else:
                url = base_url                
        else:
            # Request all remote packages
            url = base_url + '/search'
        log.debug("url: %s" % url) 
        try:
            content = self._get_content(url)
        except Exception,e:
            self._save_gather_error('Unable to get content for URL: %s: %s' % (url, str(e)),harvest_job)
            return None
Пример #46
0
def get_controlled_vocabulary_values(vocabulary_id, thesaurus_id, keywords):
    log.debug(
        '::::: Collecting thesaurus data for dcatapit skos {0} from the metadata keywords :::::'
        .format(vocabulary_id))

    values = []

    #
    # Get all the places tag names by the vocabulary id
    #
    tag_names_list = get_vocabulary_tag_names(vocabulary_id)

    if len(tag_names_list) > 0:
        for key in keywords:
            if thesaurus_id and (thesaurus_id in key['thesaurus-identifier']
                                 or thesaurus_id in key['thesaurus-title']):
                for k in key['keyword']:
                    query = Session.query(DCATAPITTagVocabulary) \
                        .filter(DCATAPITTagVocabulary.text==k, DCATAPITTagVocabulary.tag_name.in_(tag_names_list))
                    query = query.autoflush(True)
                    theme = query.first()

                    if theme and theme.tag_name:
                        values.append(theme.tag_name)
    return values
Пример #47
0
    def _search_package(self, identifier):

        name = identifier.lower()
        replace_chars = [',', ':', '.', '/', '-']

        for x in replace_chars:
            name = name.replace(x, '_')

        name = name.replace('oai_ebas_oai_pmh_nilu_no_', '')
        template_name = name[0:42]

        MAX_NUMBER_APPENDED = 999999
        PACKAGE_NAME_MAX_LENGTH = 99
        APPEND_MAX_CHARS = len(str(MAX_NUMBER_APPENDED))

        # Find out which package names have been taken. Restrict it to names
        # derived from the ideal name plus and numbers added
        like_q = u'%s%%' % \
            template_name[:PACKAGE_NAME_MAX_LENGTH-APPEND_MAX_CHARS]
        results = Session.query(Package)\
                              .filter(Package.name.ilike(like_q))\
                              .all()
        if results:
            for package in results:
                package_dict = self._get_package_dict(package)
                extra_identifier = self._get_package_extra(
                    package_dict, 'identifier')

                if identifier == extra_identifier:
                    return package
                else:
                    return None
        else:
            return None
Пример #48
0
    def gather_stage(self,harvest_job):
        log.debug('In SRDAHarvester gather_stage (%s)' % harvest_job.source.url)

        get_all_packages = True
        package_ids = []

	data = urllib2.urlopen(self.PREFIX_URL + self.CATALOGUE_INDEX_URL)
        doc = html.parse(data)
        for td in doc.findall("//td[@class='left_p12_title']/a"):
            link = td.get('href')
            if re.match(r"/search/fsciitem", link):
                id = sha1(link).hexdigest()
                obj = HarvestObject(guid=id, job= harvest_job, content=link)
                obj.save()
                package_ids.append(obj.id)
	
        self._set_config(harvest_job.source.config)

        # Check if this source has been harvested before
        previous_job = Session.query(HarvestJob) \
                        .filter(HarvestJob.source==harvest_job.source) \
                        .filter(HarvestJob.gather_finished!=None) \
                        .filter(HarvestJob.id!=harvest_job.id) \
                        .order_by(HarvestJob.gather_finished.desc()) \
                        .limit(1).first()

        return package_ids
Пример #49
0
def package_update_rest_minimal(context, data_dict):
    setup()
    package = ''
    fulltext = ''
    old_fulltext = ''
    if data_dict.has_key('extras'):
        if 'full_text_search' in data_dict['extras'].keys():
            fulltext = data_dict['extras']['full_text_search']
            data_dict = _del_extra_field_from_list(data_dict,
                                                   'full_text_search')
            package = update.package_update_rest(context, data_dict)
            old_fulltext = None

            if package.has_key('id'):
                old_fulltext = Session.query(PackageFulltext) \
                                    .filter(PackageFulltext.package_id==package['id']) \
                                    .first()
            fulltext_dict_save(fulltext, old_fulltext, package, context)
        else:
            package = update.package_update(context, data_dict)
    else:
        package = update.package_update_rest(context, data_dict)

    if check_logged_in(context):
        fulltext = _get_fulltext(package['id'])
        if fulltext:
            package['extras']['full_text_search'] = fulltext.text
        return package

    minimal_package = _del_extra_field_from_list(package)
    minimal_package = _del_main_field_from_dict(minimal_package)
    return minimal_package
Пример #50
0
    def tags(self):
        idea_tags = Session.query(IdeaTag) \
               .join(Idea) \
               .filter(Idea.id==self.id) \
               .all()

        return idea_tags
Пример #51
0
def _get_content(id):
    from ckanext.harvest.model import HarvestObject
    obj = Session.query(HarvestObject).filter(HarvestObject.id == id).first()
    if obj:
        return obj.content
    else:
        return None
    def authenticate(self, environ, identity):
        if not 'login' in identity or not 'password' in identity:
            return None
        user = User.by_name(identity.get('login'))
        if user is None:
            log.debug('Login failed - username %r not found',
                      identity.get('login'))
            return None

        seedUser = Session.query(SEEDUser).filter_by(
            name=identity.get('login')).first()
        if seedUser.login_attempts >= 10:
            log.debug('Login as %r failed - account is locked',
                      identity.get('login'))
        elif user.validate_password(identity.get('password')):
            # reset attempt count to 0
            seedUser.login_attempts = 0
            Session.commit()
            return user.name
        else:
            log.debug('Login as %r failed - password not valid',
                      identity.get('login'))

        seedUser.login_attempts += 1
        Session.commit()
        return None
Пример #53
0
    def tags(self):
        app_tags = Session.query(ApplicationTag) \
               .join(Application) \
               .filter(Application.id==self.id) \
               .all()

        return app_tags
Пример #54
0
    def data(self):
        # Get the Europe dataset
        rootdir = get_root_dir()
        data_file = os.path.join(rootdir, 'ckanext', 'offenedaten', 'data', 'eu.json')
        f = open(data_file, 'r')
        o = json.load(f)

        # Get the package count by country
        q = Session.query(
                distinct(PackageExtra.value),
                func.count(PackageExtra.value)
            ).\
                filter(PackageExtra.key == u'eu_country').\
                group_by(PackageExtra.value)

        values = dict(q.all())
        # Set the package count for each country
        
        for ft in o['features']:
            code = ft['properties']['NUTS']
            ft['properties']['packages'] = (values.get(code, 0))

        response.content_type = 'application/json'
        response.pragma = None
        response.cache_control = 'public; max-age: 3600'
        response.cache_expires(seconds=3600)
        return json.dumps(o)
Пример #55
0
def get_package_ids_in_poly(coords,db_srid):
	"""
	TODO: This needs to be removed as spatial backend is changed to Solr.
	"""
	poly_template_str = ''
	x = coords
	i=0
	for item in x['poly']:
		print item
		if i==0:
			poly_template_str = poly_template_str + ''+str(item[1]) +' '+ str(item[0])+', '
		elif i==len(x['poly'])-1:
			poly_template_str = poly_template_str +''+str(item[1]) +' '+ str(item[0])
		else:
			poly_template_str = poly_template_str +''+str(item[1]) +' '+ str(item[0])+', '
		i=i+1

	poly_template_str = 'POLYGON (('+poly_template_str + ', '+str(x['poly'][0][1]) +' '+ str(x['poly'][0][0])+'))'

    # bbox_template = Template('POLYGON (($minx $miny, $minx $maxy, $maxx $maxy, $maxx $miny, $minx $miny))')

	wkt = poly_template_str

	input_geometry = WKTSpatialElement(wkt,db_srid)

	extents = Session.query(PackageExtent).filter(PackageExtent.package_id==Package.id).filter(PackageExtent.the_geom.intersects(input_geometry)).filter(Package.state==u'active').all()
	
	ids = [extent.package_id for extent in extents]
	return ids
Пример #56
0
    def test_harvest_basic(self):

        # Create source
        source_fixture = {"url": u"http://127.0.0.1:8999/waf/index.html", "type": u"gemini-waf"}

        source, job = self._create_source_and_job(source_fixture)

        harvester = GeminiWafHarvester()

        # We need to send an actual job, not the dict
        object_ids = harvester.gather_stage(job)

        assert len(object_ids) == 2

        # Fetch stage always returns True for Waf harvesters
        assert harvester.fetch_stage(object_ids) == True

        objects = []
        for object_id in object_ids:
            obj = HarvestObject.get(object_id)
            assert obj
            objects.append(obj)
            harvester.import_stage(obj)

        pkgs = Session.query(Package).all()

        assert len(pkgs) == 2

        pkg_ids = [pkg.id for pkg in pkgs]

        for obj in objects:
            assert obj.current == True
            assert obj.package_id in pkg_ids
Пример #57
0
 def for_select(cls, lang):
     q = Session.query(cls, LocalizedLicenseName.label)\
                .join(LocalizedLicenseName)\
                .filter(LocalizedLicenseName.lang==lang,
                        cls.rank_order>1)\
                .order_by(cls.path)
     return list(q)
Пример #58
0
def update_extents():
    from ckan.model import PackageExtra, Package, Session
    conn = Session.connection()
    packages = [extra.package \
                for extra in \
                Session.query(PackageExtra).filter(PackageExtra.key == 'spatial').all()]

    errors = []
    count = 0
    for package in packages:
        try:
            value = package.extras['spatial']
            log.debug('Received: %r' % value)
            geometry = json.loads(value)

            count += 1
        except ValueError as e:
            errors.append(u'Package %s - Error decoding JSON object: %s' %
                          (package.id, six.text_type(e)))
        except TypeError as e:
            errors.append(u'Package %s - Error decoding JSON object: %s' %
                          (package.id, six.text_type(e)))

        save_package_extent(package.id, geometry)

    Session.commit()

    if errors:
        msg = 'Errors were found:\n%s' % '\n'.join(errors)
        print(msg)

    msg = "Done. Extents generated for %i out of %i packages" % (count,
                                                                 len(packages))

    print(msg)
Пример #59
0
def package_update_minimal(context, data_dict):
    '''Update a dataset (package).

    You must be authorized to edit the dataset and the groups that it belongs
    to.
    
    It is recommended to call
    :py:func:`ckan.logic.action.get.package_show`, make the desired changes to
    the result, and then call ``package_update()`` with it.

    Plugins may change the parameters of this function depending on the value
    of the dataset's ``type`` attribute, see the
    :py:class:`~ckan.plugins.interfaces.IDatasetForm` plugin interface.

    For further parameters see
    :py:func:`~ckan.logic.action.create.package_create`.

    :param id: the name or id of the dataset to update
    :type id: string

    :returns: the updated dataset (if ``'return_package_dict'`` is ``True`` in
              the context, which is the default. Otherwise returns just the
              dataset id)
    :rtype: dictionary
    '''
    setup()
    package= ''
    fulltext = ''
    old_fulltext = ''
    if data_dict.has_key('extras'):
        contains = _contains_key(data_dict['extras'], 'full_text_search')
        if(contains): 
            fulltext = contains
            data_dict = _del_extra_field_from_dict(data_dict, 'full_text_search')
        
            package = update.package_update(context, data_dict)
            old_fulltext = None
            if package.has_key('id'):
                old_fulltext = Session.query(PackageFulltext) \
                                    .filter(PackageFulltext.package_id==package['id']) \
                                    .first()
            fulltext_dict_save(fulltext, old_fulltext, package, context)
        else:
            package = update.package_update(context, data_dict)
    else:
        package = update.package_update(context, data_dict)

    if check_logged_in(context):
        fulltext = _get_fulltext(package['id'])
        if fulltext:
            fulltext_dict = { 'key': 'full_text_search',
                              'value': fulltext.text
                            }
            package['extras'].append(fulltext_dict) 
        return package
    
    minimal_package = _del_extra_field_from_dict(package)
    minimal_package = _del_main_field_from_dict(minimal_package)
    return minimal_package
Пример #60
0
    def _ensure_name_is_unique(ideal_name, existing_name=None, append_type="number-sequence"):
        """
        Returns a dataset name based on the ideal_name, only it will be
        guaranteed to be different than all the other datasets, by adding a
        number on the end if necessary.

        If generating a new name because the title of the dataset has changed,
        specify the existing name, in case the name doesn't need to change
        after all.

        The maximum dataset name length is taken account of.

        :param ideal_name: the desired name for the dataset, if its not already
                           been taken (usually derived by munging the dataset
                           title)
        :type ideal_name: string
        :param existing_name: the current name of the dataset - only specify
                              this if the dataset exists
        :type existing_name: string
        :param append_type: the type of characters to add to make it unique -
                            either 'number-sequence' or 'random-hex'.
        :type append_type: string
        """
        ideal_name = ideal_name[:PACKAGE_NAME_MAX_LENGTH]
        if existing_name == ideal_name:
            return ideal_name
        if append_type == "number-sequence":
            MAX_NUMBER_APPENDED = 999
            APPEND_MAX_CHARS = len(str(MAX_NUMBER_APPENDED))
        elif append_type == "random-hex":
            APPEND_MAX_CHARS = 5  # 16^5 = 1 million combinations
        else:
            raise NotImplementedError("append_type cannot be %s" % append_type)
        # Find out which package names have been taken. Restrict it to names
        # derived from the ideal name plus and numbers added
        like_q = u"%s%%" % ideal_name[: PACKAGE_NAME_MAX_LENGTH - APPEND_MAX_CHARS]
        name_results = Session.query(Package.name).filter(Package.name.ilike(like_q)).all()
        taken = set([name_result[0] for name_result in name_results])
        if existing_name and existing_name in taken:
            taken.remove(existing_name)
        if ideal_name not in taken:
            # great, the ideal name is available
            return ideal_name
        elif existing_name and existing_name.startswith(ideal_name):
            # the ideal name is not available, but its an existing dataset with
            # a name based on the ideal one, so there's no point changing it to
            # a different number
            return existing_name
        elif append_type == "number-sequence":
            # find the next available number
            counter = 1
            while counter <= MAX_NUMBER_APPENDED:
                candidate_name = ideal_name[: PACKAGE_NAME_MAX_LENGTH - len(str(counter))] + str(counter)
                if candidate_name not in taken:
                    return candidate_name
                counter = counter + 1
            return None
        elif append_type == "random-hex":
            return ideal_name[: PACKAGE_NAME_MAX_LENGTH - APPEND_MAX_CHARS] + str(uuid.uuid4())[:APPEND_MAX_CHARS]