class DataPreviewController(BaseController):
    def index(self, id):
        resource = model.Resource.get(id)
        if not resource or resource.state != 'active':
            abort(404, "Resource not found")

        context = {'model': model, 'session': model.Session, 'user': c.user}
        try:
            check_access("resource_show", context, {'id': resource.id})
        except NotAuthorized, e:
            abort(403, "You are not permitted access to this resource")

        size_limit = config.get('ckan.datapreview.limit', 5242880)

        qa = QA.get_for_resource(resource.id)
        format_ = qa.format if qa else None
        log.debug('File format (according to QA): %r' % format_)
        if not format_:
            format_ = resource.format.lower() if resource.format else ''
            log.debug('File format (resource.format): %r' % format_)

        query = dict(type=format_, size_limit=size_limit, length=None)
        archival = Archival.get_for_resource(resource.id)
        if archival and archival.size:
            query['length'] = archival.size

        # Add the extra fields if they are set
        for k in ['max-results', 'encoding', 'type']:
            if k in request.params:
                query[k] = request.params[k]

        url, archived = self._get_url(resource, query)
        query['archived'] = archived
        if url:
            try:
                response.content_type = 'application/json'
                result = proxy_query(resource, url, query)
            except ProxyError as e:
                log.warn("Request {0} failed : {1}".format(
                    identify_resource(resource), e))
                result = _error(title=e.title, message=e.message)
        else:
            result = _error(
                title="Remote resource not downloadable",
                message="Unable to find the remote resource for download")

        format_ = request.params.get('callback')
        if format_:
            return "%s(%s)" % (format_, result)

        return result
Пример #2
0
    def _get_url(self, resource, query):
        '''
        Given a resource, return the URL for the data.

        This allows a local cache to be used in preference to the
        resource.url.

        If we are going to use an external URL, then we can do a HEAD request
        to check it works and record the mimetype & length in the query dict.

        :param resource: resource object
        :param query: dict describing the properties of the data
        '''
        url = None
        query['mimetype'] = None

        # Look for a local cache of the data file
        # e.g. "cache_filepath": "/mnt/shared/ckan_resource_cache/63/63b159d7-90c5-443b-846d-f700f74ea062/bian-anal-mca-2005-dols-eng-1011-0312-tab2.csv"
        cache_filepath = resource.extras.get('cache_filepath')
        if cache_filepath:
            if os.path.exists(cache_filepath.encode('utf8')):
                log.debug('Previewing local cached data: %s', cache_filepath)
                url = cache_filepath
            else:
                log.debug('Local cached data file missing: %s', cache_filepath)

        # Otherwise try the cache_url
        # This works well when running on a database copied from another
        # machine - all the cached files are missing locally, but it can use
        # them from the original machine using the cache_url.
        if not url and hasattr(resource, 'cache_url') and resource.cache_url:
            u = fix_url(resource.cache_url)

            # e.g. resource.cache_url = "http://data.gov.uk/data/resource_cache/07/0791d492-8ab9-4aae-b7e6-7ecae561faa3/bian-anal-mca-2005-dols-eng-1011-0312-qual.pdf"
            try:
                req = urllib2.Request(u)
                req.get_method = lambda: 'HEAD'

                r = urllib2.urlopen(req)
                if r.getcode() == 200:
                    url = u
                    query['length'] = r.info()["content-length"]
                    query['mimetype'] = r.info().get('content-type', None)
                    log.debug('Previewing cache URL: %s', url)
            except Exception, e:
                log.error(u"Request {0} with cache url {1}, {2}".format(identify_resource(resource), u, e))
Пример #3
0
        # Otherwise use the URL itself
        if not url:
            u = fix_url(resource.url)
            try:
                req = urllib2.Request(u)
                req.get_method = lambda: 'HEAD'

                r = urllib2.urlopen(req)
                if r.getcode() == 200:
                    url = u
                    query['length'] = r.info()["content-length"]
                    query['mimetype'] = r.info().get('content-type', None)
                    log.debug('Previewing direct from URL: %s', url)
                elif r.getcode() > 400:
                    return None

            except Exception, e:
                log.error(u"Request {0} with url {1}, {2}".format(identify_resource(resource), u, e))

        return url

    def serve(self, path):
        root = os.path.join(config.get('ckanext-archiver.archive_dir', '/tmp'),
                            path).replace(' ', '%20')

        if not os.path.exists(root):
            abort(404)
        response.content_type = 'application/json'
        return str(open(root).read())
Пример #4
0
    def _get_url(self, resource, query):
        '''
        Given a resource, return the URL for the data and a flag denoting whether
        the URL is to a local file (and therefore can ignore size limit checks.)

        This allows a local cache to be used in preference to the
        resource.url.

        If we are going to use an external URL, then we can do a HEAD request
        to check it works and record the mimetype & length in the query dict.

        :param resource: resource object
        :param query: dict describing the properties of the data
        '''
        from requests.exceptions import InvalidURL

        url = None
        archived = False
        query['mimetype'] = None
        archival = Archival.get_for_resource(resource.id)

        if archival:
            # Look for a local cache of the data file
            # e.g. "cache_filepath": "/mnt/shared/ckan_resource_cache/63/63b159d7-90c5-443b-846d-f700f74ea062/bian-anal-mca-2005-dols-eng-1011-0312-tab2.csv"
            if archival.cache_filepath:
                if os.path.exists(archival.cache_filepath.encode('utf8')):
                    log.debug('Previewing local cached data: %s', archival.cache_filepath)
                    url = archival.cache_filepath
                    archived = True
                else:
                    log.debug('Local cached data file missing: %s', archival.cache_filepath)
            else:
                log.debug('No cache_filepath for resource %s', identify_resource(resource))

            # Otherwise try the cache_url
            # This works well when running on a database copied from another
            # machine - all the cached files are missing locally, but it can use
            # them from the original machine using the cache_url.
            if not url:
                if archival.cache_url:
                    try:
                        u = fix_url(archival.cache_url)
                    except InvalidURL:
                        log.error("Unable to fix the URL for resource: %s" % identify_resource(resource))
                        return None, False

                    # e.g. resource.cache_url = "http://data.gov.uk/data/resource_cache/07/0791d492-8ab9-4aae-b7e6-7ecae561faa3/bian-anal-mca-2005-dols-eng-1011-0312-qual.pdf"
                    try:
                        req = urllib2.Request(u)
                        req.get_method = lambda: 'HEAD'

                        r = urllib2.urlopen(req)
                        if r.getcode() == 200:
                            url = u
                            query['length'] = r.info().get("content-length", 0)
                            query['mimetype'] = r.info().get('content-type', None)
                            log.debug('Previewing cache URL: %s', url)
                    except Exception, e:
                        log.error(u"Request {0} with cache url {1}, {2}".format(identify_resource(resource), u, e))
                else:
                    log.debug('No cache_url for resource %s', identify_resource(resource))
Пример #5
0
                    try:
                        req = urllib2.Request(u)
                        req.get_method = lambda: 'HEAD'

                        r = urllib2.urlopen(req)
                        if r.getcode() == 200:
                            url = u
                            query['length'] = r.info().get("content-length", 0)
                            query['mimetype'] = r.info().get('content-type', None)
                            log.debug('Previewing cache URL: %s', url)
                    except Exception, e:
                        log.error(u"Request {0} with cache url {1}, {2}".format(identify_resource(resource), u, e))
                else:
                    log.debug('No cache_url for resource %s', identify_resource(resource))
        else:
            log.debug('Resource is not archived: %s', identify_resource(resource))

        # Otherwise use the URL itself
        if not url:
            try:
                u = fix_url(resource.url)
            except InvalidURL:
                log.error("Unable to fix the URL for resource: %s" % identify_resource(resource))
                return None, False

            try:
                req = urllib2.Request(u)
                req.get_method = lambda: 'HEAD'

                r = urllib2.urlopen(req)
                if r.getcode() == 200:
    def _get_url(self, resource, query):
        '''
        Given a resource, return the URL for the data and a flag denoting whether
        the URL is to a local file (and therefore can ignore size limit checks.)

        This allows a local cache to be used in preference to the
        resource.url.

        If we are going to use an external URL, then we can do a HEAD request
        to check it works and record the mimetype & length in the query dict.

        :param resource: resource object
        :param query: dict describing the properties of the data
        '''
        from requests.exceptions import InvalidURL

        url = None
        archived = False
        query['mimetype'] = None
        archival = Archival.get_for_resource(resource.id)

        if archival:
            # Look for a local cache of the data file
            # e.g. "cache_filepath": "/mnt/shared/ckan_resource_cache/63/63b159d7-90c5-443b-846d-f700f74ea062/bian-anal-mca-2005-dols-eng-1011-0312-tab2.csv"
            if archival.cache_filepath:
                if os.path.exists(archival.cache_filepath.encode('utf8')):
                    log.debug('Previewing local cached data: %s',
                              archival.cache_filepath)
                    url = archival.cache_filepath
                    archived = True
                else:
                    log.debug('Local cached data file missing: %s',
                              archival.cache_filepath)
            else:
                log.debug('No cache_filepath for resource %s',
                          identify_resource(resource))

            # Otherwise try the cache_url
            # This works well when running on a database copied from another
            # machine - all the cached files are missing locally, but it can use
            # them from the original machine using the cache_url.
            if not url:
                if archival.cache_url:
                    try:
                        u = fix_url(archival.cache_url)
                    except InvalidURL:
                        log.error("Unable to fix the URL for resource: %s" %
                                  identify_resource(resource))
                        return None, False

                    # e.g. resource.cache_url = "http://data.gov.uk/data/resource_cache/07/0791d492-8ab9-4aae-b7e6-7ecae561faa3/bian-anal-mca-2005-dols-eng-1011-0312-qual.pdf"
                    try:
                        req = urllib2.Request(u)
                        req.get_method = lambda: 'HEAD'

                        r = urllib2.urlopen(req)
                        if r.getcode() == 200:
                            url = u
                            query['length'] = r.info().get("content-length", 0)
                            query['mimetype'] = r.info().get(
                                'content-type', None)
                            log.debug('Previewing cache URL: %s', url)
                    except Exception, e:
                        log.error(
                            u"Request {0} with cache url {1}, {2}".format(
                                identify_resource(resource), u, e))
                else:
                    log.debug('No cache_url for resource %s',
                              identify_resource(resource))
                        if r.getcode() == 200:
                            url = u
                            query['length'] = r.info().get("content-length", 0)
                            query['mimetype'] = r.info().get(
                                'content-type', None)
                            log.debug('Previewing cache URL: %s', url)
                    except Exception, e:
                        log.error(
                            u"Request {0} with cache url {1}, {2}".format(
                                identify_resource(resource), u, e))
                else:
                    log.debug('No cache_url for resource %s',
                              identify_resource(resource))
        else:
            log.debug('Resource is not archived: %s',
                      identify_resource(resource))

        # Otherwise use the URL itself
        if not url:
            try:
                u = fix_url(resource.url)
            except InvalidURL:
                log.error("Unable to fix the URL for resource: %s" %
                          identify_resource(resource))
                return None, False

            try:
                req = urllib2.Request(u)
                req.get_method = lambda: 'HEAD'

                r = urllib2.urlopen(req)