Exemplo n.º 1
0
def archive_package(package_id, context, consecutive_errors=0):

    from ckanext.archiver import tasks
    package = toolkit.get_action('package_show')(context, {'id': package_id})
    extras_dict = extras_to_dict(package)

    is_activity_package = (True if 'activity' == extras_dict.get('filetype')
                           else False)

    log.debug('Archiving dataset: {0} ({1} resources)'.format(
              package.get('name'), len(package.get('resources', []))))
    for resource in package.get('resources', []):
        if not resource.get('url', ''):
            return save_package_issue(context, package, extras_dict, 'no-url',
                                      'URL missing')
        old_hash = resource.get('hash')
        try:
            result = download(context, resource, data_formats=DATA_FORMATS)
        except tasks.LinkCheckerError, e:
            if 'URL unobtainable: HTTP' in str(e):
                #TODO: What does this do?
                message = str(e)[:str(e).find(' on')]
            else:
                message = str(e)
            return save_package_issue(context, package, extras_dict,
                                      'url-error', message)
        except tasks.DownloadError, e:
            if 'exceeds maximum allowed value' in str(e):
                message = 'File too big, not downloading'
            else:
                message = str(e)
            return save_package_issue(context, package, extras_dict,
                                      'download-error', message)
Exemplo n.º 2
0
def archive_package(package_id, context, consecutive_errors=0):

    from ckanext.archiver import tasks
    package = toolkit.get_action('package_show')(context, {'id': package_id})
    extras_dict = extras_to_dict(package)

    is_activity_package = (True if 'activity' == extras_dict.get('filetype')
                           else False)

    log.info('Archiving dataset: {0} ({1} resources)'.format(
        package.get('name'), len(package.get('resources', []))))
    for resource in package.get('resources', []):
        if not resource.get('url', ''):
            return save_package_issue(context, package, extras_dict, 'no-url',
                                      'URL missing')
        old_hash = resource.get('hash')
        try:
            result = download(context, resource, data_formats=DATA_FORMATS)
        except tasks.LinkCheckerError, e:
            if 'URL unobtainable: HTTP' in str(e):
                #TODO: What does this do?
                message = str(e)[:str(e).find(' on')]
            else:
                message = str(e)
            return save_package_issue(context, package, extras_dict,
                                      'url-error', message)
        except tasks.DownloadError, e:
            if 'exceeds maximum allowed value' in str(e):
                message = 'File too big, not downloading'
            else:
                message = str(e)
            return save_package_issue(context, package, extras_dict,
                                      'download-error', message)
Exemplo n.º 3
0
    def write_to_csv(self, publisher):
        """

        :param publisher:
        :return:
        """
        context = {'model': model, 'user': c.user or c.author}
        try:
            if publisher == 'all':
                package_ids = p.toolkit.get_action('package_list')(context, {})
                packages = []
                for pkg_id in package_ids:
                    try:
                        package = p.toolkit.get_action('package_show')(
                            context, {
                                'id': pkg_id
                            })
                        package.pop('state', None)
                        packages.append(package)
                    except p.toolkit.NotAuthorized:
                        log.warn('User %s not authorized to read package %s' %
                                 (c.user, pkg_id))
                        continue

            elif publisher == 'template':
                # Just return an empty CSV file with just the headers
                packages = []
            else:
                packages = self._get_packages_for_org(context, publisher)

            f = io.BytesIO()
            fieldnames = [n[0] for n in self.CSV_MAPPING if n[0] != 'state']
            writer = csv.DictWriter(f,
                                    fieldnames=fieldnames,
                                    quoting=csv.QUOTE_ALL)
            headers = dict(
                (n[0], n[0]) for n in self.CSV_MAPPING if n[0] != 'state')
            writer.writerow(headers)

            for package in packages:
                if package:
                    row = {}
                    extras_dict = extras_to_dict(package)
                    for fieldname, entity, key in self.CSV_MAPPING:
                        if key == 'state':
                            continue
                        value = None
                        if entity == 'organization':
                            if len(package['organization']):
                                value = package['organization']['name']
                        elif entity == 'resources':
                            if len(package['resources']
                                   ) and key in package['resources'][0]:
                                value = package['resources'][0][key]
                        else:
                            if key in package:
                                value = package[key]
                            elif key in extras_dict:
                                value = extras_dict[key]
                        row[fieldname] = value

                        for field_to_check in ('title', 'description'):
                            if fieldname == field_to_check and row.get(
                                    field_to_check):
                                row[field_to_check] = row[
                                    field_to_check].encode('utf-8')

                    writer.writerow(row)
            output = f.getvalue()
            f.close()
            return output
        except p.toolkit.ObjectNotFound:
            p.toolkit.abort(404, 'Organization not found')
Exemplo n.º 4
0
    def write_csv_file(self, publisher):
        context = {'model': model, 'user': c.user or c.author}
        try:
            if publisher == 'all':
                package_ids = p.toolkit.get_action('package_list')(context, {})
                packages = []
                for pkg_id in package_ids:
                    try:
                        package = p.toolkit.get_action('package_show')(context, {'id': pkg_id})
                        package.pop('state', None)
                        packages.append(package)
                    except p.toolkit.NotAuthorized:
                        log.warn('User %s not authorized to read package %s' % (c.user, pkg_id))
                        continue

            elif publisher == 'template':
                # Just return an empty CSV file with just the headers
                packages = []
            else:
                packages = get_packages_for_org(context, publisher)
        except p.toolkit.ObjectNotFound:
            p.toolkit.abort(404, 'Organization not found')

        f = StringIO.StringIO()

        output = ''
        try:
            fieldnames = [n[0] for n in CSV_MAPPING if n[0] != 'state']
            writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
            headers = dict( (n[0],n[0]) for n in CSV_MAPPING if n[0] != 'state')
            writer.writerow(headers)

            packages.sort()
            for package in packages:
                if package:
                    row = {}
                    extras_dict = extras_to_dict(package)
                    for fieldname, entity, key in CSV_MAPPING:
                        if key == 'state':
                            continue
                        value = None
                        if entity == 'organization':
                            if len(package['organization']):
                                value = package['organization']['name']
                        elif entity == 'resources':
                            if len(package['resources']) and key in package['resources'][0]:
                                value = package['resources'][0][key]
                        else:
                            if key in package:
                                value = package[key]
                            elif key in extras_dict:
                                value = extras_dict[key]
                        row[fieldname] = value

                        for field_to_check in ('title', 'description'):
                            if fieldname == field_to_check and row.get(field_to_check):
                                row[field_to_check] = row[field_to_check].encode('utf-8')

                    writer.writerow(row)
            output = f.getvalue()
        finally:
            f.close()

        return output