示例#1
0
文件: combine.py 项目: jayvdb/ocdskit
def package_releases(stream,
                     uri='',
                     publisher=None,
                     published_date='',
                     extensions=None):
    """
    Reads releases from the stream, and returns one release package.

    :param str uri: the release package's ``uri``
    :param dict publisher: the release package's ``publisher``
    :param str published_date: the release package's ``publishedDate``
    :param list extensions: the release package's ``extensions``
    """
    if publisher is None:
        publisher = OrderedDict()
    if extensions is None:
        extensions = []

    releases = [json_loads(line) for line in stream]

    output = OrderedDict([
        ('uri', uri),
        ('publisher', publisher),
        ('publishedDate', published_date),
        ('version', '1.1'),
        ('extensions', extensions),
        ('releases', releases),
    ])

    return output
示例#2
0
    def handle(self):
        for line in self.buffer():
            package = json_loads(line)

            releases = package['releases']

            for i in range(0, len(releases), self.args.size):
                package.update(releases=releases[i:i + self.args.size])

                self.print(package)
示例#3
0
    def handle(self):
        for line in self.buffer():
            package = json_loads(line)

            records = package['records']

            # We can't determine which records came from which packages.
            if 'packages' in package:
                del package['packages']

            for i in range(0, len(records), self.args.size):
                package.update(records=records[i:i + self.args.size])

                self.print(package)
示例#4
0
    def handle(self):
        components = urlparse(self.args.schema)
        if components.scheme == 'file':
            with open(self.args.schema[7:]) as f:
                schema = json_load(f)
        else:
            schema = requests.get(self.args.schema).json()

        format_checker = FormatChecker()
        if self.args.check_urls:

            def check_url(instance):
                # See https://github.com/Julian/jsonschema/blob/master/jsonschema/_format.py
                if not isinstance(instance, str_types):
                    return True
                rfc3987.parse(instance, rule='URI')  # raises ValueError
                try:
                    response = requests.get(instance,
                                            timeout=self.args.timeout)
                    result = response.status_code in (200, )
                    if not result:
                        print('HTTP {} on GET {}'.format(
                            response.status_code, instance))
                    return result
                except requests.exceptions.Timeout:
                    print('Timedout on GET {}'.format(instance))
                    return False

            format_checker.checks('uri', raises=(ValueError))(check_url)

        for i, line in enumerate(self.buffer()):
            try:
                data = json_loads(line)
                errors = False
                for error in validator(
                        schema,
                        format_checker=format_checker).iter_errors(data):
                    print('item {}: {} ({})'.format(
                        i, error.message,
                        '/'.join(error.absolute_schema_path)))
                    errors = True
                if not errors and self.args.verbose:
                    print('item {}: no errors'.format(i))
            except json.decoder.JSONDecodeError as e:
                raise CommandError('item {}: JSON error: {}'.format(i, e))
示例#5
0
    def handle(self):
        versions = self.args.versions

        version_from, version_to = versions.split(':')
        if version_from < version_to:
            direction = 'up'
        else:
            direction = 'down'

        try:
            upgrade_method = getattr(
                upgrade, 'upgrade_{}'.format(
                    versions.replace('.', '').replace(':', '_')))
        except AttributeError:
            raise CommandError('{}grade from {} is not supported'.format(
                direction, versions.replace(':', ' to ')))

        for line in self.buffer():
            data = json_loads(line)
            upgrade_method(data)
            self.print(data)
示例#6
0
文件: combine.py 项目: jayvdb/ocdskit
def combine_record_packages(stream, uri='', publisher=None, published_date=''):
    """
    Reads record packages from the stream, collects packages and records, and returns one record package.

    :param str uri: the record package's ``uri``
    :param dict publisher: the record package's ``publisher``
    :param str published_date: the record package's ``publishedDate``
    """
    if publisher is None:
        publisher = OrderedDict()

    output = OrderedDict([
        ('uri', uri),
        ('publisher', publisher),
        ('publishedDate', published_date),
        ('license', None),
        ('publicationPolicy', None),
        ('version', None),
        ('extensions', OrderedDict()),
        ('packages', []),
        ('records', []),
    ])

    for line in stream:
        package = json_loads(line)

        _update_package_metadata(output, package, publisher)

        output['records'].extend(package['records'])

        if 'packages' in package:
            output['packages'].extend(package['packages'])

    if not output['packages']:
        del output['packages']

    _set_extensions_metadata(output)
    _remove_empty_optional_metadata(output)

    return output
示例#7
0
文件: combine.py 项目: jayvdb/ocdskit
def compile_release_packages(stream,
                             uri='',
                             publisher=None,
                             published_date='',
                             schema=None,
                             return_versioned_release=False,
                             return_package=False,
                             use_linked_releases=False):
    """
    Reads release packages from the stream, merges the releases by OCID, and yields the compiled releases.

    If ``return_versioned_release`` is ``True``, yields the versioned release. If ``return_package`` is ``True``, wraps
    the compiled releases (and versioned releases if ``return_versioned_release`` is ``True``) in a record package.

    If ``return_package`` is set and ``publisher`` isn't set, the output record package will have the same publisher as
    the last input release package.

    :param str uri: if ``return_package`` is ``True``, the record package's ``uri``
    :param dict publisher: if ``return_package`` is ``True``, the record package's ``publisher``
    :param str published_date: if ``return_package`` is ``True``, the record package's ``publishedDate``
    :param dict schema: the URL or path of the release schema to use
    :param bool return_package: wrap the compiled releases in a record package
    :param bool use_linked_releases: if ``return_package`` is ``True``, use linked releases instead of full releases
    :param bool return_versioned_release: if ``return_package`` is ``True``, include versioned releases in the record
        package; otherwise, yield versioned releases instead of compiled releases
    """
    if return_package:
        output = OrderedDict([
            ('uri', uri),
            ('publisher', publisher),
            ('publishedDate', published_date),
            ('license', None),
            ('publicationPolicy', None),
            ('version', None),
            ('extensions', OrderedDict()),
            ('packages', []),
            ('records', []),
        ])

    version = None
    releases_by_ocid = defaultdict(list)
    linked_releases = []

    for i, line in enumerate(stream):
        package = json_loads(line)

        if not version:
            version = get_ocds_minor_version(package)
        else:
            v = get_ocds_minor_version(package)
            if v != version:
                raise InconsistentVersionError(
                    'item {}: version error: this package uses version {}, but earlier '
                    'packages used version {}'.format(i, v, version), version,
                    v)

        if not schema:
            prefix = version.replace('.', '__') + '__'
            tag = next(tag for tag in reversed(get_tags())
                       if tag.startswith(prefix))
            schema = get_release_schema_url(tag)

        for release in package['releases']:
            releases_by_ocid[release['ocid']].append(release)

            if return_package and use_linked_releases:
                linked_releases.append(
                    OrderedDict([
                        ('url', package['uri'] + '#' + release['id']),
                        ('date', release['date']),
                        ('tag', release['tag']),
                    ]))

        if return_package:
            _update_package_metadata(output, package, publisher)

            output['packages'].append(package['uri'])

    if return_package:
        for ocid, releases in releases_by_ocid.items():
            record = OrderedDict([
                ('ocid', ocid),
                ('releases', []),
                ('compiledRelease', merge(releases, schema)),
            ])

            if use_linked_releases:
                record['releases'] = linked_releases
            else:
                record['releases'] = releases

            if return_versioned_release:
                record['versionedRelease'] = merge_versioned(releases, schema)

            output['records'].append(record)

        _set_extensions_metadata(output)
        _remove_empty_optional_metadata(output)

        yield output
    else:
        for releases in releases_by_ocid.values():
            if return_versioned_release:
                merge_method = merge_versioned
            else:
                merge_method = merge

            merged_release = merge_method(releases, schema)

            yield merged_release
示例#8
0
 def json(self):
     """
     Returns the file's parsed JSON contents.
     """
     with open(self.path, encoding='utf-8') as f:
         return json_loads(f.read())