示例#1
0
    def test_write_line_doc(self):
        """Convert CSV files to text lines and back to text lines"""

        all = [
            'example1.csv', 'example2.csv', 'example1-web.csv', 'children.csv',
            'children2.csv', 'issue1.csv'
        ]

        self.maxDiff = None

        for f in all:

            path = test_data(f)

            doc1 = MetatabDoc(path)

            doc1_lines = doc1.as_lines()

            print(doc1_lines)

            doc2 = MetatabDoc(TextRowGenerator(doc1_lines))

            doc2_lines = doc2.as_lines()

            self.assertEqual(doc1_lines, doc2_lines)

            self.compare_dict(doc1.as_dict(), doc2.as_dict())

            self.assertEqual(doc1_lines, doc2_lines)

            self.assertEqual(doc1.as_csv(), doc2.as_csv())
示例#2
0
    def test_datapackage_declare(self):
        import datapackage

        doc = MetatabDoc(test_data('datapackage_ex2.csv'))

        d = doc.as_dict()

        f = open('/tmp/package.json', 'w')  # NamedTemporaryFile(delete=False)
        f.write(json.dumps(d, indent=4))
        f.close()

        try:
            dp = datapackage.DataPackage(f.name)
            dp.validate()
        except:
            with open(f.name) as f2:
                print(f2.read())
            raise

        print(f.name)
        # unlink(f.name)

        doc = MetatabDoc(test_data('example1.csv'))

        from metatab.datapackage import convert_to_datapackage

        print(json.dumps(convert_to_datapackage(doc), indent=4))
示例#3
0
    def test_children(self):

        doc = MetatabDoc(test_data('children.csv'))

        for t in doc.terms:
            print(t)

        import json
        print(json.dumps(doc.as_dict(), indent=4))

        for t in doc.as_dict()['parent']:
            self.assertEquals(
                {
                    'prop1': 'prop1',
                    'prop2': 'prop2',
                    '@value': 'parent'
                }, t)
示例#4
0
    def test_includes(self):

        doc = MetatabDoc(test_data('include1.csv'))
        d = doc.as_dict()

        for t in doc['root'].terms:
            print(t)

        print(d)

        self.assertEquals(
            ['Include File 1', 'Include File 2', 'Include File 3'], d['note'])

        self.assertTrue(any('include2.csv' in e for e in d['include']))
        self.assertTrue(any('include3.csv' in e for e in d['include']))
示例#5
0
    def test_parse_everything(self):

        import json

        all = [
            'example1.csv', 'example2.csv', 'example1-web.csv', 'include1.csv',
            'include2.csv', 'include3.csv', 'children.csv', 'children2.csv',
            'issue1.csv'
        ]

        # These are currently broken -- as_dict doesn't work properly with the
        # datapackage-latest decl.
        datapackages = [
            'datapackage_ex1.csv', 'datapackage_ex1_web.csv',
            'datapackage_ex2.csv'
        ]

        for fn in all:

            print('Testing ', fn)

            path = test_data(fn)

            json_path = test_data('json', fn.replace('.csv', '.json'))

            doc = MetatabDoc(path)
            d = doc.as_dict()

            if not exists(json_path):
                with open(json_path, 'w') as f:
                    print("Writing", json_path)
                    json.dump(d, f, indent=4)

            with open(json_path) as f:
                d2 = json.load(f)

            self.compare_dict(d, d2)
示例#6
0
def metatab():
    import argparse
    parser = argparse.ArgumentParser(
        prog='metatab',
        description='Matatab file parser, version {}'.format(_meta.__version__))

    parser.add_argument('-C', '--clean-cache', default=False, action='store_true',
                        help="Clean the download cache")

    g = parser.add_mutually_exclusive_group(required=True)

    g.add_argument('-i', '--info', default=False, action='store_true',
                   help="Show configuration information")

    g.add_argument('-c', '--create', action='store', nargs='?', default=False,
                   help="Create a new metatab file, from named template. With no argument, uses the 'metatab' template ")

    g.add_argument('-t', '--terms', default=False, action='store_true',
                   help='Parse a file and print out the stream of terms, before interpretation')

    g.add_argument('-I', '--interp', default=False, action='store_true',
                   help='Parse a file and print out the stream of terms, after interpretation')

    g.add_argument('-j', '--json', default=False, action='store_true',
                   help='Parse a file and print out a JSON representation')

    g.add_argument('-y', '--yaml', default=False, action='store_true',
                   help='Parse a file and print out a YAML representation')

    g.add_argument('-R', '--resource', default=False, action='store_true',
                   help='If the URL has no fragment, dump the resources listed in the metatab file. With a fragment, dump a resource as a CSV')

    g.add_argument('-H', '--head', default=False, action='store_true',
                   help="Dump the first 20 lines of a resoruce ")

    g.add_argument('-S', '--schema',
                   help='Dump the schema for one named resource')

    parser.add_argument('-d', '--show-declaration', default=False, action='store_true',
                        help='Parse a declaration file and print out declaration dict. Use -j or -y for the format')

    parser.add_argument('-D', '--declare', help='Parse and incorporate a declaration before parsing the file.' +
                                                ' (Adds the declaration to the start of the file as the first term. )')

    parser.add_argument('file', nargs='?', default=DEFAULT_METATAB_FILE, help='Path to a Metatab file')

    args = parser.parse_args(sys.argv[1:])

    # Specing a fragment screws up setting the default metadata file name
    if args.file.startswith('#'):
        args.file = DEFAULT_METATAB_FILE + args.file

    cache = get_cache('metapack')

    if args.info:
        prt('Version  : {}'.format(_meta.__version__))
        prt('Cache dir: {}'.format(str(cache.getsyspath('/'))))
        exit(0)

    if args.clean_cache:
        clean_cache(cache)

    if args.create is not False:
        new_metatab_file(args.file, args.create)
        exit(0)

    if args.resource or args.head:

        limit = 20 if args.head else None

        u = Url(args.file)
        resource = u.parts.fragment
        metadata_url = u.rebuild_url(False, False)

        package_url, metadata_url = resolve_package_metadata_url(metadata_url)

        try:
            doc = MetatabDoc(metadata_url, cache=cache)
        except OSError as e:
            err("Failed to open Metatab doc: {}".format(e))
            return # Never reached

        if resource:
            dump_resource(doc, resource, limit)
        else:
            dump_resources(doc)


        exit(0)

    if args.show_declaration:

        doc = MetatabDoc()
        doc.load_declarations([args.file])

        print(json.dumps({
            'terms': doc.decl_terms,
            'sections': doc.decl_sections
        }, indent=4))
        exit(0)
    else:

        package_url, metadata_url = resolve_package_metadata_url(args.file)
        try:
            doc = MetatabDoc(metadata_url, cache=cache)
        except IOError as e:
            raise
            err("Failed to open '{}': {}".format(metadata_url, e))

    if args.terms:
        for t in doc._term_parser:
            print(t)

    elif args.json:
        print(json.dumps(doc.as_dict(), indent=4))


    elif args.yaml:
        import yaml
        print(yaml.safe_dump(doc.as_dict(), default_flow_style=False, indent=4))


    elif args.schema:
        dump_schema(doc, args.schema)

    exit(0)
示例#7
0
文件: cli.py 项目: Metatab/metatab
def metatab():
    import argparse
    parser = argparse.ArgumentParser(prog='metatab',
                                     description='Matatab file parser',
                                     epilog='Cache dir: {}\n'.format(
                                         str(cache.getsyspath('/'))))

    g = parser.add_mutually_exclusive_group()

    g.add_argument(
        '-C',
        '--create',
        action='store',
        nargs='?',
        default=False,
        help=
        "Create a new metatab file, from named template. With no argument, uses the 'metatab' template "
    )

    g.add_argument(
        '-t',
        '--terms',
        default=False,
        action='store_const',
        dest='out_type',
        const='terms',
        help=
        'Parse a file and print out the stream of terms, before interpretation'
    )

    g.add_argument('-j',
                   '--json',
                   default=False,
                   action='store_const',
                   dest='out_type',
                   const='json',
                   help='Parse a file and print out a JSON representation')

    g.add_argument('-y',
                   '--yaml',
                   default=False,
                   action='store_const',
                   dest='out_type',
                   const='yaml',
                   help='Parse a file and print out a YAML representation')

    g.add_argument(
        '-l',
        '--line',
        default=False,
        action='store_const',
        dest='out_type',
        const='line',
        help='Parse a file and print out a Metatab Line representation')

    g.add_argument(
        '-c',
        '--csv',
        default=False,
        action='store_const',
        dest='out_type',
        const='csv',
        help='Parse a file and print out a Metatab Line representation')

    g.add_argument('-p',
                   '--prety',
                   default=False,
                   action='store_const',
                   dest='out_type',
                   const='prety',
                   help='Pretty print the python Dict representation ')

    parser.add_argument(
        '-W',
        '--write-in-place',
        help=
        'When outputting as yaml, json, csv or line, write the file instead of printing it, '
        'to a file with same base name and appropriate extension ',
        action='store_true')

    parser.set_defaults(out_type='csv')

    parser.add_argument(
        '-f',
        '--find-first',
        help='Find and print the first value for a fully qualified term name')

    parser.add_argument(
        '-d',
        '--show-declaration',
        default=False,
        action='store_true',
        help=
        'Parse a declaration file and print out declaration dict. Use -j or -y for the format'
    )

    parser.add_argument('file',
                        nargs='?',
                        default=DEFAULT_METATAB_FILE,
                        help='Path to a Metatab file')

    cli_init()

    args = parser.parse_args(sys.argv[1:])

    # Specing a fragment screws up setting the default metadata file name
    if args.file.startswith('#'):
        args.file = DEFAULT_METATAB_FILE + args.file

    if args.create is not False:
        if new_metatab_file(args.file, args.create):
            prt("Created ", args.file)
        else:
            warn("File", args.file, 'already exists.')

        exit(0)

    metadata_url = parse_app_url(args.file, proto='metatab')
    try:
        doc = MetatabDoc(metadata_url, cache=cache)
    except IOError as e:

        err("Failed to open '{}': {}".format(metadata_url, e))

    def write_or_print(t):
        from pathlib import Path

        if metadata_url.scheme != 'file':
            err("Can only use -w with local files")
            return

        ext = 'txt' if args.out_type == 'line' else args.out_type

        if args.write_in_place:
            with metadata_url.fspath.with_suffix('.' + ext).open('w') as f:
                f.write(t)
        else:
            print(t)

    if args.show_declaration:

        decl_doc = MetatabDoc('', cache=cache, decl=metadata_url.path)

        d = {'terms': decl_doc.decl_terms, 'sections': decl_doc.decl_sections}

        if args.out_type == 'json':
            print(json.dumps(d, indent=4))

        elif args.out_type == 'yaml':
            import yaml
            print(yaml.safe_dump(d, default_flow_style=False, indent=4))

    elif args.find_first:

        t = doc.find_first(args.find_first)
        print(t.value)

    elif args.out_type == 'terms':
        for t in doc._term_parser:
            print(t)

    elif args.out_type == 'json':
        write_or_print(json.dumps(doc.as_dict(), indent=4))

    elif args.out_type == 'yaml':
        import yaml
        from collections import OrderedDict

        def ordered_dump(data, stream=None, Dumper=yaml.Dumper, **kwds):
            class OrderedDumper(Dumper):
                pass

            def _dict_representer(dumper, data):
                return dumper.represent_mapping(
                    yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
                    data.items())

            OrderedDumper.add_representer(OrderedDict, _dict_representer)
            return yaml.dump(data, stream, OrderedDumper, **kwds)

        write_or_print(
            ordered_dump(doc.as_dict(),
                         default_flow_style=False,
                         indent=4,
                         Dumper=yaml.SafeDumper))

    elif args.out_type == 'line':
        write_or_print(doc.as_lines())

    elif args.out_type == 'csv':
        write_or_print(doc.as_csv())

    elif args.out_type == 'prety':
        from pprint import pprint
        pprint(doc.as_dict())

    exit(0)