Exemplo n.º 1
0
    def test_sync_csv_package(self):

        from metapack_build.package import CsvPackageBuilder

        package_root = MetapackPackageUrl(test_data(
            'packages/example.com/example.com-simple_example-2017-us/_packages'
        ),
                                          downloader=downloader)

        source_url = 'http://library.metatab.org/example.com-simple_example-2017-us-2/metadata.csv'

        u = MetapackUrl(source_url, downloader=downloader)

        u.get_resource().get_target()

        p = CsvPackageBuilder(
            u,
            package_root,
            resource_root=u.dirname().as_type(MetapackPackageUrl))

        csv_url = p.save()

        doc = csv_url.metadata_url.doc

        for r in doc.resources():
            print(r.name, r.url)
Exemplo n.º 2
0
    def test_build_notebook_package(self):

        try:
            from metapack import MetapackDocumentUrl, get_cache
            from metapack_build.build import make_filesystem_package

            m = MetapackDocumentUrl(test_data(
                'packages/example.com/example.com-notebook/metadata.csv'),
                                    downloader=downloader)

            # process_schemas(m)

            doc = MetapackDoc(m)

            r = doc.resource('basic_a')

            self.assertEqual(2501, len(list(r)))

            package_dir = m.package_url.join_dir(PACKAGE_PREFIX)

            _, fs_url, created = make_filesystem_package(
                m, package_dir, get_cache(), {}, False, False, False)

            print(fs_url)
        except ImportError:
            unittest.skip("Pandas not installed")
            return
Exemplo n.º 3
0
    def test_fixed_resource(self):
        from itertools import islice
        from rowgenerators.generator.fixed import FixedSource

        m = MetapackUrl(test_data(
            'packages/example.com/example.com-full-2017-us/metadata.csv'),
                        downloader=downloader)

        doc = MetapackDoc(m)

        r = doc.resource('simple-fixed')

        self.assertEqual(
            'fixed+http://public.source.civicknowledge.com/example.com/sources/simple-example.txt',
            str(r.url))
        self.assertEqual(
            'fixed+http://public.source.civicknowledge.com/example.com/sources/simple-example.txt',
            str(r.resolved_url))

        g = r.row_generator

        print(r.row_processor_table())

        self.assertIsInstance(g, FixedSource)

        rows = list(islice(r, 10))

        print('----')
        for row in rows:
            print(row)

        self.assertEqual('f02d53a3-6bbc-4095-a889-c4dde0ccf5', rows[5][1])
Exemplo n.º 4
0
    def test_build_geo_package(self):

        from rowgenerators.valuetype import ShapeValue

        m = MetapackUrl(test_data(
            'packages/sangis.org/sangis.org-census_regions/metadata.csv'),
                        downloader=downloader)

        package_dir = m.package_url.join_dir(PACKAGE_PREFIX)

        _, fs_url, created = make_filesystem_package(m, package_dir,
                                                     downloader.cache, {},
                                                     True)

        print(fs_url)

        doc = MetapackDoc(fs_url)

        r = doc.resource('sra')

        rows = list(r.iterdict)

        self.assertEqual(41, len(rows))

        self.assertIsInstance(rows[1]['geometry'], ShapeValue)
Exemplo n.º 5
0
    def test_build_simple_package(self):

        cli_init()

        cache = Downloader().cache

        m = MetapackUrl(test_data(
            'packages/example.com/example.com-simple_example-2017-us'),
                        downloader=downloader)

        package_dir = m.package_url.join_dir(PACKAGE_PREFIX)
        package_dir = package_dir

        _, fs_url, created = make_filesystem_package(m, package_dir, cache, {},
                                                     False)

        fs_doc = MetapackDoc(fs_url, cache=downloader.cache)

        fs_doc.resource('random-names')

        # Excel

        _, url, created = make_excel_package(fs_url, package_dir, cache, {},
                                             False)

        self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'],
                         [r.name for r in url.doc.resources()])

        self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'],
                         [r.url for r in url.doc.resources()])

        # ZIP

        _, url, created = make_zip_package(fs_url, package_dir, cache, {},
                                           False)

        self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'],
                         [r.name for r in url.doc.resources()])

        self.assertEqual([
            'data/random-names.csv', 'data/renter_cost.csv',
            'data/unicode-latin1.csv'
        ], [r.url for r in url.doc.resources()])

        #  CSV

        _, url, created = make_csv_package(fs_url, package_dir, cache, {},
                                           False)

        self.assertEqual(['random-names', 'renter_cost', 'unicode-latin1'],
                         [r.name for r in url.doc.resources()])

        self.assertEqual([
            'com-simple_example-2017-us-2/data/random-names.csv',
            '.com-simple_example-2017-us-2/data/renter_cost.csv',
            'm-simple_example-2017-us-2/data/unicode-latin1.csv'
        ], [str(r.url)[-50:] for r in url.doc.resources()])
Exemplo n.º 6
0
    def test_metapack_resources(self):

        cli_init()

        p = test_data(
            'packages/example.com/example.com-metab_reuse/metadata.csv')

        m = MetapackUrl(p, downloader=downloader)

        print(m.doc.resources())

        print(m.get_resource().get_target().exists())
Exemplo n.º 7
0
    def test_build_transform_package(self):

        m = MetapackUrl(test_data(
            'packages/example.com/example.com-transforms/metadata.csv'),
                        downloader=downloader)

        package_dir = m.package_url.join_dir(PACKAGE_PREFIX)

        _, fs_url, created = make_filesystem_package(m, package_dir,
                                                     downloader.cache, {},
                                                     False)

        print(fs_url)
Exemplo n.º 8
0
    def test_html(self):

        p = open_package(
            test_data(
                'packages/example.com/example.com-full-2017-us/metadata.csv'))

        self.assertTrue(len(p._repr_html_()) > 4500, len(p._repr_html_()))

        print(list(e.name for e in p.find('Root.Resource')))

        r = p.find_first('Root.Resource', name='random-names')

        self.assertTrue(len(r._repr_html_()) > 400, len(r._repr_html_()))
Exemplo n.º 9
0
    def test_nbconvert(self):

        from collections import namedtuple
        from metapack.jupyter.convert import convert_documentation

        cli_init()

        M = namedtuple('M', 'mt_file')

        fn = test_data('notebooks/ConversionTest.ipynb')

        m = M(mt_file=parse_app_url(fn))

        convert_documentation(m.mt_file.path)
Exemplo n.º 10
0
    def test_line_doc_parts(self):

        doc = MetapackDoc(TextRowGenerator("Declare: metatab-latest"))

        for fn in (
                'line/line-oriented-doc-root.txt',
                'line/line-oriented-doc-contacts.txt',
                'line/line-oriented-doc-datafiles.txt',
                'line/line-oriented-doc-references-1.txt',
                'line/line-oriented-doc-references-2.txt',
                'line/line-oriented-doc-bib.txt',
        ):
            with open(test_data(fn)) as f:
                text = f.read()

            tp = TermParser(TextRowGenerator(text),
                            resolver=doc.resolver,
                            doc=doc)

            doc.load_terms(tp)

        self.assertEqual('47bc1089-7584-41f0-b804-602ec42f1249',
                         doc.get_value('Root.Identifier'))
        self.assertEqual(157, len(doc.terms))

        self.assertEqual(5, len(list(doc['References'])))

        self.assertEqual(5,
                         len(list(doc['References'].find('Root.Reference'))))

        self.assertEqual(5, len(list(doc['References'].find(
            'Root.Resource'))))  # References are Resources

        rt = list(doc['References'].find('Root.Resource'))[0]

        self.assertIsInstance(rt, Reference)

        self.assertEqual(5, len(list(doc['Resources'])))

        self.assertEqual(5, len(list(doc['Resources'].find('Root.Datafile'))))

        self.assertEqual(5, len(list(doc['Resources'].find(
            'Root.Resource'))))  # References are Resources

        rt = list(doc['Resources'].find('Root.Resource'))[0]

        self.assertIsInstance(rt, Resource)

        doc._repr_html_()  # Check no exceptions
Exemplo n.º 11
0
    def test_build_dataframe(self):

        p = open_package(
            test_data('packages/example.com/example.com-python/metadata.csv'))

        df = p.resource('simple').dataframe()

        self.assertEqual(270, df.sum().sum())

        df = p.resource('explicit_dataframe_source').dataframe()

        self.assertEqual(435, df.sum().sum())

        df = p.resource('implicit_dataframe_source').dataframe()

        self.assertEqual(435, df.sum().sum())
Exemplo n.º 12
0
    def test_petl(self):
        from petl import look

        m = MetapackUrl(test_data(
            'packages/example.com/example.com-full-2017-us/metadata.csv'),
                        downloader=downloader)

        doc = MetapackDoc(m)

        r = doc.resource('simple-example')

        r.resolved_url.get_resource().get_target()

        p = r.petl()

        print(look(p))
Exemplo n.º 13
0
    def x_test_metatab_line(self):
        from metatab.generate import TextRowGenerator
        from metatab.cli.core import process_schemas
        from metatab import MetatabDoc

        cli_init()

        doc = MetatabDoc(
            TextRowGenerator(test_data('simple-text.txt'), 'simple-text.txt'))

        process_schemas(doc)

        r = doc.resource('resource')

        for c in r.columns():
            print(c)
Exemplo n.º 14
0
    def test_read_geo_packages(self):

        import warnings
        from requests.exceptions import HTTPError

        warnings.simplefilter("ignore")

        try:
            from publicdata.census.dataframe import CensusDataFrame
        except ImportError:
            return unittest.skip("Public data isn't installed")

        with open(test_data('line', 'line-oriented-doc.txt')) as f:
            text = f.read()

        doc = MetapackDoc(TextRowGenerator("Declare: metatab-latest\n" + text))

        r = doc.reference('B09020')

        try:
            df = r.dataframe()
        except HTTPError:  # The Census reporter URLs fail sometimes.
            return unittest.skip("Census Reporter vanished")

        self.assertIsInstance(df, CensusDataFrame)

        r = doc.reference('sra_geo')

        gf = r.geoframe()

        self.assertEqual(41, len(gf.geometry.geom_type))

        self.assertEqual({'Polygon'}, set(gf.geometry.geom_type))

        r = doc.reference('ri_tracts')

        gf = r.geoframe()

        self.assertEqual(244, len(gf.geometry.geom_type))

        print(sorted(list(set(gf.geometry.geom_type))))

        self.assertEqual(['MultiPolygon', 'Polygon'],
                         sorted(list(set(gf.geometry.geom_type))))

        print(gf.head())
Exemplo n.º 15
0
    def test_gen_line_rows(self):
        from metatab import parse_app_url
        from metapack import MetapackDocumentUrl
        from metatab.rowgenerators import TextRowGenerator
        u = parse_app_url(test_data('line', 'line-oriented-doc.txt'),
                          proto='metapack')

        self.assertIsInstance(u, MetapackDocumentUrl)
        self.assertIsInstance(u.get_resource(), MetapackDocumentUrl)
        self.assertIsInstance(u.get_resource().get_target(),
                              MetapackDocumentUrl)

        self.assertIsInstance(u.generator, TextRowGenerator)

        doc = MetapackDoc(u)
        self.assertEqual('47bc1089-7584-41f0-b804-602ec42f1249',
                         doc.get_value('Root.Identifier'))
Exemplo n.º 16
0
    def test_open_package(self):

        from metapack import open_package
        from metapack.terms import Resource

        p = open_package(
            test_data(
                'packages/example.com/example.com-full-2017-us/metadata.csv'))

        self.assertEqual(Resource, type(p.find_first('root.datafile')))

        self.assertEqual('example.com-full-2017-us-1',
                         p.find_first('Root.Name').value)

        self.assertEqual(16, len(list(p['Resources'].find('Root.Resource'))))

        all_names = [r.name for r in p.find('Datafile')]

        for name in [
                'renter_cost', 'simple-example-altnames', 'simple-example',
                'unicode-latin1', 'unicode-utf8', 'renter_cost_excel07',
                'renter_cost_excel97', 'renter_cost-2', 'random-names',
                'random-names-fs', 'random-names-csv', 'random-names-xlsx',
                'random-names-zip', 'sra'
        ]:
            self.assertIn(name, all_names)

        self.assertIsInstance(p.resource('random-names'), Resource)
        self.assertEqual('random-names', p.resource('random-names').name)

        r = p.find_first('Root.DataFile')
        print(r.resolved_url)
        self.assertEqual(
            'http://public.source.civicknowledge.com/example.com/sources/test_data.zip#renter_cost.csv',
            str(r.resolved_url))

        for r in p.find('Root.DataFile'):

            if r.name != 'unicode-latin1':
                continue

            self.assertEqual(int(r.nrows), len(list(r)))

        self.assertEqual(['ipums', 'bordley', 'mcdonald', 'majumder'],
                         [c.name for c in p['Bibliography']])
Exemplo n.º 17
0
    def test_line_doc(self):

        from os.path import splitext, basename
        import sys

        with open(test_data('line', 'line-oriented-doc.txt')) as f:
            text = f.read()

        doc = MetapackDoc(TextRowGenerator("Declare: metatab-latest\n" + text))

        # process_schemas(doc)

        r = doc.reference('tracts')

        self.assertEqual(628, len(list(r)))

        tracts = r.dataframe()

        self.assertEqual(-73427, tracts.lon.sum().astype(int))

        tracts = r.read_csv()

        self.assertEqual(-73427, tracts.lon.sum().astype(int))

        r.dataframe()

        # Test loading a Python Library from a package.

        ref = doc.reference('incv')

        self.assertIsNotNone(ref)

        ref_resource = parse_app_url(
            ref.url).inner.clear_fragment().get_resource()

        # The path has to be a Metatab ZIP archive, and the root directory must be the same as
        # the name of the path

        pkg_name, _ = splitext(basename(ref_resource.path))

        lib_path = ref_resource.join(pkg_name).path

        if lib_path not in sys.path:
            sys.path.insert(0, lib_path)
Exemplo n.º 18
0
    def test_build_package(self):

        try:
            cli_init()

            m = MetapackUrl(test_data(
                'packages/example.com/example.com-full-2017-us/metadata.csv'),
                            downloader=downloader)

            package_dir = m.package_url.join_dir(PACKAGE_PREFIX)

            cache = Downloader().cache

            _, fs_url, created = make_filesystem_package(
                m, package_dir, cache, {}, False)
        except ImportError as e:
            unittest.skip(str(e))
            return

        print(created)
Exemplo n.º 19
0
    def test_line_oriented(self):

        doc = MetapackDoc(
            TextRowGenerator(test_data('line', 'line-oriented-doc.txt')))

        self.assertEqual('47bc1089-7584-41f0-b804-602ec42f1249',
                         doc.get_value('Root.Identifier'))
        self.assertEqual(153, len(doc.terms))

        self.assertEqual(6, len(list(doc['References'])))

        self.assertEqual(6,
                         len(list(doc['References'].find('Root.Reference'))))

        self.assertEqual(6, len(list(doc['References'].find(
            'Root.Resource'))))  # References are Resources

        rt = list(doc['References'].find('Root.Resource'))[0]

        self.assertIsInstance(rt, Reference)
Exemplo n.º 20
0
    def test_nbconvert_package(self):

        try:
            from collections import namedtuple
            from metapack.jupyter.convert import convert_notebook

            cli_init()

            M = namedtuple('M', 'mt_file mtfile_arg init_stage2')

            fn = test_data('notebooks/ConversionTest.ipynb')

            m = M(mt_file=parse_app_url(fn),
                  mtfile_arg=parse_app_url(fn),
                  init_stage2=lambda x, y: None)

            convert_notebook(m.mt_file.path)
        except (ImportError, FileNotFoundError):
            unittest.skip("Pandoc is not installed")
            return
Exemplo n.º 21
0
    def test_resolve_resource_urls(self):
        """Test how resources are resolved in packages.
            - A name, for excel and CSV packages
            - a path, for ZIP and filesystem packages
            - a web url, for any kind of package
        """
        with open(test_data('packages.csv')) as f:
            for i, l in enumerate(DictReader(f), 2):

                # print(i, l['url'], l['target_file'])

                u = MetapackPackageUrl(l['url'], downloader=Downloader())

                try:
                    t = u.resolve_url(l['target_file'])
                    self.assertFalse(bool(l['resolve_error']))
                except ResourceError:
                    self.assertTrue(bool(l['resolve_error']))
                    continue
                except DownloadError:
                    raise

                # Testing containment because t can have path in local filesystem, which changes depending on where
                # test is run

                # print("   ", t)
                self.assertTrue(l['resolved_url'] in str(t),
                                (i, l['resolved_url'], str(t)))

                try:
                    g = get_generator(t.get_resource().get_target())

                    self.assertEqual(101, len(list(g)))
                    self.assertFalse(bool(l['generate_error']))
                except DownloadError:
                    raise
                except RowGeneratorError:
                    self.assertTrue(bool(l['generate_error']))
                    continue
Exemplo n.º 22
0
    def test_program_resource(self):

        return  # Actually, completely broken right now

        m = MetapackUrl(test_data(
            'packages/example.com/example.com-full-2017-us/metadata.csv'),
                        downloader=downloader)

        doc = MetapackDoc(m)

        r = doc.resource('rowgen')

        self.assertEqual('program+file:scripts/rowgen.py', str(r.url))

        print(r.resolved_url)

        g = r.row_generator

        print(type(g))

        for row in r:
            print(row)
Exemplo n.º 23
0
    def test_notebook_url(self):

        try:
            from metapack.appurl import JupyterNotebookUrl
            from metapack.jupyter.exec import execute_notebook
            from os.path import exists

            u = parse_app_url(test_data('notebooks', 'GenerateDataTest.ipynb'))

            self.assertIsInstance(u, JupyterNotebookUrl)

            execute_notebook(u.path, '/tmp/nbtest', ['dfa', 'dfb'], True)

            self.assertTrue(exists('/tmp/nbtest/dfa.csv'))
            self.assertTrue(exists('/tmp/nbtest/dfb.csv'))

            g = get_generator(parse_app_url('/tmp/nbtest/dfa.csv'))

            print(list(g))
        except ImportError:
            unittest.skip("Missing pandas or jupyter client")
            return
Exemplo n.º 24
0
    def x_test_ipy(self):
        from rowgenerators import SourceSpec, Url, RowGenerator, get_cache

        urls = ('ipynb+file:foobar.ipynb',
                'ipynb+http://example.com/foobar.ipynb', 'ipynb:foobar.ipynb')

        for url in urls:
            u = Url(url)
            print(u, u.path, u.resource_url)

            s = SourceSpec(url)
            print(s, s.proto, s.scheme, s.resource_url, s.target_file,
                  s.target_format)
            self.assertIn(s.scheme, ('file', 'http'))
            self.assertEquals('ipynb', s.proto)

        gen = RowGenerator(cache=get_cache(),
                           url='ipynb:scripts/Py3Notebook.ipynb#lst',
                           working_dir=test_data(),
                           generator_args={'mult': lambda x: x * 3})

        rows = gen.generator.execute()

        print(len(rows))
Exemplo n.º 25
0
    def test_dataframe(self):

        try:
            p = open_package(
                test_data(
                    'packages/example.com/example.com-full-2017-us/metadata.csv'
                ))

            r = p.resource('random-names')

            df = r.dataframe()

            self.assertTrue(df.describe().loc['count', 'Size'] == 100)
            self.assertTrue(df.describe().loc['mean',
                                              'Size'].round(4) == 49.8032)

            df = r.read_csv()

            self.assertTrue(df.describe().loc['count', 'Size'] == 100)
            self.assertTrue(df.describe().loc['mean',
                                              'Size'].round(4) == 49.8032)
        except ImportError:
            unittest.skip("Pandas not installed")
            return