示例#1
0
    def test_partition_2(self):

        bundle = Bundle()
        bundle.clean()
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()

        table = self.bundle.schema.tables[0]

        p = (('time', 'time2'), ('space', 'space3'), ('table', table.name),
             ('grain', 'grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1]))
                pids[pid.fqname] = pid

        for pid in pids.values():
            part = bundle.partitions.new_db_partition(**pid.dict)
            part.create()

            parts = bundle.partitions._find_orm(
                PartitionNameQuery(vid=pid.vid)).all()
            self.assertIn(pid.sname, [p.name for p in parts])
示例#2
0
    def setUp(self):
        import bundles.testbundle.bundle
        from ambry.run import RunConfig
        import manifests, configs

        self.bundle_dir = os.path.dirname(bundles.testbundle.bundle.__file__)
        self.config_dir = os.path.dirname(configs.__file__)

        self.rc = get_runconfig(
            (os.path.join(self.config_dir, 'test.yaml'),
             os.path.join(self.bundle_dir,
                          'bundle.yaml'), RunConfig.USER_ACCOUNTS))

        self.copy_or_build_bundle()

        self.bundle = Bundle()

        #print "Deleting: {}".format(self.rc.group('filesystem').root)
        #ambry.util.rm_rf(self.rc.group('filesystem').root)

        self.m = os.path.join(os.path.dirname(manifests.__file__),
                              'test.ambry')

        with open(self.m) as f:
            self.m_contents = f.read()
示例#3
0
    def setUp(self):

        super(Test, self).setUp()

        self.copy_or_build_bundle()

        self.bundle = Bundle()
        self.bundle_dir = self.bundle.bundle_dir
示例#4
0
    def setUp(self):

        super(Test, self).setUp()

        self.copy_or_build_bundle()

        self.bundle = Bundle()
        self.bundle_dir = self.bundle.bundle_dir
示例#5
0
文件: test_base.py 项目: kball/ambry
    def copy_or_build_bundle(self):
        """Set up a clean bundle build, either by re-building the bundle, or
        by copying it from a saved bundle directory """
        
        # For most cases, re-set the bundle by copying from a saved version. If
        # the bundle doesn't exist and the saved version doesn't exist, 
        # build a new one. 

        bundle, marker, build_dir, save_dir = self.bundle_dirs()

        idnt = bundle.identity

        if str(idnt.name.version) != "0.0.1":
            # Rebuild the bundle if the test_library.py:test_versions
            # script didn't reset the bundle at the end
            from ambry.util import rm_rf
            rm_rf(build_dir)
            rm_rf(save_dir)

        idnt = Identity.from_dict({'subset': 'subset',
                                   'vid': 'piEGPXmDC8001001',
                                   'variation': 'variation',
                                   'dataset': 'dataset',
                                   'source': 'source',
                                   'version': '0.0.1',
                                   'id': 'diEGPXmDC8',
                                   'revision': 1}
        )

        bundle.config.rewrite(
            identity = idnt.ident_dict,
            names = idnt.names_dict
        )

        if not os.path.exists(marker):
            logger.info( "Build dir marker ({}) is missing".format(marker))
            # There is a good reason to create a seperate instance, 
            # but don't remember what it is ... 

            bundle.clean()
            bundle = Bundle()   
            if not os.path.exists(save_dir):
                logger.info( "Save dir is missing; re-build bundle. ")
                bundle.prepare()

                if str(bundle.identity.name.version) != '0.0.1':
                    raise Exception("Can only save bundle if version is 0.0.1")

                bundle.build()
                bundle.close()

                with open(marker, 'w') as f:
                    f.write(str(time.time()))
                # Copy the newly built bundle to the save directory    
                os.system("rm -rf {1}; rsync -arv {0} {1} > /dev/null ".format(build_dir, save_dir))

        # Always copy, just to be safe. 
        logger.info(  "Copying bundle from {}".format(save_dir))
        os.system("rm -rf {0}; rsync -arv {1} {0}  > /dev/null ".format(build_dir, save_dir))
示例#6
0
    def bundle_dirs(self):

        bundle = Bundle()

        marker = bundle.filesystem.build_path('test-marker')
        build_dir = bundle.filesystem.build_path(
        ) + '/'  # Slash needed for rsync
        save_dir = bundle.filesystem.build_path() + "-save/"

        return bundle, marker, build_dir, save_dir
示例#7
0
    def test_wkb(self):

        from shapely.wkb import dumps, loads

        b = Bundle()
        p = b.partitions.find(table='geot2')

        for row in p.query(
                "SELECT quote(AsBinary(GEOMETRY)) as wkb, quote(GEOMETRY) FROM geot2"
        ):
            print row
示例#8
0
    def setUp(self):
        import os
        from ambry.run import get_runconfig, RunConfig

        self.copy_or_build_bundle()

        self.bundle = Bundle()
        self.bundle_dir = self.bundle.bundle_dir

        self.rc = get_runconfig(
            (os.path.join(self.bundle_dir, 'geo-test-config.yaml'),
             os.path.join(self.bundle_dir,
                          'bundle.yaml'), RunConfig.USER_ACCOUNTS))
示例#9
0
    def test_db_bundle(self):

        from ambry.bundle import BuildBundle, DbBundle

        b = BuildBundle(self.bundle_dir)
        b.clean()

        self.assertTrue(b.identity.id_ is not None)
        self.assertEquals('source-dataset-subset-variation', b.identity.sname)
        self.assertEquals('source-dataset-subset-variation-0.0.1',
                          b.identity.vname)

        b.database.create()

        db_path = b.database.path

        dbb = DbBundle(db_path)

        self.assertEqual("source-dataset-subset-variation", dbb.identity.sname)
        self.assertEqual("source-dataset-subset-variation-0.0.1",
                         dbb.identity.vname)

        b = Bundle()
        b.database.enable_delete = True
        b.clean()
        b.database.create()

        b = Bundle()
        b.exit_on_fatal = False
        b.pre_prepare()
        b.prepare()
        b.post_prepare()
        b.pre_build()
        #b.build_db_inserter()
        b.build_geo()
        b.post_build()
        b.close()
示例#10
0
    def setUp(self):

        super(Test, self).setUp()  #

        import bundles.testbundle.bundle

        self.bundle_dir = os.path.dirname(bundles.testbundle.bundle.__file__)
        self.rc = get_runconfig(
            (os.path.join(self.bundle_dir, 'library-test-config.yaml'),
             os.path.join(self.bundle_dir,
                          'bundle.yaml'), RunConfig.USER_ACCOUNTS))

        self.copy_or_build_bundle()

        self.bundle = Bundle()

        Test.rm_rf(self.rc.group('filesystem').root)
示例#11
0
    def setUp(self):

        self.copy_or_build_bundle()

        self.bundle_dir = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), 'testbundle')
        self.rc = RunConfig([
            os.path.join(self.bundle_dir, 'client-test-config.yaml'),
            os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_CONFIG
        ])

        self.server_rc = RunConfig([
            os.path.join(self.bundle_dir, 'server-test-config.yaml'),
            RunConfig.USER_CONFIG
        ])

        self.bundle = Bundle()
        self.bundle_dir = self.bundle.bundle_dir
示例#12
0
    def test_partition_2(self):

        bundle = Bundle()
        bundle.clean()
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()

        table = self.bundle.schema.tables[0]

        p = (('time', 'time2'), ('space', 'space3'), ('table', table.name), ('grain', 'grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1]))
                pids[pid.fqname] = pid

        for pid in pids.values():
            part = bundle.partitions.new_db_partition(**pid.dict)
            part.create()

            parts = bundle.partitions._find_orm(PartitionNameQuery(vid=pid.vid)).all()
            self.assertIn(pid.sname, [p.name for p in parts])
示例#13
0
    def setUp(self):
        import bundles.testbundle.bundle
        from ambry.run import RunConfig
        import manifests, configs

        self.bundle_dir = os.path.dirname( bundles.testbundle.bundle.__file__)
        self.config_dir = os.path.dirname(configs.__file__)

        self.rc = get_runconfig((os.path.join(self.config_dir, 'test.yaml'),
                                 os.path.join(self.bundle_dir,'bundle.yaml'),
                                 RunConfig.USER_ACCOUNTS))

        self.copy_or_build_bundle()

        self.bundle = Bundle()    

        #print "Deleting: {}".format(self.rc.group('filesystem').root)
        #ambry.util.rm_rf(self.rc.group('filesystem').root)

        self.m = os.path.join(os.path.dirname(manifests.__file__), 'test.ambry')

        with open(self.m) as f:
            self.m_contents = f.read()
示例#14
0
    def copy_or_build_bundle(self):
        """Set up a clean bundle build, either by re-building the bundle, or
        by copying it from a saved bundle directory """

        # For most cases, re-set the bundle by copying from a saved version. If
        # the bundle doesn't exist and the saved version doesn't exist,
        # build a new one.

        bundle, marker, build_dir, save_dir = self.bundle_dirs()

        idnt = bundle.identity

        if str(idnt.name.version) != "0.0.1":
            # Rebuild the bundle if the test_library.py:test_versions
            # script didn't reset the bundle at the end
            from ambry.util import rm_rf
            rm_rf(build_dir)
            rm_rf(save_dir)

        idnt = Identity.from_dict(dict(bundle.metadata.identity))

        bundle.metadata.identity = idnt.ident_dict
        bundle.metadata.names = idnt.names_dict

        bundle.metadata.write_to_dir()

        if not os.path.exists(marker):
            global_logger.info(
                "Build dir marker ({}) is missing".format(marker))
            # There is a good reason to create a seperate instance,
            # but don't remember what it is ...

            bundle.clean()
            bundle = Bundle()
            if not os.path.exists(save_dir):
                global_logger.info("Save dir is missing; re-build bundle. ")

                bundle.pre_prepare()
                bundle.prepare()
                bundle.post_prepare()

                if str(bundle.identity.name.version) != '0.0.1':
                    raise Exception(
                        "Can only save bundle if version is 0.0.1. This one is version: {} "
                        .format(bundle.identity.name.version))

                bundle.pre_build()
                bundle.build()
                bundle.post_build()

                bundle.close()

                with open(marker, 'w') as f:
                    f.write(str(time.time()))
                # Copy the newly built bundle to the save directory
                os.system("rm -rf {1}; rsync -arv {0} {1} > /dev/null ".format(
                    build_dir, save_dir))

        # Always copy, just to be safe.
        #global_logger.info(  "Copying bundle from {}".format(save_dir))
        os.system("rm -rf {0}; rsync -arv {1} {0}  > /dev/null ".format(
            build_dir, save_dir))
示例#15
0
class Test(TestBase):
    def setUp(self):

        super(Test, self).setUp()

        self.copy_or_build_bundle()

        self.bundle = Bundle()
        self.bundle_dir = self.bundle.bundle_dir

    def test_db_bundle(self):

        from ambry.bundle import BuildBundle, DbBundle

        b = BuildBundle(self.bundle_dir)
        b.clean()

        self.assertTrue(b.identity.id_ is not None)
        self.assertEquals('source-dataset-subset-variation', b.identity.sname)
        self.assertEquals('source-dataset-subset-variation-0.0.1',
                          b.identity.vname)

        b.database.create()

        db_path = b.database.path

        dbb = DbBundle(db_path)

        self.assertEqual("source-dataset-subset-variation", dbb.identity.sname)
        self.assertEqual("source-dataset-subset-variation-0.0.1",
                         dbb.identity.vname)

    def test_paths(self):
        ''' Test that a build bundle and a db bundle both produce the same paths. '''

        from ambry.bundle import DbBundle

        b = self.bundle
        db = DbBundle(b.database.path)

        self.assertEqual(b.path, db.path)
        self.assertTrue(os.path.exists(b.path))

        self.assertEqual(b.database.path, db.database.path)
        self.assertTrue(os.path.exists(b.database.path))

        self.assertEqual(b.identity.path, db.identity.path)

        for p in zip(b.partitions, db.partitions):
            self.assertTrue(bool(p[0].path))
            self.assertEqual(p[0].path, p[1].path)
            self.assertTrue(bool(p[0].path))

    def test_schema_direct(self):
        '''Test adding tables directly to the schema'''

        # If we don't explicitly set the id_, it will change for every run.
        self.bundle.metadata.identity.id = 'aTest'

        self.bundle.schema.clean()

        with self.bundle.session:
            s = self.bundle.schema
            s.add_table('table 1', altname='alt name a')
            s.add_table('table 2', altname='alt name b')

            self.assertRaises(Exception, s.add_table, ('table 1', ))

            t = s.add_table('table 3', altname='alt name')

            s.add_column(t, 'col 1', altname='altname1')
            s.add_column(t, 'col 2', altname='altname2')
            s.add_column(t, 'col 3', altname='altname3')

        #print self.bundle.schema.as_csv()

        self.assertIn('tiEGPXmDC801',
                      [t.id_ for t in self.bundle.schema.tables])
        self.assertIn('tiEGPXmDC802',
                      [t.id_ for t in self.bundle.schema.tables])
        self.assertNotIn('cTest03', [t.id_ for t in self.bundle.schema.tables])

        t = self.bundle.schema.table('table_3')

        self.assertIn('ciEGPXmDC803001', [c.id_ for c in t.columns])
        self.assertIn('ciEGPXmDC803002', [c.id_ for c in t.columns])
        self.assertIn('ciEGPXmDC803003', [c.id_ for c in t.columns])

        # Try with a nested session, b/c we need to test it somewhere ...
        with self.bundle.session:

            with self.bundle.session:

                t = s.add_table('table 4', altname='alt name')

                s.add_column(t, 'col 1', altname='altname1')
                s.add_column(t, 'col 2', altname='altname2')
                s.add_column(t, 'col 3', altname='altname3')

    def x_test_generate_schema(self):
        '''Uses the generateSchema method in the bundle'''
        from ambry.orm import Column

        with self.bundle.session:
            s = self.bundle.schema
            s.clean()

            t1 = s.add_table('table1')

            s.add_column(t1, name='col1', datatype=Column.DATATYPE_REAL)
            s.add_column(t1, name='col2', datatype=Column.DATATYPE_INTEGER)
            s.add_column(t1, name='col3', datatype=Column.DATATYPE_TEXT)

            t2 = s.add_table('table2')
            s.add_column(t2, name='col1')
            s.add_column(t2, name='col2')
            s.add_column(t2, name='col3')

            t3 = s.add_table('table3')
            s.add_column(t3, name='col1', datatype=Column.DATATYPE_REAL)
            s.add_column(t3, name='col2', datatype=Column.DATATYPE_INTEGER)
            s.add_column(t3, name='col3', datatype=Column.DATATYPE_TEXT)

    def test_column_processor(self):
        from ambry.orm import Column
        from ambry.transform import BasicTransform, CensusTransform

        self.bundle.schema.clean()

        with self.bundle.session:
            s = self.bundle.schema

            t = s.add_table('table3')
            s.add_column(t,
                         name='col1',
                         datatype=Column.DATATYPE_INTEGER,
                         default=-1,
                         illegal_value='999')
            s.add_column(t, name='col2', datatype=Column.DATATYPE_TEXT)
            s.add_column(t, name='col3', datatype=Column.DATATYPE_REAL)

            c1 = t.column('col1')

            self.assertEquals(1, BasicTransform(c1)({'col1': ' 1 '}))

            with self.assertRaises(ValueError):
                print "PROCESSOR '{}'".format(
                    CensusTransform(c1)({
                        'col1': ' B '
                    }))

            self.assertEquals(1, CensusTransform(c1)({'col1': ' 1 '}))
            self.assertEquals(-1, CensusTransform(c1)({'col1': ' 999 '}))
            self.assertEquals(-3, CensusTransform(c1)({'col1': ' # '}))
            self.assertEquals(-2, CensusTransform(c1)({'col1': ' ! '}))

    def test_validator(self):

        #
        # Validators
        #

        tests = [
            ('tone', True, (None, 'VALUE', 0, 0)),
            ('tone', True, (None, 'VALUE', -1, 0)),
            ('tone', False, (None, 'DEFAULT', 0, 0)),
            ('tone', False, (None, 'DEFAULT', -1, 0)),
            ('ttwo', True, (None, 'DEFAULT', 0, 0)),
            ('ttwo', True, (None, 'DEFAULT', 0, 3.14)),
            ('ttwo', False, (None, 'DEFAULT', -1, 0)),
            ('tthree', True, (None, 'DEFAULT', 0, 0)),
            ('tthree', True, (None, 'DEFAULT', 0, 3.14)),
            ('all', True, (None, 'text1', 'text2', 1, 2, 3, 3.14)),
            ('all', False, (None, 'text1', 'text2', -1, -1, 3, 3.14)),
            ('all', False, (None, 'text1', 'text2', -1, 2, 3, 3.14)),
            ('all', False, (None, 'text1', 'text2', 1, -1, 3, 3.14)),
        ]

        for i, test in enumerate(tests):
            table_name, truth, row = test
            table = self.bundle.schema.table(table_name)
            vd = table._get_validator()

            if truth:
                self.assertTrue(
                    vd(row), "Test {} not 'true' for table '{}': {}".format(
                        i + 1, table_name, row))

            else:
                self.assertFalse(
                    vd(row), "Test {} not 'false' for table '{}': {}".format(
                        i + 1, table_name, row))

        # Testing the "OR" join of multiple columns.

        tests = [
            ('tone', True, (None, 'VALUE', 0, 0)),  #1
            ('tone', True, (None, 'VALUE', -1, 0)),
            ('tone', False, (None, 'DEFAULT', 0, 0)),
            ('tone', False, (None, 'DEFAULT', -1, 0)),
            ('ttwo', True, (None, 'DEFAULT', 0, 0)),  #5
            ('ttwo', True, (None, 'DEFAULT', 0, 3.14)),
            ('ttwo', False, (None, 'DEFAULT', -1, 0)),
            ('tthree', True, (None, 'DEFAULT', 0, 0)),  #8
            ('tthree', True, (None, 'DEFAULT', 0, 3.14)),
            ('all', True, (None, 'text1', 'text2', 1, 2, 3, 3.14)),  #10
            ('all', False, (None, 'text1', 'text2', -1, -1, 3, 3.14)),  #11
            ('all', True, (None, 'text1', 'text2', -1, 2, 3, 3.14)),  #12
            ('all', True, (None, 'text1', 'text2', 1, -1, 3, 3.14)),  #13
        ]

        for i, test in enumerate(tests):
            table_name, truth, row = test
            table = self.bundle.schema.table(table_name)
            vd = table._get_validator(and_join=False)
            if truth:
                self.assertTrue(
                    vd(row), "Test {} not 'true' for table '{}': {}".format(
                        i + 1, table_name, row))
            else:
                self.assertFalse(
                    vd(row), "Test {} not 'false' for table '{}': {}".format(
                        i + 1, table_name, row))

        # Test the hash functions. This test depends on the d_test values in geoschema.csv
        tests = [('tone', 'A|1|', (None, 'A', 1, 2)),
                 ('ttwo', '1|2|', (None, 'B', 1, 2)),
                 ('tthree', 'C|2|', (None, 'C', 1, 2))]

        import hashlib

        for i, test in enumerate(tests):
            table_name, hashed_str, row = test
            table = self.bundle.schema.table(table_name)

            m = hashlib.md5()
            m.update(hashed_str)

            self.assertEquals(int(m.hexdigest()[:14], 16), table.row_hash(row))

    def test_partition(self):
        from ambry.dbexceptions import ConflictError
        from ambry.identity import PartitionNameQuery
        from ambry.partition.csv import CsvPartition

        self.bundle.clean()
        self.bundle.prepare()

        p = self.bundle.partitions.new_db_partition(time=10,
                                                    space=10,
                                                    data={'pid': 'pid1'})

        with self.assertRaises(ConflictError):
            self.bundle.partitions.new_db_partition(time=10,
                                                    space=10,
                                                    data={'pid': 'pid1'})

        self.assertEqual(1, len(self.bundle.partitions.all))

        p = self.bundle.partitions.find_or_new(time=10, space=10)
        p.database.create(
        )  # Find will go to the library if the database doesn't exist.
        self.assertEqual(1, len(self.bundle.partitions.all))
        self.assertEquals('pid1', p.data['pid'])

        p = self.bundle.partitions.find(PartitionNameQuery(time=10, space=10))
        self.assertEquals('pid1', p.data['pid'])

        p = self.bundle.partitions.find(time=10, space=10)
        self.assertEquals('pid1', p.data['pid'])

        pnq3 = PartitionNameQuery(space=10)

        with self.bundle.session as s:
            p = self.bundle.partitions._find_orm(pnq3).first()
            p.data['foo'] = 'bar'
            s.add(p)

        bundle = Bundle()
        p = bundle.partitions.find(pnq3)

        self.assertEquals('bar', p.data['foo'])

        #p = self.bundle.partitions.find(PartitionNameQuery(name='source-dataset-subset-variation-30-hdf'))
        #self.assertTrue(p is not None)
        #self.assertEquals('source-dataset-subset-variation-30-hdf', p.identity.sname)

        #
        # Create all possible combinations of partition names
        #

        table = self.bundle.schema.tables[0]

        p = (('time', 'time2'), ('space', 'space3'), ('table', table.name),
             ('grain', 'grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1]))
                pids[pid.fqname] = pid

        with self.bundle.session as s:

            s.commit()

            # These two deletey bits clear out all of the old
            # partitions, to avoid a conflict with the next section. We also have
            # to delete the files, since create() adds a partition record to the database,
            # and if one already exists, it will throw an Integrity Error.
            for p in self.bundle.partitions:
                if os.path.exists(p.database.path):
                    os.remove(p.database.path)

            for p in self.bundle.dataset.partitions:
                # Using SQL instead of s.delete() because we want to avoid the cascade to stored_partitions, since
                # that table doesn't exist in the bundle, only in the library
                s.execute("DELETE FROM partitions WHERE p_vid = :vid",
                          {'vid': p.vid})
                #s.delete(p)

    def test_partition_2(self):

        bundle = Bundle()
        bundle.clean()
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()

        table = self.bundle.schema.tables[0]

        p = (('time', 'time2'), ('space', 'space3'), ('table', table.name),
             ('grain', 'grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1]))
                pids[pid.fqname] = pid

        for pid in pids.values():
            part = bundle.partitions.new_db_partition(**pid.dict)
            part.create()

            parts = bundle.partitions._find_orm(
                PartitionNameQuery(vid=pid.vid)).all()
            self.assertIn(pid.sname, [p.name for p in parts])

    def test_runconfig(self):
        """Check the the RunConfig expands  the library configuration"""
        from ambry.run import get_runconfig, RunConfig

        rc = get_runconfig((os.path.join(self.bundle_dir,
                                         'test-run-config.yaml'),
                            RunConfig.USER_CONFIG, RunConfig.USER_ACCOUNTS))

        l = rc.library('library1')

        self.assertEquals('database1', l['database']['_name'])
        self.assertEquals('filesystem1', l['filesystem']['_name'])
        self.assertEquals('filesystem2', l['filesystem']['upstream']['_name'])
        self.assertEquals('filesystem3',
                          l['filesystem']['upstream']['upstream']['_name'])
        self.assertEquals(
            'devtest.sandiegodata.org',
            l['filesystem']['upstream']['upstream']['account']['_name'])

    def test_build_bundle(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                        bundle.filesystem.path('meta', 'schema.csv'))

        #try:
        bundle.database.enable_delete = True
        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        # The second run will use the changes to the schema made in the
        # first run, due to the types errors in the  'coding' table.

        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        try:
            pass

        finally:

            # Need to clean up to ensure that we're back to a good state.
            # This runs the normal build, which will be used by the other
            # tests.

            shutil.copyfile(
                bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                bundle.filesystem.path('meta', 'schema.csv'))

            bundle.clean()
            bundle = Bundle()
            bundle.exit_on_fatal = False
            bundle.pre_prepare()
            bundle.prepare()
            bundle.post_prepare()
            bundle.pre_build()
            bundle.build()
            bundle.post_build()

    def test_simple_build(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                        bundle.filesystem.path('meta', 'schema.csv'))

        bundle.clean()

        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()

        bundle.post_prepare()
        bundle.pre_build()
        bundle.build()
        bundle.post_build()

    def test_config_update(self):

        bundle = Bundle()

        bundle.update_configuration()

    def test_session(self):

        import uuid

        b = self.bundle

        uv = str(uuid.uuid4())

        with b.session as s1:
            with b.session as s2:
                b.set_value('test', 'uuid', uv)

        b.close()

        self.assertEqual(uv, b.get_value('test', 'uuid').value)

        uv2 = str(uuid.uuid4())

        self.assertNotEqual(uv, uv2)

        with b.session as s1:
            with b.session as s2:
                b.set_value('test', 'uuid', uv2)

        self.assertEqual(uv2, b.get_value('test', 'uuid').value)

        b.set_value('test', 'uuid', uv2)
示例#16
0
    def setUp(self):

        self.copy_or_build_bundle()

        self.bundle = Bundle()    
        self.bundle_dir = self.bundle.bundle_dir
示例#17
0
class Test(TestBase):
    def setUp(self):
        import bundles.testbundle.bundle
        from ambry.run import RunConfig
        import manifests, configs

        self.bundle_dir = os.path.dirname(bundles.testbundle.bundle.__file__)
        self.config_dir = os.path.dirname(configs.__file__)

        self.rc = get_runconfig(
            (os.path.join(self.config_dir, 'test.yaml'),
             os.path.join(self.bundle_dir,
                          'bundle.yaml'), RunConfig.USER_ACCOUNTS))

        self.copy_or_build_bundle()

        self.bundle = Bundle()

        #print "Deleting: {}".format(self.rc.group('filesystem').root)
        #ambry.util.rm_rf(self.rc.group('filesystem').root)

        self.m = os.path.join(os.path.dirname(manifests.__file__),
                              'test.ambry')

        with open(self.m) as f:
            self.m_contents = f.read()

    def tearDown(self):
        pass

    def resolver(self, name):
        if name == self.bundle.identity.name or name == self.bundle.identity.vname:
            return self.bundle
        else:
            return False

    def get_library(self, name='default'):
        """Clear out the database before the test run"""
        from ambry.library import new_library

        config = self.rc.library(name)

        l = new_library(config, reset=True)

        return l

    def get_warehouse(self, l, name, delete=True):
        from ambry.util import get_logger
        from ambry.warehouse import new_warehouse

        w = new_warehouse(self.rc.warehouse(name), l)
        w.logger = get_logger('unit_test')

        lr = self.bundle.init_log_rate(10000)
        w.logger = TestLogger(lr)

        if delete:
            w.database.enable_delete = True
            w.database.delete()
            w.create()

        return w

    def get_fs_cache(self, name):
        from ckcache.filesystem import FsCache
        import shutil

        #cache_dir = os.path.join(temp_file_name(), 'warehouse-test', name)

        cache_dir = os.path.join('/tmp/ambry/test-warehouse', 'warehouse-test',
                                 name)

        if os.path.exists(cache_dir):
            shutil.rmtree(cache_dir)

        return FsCache(cache_dir)

    def _test_local_install(self, name):

        l = self.get_library()

        l.put_bundle(self.bundle)

        w = self.get_warehouse(l, name)
        print "Warehouse: ", w.database.dsn
        print "Library: ", l.database.dsn

        w.install("source-dataset-subset-variation-tone-0.0.1")
        w.install("source-dataset-subset-variation-tthree-0.0.1")
        w.install("source-dataset-subset-variation-geot1-geo-0.0.1")

        w = self.get_warehouse(l, 'spatialite')
        print "WAREHOUSE: ", w.database.dsn

        w.install("source-dataset-subset-variation-tone-0.0.1")
        w.install("source-dataset-subset-variation-tthree-0.0.1")
        w.install("source-dataset-subset-variation-geot1-geo-0.0.1")

    def test_local_sqlite_install(self):
        self._test_local_install('sqlite')

    def test_local_postgres_install(self):
        self._test_local_install('postgres1')

    def _test_remote_install(self, name):

        self.start_server(self.rc.library('server'))

        l = self.get_library('client')
        l.put_bundle(self.bundle)

        w = self.get_warehouse(l, name)
        print "WAREHOUSE: ", w.database.dsn

        w.install("source-dataset-subset-variation-tone-0.0.1")
        w.install("source-dataset-subset-variation-tthree-0.0.1")
        w.install("source-dataset-subset-variation-geot1-geo-0.0.1")

        w = self.get_warehouse(l, 'spatialite')
        print "WAREHOUSE: ", w.database.dsn

        w.install("source-dataset-subset-variation-tone-0.0.1")
        w.install("source-dataset-subset-variation-tthree-0.0.1")
        w.install("source-dataset-subset-variation-geot1-geo-0.0.1")

    def test_manifest(self):
        """Load the manifest and convert it to a string to check the round-trip"""
        from ambry.warehouse.manifest import Manifest
        from ambry.util import get_logger
        from ambry.util import print_yaml

        m = Manifest(self.m, get_logger('TL'))

        self.assertEqual(self.m_contents.strip(), str(m).strip())

        l = self.get_library()
        l.put_bundle(self.bundle)

        for k, ident in l.list().items():
            print ident

        w = self.get_warehouse(l, 'sqlite')
        print 'Installing to ', w.database.path

        w.title = "This is the Warehouse!"

        w.about = "A Warehouse full of wonder"

        w.install_manifest(m)

        extracts = w.extract(force=True)

        print print_yaml(extracts)

    def test_extract(self):

        l = self.get_library()
        l.put_bundle(self.bundle)
        w = self.get_warehouse(l, 'sqlite', delete=False)

        print 'WAREHOUSE: ', w.database.dsn

        #cache = new_cache('s3://warehouse.sandiegodata.org/test', run_config = get_runconfig())

        extracts = w.extract(force=True)

        from ambry.util import print_yaml
        print_yaml(extracts)

    def test_manifest_parser(self):

        import pprint
        lines = [
            "sangis.org-business-sites-orig-businesses-geo-0.1.1",
            "table from sangis.org-business-sites-orig-businesses-geo-0.1.1",
            "table FROM sangis.org-business-sites-orig-businesses-geo-0.1.1",
            "table1, table2 FROM sangis.org-business-sites-orig-businesses-geo-0.1.1",
            "table1, table2 FROM sangis.org-business-sites-orig-businesses-geo-0.1.1 WHERE foo and bar and bas",
            "table1, table2 , table3,table4 FROM sangis.org-business-sites-orig-businesses-geo-0.1.1 # Wot you got?",
        ]

        for line in lines:
            print '----', line
            pprint.pprint(Manifest.parse_partition_line(line))

    def test_manifest_parts(self):
        from ambry.warehouse.manifest import Manifest
        from ambry.util import get_logger
        from old.ipython.manifest import ManifestMagicsImpl

        m = Manifest('', get_logger('TL'))
        mmi = ManifestMagicsImpl(m)

        m_head = """
TITLE:  A Test Manifest, For Testing
UID: b4303f85-7d07-471d-9bcb-6980ea1bbf18
DATABASE: spatialite:///tmp/census-race-ethnicity.db
DIR: /tmp/warehouse
        """

        mmi.manifest('', m_head)

        mmi.extract('foobar AS csv TO /bin/bar/bingo')
        mmi.extract('foobar AS csv TO /bin/bar/bingo')
        mmi.extract('foobar AS csv TO /bin/bar/bingo2')
        mmi.extract('foobar AS csv TO /bin/bar/bingo')

        mmi.partitions('', 'one\ntwo\nthree\nfour')

        mmi.view('foo_view_1', '1234\n5678\n')
        mmi.view('foo_view_2', '1234\n5678\n')

        mmi.mview('foo_mview_1', '1234\n5678\n')
        mmi.mview('foo_mview_2', '1234\n5678\n')

        mmi.view('foo_view_1', '1234\n5678\n')
        mmi.view('foo_view_2', '1234\n5678\n')

        mmi.mview('foo_mview_1', '1234\n5678\n')
        mmi.mview('foo_mview_2', '1234\n5678\n')

        #print yaml.dump(m.sections, default_flow_style=False)

        print str(m)

    def test_sql_parser(self):

        sql = """
SELECT
    geo.state, -- comment 1
    geo.county, -- comment 2
    geo.tract,
    geo.blkgrp,
    bb.geometry,
    CAST(Area(Transform(geometry,26946)) AS REAL) AS area,
    CAST(b02001001 AS INTEGER) AS total_pop,
FROM d02G003_geofile  AS geo
 JOIN d024004_b02001_estimates AS b02001e ON geo.stusab = b02001e.stusab AND geo.logrecno = b02001e.logrecno
 JOIN blockgroup_boundaries AS bb ON geo.state = bb.state AND geo.county = bb.county AND bb.tract = geo.tract AND bb.blkgrp = geo.blkgrp
WHERE geo.sumlevel = 150 AND geo.state = 6 and geo.county = 73
"""

        import sqlparse
        import sqlparse.sql

        r = sqlparse.parse(sql)

        for t in r[0].tokens:
            if isinstance(t, sqlparse.sql.IdentifierList):
                for i in t.get_identifiers():
                    print i, type(i)

        #print sqlparse.format(sql, strip_comments = True, reindent = True)

    def x_test_install(self):
        def resolver(name):
            if name == self.bundle.identity.name or name == self.bundle.identity.vname:
                return self.bundle
            else:
                return False

        def progress_cb(lr, type, name, n):
            if n:
                lr("{} {}: {}".format(type, name, n))
            else:
                self.bundle.log("{} {}".format(type, name))

        from ambry.warehouse import new_warehouse
        from functools import partial
        print "Getting warehouse"
        w = new_warehouse(self.rc.warehouse('postgres'))

        print "Re-create database"
        w.database.enable_delete = True
        w.resolver = resolver
        w.progress_cb = progress_cb

        try:
            w.drop()
        except:
            pass

        w.create()

        ps = self.bundle.partitions.all

        print "{} partitions".format(len(ps))

        for p in self.bundle.partitions:
            lr = self.bundle.init_log_rate(10000)
            w.install(p, progress_cb=partial(progress_cb, lr))

        self.assertTrue(w.has(self.bundle.identity.vname))

        for p in self.bundle.partitions:
            self.assertTrue(w.has(p.identity.vname))

        for p in self.bundle.partitions:
            w.remove(p.identity.vname)

        print w.get(self.bundle.identity.name)
        print w.get(self.bundle.identity.vname)
        print w.get(self.bundle.identity.id_)

        w.install(self.bundle)

        print w.get(self.bundle.identity.name)
        print w.get(self.bundle.identity.vname)
        print w.get(self.bundle.identity.id_)

        for p in self.bundle.partitions:
            lr = self.bundle.init_log_rate(10000)
            w.install(p, progress_cb=partial(progress_cb, lr))
示例#18
0
    def test_partition(self):
        from ambry.dbexceptions import ConflictError
        from ambry.identity import PartitionIdentity, PartitionNameQuery
        from ambry.partition.csv import CsvPartition
        from ambry.partition.hdf import HdfPartition

        self.bundle.clean()
        self.bundle.prepare()

        p = self.bundle.partitions.new_db_partition(time=10, space=10, data={'pid':'pid1'})

        p = self.bundle.partitions.new_csv_partition(time=20, space=20, data={'pid':'pid2'})
        self.assertIsInstance(p, CsvPartition )
        p = self.bundle.partitions.find_or_new_csv(time=20, space=20)
        self.assertIsInstance(p, CsvPartition)

        p = self.bundle.partitions.new_hdf_partition(space=30, data={'pid':'pid3'})
        self.assertIsInstance(p, HdfPartition)
        p = self.bundle.partitions.find_or_new_hdf(space=30)
        self.assertIsInstance(p, HdfPartition)

        with self.assertRaises(ConflictError):
            self.bundle.partitions.new_db_partition(time=10, space=10, data={'pid':'pid1'})

        with self.assertRaises(ConflictError):
            self.bundle.partitions.new_csv_partition(time=20, space=20, data={'pid':'pid21'})

        with self.assertRaises(ConflictError):
            self.bundle.partitions.new_hdf_partition(space=30, data={'pid':'pid31'})


        self.assertEqual(3, len(self.bundle.partitions.all))

        p = self.bundle.partitions.find_or_new(time=10, space=10)
        p.database.create() # Find will go to the library if the database doesn't exist.
        self.assertEqual(3, len(self.bundle.partitions.all))
        self.assertEquals('pid1',p.data['pid'] )
      
        p = self.bundle.partitions.find_or_new_csv(time=20, space=20)
        p.database.create()  
        self.assertEquals('pid2',p.data['pid'] ) 

        p = self.bundle.partitions.find_or_new_hdf(space=30)
        self.assertEquals('pid3',p.data['pid'] ) 

        p = self.bundle.partitions.find(PartitionNameQuery(time=10, space=10))
        self.assertEquals('pid1',p.data['pid'] )

        p = self.bundle.partitions.find(time=10, space=10)
        self.assertEquals('pid1', p.data['pid'])

        p = self.bundle.partitions.find(PartitionNameQuery(time=20, space=20))
        self.assertEquals('pid2',p.data['pid'] )

        p = self.bundle.partitions.find(time=20, space=20)
        self.assertEquals('pid2',p.data['pid'] )

        pnq3 = PartitionNameQuery(space=30)

        p = self.bundle.partitions.find(pnq3)
        self.assertEquals('pid3',p.data['pid'] ) 
         
        with self.bundle.session as s:
            p = self.bundle.partitions._find_orm(pnq3).first()
            p.data['foo'] = 'bar'
            s.add(p)


        bundle = Bundle()
        p = bundle.partitions.find(pnq3)
        print p.data 
        self.assertEquals('bar',p.data['foo'] ) 

        p = self.bundle.partitions.find(PartitionNameQuery(name='source-dataset-subset-variation-30-hdf'))
        self.assertTrue(p is not None)
        self.assertEquals('source-dataset-subset-variation-30-hdf', p.identity.sname)
 
        #
        # Create all possible combinations of partition names
        # 

        table = self.bundle.schema.tables[0]
        
        p = (('time','time2'),('space','space3'),('table',table.name),('grain','grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i+j+1]))
                pids[pid.fqname] = pid

        
        with self.bundle.session as s:
        
            # These two deletely bits clear out all of the old
            # partitions, to avoid a conflict with the next section. We also have
            # to delete the files, since create() adds a partition record to the database, 
            # and if one already exists, it will throw an Integrity Error.
            for p in self.bundle.partitions:
                if os.path.exists(p.database.path):
                    os.remove(p.database.path)
            
            for p in self.bundle.dataset.partitions:
                s.delete(p)

        import pprint

        pprint.pprint(sorted([ pid.fqname for pid in pids.values()]))

        bundle = Bundle()
        bundle.clean()
        bundle.prepare()

        for pid in pids.values():
            part = bundle.partitions.new_db_partition(**pid.dict)
            part.create()

            parts = bundle.partitions._find_orm(PartitionNameQuery(vid=pid.vid)).all()
            self.assertIn(pid.sname, [p.name for p in parts])
示例#19
0
class Test(TestBase):
 
    def setUp(self):
        import bundles.testbundle.bundle
        from ambry.run import RunConfig
        import manifests, configs

        self.bundle_dir = os.path.dirname( bundles.testbundle.bundle.__file__)
        self.config_dir = os.path.dirname(configs.__file__)

        self.rc = get_runconfig((os.path.join(self.config_dir, 'test.yaml'),
                                 os.path.join(self.bundle_dir,'bundle.yaml'),
                                 RunConfig.USER_ACCOUNTS))

        self.copy_or_build_bundle()

        self.bundle = Bundle()    

        #print "Deleting: {}".format(self.rc.group('filesystem').root)
        #ambry.util.rm_rf(self.rc.group('filesystem').root)

        self.m = os.path.join(os.path.dirname(manifests.__file__), 'test.ambry')

        with open(self.m) as f:
            self.m_contents = f.read()

    def tearDown(self):
        pass

    def resolver(self,name):
        if name == self.bundle.identity.name or name == self.bundle.identity.vname:
            return self.bundle
        else:
            return False

    def get_library(self, name='default'):
        """Clear out the database before the test run"""
        from ambry.library import new_library

        config = self.rc.library(name)

        l = new_library(config, reset=True)

        return l


    def get_warehouse(self, l, name, delete = True):
        from  ambry.util import get_logger
        from ambry.warehouse import new_warehouse

        w = new_warehouse(self.rc.warehouse(name), l)
        w.logger = get_logger('unit_test')

        lr = self.bundle.init_log_rate(10000)
        w.logger = TestLogger(lr)

        if delete:
            w.database.enable_delete = True
            w.database.delete()
            w.create()

        return w

    def get_fs_cache(self,name):
        from ckcache.filesystem import FsCache
        import shutil

        #cache_dir = os.path.join(temp_file_name(), 'warehouse-test', name)

        cache_dir = os.path.join('/tmp/ambry/test-warehouse', 'warehouse-test', name)

        if os.path.exists(cache_dir):
            shutil.rmtree(cache_dir)

        return FsCache(cache_dir)

    def _test_local_install(self, name):

        l = self.get_library()

        l.put_bundle(self.bundle)

        w = self.get_warehouse(l, name)
        print "Warehouse: ", w.database.dsn
        print "Library: ", l.database.dsn

        w.install("source-dataset-subset-variation-tone-0.0.1")
        w.install("source-dataset-subset-variation-tthree-0.0.1")
        w.install("source-dataset-subset-variation-geot1-geo-0.0.1")

        w = self.get_warehouse(l, 'spatialite')
        print "WAREHOUSE: ", w.database.dsn

        w.install("source-dataset-subset-variation-tone-0.0.1")
        w.install("source-dataset-subset-variation-tthree-0.0.1")
        w.install("source-dataset-subset-variation-geot1-geo-0.0.1")

    def test_local_sqlite_install(self):
        self._test_local_install('sqlite')

    def test_local_postgres_install(self):
        self._test_local_install('postgres1')


    def _test_remote_install(self, name):

        self.start_server(self.rc.library('server'))

        l = self.get_library('client')
        l.put_bundle(self.bundle)

        w = self.get_warehouse(l, name)
        print "WAREHOUSE: ", w.database.dsn

        w.install("source-dataset-subset-variation-tone-0.0.1")
        w.install("source-dataset-subset-variation-tthree-0.0.1")
        w.install("source-dataset-subset-variation-geot1-geo-0.0.1")

        w = self.get_warehouse(l, 'spatialite')
        print "WAREHOUSE: ", w.database.dsn

        w.install("source-dataset-subset-variation-tone-0.0.1")
        w.install("source-dataset-subset-variation-tthree-0.0.1")
        w.install("source-dataset-subset-variation-geot1-geo-0.0.1")




    def test_manifest(self):
        """Load the manifest and convert it to a string to check the round-trip"""
        from ambry.warehouse.manifest import Manifest
        from ambry.util import get_logger
        from ambry.util import print_yaml

        m = Manifest(self.m,get_logger('TL') )

        self.assertEqual(self.m_contents.strip(), str(m).strip())


        l = self.get_library()
        l.put_bundle(self.bundle)

        for k, ident in  l.list().items():
            print ident

        w = self.get_warehouse(l, 'sqlite')
        print 'Installing to ', w.database.path

        w.title = "This is the Warehouse!"

        w.about = "A Warehouse full of wonder"

        w.install_manifest(m)

        extracts = w.extract(force=True)

        print print_yaml(extracts)

    def test_extract(self):

        l = self.get_library()
        l.put_bundle(self.bundle)
        w = self.get_warehouse(l, 'sqlite', delete=False)

        print 'WAREHOUSE: ', w.database.dsn

        #cache = new_cache('s3://warehouse.sandiegodata.org/test', run_config = get_runconfig())

        extracts = w.extract(force = True)

        from ambry.util import print_yaml
        print_yaml(extracts)



    def test_manifest_parser(self):

        import pprint
        lines = [
            "sangis.org-business-sites-orig-businesses-geo-0.1.1",
            "table from sangis.org-business-sites-orig-businesses-geo-0.1.1",
            "table FROM sangis.org-business-sites-orig-businesses-geo-0.1.1",
            "table1, table2 FROM sangis.org-business-sites-orig-businesses-geo-0.1.1",
            "table1, table2 FROM sangis.org-business-sites-orig-businesses-geo-0.1.1 WHERE foo and bar and bas",
            "table1, table2 , table3,table4 FROM sangis.org-business-sites-orig-businesses-geo-0.1.1 # Wot you got?",
        ]


        for line in lines:
            print '----', line
            pprint.pprint( Manifest.parse_partition_line(line))


    def test_manifest_parts(self):
        from ambry.warehouse.manifest import Manifest
        from ambry.util import get_logger
        from old.ipython.manifest import ManifestMagicsImpl

        m = Manifest('', get_logger('TL'))
        mmi = ManifestMagicsImpl(m)

        m_head = """
TITLE:  A Test Manifest, For Testing
UID: b4303f85-7d07-471d-9bcb-6980ea1bbf18
DATABASE: spatialite:///tmp/census-race-ethnicity.db
DIR: /tmp/warehouse
        """

        mmi.manifest('',m_head)

        mmi.extract('foobar AS csv TO /bin/bar/bingo')
        mmi.extract('foobar AS csv TO /bin/bar/bingo')
        mmi.extract('foobar AS csv TO /bin/bar/bingo2')
        mmi.extract('foobar AS csv TO /bin/bar/bingo')

        mmi.partitions('','one\ntwo\nthree\nfour')

        mmi.view('foo_view_1','1234\n5678\n')
        mmi.view('foo_view_2', '1234\n5678\n')

        mmi.mview('foo_mview_1', '1234\n5678\n')
        mmi.mview('foo_mview_2', '1234\n5678\n')

        mmi.view('foo_view_1', '1234\n5678\n')
        mmi.view('foo_view_2', '1234\n5678\n')

        mmi.mview('foo_mview_1', '1234\n5678\n')
        mmi.mview('foo_mview_2', '1234\n5678\n')

        #print yaml.dump(m.sections, default_flow_style=False)

        print str(m)

    def test_sql_parser(self):

        sql = """
SELECT
    geo.state, -- comment 1
    geo.county, -- comment 2
    geo.tract,
    geo.blkgrp,
    bb.geometry,
    CAST(Area(Transform(geometry,26946)) AS REAL) AS area,
    CAST(b02001001 AS INTEGER) AS total_pop,
FROM d02G003_geofile  AS geo
 JOIN d024004_b02001_estimates AS b02001e ON geo.stusab = b02001e.stusab AND geo.logrecno = b02001e.logrecno
 JOIN blockgroup_boundaries AS bb ON geo.state = bb.state AND geo.county = bb.county AND bb.tract = geo.tract AND bb.blkgrp = geo.blkgrp
WHERE geo.sumlevel = 150 AND geo.state = 6 and geo.county = 73
"""

        import sqlparse
        import sqlparse.sql


        r =  sqlparse.parse(sql)

        for t in  r[0].tokens:
            if isinstance(t, sqlparse.sql.IdentifierList):
                for i in t.get_identifiers():
                    print i,  type(i)


        #print sqlparse.format(sql, strip_comments = True, reindent = True)


    def x_test_install(self):
        
        def resolver(name):
            if name == self.bundle.identity.name or name == self.bundle.identity.vname:
                return self.bundle
            else:
                return False
        
        def progress_cb(lr, type,name,n):
            if n:
                lr("{} {}: {}".format(type, name, n))
            else:
                self.bundle.log("{} {}".format(type, name))
        
        from ambry.warehouse import new_warehouse
        from functools import partial
        print "Getting warehouse"
        w = new_warehouse(self.rc.warehouse('postgres'))

        print "Re-create database"
        w.database.enable_delete = True
        w.resolver = resolver
        w.progress_cb = progress_cb
        
        try: w.drop()
        except: pass
        
        w.create()

        ps = self.bundle.partitions.all
        
        print "{} partitions".format(len(ps))
        
        for p in self.bundle.partitions:
            lr = self.bundle.init_log_rate(10000)
            w.install(p, progress_cb = partial(progress_cb, lr) )

        self.assertTrue(w.has(self.bundle.identity.vname))

        for p in self.bundle.partitions:
            self.assertTrue(w.has(p.identity.vname))

        for p in self.bundle.partitions:
            w.remove(p.identity.vname)

        print w.get(self.bundle.identity.name)
        print w.get(self.bundle.identity.vname)
        print w.get(self.bundle.identity.id_)
        
        w.install(self.bundle)
         
        print w.get(self.bundle.identity.name)
        print w.get(self.bundle.identity.vname)
        print w.get(self.bundle.identity.id_)

        for p in self.bundle.partitions:
            lr = self.bundle.init_log_rate(10000)
            w.install(p, progress_cb = partial(progress_cb, lr))
示例#20
0
    def test_build_bundle(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                        bundle.filesystem.path('meta', 'schema.csv'))

        #try:
        bundle.database.enable_delete = True
        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        # The second run will use the changes to the schema made in the
        # first run, due to the types errors in the  'coding' table.

        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        try:
            pass

        finally:

            # Need to clean up to ensure that we're back to a good state.
            # This runs the normal build, which will be used by the other
            # tests.

            shutil.copyfile(
                bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                bundle.filesystem.path('meta', 'schema.csv'))

            bundle.clean()
            bundle = Bundle()
            bundle.exit_on_fatal = False
            bundle.pre_prepare()
            bundle.prepare()
            bundle.post_prepare()
            bundle.pre_build()
            bundle.build()
            bundle.post_build()
示例#21
0
    def test_simple_build(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(
            bundle.filesystem.path('meta', 'schema-edit-me.csv'),
            bundle.filesystem.path('meta', 'schema.csv'))

        bundle.clean()

        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()

        bundle.post_prepare()
        bundle.pre_build()
        bundle.build()
        bundle.post_build()
示例#22
0
    def test_build_bundle(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(
            bundle.filesystem.path('meta', 'schema-edit-me.csv'),
            bundle.filesystem.path('meta', 'schema.csv'))

        # try:
        bundle.database.enable_delete = True
        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        # The second run will use the changes to the schema made in the
        # first run, due to the types errors in the  'coding' table.

        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        try:
            pass

        finally:

            # Need to clean up to ensure that we're back to a good state.
            # This runs the normal build, which will be used by the other
            # tests. 

            shutil.copyfile(
                bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                bundle.filesystem.path('meta', 'schema.csv'))

            bundle.clean()
            bundle = Bundle()
            bundle.exit_on_fatal = False
            bundle.pre_prepare()
            bundle.prepare()
            bundle.post_prepare()
            bundle.pre_build()
            bundle.build()
            bundle.post_build()
示例#23
0
    def test_build_bundle_hdf(self):

        bundle = Bundle()
        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_hdf()
        bundle.post_build()
示例#24
0
 def setUp(self):
     super(Test, self).setUp()
     self.bundle = Bundle()
     self.bundle_dir = self.bundle.bundle_dir
示例#25
0
    def test_config_update(self):

        bundle = Bundle()

        bundle.update_configuration()
示例#26
0
    def setUp(self):

        self.copy_or_build_bundle()

        self.bundle = Bundle()
        self.bundle_dir = self.bundle.bundle_dir
示例#27
0
    def test_versions(self):
        from ambry.run import get_runconfig
        from ambry.library.query import Resolver
        import shutil
        idnt = self.bundle.identity

        l = self.get_library()

        l.purge()

        orig = os.path.join(self.bundle.bundle_dir, 'bundle.yaml')
        save = os.path.join(self.bundle.bundle_dir, 'bundle.yaml.save')
        shutil.copyfile(orig, save)

        datasets = {}

        try:
            for i in [1, 2, 3]:
                idnt._on.revision = i
                idnt.name.version_major = i
                idnt.name.version_minor = i * 10

                bundle = Bundle()
                get_runconfig.clear()  #clear runconfig cache

                bundle.metadata.load_all()

                bundle.metadata.identity = idnt.ident_dict
                bundle.metadata.names = idnt.names_dict

                bundle.metadata.write_to_dir(write_all=True)

                bundle = Bundle()

                bundle.clean()
                bundle.pre_prepare()
                bundle.prepare()
                bundle.post_prepare()
                bundle.pre_build()
                bundle.build_small()
                #bundle.build()
                bundle.post_build()

                bundle = Bundle()

                l.put_bundle(bundle)

        finally:
            pass
            os.rename(save, orig)

        #
        # Save the list of datasets for version analysis in other
        # tests
        #

        db = l.database

        for d in db.list(with_partitions=True).values():
            datasets[d.vid] = d.dict
            datasets[d.vid]['partitions'] = {}

            for p_vid, p in d.partitions.items():
                datasets[d.vid]['partitions'][p_vid] = p.dict

        with open(self.bundle.filesystem.path('meta', 'version_datasets.json'),
                  'w') as f:
            import json
            f.write(json.dumps(datasets))

        r = Resolver(db.session)

        ref = idnt.id_

        ref = "source-dataset-subset-variation-=2.20"

        ip, results = r.resolve_ref_all(ref)

        for row in results:
            print row
示例#28
0
    def test_partition(self):
        from ambry.dbexceptions import ConflictError
        from ambry.identity import PartitionNameQuery
        from ambry.partition.csv import CsvPartition

        self.bundle.clean()
        self.bundle.prepare()

        p = self.bundle.partitions.new_db_partition(time=10,
                                                    space=10,
                                                    data={'pid': 'pid1'})

        with self.assertRaises(ConflictError):
            self.bundle.partitions.new_db_partition(time=10,
                                                    space=10,
                                                    data={'pid': 'pid1'})

        self.assertEqual(1, len(self.bundle.partitions.all))

        p = self.bundle.partitions.find_or_new(time=10, space=10)
        p.database.create(
        )  # Find will go to the library if the database doesn't exist.
        self.assertEqual(1, len(self.bundle.partitions.all))
        self.assertEquals('pid1', p.data['pid'])

        p = self.bundle.partitions.find(PartitionNameQuery(time=10, space=10))
        self.assertEquals('pid1', p.data['pid'])

        p = self.bundle.partitions.find(time=10, space=10)
        self.assertEquals('pid1', p.data['pid'])

        pnq3 = PartitionNameQuery(space=10)

        with self.bundle.session as s:
            p = self.bundle.partitions._find_orm(pnq3).first()
            p.data['foo'] = 'bar'
            s.add(p)

        bundle = Bundle()
        p = bundle.partitions.find(pnq3)

        self.assertEquals('bar', p.data['foo'])

        #p = self.bundle.partitions.find(PartitionNameQuery(name='source-dataset-subset-variation-30-hdf'))
        #self.assertTrue(p is not None)
        #self.assertEquals('source-dataset-subset-variation-30-hdf', p.identity.sname)

        #
        # Create all possible combinations of partition names
        #

        table = self.bundle.schema.tables[0]

        p = (('time', 'time2'), ('space', 'space3'), ('table', table.name),
             ('grain', 'grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1]))
                pids[pid.fqname] = pid

        with self.bundle.session as s:

            s.commit()

            # These two deletey bits clear out all of the old
            # partitions, to avoid a conflict with the next section. We also have
            # to delete the files, since create() adds a partition record to the database,
            # and if one already exists, it will throw an Integrity Error.
            for p in self.bundle.partitions:
                if os.path.exists(p.database.path):
                    os.remove(p.database.path)

            for p in self.bundle.dataset.partitions:
                # Using SQL instead of s.delete() because we want to avoid the cascade to stored_partitions, since
                # that table doesn't exist in the bundle, only in the library
                s.execute("DELETE FROM partitions WHERE p_vid = :vid",
                          {'vid': p.vid})
示例#29
0
    def x_test_rewrite(self):
        from  testbundle.bundle import Bundle
        import json
        from ambry.run import get_runconfig

        # Prepare to rewrite the bundle.yaml file.
        bundle = Bundle()
        orig = os.path.join(bundle.bundle_dir,'bundle.yaml')
        save = os.path.join(bundle.bundle_dir,'bundle.yaml.save')

        try:
            os.rename(orig,save)

            print 'Write to ', orig
            with open(orig,'w') as f:
                f.write(json.dumps(
                    {
                        "identity":{
                            "dataset": "dataset1",
                            "id": "dfoo",
                            "revision": 100,
                            "source": "source1",
                            "subset": "subset1",
                            "variation": "variation1",
                            "version": "1.0.1",
                            "vid": "dfob001",
                        },
                        "about": {
                            "author": "*****@*****.**"
                        }
                    }
                ))

            get_runconfig.clear() # clear config cache.
            bundle = Bundle()
            bundle.clean()
            bundle.pre_prepare()
            bundle.prepare()
            bundle.post_prepare() # Does the rewrite, adding the 'names'

            # Need to clear and reload one more time for the 'names' to appear
            get_runconfig.clear() # clear config cache.
            bundle = Bundle()
            bundle.exit_on_fatal = False

            self.assertEquals('dataset1', bundle.config.identity.dataset)
            self.assertEquals('dfoo', bundle.config.identity.id)
            self.assertEquals(100, bundle.config.identity.revision)

            self.assertEquals("source1-dataset1-subset1-variation1-1.0.100~dfoo01C", bundle.config.names.fqname)

            self.assertEquals("*****@*****.**", bundle.config.about.author)

        finally:
            os.rename(save, orig)
            self.delete_bundle()
示例#30
0
 def test_config_update(self):
     bundle = Bundle()
     bundle.update_configuration()
示例#31
0
    def test_simple_build(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                        bundle.filesystem.path('meta', 'schema.csv'))

        bundle.clean()

        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()

        bundle.post_prepare()
        bundle.pre_build()
        bundle.build()
        bundle.post_build()
示例#32
0
    def test_bundle_build(self):

        from ambry.dbexceptions import ConflictError

        bundle = Bundle()

        # Need to clear the library, or the Bundle's pre_prepare
        # will cancel the build if this version is already installed
        bundle.library.purge()

        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()

        bp = bundle.partitions

        with bundle.session:
            bp._new_orm_partition(PartialPartitionName(time = 't1', space='s1'))
            bp._new_orm_partition(PartialPartitionName(time = 't1', space='s2'))
            bp._new_orm_partition(PartialPartitionName(time = 't1', space=None))
            bp._new_orm_partition(PartialPartitionName(time = 't2', space='s1'))
            bp._new_orm_partition(PartialPartitionName(time = 't2', space='s2'))
            bp._new_orm_partition(PartialPartitionName(time = 't2', space=None))


        with self.assertRaises(ConflictError):
            with bundle.session:
                bp._new_orm_partition(PartialPartitionName(time = 't1', space='s1'))

        pnq = PartitionNameQuery(time=NameQuery.ANY, space='s1')

        names = [p.vname
                 for p in bp._find_orm(pnq).all()]


        self.assertEqual({u'source-dataset-subset-variation-t2-s1-0.0.1',
                          u'source-dataset-subset-variation-t1-s1-0.0.1'},
                         set(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(space=NameQuery.ANY)).all()]

        self.assertEqual(6,len(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(time='t1',space=NameQuery.ANY)).all()]

        self.assertEqual({'source-dataset-subset-variation-t1-s2-0.0.1',
                              'source-dataset-subset-variation-t1-0.0.1',
                              'source-dataset-subset-variation-t1-s1-0.0.1'},
                         set(names))


        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(time='t1',space=NameQuery.NONE)).all()]

        self.assertEqual({'source-dataset-subset-variation-t1-0.0.1'},
                         set(names))

        # Start over, use a higher level function to create the partitions

        bundle.close() # Or you'll get an OperationalError
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bp = bundle.partitions

        bp._new_partition(PartialPartitionName(time = 't1', space='s1'))
        self.assertEquals(1, len(bp.all))

        bp._new_partition(PartialPartitionName(time = 't1', space='s2'))
        self.assertEquals(2, len(bp.all))

        bp._new_partition(PartialPartitionName(time = 't1', space=None))
        bp._new_partition(PartialPartitionName(time = 't2', space='s1'))
        bp._new_partition(PartialPartitionName(time = 't2', space='s2'))
        bp._new_partition(PartialPartitionName(time = 't2', space=None))
        self.assertEquals(6, len(bp.all))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(time='t1',space=NameQuery.ANY)).all()]

        self.assertEqual({'source-dataset-subset-variation-t1-s2-0.0.1',
                              'source-dataset-subset-variation-t1-0.0.1',
                              'source-dataset-subset-variation-t1-s1-0.0.1'},
                         set(names))


        # Start over, use a higher level function to create the partitions
        bundle.close()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bp = bundle.partitions

        p = bp.new_db_partition(time = 't1', space='s1')
        self.assertEquals('source-dataset-subset-variation-t1-s1-0.0.1~piEGPXmDC8001001', p.identity.fqname)

        p = bp.find_or_new(time = 't1', space='s2')
        self.assertEquals('source-dataset-subset-variation-t1-s2-0.0.1~piEGPXmDC8002001', p.identity.fqname)

        # Duplicate
        p = bp.find_or_new(time = 't1', space='s2')
        self.assertEquals('source-dataset-subset-variation-t1-s2-0.0.1~piEGPXmDC8002001', p.identity.fqname)


        p = bp.find_or_new_geo(time = 't2', space='s1')

        # Which it is depends on whether GDAL is installed.
        self.assertIn(p.identity.fqname,[
            'source-dataset-subset-variation-t2-s1-geo-0.0.1~piEGPXmDC8003001',
            'source-dataset-subset-variation-t2-s1-0.0.1~piEGPXmDC8003001' ]
        )


        # Ok! Build!

        bundle.close()
        bundle = Bundle()
        bundle.exit_on_fatal = False

        bundle.clean()
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()

        self.assertEquals('diEGPXmDC8001',bundle.identity.vid)
        self.assertEquals('source-dataset-subset-variation',bundle.identity.sname)
        self.assertEquals('source-dataset-subset-variation-0.0.1',bundle.identity.vname)
        self.assertEquals('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',bundle.identity.fqname)
示例#33
0
    def test_bundle_build(self):

        from ambry.dbexceptions import ConflictError

        bundle = Bundle()

        # Need to clear the library, or the Bundle's pre_prepare
        # will cancel the build if this version is already installed
        bundle.library.purge()

        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bundle.prepare()

        bp = bundle.partitions


        with bundle.session:
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't1', space='s1'))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't1', space='s2'))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't1', space=None))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't2', space='s1'))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't2', space='s2'))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't2', space=None))

        with self.assertRaises(ConflictError):
            with bundle.session:
                bp._new_orm_partition(PartialPartitionName(table = 'tone',time = 't1', space='s1'))

        pnq = PartitionNameQuery(table = 'tone', time=NameQuery.ANY, space='s1')

        names = [p.vname
                 for p in bp._find_orm(pnq).all()]


        self.assertEqual({u'source-dataset-subset-variation-tone-t1-s1-0.0.1',
                          u'source-dataset-subset-variation-tone-t2-s1-0.0.1'},
                         set(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(space=NameQuery.ANY)).all()]

        self.assertEqual(6, len(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(table = 'tone',time='t1',space=NameQuery.ANY)).all()]

        self.assertEqual({'source-dataset-subset-variation-tone-t1-s2-0.0.1',
                              'source-dataset-subset-variation-tone-t1-0.0.1',
                              'source-dataset-subset-variation-tone-t1-s1-0.0.1'},
                         set(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(table = 'tone',time='t1',space=NameQuery.NONE)).all()]

        self.assertEqual({'source-dataset-subset-variation-tone-t1-0.0.1'},
                         set(names))

        # Start over, use a higher level function to create the partitions

        bundle.close()  # Or you'll get an OperationalError
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bundle.prepare()
        bp = bundle.partitions

        bp._new_partition(PartialPartitionName(table = 'tone',time = 't1', space='s1'))
        self.assertEquals(1, len(bp.all))

        bp._new_partition(PartialPartitionName(table = 'tone',time = 't1', space='s2'))
        self.assertEquals(2, len(bp.all))

        bp._new_partition(PartialPartitionName(table = 'tone',time = 't1', space=None))
        bp._new_partition(PartialPartitionName(table = 'tone',time = 't2', space='s1'))
        bp._new_partition(PartialPartitionName(table = 'tone',time = 't2', space='s2'))
        bp._new_partition(PartialPartitionName(table = 'tone',time = 't2', space=None))
        self.assertEquals(6, len(bp.all))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(time='t1', space=NameQuery.ANY)).all()]

        self.assertEqual({'source-dataset-subset-variation-tone-t1-s2-0.0.1',
                              'source-dataset-subset-variation-tone-t1-0.0.1',
                              'source-dataset-subset-variation-tone-t1-s1-0.0.1'},
                         set(names))

        # Start over, use a higher level function to create the partitions
        bundle.close()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bundle.prepare()
        bp = bundle.partitions

        p = bp.new_db_partition(table = 'tone',time = 't1', space='s1')
        self.assertEquals('source-dataset-subset-variation-tone-t1-s1-0.0.1~piEGPXmDC8001001', p.identity.fqname)

        p = bp.find_or_new(table = 'tone',time = 't1', space='s2')
        self.assertEquals('source-dataset-subset-variation-tone-t1-s2-0.0.1~piEGPXmDC8002001', p.identity.fqname)

        # Duplicate
        p = bp.find_or_new(table = 'tone',time = 't1', space='s2')
        self.assertEquals('source-dataset-subset-variation-tone-t1-s2-0.0.1~piEGPXmDC8002001', p.identity.fqname)

        # Ok! Build!
        bundle.close()
        bundle = Bundle()
        bundle.exit_on_fatal = False

        bundle.clean()
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()

        self.assertEquals('diEGPXmDC8001', bundle.identity.vid)
        self.assertEquals('source-dataset-subset-variation', bundle.identity.sname)
        self.assertEquals('source-dataset-subset-variation-0.0.1', bundle.identity.vname)
        self.assertEquals('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', bundle.identity.fqname)
示例#34
0
class Test(TestBase):
    def setUp(self):

        super(Test, self).setUp()

        self.copy_or_build_bundle()

        self.bundle = Bundle()
        self.bundle_dir = self.bundle.bundle_dir

    def test_db_bundle(self):

        from ambry.bundle import BuildBundle, DbBundle

        b = BuildBundle(self.bundle_dir)
        b.clean()

        self.assertTrue(b.identity.id_ is not None)
        self.assertEquals('source-dataset-subset-variation', b.identity.sname)
        self.assertEquals('source-dataset-subset-variation-0.0.1', b.identity.vname)

        b.database.create()

        db_path = b.database.path

        dbb = DbBundle(db_path)

        self.assertEqual("source-dataset-subset-variation", dbb.identity.sname)
        self.assertEqual("source-dataset-subset-variation-0.0.1", dbb.identity.vname)

    def test_paths(self):
        """ Test that a build bundle and a db bundle both produce the same paths. """

        from ambry.bundle import DbBundle

        b = self.bundle
        db = DbBundle(b.database.path)

        self.assertEqual(b.path, db.path)
        self.assertTrue(os.path.exists(b.path))

        self.assertEqual(b.database.path, db.database.path)
        self.assertTrue(os.path.exists(b.database.path))

        self.assertEqual(b.identity.path, db.identity.path)

        for p in zip(b.partitions, db.partitions):
            self.assertTrue(bool(p[0].path))
            self.assertEqual(p[0].path, p[1].path)
            self.assertTrue(bool(p[0].path))

    def test_schema_direct(self):
        """Test adding tables directly to the schema"""

        # If we don't explicitly set the id_, it will change for every run.
        self.bundle.metadata.identity.id = 'aTest'

        self.bundle.schema.clean()

        with self.bundle.session:
            s = self.bundle.schema
            s.add_table('table 1', altname='alt name a')
            s.add_table('table 2', altname='alt name b')

            self.assertRaises(Exception, s.add_table, ('table 1', ))

            t = s.add_table('table 3', altname='alt name')

            s.add_column(t, 'col 1', altname='altname1')
            s.add_column(t, 'col 2', altname='altname2')
            s.add_column(t, 'col 3', altname='altname3')

        # print self.bundle.schema.as_csv()

        self.assertIn('tiEGPXmDC801', [t.id_ for t in self.bundle.schema.tables])
        self.assertIn('tiEGPXmDC802', [t.id_ for t in self.bundle.schema.tables])
        self.assertNotIn('cTest03', [t.id_ for t in self.bundle.schema.tables])

        t = self.bundle.schema.table('table_3')

        self.assertIn('ciEGPXmDC803001', [c.id_ for c in t.columns])
        self.assertIn('ciEGPXmDC803002', [c.id_ for c in t.columns])
        self.assertIn('ciEGPXmDC803003', [c.id_ for c in t.columns])

        # Try with a nested session, b/c we need to test it somewhere ... 
        with self.bundle.session:
            with self.bundle.session:
                t = s.add_table('table 4', altname='alt name')

                s.add_column(t, 'col 1', altname='altname1')
                s.add_column(t, 'col 2', altname='altname2')
                s.add_column(t, 'col 3', altname='altname3')

    def x_test_generate_schema(self):
        """Uses the generateSchema method in the bundle"""
        from ambry.orm import Column

        with self.bundle.session:
            s = self.bundle.schema
            s.clean()

            t1 = s.add_table('table1')

            s.add_column(t1, name='col1', datatype=Column.DATATYPE_REAL)
            s.add_column(t1, name='col2', datatype=Column.DATATYPE_INTEGER)
            s.add_column(t1, name='col3', datatype=Column.DATATYPE_TEXT)

            t2 = s.add_table('table2')
            s.add_column(t2, name='col1')
            s.add_column(t2, name='col2')
            s.add_column(t2, name='col3')

            t3 = s.add_table('table3')
            s.add_column(t3, name='col1', datatype=Column.DATATYPE_REAL)
            s.add_column(t3, name='col2', datatype=Column.DATATYPE_INTEGER)
            s.add_column(t3, name='col3', datatype=Column.DATATYPE_TEXT)

    def test_column_processor(self):
        from ambry.orm import Column
        from ambry.transform import BasicTransform, CensusTransform

        self.bundle.schema.clean()

        with self.bundle.session:
            s = self.bundle.schema

            t = s.add_table('table3')
            s.add_column(t, name='col1', datatype=Column.DATATYPE_INTEGER, default=-1, illegal_value='999')
            s.add_column(t, name='col2', datatype=Column.DATATYPE_TEXT)
            s.add_column(t, name='col3', datatype=Column.DATATYPE_REAL)

            c1 = t.column('col1')

            self.assertEquals(1, BasicTransform(c1)({'col1': ' 1 '}))

            with self.assertRaises(ValueError):
                print "PROCESSOR '{}'".format(CensusTransform(c1)({'col1': ' B '}))

            self.assertEquals(1, CensusTransform(c1)({'col1': ' 1 '}))
            self.assertEquals(-1, CensusTransform(c1)({'col1': ' 999 '}))
            self.assertEquals(-3, CensusTransform(c1)({'col1': ' # '}))
            self.assertEquals(-2, CensusTransform(c1)({'col1': ' ! '}))

    def test_validator(self):

        #
        # Validators
        #

        tests = [
            ('tone', True, (None, 'VALUE', 0, 0)),
            ('tone', True, (None, 'VALUE', -1, 0)),
            ('tone', False, (None, 'DEFAULT', 0, 0)),
            ('tone', False, (None, 'DEFAULT', -1, 0)),

            ('ttwo', True, (None, 'DEFAULT', 0, 0)),
            ('ttwo', True, (None, 'DEFAULT', 0, 3.14)),
            ('ttwo', False, (None, 'DEFAULT', -1, 0)),

            ('tthree', True, (None, 'DEFAULT', 0, 0)),
            ('tthree', True, (None, 'DEFAULT', 0, 3.14)),

            ('all', True, (None, 'text1', 'text2', 1, 2, 3, 3.14)),
            ('all', False, (None, 'text1', 'text2', -1, -1, 3, 3.14)),
            ('all', False, (None, 'text1', 'text2', -1, 2, 3, 3.14)),
            ('all', False, (None, 'text1', 'text2', 1, -1, 3, 3.14)),
        ]

        for i, test in enumerate(tests):
            table_name, truth, row = test
            table = self.bundle.schema.table(table_name)
            vd = table._get_validator()

            if truth:
                self.assertTrue(vd(row), "Test {} not 'true' for table '{}': {}".format(i + 1, table_name, row))

            else:
                self.assertFalse(vd(row), "Test {} not 'false' for table '{}': {}".format(i + 1, table_name, row))

        # Testing the "OR" join of multiple columns. 

        tests = [
            ('tone', True, (None, 'VALUE', 0, 0)),  # 1
            ('tone', True, (None, 'VALUE', -1, 0)),
            ('tone', False, (None, 'DEFAULT', 0, 0)),
            ('tone', False, (None, 'DEFAULT', -1, 0)),

            ('ttwo', True, (None, 'DEFAULT', 0, 0)),  # 5
            ('ttwo', True, (None, 'DEFAULT', 0, 3.14)),
            ('ttwo', False, (None, 'DEFAULT', -1, 0)),

            ('tthree', True, (None, 'DEFAULT', 0, 0)),  # 8
            ('tthree', True, (None, 'DEFAULT', 0, 3.14)),

            ('all', True, (None, 'text1', 'text2', 1, 2, 3, 3.14)),  # 10
            ('all', False, (None, 'text1', 'text2', -1, -1, 3, 3.14)),  # 11
            ('all', True, (None, 'text1', 'text2', -1, 2, 3, 3.14)),  # 12
            ('all', True, (None, 'text1', 'text2', 1, -1, 3, 3.14)),  # 13
        ]

        for i, test in enumerate(tests):
            table_name, truth, row = test
            table = self.bundle.schema.table(table_name)
            vd = table._get_validator(and_join=False)
            if truth:
                self.assertTrue(vd(row), "Test {} not 'true' for table '{}': {}".format(i + 1, table_name, row))
            else:
                self.assertFalse(vd(row), "Test {} not 'false' for table '{}': {}".format(i + 1, table_name, row))

        # Test the hash functions. This test depends on the d_test values in geoschema.csv
        tests = [
            ('tone', 'A|1|', (None, 'A', 1, 2)),
            ('ttwo', '1|2|', (None, 'B', 1, 2)),
            ('tthree', 'C|2|', (None, 'C', 1, 2))]

        import hashlib

        for i, test in enumerate(tests):
            table_name, hashed_str, row = test
            table = self.bundle.schema.table(table_name)

            m = hashlib.md5()
            m.update(hashed_str)

            self.assertEquals(int(m.hexdigest()[:14], 16), table.row_hash(row))

    def test_partition(self):
        from ambry.dbexceptions import ConflictError
        from ambry.identity import PartitionNameQuery

        self.bundle.clean()
        self.bundle.prepare()

        p = self.bundle.partitions.new_db_partition(table='tone', time=10, space=10, data={'pid':'pid1'})

        with self.assertRaises(ConflictError):
            self.bundle.partitions.new_db_partition(table='tone',time=10, space=10, data={'pid':'pid1'})

        self.assertEqual(1, len(self.bundle.partitions.all))

        p = self.bundle.partitions.find_or_new(table='tone',time=10, space=10)
        p.database.create() # Find will go to the library if the database doesn't exist.
        self.assertEqual(1, len(self.bundle.partitions.all))
        self.assertEquals('pid1', p.data['pid'])

        p = self.bundle.partitions.find(PartitionNameQuery(table='tone',time=10, space=10))
        self.assertEquals('pid1',p.data['pid'] )

        p = self.bundle.partitions.find(table='tone',time=10, space=10)
        self.assertEquals('pid1', p.data['pid'])

        pnq3 = PartitionNameQuery(space=10)

        with self.bundle.session as s:
            p = self.bundle.partitions._find_orm(pnq3).first()
            p.data['foo'] = 'bar'
            s.add(p)

        bundle = Bundle()
        p = bundle.partitions.find(pnq3)

        self.assertEquals('bar', p.data['foo'])

        # p = self.bundle.partitions.find(PartitionNameQuery(name='source-dataset-subset-variation-30-hdf'))
        # self.assertTrue(p is not None)
        # self.assertEquals('source-dataset-subset-variation-30-hdf', p.identity.sname)

        #
        # Create all possible combinations of partition names
        # 

        table = self.bundle.schema.tables[0]

        p = (('time', 'time2'), ('space', 'space3'), ('table', table.name), ('grain', 'grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1]))
                pids[pid.fqname] = pid

        with self.bundle.session as s:

            s.commit()

            # These two deletey bits clear out all of the old
            # partitions, to avoid a conflict with the next section. We also have
            # to delete the files, since create() adds a partition record to the database, 
            # and if one already exists, it will throw an Integrity Error.
            for p in self.bundle.partitions:
                if os.path.exists(p.database.path):
                    os.remove(p.database.path)

            for p in self.bundle.dataset.partitions:
                # Using SQL instead of s.delete() because we want to avoid the cascade to stored_partitions, since
                # that table doesn't exist in the bundle, only in the library
                s.execute("DELETE FROM partitions WHERE p_vid = :vid", {'vid': p.vid})
                # s.delete(p)



        
    def test_runconfig(self):
        """Check the the RunConfig expands  the library configuration"""
        from ambry.run import get_runconfig, RunConfig

        rc = get_runconfig(
            (os.path.join(self.bundle_dir, 'test-run-config.yaml'), RunConfig.USER_CONFIG, RunConfig.USER_ACCOUNTS))

        l = rc.library('library1')

        self.assertEquals('database1', l['database']['_name'])
        self.assertEquals('filesystem1', l['filesystem']['_name'])
        self.assertEquals('filesystem2', l['filesystem']['upstream']['_name'])
        self.assertEquals('filesystem3', l['filesystem']['upstream']['upstream']['_name'])

    def test_build_bundle(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(
            bundle.filesystem.path('meta', 'schema-edit-me.csv'),
            bundle.filesystem.path('meta', 'schema.csv'))

        # try:
        bundle.database.enable_delete = True
        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        # The second run will use the changes to the schema made in the
        # first run, due to the types errors in the  'coding' table.

        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        try:
            pass

        finally:

            # Need to clean up to ensure that we're back to a good state.
            # This runs the normal build, which will be used by the other
            # tests. 

            shutil.copyfile(
                bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                bundle.filesystem.path('meta', 'schema.csv'))

            bundle.clean()
            bundle = Bundle()
            bundle.exit_on_fatal = False
            bundle.pre_prepare()
            bundle.prepare()
            bundle.post_prepare()
            bundle.pre_build()
            bundle.build()
            bundle.post_build()

    def test_simple_build(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(
            bundle.filesystem.path('meta', 'schema-edit-me.csv'),
            bundle.filesystem.path('meta', 'schema.csv'))

        bundle.clean()

        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()

        bundle.post_prepare()
        bundle.pre_build()
        bundle.build()
        bundle.post_build()

    def test_config_update(self):
        bundle = Bundle()
        bundle.update_configuration()

    def test_session(self):
        import uuid

        b = self.bundle

        uv = str(uuid.uuid4())

        with b.session as s1:
            with b.session as s2:
                b.set_value('test', 'uuid', uv)

        b.close()

        self.assertEqual(uv, b.get_value('test', 'uuid').value)

        uv2 = str(uuid.uuid4())

        self.assertNotEqual(uv, uv2)

        with b.session as s1:
            with b.session as s2:
                b.set_value('test', 'uuid', uv2)

        self.assertEqual(uv2, b.get_value('test', 'uuid').value)

        b.set_value('test', 'uuid', uv2)