Exemplo n.º 1
0
    def test_md5(self):
        from ambry.run import get_runconfig
        from ambry.cache import new_cache
        from ambry.util import md5_for_file
        from ambry.cache.filesystem import make_metadata

        rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG))

        fn = self.make_test_file()

        md5 = md5_for_file(fn)

        cache = new_cache(rc.filesystem("fscache"))

        cache.put(fn, "foo1")

        abs_path = cache.path("foo1")

        self.assertEquals(md5, cache.md5("foo1"))

        cache = new_cache(rc.filesystem("compressioncache"))

        cache.put(fn, "foo2", metadata=make_metadata(fn))

        abs_path = cache.path("foo2")

        self.assertEquals(md5, cache.md5("foo2"))

        os.remove(fn)
Exemplo n.º 2
0
    def test_caches(self):
        '''Basic test of put(), get() and has() for all cache types'''
        from functools import partial
        from ambry.run import  get_runconfig, RunConfig
        from ambry.filesystem import Filesystem
        from ambry.cache import new_cache
        from ambry.util import md5_for_file
        from ambry.bundle import DbBundle

        #self.start_server() # For the rest-cache

        #fn = '/tmp/1mbfile'
        #with open(fn, 'wb') as f:
        #    f.write('.'*(1024))

        fn = self.bundle.database.path

        # Opening the file might run the database updates in
        # database.sqlite._on_connect_update_schema, which can affect the md5.
        b = DbBundle(fn)

        md5 = md5_for_file(fn)

        print "MD5 {}  = {}".format(fn, md5)

        rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'),
                            RunConfig.USER_ACCOUNTS))

        for i, fsname in enumerate(['fscache', 'limitedcache', 'compressioncache',
                                    'cached-s3', 'cached-compressed-s3']):

            config = rc.filesystem(fsname)
            cache = new_cache(config)
            print '---', fsname, cache
            identity = self.bundle.identity

            relpath = identity.cache_key

            r = cache.put(fn, relpath,identity.to_meta(md5=md5))

            r = cache.get(relpath)

            if not r.startswith('http'):
                self.assertTrue(os.path.exists(r), 'Not a url: {}: {}'.format(r,str(cache)))

            self.assertTrue(cache.has(relpath, md5=md5))

            cache.remove(relpath, propagate=True)

            self.assertFalse(os.path.exists(r), str(cache))
            self.assertFalse(cache.has(relpath))


        cache = new_cache(rc.filesystem('s3cache-noupstream'))
        r = cache.put(fn, 'a')
Exemplo n.º 3
0
    def test_caches(self):
        '''Basic test of put(), get() and has() for all cache types'''
        from ambry.run import get_runconfig
        from ambry.cache import new_cache
        from ambry.util import md5_for_file
        from ambry.bundle import DbBundle

        self.start_server()  # For the rest-cache

        #fn = '/tmp/1mbfile'
        #with open(fn, 'wb') as f:
        #    f.write('.'*(1024))

        fn = self.bundle.database.path

        # Opening the file might run the database updates in
        # database.sqlite._on_connect_update_schema, which can affect the md5.
        b = DbBundle(fn)

        md5 = md5_for_file(fn)

        print "MD5 {}  = {}".format(fn, md5)

        rc = get_runconfig(
            (os.path.join(self.bundle_dir,
                          'test-run-config.yaml'), RunConfig.USER_CONFIG))

        for i, fsname in enumerate([
                'fscache', 'limitedcache', 'compressioncache', 'cached-s3',
                'cached-compressed-s3'
        ]):  #'compressioncache',

            config = rc.filesystem(fsname)
            cache = new_cache(config)
            print '---', fsname, cache
            identity = self.bundle.identity

            relpath = identity.cache_key

            r = cache.put(fn, relpath, identity.to_meta(md5=md5))
            r = cache.get(relpath)

            if not r.startswith('http'):
                self.assertTrue(os.path.exists(r), str(cache))

            self.assertTrue(cache.has(relpath, md5=md5))

            cache.remove(relpath, propagate=True)

            self.assertFalse(os.path.exists(r), str(cache))
            self.assertFalse(cache.has(relpath))

        cache = new_cache(rc.filesystem('s3cache-noupstream'))
        r = cache.put(fn, 'a')
Exemplo n.º 4
0
    def test_caches(self):
        """Basic test of put(), get() and has() for all cache types"""
        from ambry.run import get_runconfig
        from ambry.cache import new_cache
        from ambry.util import md5_for_file
        from ambry.bundle import DbBundle

        self.start_server()  # For the rest-cache

        # fn = '/tmp/1mbfile'
        # with open(fn, 'wb') as f:
        #    f.write('.'*(1024))

        fn = self.bundle.database.path

        # Opening the file might run the database updates in
        # database.sqlite._on_connect_update_schema, which can affect the md5.
        b = DbBundle(fn)

        md5 = md5_for_file(fn)

        print "MD5 {}  = {}".format(fn, md5)

        rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG))

        for i, fsname in enumerate(
            ["fscache", "limitedcache", "compressioncache", "cached-s3", "cached-compressed-s3"]
        ):  #'compressioncache',

            config = rc.filesystem(fsname)
            cache = new_cache(config)
            print "---", fsname, cache
            identity = self.bundle.identity

            relpath = identity.cache_key

            r = cache.put(fn, relpath, identity.to_meta(md5=md5))
            r = cache.get(relpath)

            if not r.startswith("http"):
                self.assertTrue(os.path.exists(r), str(cache))

            self.assertTrue(cache.has(relpath, md5=md5))

            cache.remove(relpath, propagate=True)

            self.assertFalse(os.path.exists(r), str(cache))
            self.assertFalse(cache.has(relpath))

        cache = new_cache(rc.filesystem("s3cache-noupstream"))
        r = cache.put(fn, "a")
Exemplo n.º 5
0
    def test_s3(self):
        from ambry.run import get_runconfig
        from ambry.cache import new_cache
        from ambry.bundle import DbBundle

        rc = get_runconfig(
            (os.path.join(self.bundle_dir,
                          'test-run-config.yaml'), RunConfig.USER_CONFIG))

        fn = self.bundle.database.path

        # Opening the file might run the database updates in
        # database.sqlite._on_connect_update_schema, which can affect the md5.
        b = DbBundle(fn)
        identity = b.identity

        fsname = 'cached-compressed-s3'

        config = rc.filesystem(fsname)
        cache = new_cache(config)

        r = cache.put(fn, b.identity.cache_key,
                      b.identity.to_meta(md5=b.database.md5))

        for p in b.partitions:
            r = cache.put(p.database.path, p.identity,
                          p.identity.to_meta(md5=p.database.md5))

        r = cache.get(b.identity.cache_key)
Exemplo n.º 6
0
    def test_s3(self):
        from ambry.run import get_runconfig
        from ambry.cache import new_cache
        from ambry.bundle import DbBundle

        rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG))

        fn = self.bundle.database.path

        # Opening the file might run the database updates in
        # database.sqlite._on_connect_update_schema, which can affect the md5.
        b = DbBundle(fn)
        identity = b.identity

        fsname = "cached-compressed-s3"

        config = rc.filesystem(fsname)
        cache = new_cache(config)

        r = cache.put(fn, b.identity.cache_key, b.identity.to_meta(md5=b.database.md5))

        for p in b.partitions:
            r = cache.put(p.database.path, p.identity, p.identity.to_meta(md5=p.database.md5))

        r = cache.get(b.identity.cache_key)
Exemplo n.º 7
0
    def test_files(self):
        '''
        Test some of the server's file functions
        :return:
        '''

        from ambry.cache import new_cache
        from ambry.bundle import DbBundle

        fs = new_cache(self.server_rc.filesystem('rrc-fs'))
        fs.clean()
        remote = new_cache(self.server_rc.filesystem('rrc'))


        config = self.start_server()

        l = new_library(config)

        l.put_bundle(self.bundle)
        l.push()

        ident = self.bundle.identity
        ck = ident.cache_key

        # The remote is tied to the REST server, so it has the
        # bundle, but the new filesystem cache does not.

        self.assertFalse(fs.has(ck))
        self.assertTrue(remote.has(ck))

        # But if we tie them together, the FS cache should have it

        fs.upstream = remote
        self.assertTrue(fs.has(ck))

        path = fs.get(ck)

        b = DbBundle(path)
        self.assertEquals(ck, b.identity.cache_key)

        # It should have been copied, so the fs should still have
        # it after disconnecting.

        fs.upstream = None
        self.assertTrue(fs.has(ck))
Exemplo n.º 8
0
 def test_basic(self):
     
     from ambry.cache import new_cache
     
     c = new_cache(self.rc.filesystem('google'))
     
     print c
    
     print c.list()
Exemplo n.º 9
0
    def test_basic(self):

        from ambry.cache import new_cache

        c = new_cache(self.rc.filesystem('google'))

        print c

        print c.list()
Exemplo n.º 10
0
    def test_s3(self):

        #ambry.util.get_logger('ambry.filesystem').setLevel(logging.DEBUG)
        # Set up the test directory and make some test files. 
        from ambry.cache import new_cache
        
        root = self.rc.group('filesystem').root
        os.makedirs(root)
                
        testfile = os.path.join(root,'testfile')
        
        with open(testfile,'w+') as f:
            for i in range(1024):
                f.write('.'*1023)
                f.write('\n')
         
        #fs = self.bundle.filesystem
        #local = fs.get_cache('downloads')
        
        cache = new_cache(self.rc.filesystem('s3'))
        repo_dir  = cache.cache_dir
      
        print "Repo Dir: {}".format(repo_dir)
      
        for i in range(0,10):
            logger.info("Putting "+str(i))
            cache.put(testfile,'many'+str(i))
        
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many1')))   
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many2')))
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3')))
                
        p = cache.get('many1')
        self.assertTrue(p is not None)
                
        self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many1')))   
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many2')))
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3')))
        
        p = cache.get('many2')
        self.assertTrue(p is not None)
                
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many3')))      
        self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many7'))) 
 
        p = cache.get('many3')
        self.assertTrue(p is not None)
                
        self.assertTrue(os.path.exists(os.path.join(repo_dir, 'many3')))      
        self.assertFalse(os.path.exists(os.path.join(repo_dir, 'many7'))) 
Exemplo n.º 11
0
    def test_compression(self):
        from ambry.run import get_runconfig
        from ambry.cache import new_cache
        from ambry.util import temp_file_name, md5_for_file, copy_file_or_flo

        rc = get_runconfig((os.path.join(self.bundle_dir, "test-run-config.yaml"), RunConfig.USER_CONFIG))

        comp_cache = new_cache(rc.filesystem("compressioncache"))

        test_file_name = "test_file"

        fn = temp_file_name()
        print "orig file ", fn
        with open(fn, "wb") as f:
            for i in range(1000):
                f.write("{:03d}:".format(i))

        cf = comp_cache.put(fn, test_file_name)

        with open(cf) as stream:
            from ambry.util.sgzip import GzipFile

            stream = GzipFile(stream)

            uncomp_cache = new_cache(rc.filesystem("fscache"))

            uncomp_stream = uncomp_cache.put_stream("decomp")

            copy_file_or_flo(stream, uncomp_stream)

        uncomp_stream.close()

        dcf = uncomp_cache.get("decomp")

        self.assertEquals(md5_for_file(fn), md5_for_file(dcf))

        os.remove(fn)
Exemplo n.º 12
0
    def test_load(self):

        from ambry.run import  get_runconfig, RunConfig
        from ambry.client.rest import RemoteLibrary
        from ambry.cache import new_cache
        from ambry.util import md5_for_file
        from ambry.identity import Identity

        config = self.start_server()
        l = new_library(config)

        rl = RemoteLibrary(self.server_url)


        #
        # Check that the library can list datasets that are inserted externally
        #

        l.put_bundle(self.bundle)

        s = set([i.fqname for i in rl.list().values()])

        self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', s)

        dsident = rl.dataset('diEGPXmDC8001')

        s = set([i.fqname for i in dsident.partitions.values()])

        self.assertEquals(4, len(s))

        self.assertIn('source-dataset-subset-variation-tthree-0.0.1~piEGPXmDC8003001', s)
        self.assertIn('source-dataset-subset-variation-geot1-geo-0.0.1~piEGPXmDC8001001', s)
        self.assertIn('source-dataset-subset-variation-geot2-geo-0.0.1~piEGPXmDC8002001', s)

        #
        # Upload the dataset to S3, clear the library, then load it back in
        #

        rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'),RunConfig.USER_ACCOUNTS))
        cache = new_cache(rc.filesystem('cached-compressed-s3'))

        fn = self.bundle.database.path
        identity = self.bundle.identity
        relpath = identity.cache_key

        r = cache.put(fn, relpath, identity.to_meta(file=fn))


        self.assertTrue(bool(cache.has(relpath)))

        # clear the library.

        l.purge()
        self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',
                         set([i.fqname for i in rl.list()]))

        # Load from  S3, directly in to the local library

        identity.add_md5(md5_for_file(fn))

        l.load(identity.cache_key, identity.md5)

        self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',
                      set([i.fqname for i in rl.list().values()]))

        # Do it one more time, using the remote library

        l.purge()
        self.assertNotIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',
                         set([i.fqname for i in rl.list().values()]))

        rl.load_dataset(identity)

        self.assertIn('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',
                      set([i.fqname for i in rl.list().values()]))

        # Check that we can get the record from the library

        self.assertEquals(identity.vid, rl.resolve(identity.vid).vid)
        self.assertEquals(identity.vid, rl.resolve(identity.vname).vid)
        self.assertEquals(identity.vid, rl.resolve(identity.cache_key).vid)
        self.assertEquals(identity.vid, rl.resolve(identity.sname).vid)