def test_chown(): cluster = mini_cluster.shared_cluster(conf=True) try: # Only the Hadoop superuser really has carte blanche here c = make_logged_in_client(cluster.superuser) cluster.fs.setuser(cluster.superuser) PATH = u"/test-chown-en-Español" cluster.fs.mkdir(PATH) c.post("/filebrowser/chown", dict(path=PATH, user="******", group="y")) assert_equal("x", cluster.fs.stats(PATH)["user"]) assert_equal("y", cluster.fs.stats(PATH)["group"]) c.post("/filebrowser/chown", dict(path=PATH, user="******", user_other="z", group="y")) assert_equal("z", cluster.fs.stats(PATH)["user"]) # Make sure that the regular user chown form doesn't have useless fields, # and that the superuser's form has all the fields it could dream of. PATH = '/filebrowser/chown-regular-user' cluster.fs.mkdir(PATH) cluster.fs.chown(PATH, 'chown_test', 'chown_test') response = c.get( '/filebrowser/chown', dict(path=PATH, user='******', group='chown_test')) assert_true('<option value="__other__"' in response.content) c = make_logged_in_client('chown_test') response = c.get( '/filebrowser/chown', dict(path=PATH, user='******', group='chown_test')) assert_false('<option value="__other__"' in response.content) finally: cluster.shutdown()
def test_edit_i18n(): cluster = mini_cluster.shared_cluster(conf=True) try: cluster.fs.setuser(cluster.superuser) cluster.fs.mkdir('/test-filebrowser/') # Test utf-8 pass_1 = u'en-hello pt-Olá ch-你好 ko-안녕 ru-Здравствуйте' pass_2 = pass_1 + u'yi-העלא' edit_helper(cluster, 'utf-8', pass_1, pass_2) # Test utf-16 edit_helper(cluster, 'utf-16', pass_1, pass_2) # Test cjk pass_1 = u'big5-你好' pass_2 = pass_1 + u'世界' edit_helper(cluster, 'big5', pass_1, pass_2) pass_1 = u'shift_jis-こんにちは' pass_2 = pass_1 + u'世界' edit_helper(cluster, 'shift_jis', pass_1, pass_2) pass_1 = u'johab-안녕하세요' pass_2 = pass_1 + u'세상' edit_helper(cluster, 'johab', pass_1, pass_2) finally: try: cluster.fs.rmtree('/test-filebrowser/') except Exception, ex: LOG.error('Failed to remove tree /test-filebrowser: %s' % (ex, )) cluster.shutdown()
def test_upload(): """Test file upload""" cluster = mini_cluster.shared_cluster(conf=True) try: USER_NAME = cluster.fs.superuser cluster.fs.setuser(USER_NAME) DEST = "/tmp/fb-upload-test" client = make_logged_in_client(USER_NAME) # Just upload the current python file resp = client.post('/filebrowser/upload', dict(dest=DEST, hdfs_file=file(__file__))) assert_true("Upload Complete" in resp.content) stats = cluster.fs.stats(DEST) assert_equal(stats['user'], USER_NAME) assert_equal(stats['group'], USER_NAME) f = cluster.fs.open(DEST) actual = f.read() expected = file(__file__).read() assert_equal(actual, expected) finally: try: cluster.fs.remove(DEST) except Exception, ex: pass cluster.shutdown()
def test_view_avro(): cluster = mini_cluster.shared_cluster(conf=True) try: c = make_logged_in_client() cluster.fs.setuser(cluster.superuser) if cluster.fs.isdir("/test-avro-filebrowser"): cluster.fs.rmtree('/test-avro-filebrowser/') cluster.fs.mkdir('/test-avro-filebrowser/') test_schema = schema.parse(""" { "name": "test", "type": "record", "fields": [ { "name": "name", "type": "string" }, { "name": "integer", "type": "int" } ] } """) f = cluster.fs.open('/test-avro-filebrowser/test-view.avro', "w") data_file_writer = datafile.DataFileWriter(f, io.DatumWriter(), writers_schema=test_schema, codec='deflate') dummy_datum = { 'name': 'Test', 'integer': 10, } data_file_writer.append(dummy_datum) data_file_writer.close() # autodetect response = c.get('/filebrowser/view/test-avro-filebrowser/test-view.avro') # (Note: we use eval here cause of an incompatibility issue between # the representation string of JSON dicts in simplejson vs. json) assert_equal(eval(response.context['view']['contents']), dummy_datum) # offsetting should work as well response = c.get('/filebrowser/view/test-avro-filebrowser/test-view.avro?offset=1') assert_true(response.context.has_key('view')) f = cluster.fs.open('/test-avro-filebrowser/test-view2.avro', "w") f.write("hello") f.close() # we shouldn't autodetect non avro files response = c.get('/filebrowser/view/test-avro-filebrowser/test-view2.avro') assert_equal(response.context['view']['contents'], "hello") # we should fail to do a bad thing if they specify compression when it's not set. response = c.get('/filebrowser/view/test-avro-filebrowser/test-view2.avro?compression=gzip') assert_false(response.context.has_key('view')) finally: try: cluster.fs.rmtree('/test-avro-filebrowser/') except: pass # Don't let cleanup errors mask earlier failures cluster.shutdown()
def test_seek_across_blocks(): """Makes a file with a lot of blocks, seeks around""" cluster = mini_cluster.shared_cluster() try: fs = cluster.fs fs.setuser(cluster.superuser) f = fs.open("/fortest-blocks.txt", "w", block_size=1024) try: data = "abcdefghijklmnopqrstuvwxyz" * 3000 f.write(data) f.close() for i in xrange(1, 10): f = fs.open("/fortest-blocks.txt", "r") for j in xrange(1, 100): offset = random.randint(0, len(data) - 1) f.seek(offset, posixfile.SEEK_SET) assert_equals(data[offset:offset+50], f.read(50)) f.close() finally: fs.remove("/fortest-blocks.txt") finally: cluster.shutdown()
def test_config_validator_more(): # TODO: Setup DN to not load the plugin, which is a common user error. # We don't actually use the mini_cluster. But the cluster sets up the correct # configuration that forms the test basis. cluster = mini_cluster.shared_cluster() if not cluster.fs.exists('/tmp'): cluster.fs.setuser(cluster.fs.superuser) cluster.fs.mkdir('/tmp', 0777) cli = make_logged_in_client() reset = ( conf.HADOOP_BIN.set_for_testing(cluster.fs.hadoop_bin_path), conf.HDFS_CLUSTERS['default'].NN_HOST.set_for_testing('localhost'), conf.HDFS_CLUSTERS['default'].NN_HDFS_PORT.set_for_testing(22), conf.HDFS_CLUSTERS["default"].NN_THRIFT_PORT.set_for_testing(cluster.fs.thrift_port), conf.MR_CLUSTERS["default"].JT_HOST.set_for_testing("localhost"), conf.MR_CLUSTERS['default'].JT_THRIFT_PORT.set_for_testing(23), ) try: resp = cli.get('/debug/check_config') assert_false('Failed to contact Namenode plugin' in resp.content) assert_false('Failed to see HDFS root' in resp.content) assert_true('Failed to upload files' in resp.content) assert_true('Failed to contact JobTracker plugin' in resp.content) finally: for old_conf in reset: old_conf() cluster.shutdown()
def test_live_jobtracker(): """ Checks that LiveJobTracker never raises exceptions for most of its calls. """ cluster = mini_cluster.shared_cluster() try: jt = cluster.jt # Make sure that none of the following # raise. assert_true(jt.queues()) assert_true(jt.cluster_status()) assert_true(jt.all_task_trackers()) assert_true(jt.active_trackers()) assert_true(jt.blacklisted_trackers()) # not tested: task_tracker assert_true(jt.running_jobs()) assert_true(jt.completed_jobs()) assert_true(jt.failed_jobs()) assert_true(jt.all_jobs()) # not tested: get_job_counters assert_true(jt.get_current_time()) # not tested: get_job_xml finally: cluster.shutdown()
def test_edit_i18n(): cluster = mini_cluster.shared_cluster(conf=True) try: cluster.fs.setuser(cluster.superuser) cluster.fs.mkdir('/test-filebrowser/') # Test utf-8 pass_1 = u'en-hello pt-Olá ch-你好 ko-안녕 ru-Здравствуйте' pass_2 = pass_1 + u'yi-העלא' edit_helper(cluster, 'utf-8', pass_1, pass_2) # Test utf-16 edit_helper(cluster, 'utf-16', pass_1, pass_2) # Test cjk pass_1 = u'big5-你好' pass_2 = pass_1 + u'世界' edit_helper(cluster, 'big5', pass_1, pass_2) pass_1 = u'shift_jis-こんにちは' pass_2 = pass_1 + u'世界' edit_helper(cluster, 'shift_jis', pass_1, pass_2) pass_1 = u'johab-안녕하세요' pass_2 = pass_1 + u'세상' edit_helper(cluster, 'johab', pass_1, pass_2) finally: try: cluster.fs.rmtree('/test-filebrowser/') except Exception, ex: LOG.error('Failed to remove tree /test-filebrowser: %s' % (ex,)) cluster.shutdown()
def test_seek_across_blocks(): """Makes a file with a lot of blocks, seeks around""" cluster = mini_cluster.shared_cluster() try: fs = cluster.fs fs.setuser(cluster.superuser) f = fs.open("/fortest-blocks.txt", "w", block_size=1024) try: data = "abcdefghijklmnopqrstuvwxyz" * 3000 f.write(data) f.close() for i in xrange(1, 10): f = fs.open("/fortest-blocks.txt", "r") for j in xrange(1, 100): offset = random.randint(0, len(data) - 1) f.seek(offset, posixfile.SEEK_SET) assert_equals(data[offset:offset + 50], f.read(50)) f.close() finally: fs.remove("/fortest-blocks.txt") finally: cluster.shutdown()
def test_view_i18n(): cluster = mini_cluster.shared_cluster(conf=True) try: cluster.fs.setuser(cluster.superuser) cluster.fs.mkdir('/test-filebrowser/') # Test viewing files in different encodings content = u'pt-Olá en-hello ch-你好 ko-안녕 ru-Здравствуйте' view_helper(cluster, 'utf-8', content) view_helper(cluster, 'utf-16', content) content = u'你好-big5' view_helper(cluster, 'big5', content) content = u'こんにちは-shift-jis' view_helper(cluster, 'shift_jis', content) content = u'안녕하세요-johab' view_helper(cluster, 'johab', content) # Test that the default view is home c = make_logged_in_client() response = c.get('/filebrowser/view/') assert_equal(response.context['path'], '/') cluster.fs.mkdir('/user/test') cluster.fs.chown("/user/test", "test", "test") response = c.get('/filebrowser/view/?default_to_home=1') assert_equal("http://testserver/filebrowser/view/user/test", response["location"]) finally: try: cluster.fs.rmtree('/test-filebrowser/') cluster.fs.rmtree('/user/test') except Exception, ex: LOG.error('Failed to cleanup test directory: %s' % (ex,)) cluster.shutdown()
def test_config_validator_more(): # TODO: Setup DN to not load the plugin, which is a common user error. # We don't actually use the mini_cluster. But the cluster sets up the correct # configuration that forms the test basis. cluster = mini_cluster.shared_cluster() if not cluster.fs.exists('/tmp'): cluster.fs.setuser(cluster.fs.superuser) cluster.fs.mkdir('/tmp', 0777) cli = make_logged_in_client() reset = ( conf.HADOOP_BIN.set_for_testing(cluster.fs.hadoop_bin_path), conf.HDFS_CLUSTERS['default'].NN_HOST.set_for_testing('localhost'), conf.HDFS_CLUSTERS['default'].NN_HDFS_PORT.set_for_testing(22), conf.HDFS_CLUSTERS["default"].NN_THRIFT_PORT.set_for_testing( cluster.fs.thrift_port), conf.MR_CLUSTERS["default"].JT_HOST.set_for_testing("localhost"), conf.MR_CLUSTERS['default'].JT_THRIFT_PORT.set_for_testing(23), ) try: resp = cli.get('/debug/check_config') assert_false('Failed to contact Namenode plugin' in resp.content) assert_false('Failed to see HDFS root' in resp.content) assert_true('Failed to upload files' in resp.content) assert_true('Failed to contact JobTracker plugin' in resp.content) finally: for old_conf in reset: old_conf() cluster.shutdown()
def test_seek(): """Test for DESKTOP-293 - ensure seek works in python2.4""" cluster = mini_cluster.shared_cluster() try: fs = cluster.fs fs.setuser(cluster.superuser) f = fs.open("/fortest.txt", "w") try: f.write("hello") f.close() f = fs.open("/fortest.txt", "r") f.seek(0, posixfile.SEEK_SET) assert_equals("he", f.read(2)) f.seek(1, posixfile.SEEK_SET) assert_equals("el", f.read(2)) f.seek(-1, posixfile.SEEK_END) assert_equals("o", f.read()) f.seek(0, posixfile.SEEK_SET) f.seek(2, posixfile.SEEK_CUR) assert_equals("ll", f.read(2)) finally: fs.remove("/fortest.txt") finally: cluster.shutdown()
def test_view(): cluster = mini_cluster.shared_cluster(conf=True) try: c = make_logged_in_client() cluster.fs.setuser(cluster.superuser) cluster.fs.mkdir('/test-filebrowser/') f = cluster.fs.open('/test-filebrowser/test-view', "w") f.write("hello") f.close() response = c.get('/filebrowser/view/test-filebrowser/test-view') assert_equal(response.context['view']['contents'], "hello") response = c.get('/filebrowser/view/test-filebrowser/test-view?end=2&begin=1') assert_equal(response.context['view']['contents'], "he") response = c.get('/filebrowser/view/') assert_equal(response.context['path'], '/') cluster.fs.mkdir('/user/test') cluster.fs.chown("/user/test", "test", "test") response = c.get('/filebrowser/view/?default_to_home=1') assert_equal("http://testserver/filebrowser/view/user/test", response["location"]) finally: cluster.shutdown()
def test_chown(): cluster = mini_cluster.shared_cluster(conf=True) try: # Only the Hadoop superuser really has carte blanche here c = make_logged_in_client(cluster.superuser) cluster.fs.setuser(cluster.superuser) PATH = u"/test-chown-en-Español" cluster.fs.mkdir(PATH) c.post("/filebrowser/chown", dict(path=PATH, user="******", group="y")) assert_equal("x", cluster.fs.stats(PATH)["user"]) assert_equal("y", cluster.fs.stats(PATH)["group"]) c.post("/filebrowser/chown", dict(path=PATH, user="******", user_other="z", group="y")) assert_equal("z", cluster.fs.stats(PATH)["user"]) # Make sure that the regular user chown form doesn't have useless fields, # and that the superuser's form has all the fields it could dream of. PATH = '/filebrowser/chown-regular-user' cluster.fs.mkdir(PATH) cluster.fs.chown(PATH, 'chown_test', 'chown_test') response = c.get('/filebrowser/chown', dict(path=PATH, user='******', group='chown_test')) assert_true('<option value="__other__"' in response.content) c = make_logged_in_client('chown_test') response = c.get('/filebrowser/chown', dict(path=PATH, user='******', group='chown_test')) assert_false('<option value="__other__"' in response.content) finally: cluster.shutdown()
def start_helper_servers(self): """ Starts Hadoop daemons. This currently doesn't start app-specific other servers. """ self.cluster = mini_cluster.shared_cluster(conf=True)
def get_shared_beeswax_server(): finish = ( beeswax.conf.BEESWAX_SERVER_HOST.set_for_testing("localhost"), beeswax.conf.BEESWAX_SERVER_PORT.set_for_testing(BEESWAXD_TEST_PORT), beeswax.conf.BEESWAX_META_SERVER_HOST.set_for_testing("localhost"), beeswax.conf.BEESWAX_META_SERVER_PORT.set_for_testing(BEESWAXD_TEST_PORT + 1), # Use a bogus path to avoid loading the normal hive-site.xml beeswax.conf.BEESWAX_HIVE_CONF_DIR.set_for_testing('/my/bogus/path'), ) cluster = mini_cluster.shared_cluster(conf=True) global _SHARED_BEESWAX_SERVER_PROCESS if _SHARED_BEESWAX_SERVER_PROCESS is None: p = _start_server(cluster) _SHARED_BEESWAX_SERVER_PROCESS = p def kill(): LOG.info("Killing beeswax server (pid %d)." % p.pid) os.kill(p.pid, 9) p.wait() atexit.register(kill) # Wait for server to come up, by repeatedly trying. start = time.time() started = False sleep = 0.001 while not started and time.time() - start < 20.0: try: client = beeswax.db_utils.db_client() meta_client = beeswax.db_utils.meta_client() client.echo("echo") if meta_client.getStatus() == fb303.ttypes.fb_status.ALIVE: started = True break time.sleep(sleep) sleep *= 2 except: time.sleep(sleep) sleep *= 2 pass if not started: raise Exception("Beeswax server took too long to come up.") # Make sure /tmp is 0777 cluster.fs.setuser(cluster.superuser) if not cluster.fs.isdir('/tmp'): cluster.fs.mkdir('/tmp', 0777) else: cluster.fs.chmod('/tmp', 0777) def s(): for f in finish: f() cluster.shutdown() return cluster, s
def test_quota_space(): """ Lets make sure we can violate the quota in regards to diskspace """ cluster = mini_cluster.shared_cluster() fs = cluster.fs try: fs.setuser(cluster.superuser) if fs.exists('/tmp/foo2'): fs.rmtree('/tmp/foo2') fs.mkdir("/tmp/foo2", 0777) # this also tests more restrictive subdirectories ONE_HUNDRED_192_MEGS = 1024 * 1024 * 192 fs.set_diskspace_quota("/tmp/foo2", ONE_HUNDRED_192_MEGS) assert_equals(fs.get_diskspace_quota("/tmp/foo2"), ONE_HUNDRED_192_MEGS) f = fs.open('/tmp/foo2/asdf', 'w') # we should be able to do this f.write('a') f.close() assert_equals(fs.get_diskspace_quota("/tmp/foo2"), ONE_HUNDRED_192_MEGS) fs.set_diskspace_quota("/tmp/foo2", 1) assert_equals(fs.get_diskspace_quota("/tmp/foo2"), 1) f = fs.open('/tmp/foo2/asdfsd', 'w') f.write('a') assert_raises(IOError, f.close) fs.clear_diskspace_quota("/tmp/foo2") assert_equals(fs.get_diskspace_quota("/tmp/foo2"), None) f = fs.open('/tmp/foo2/asdfsda', 'w') f.write('a') f.close() fs.mkdir("/tmp/baz/bar", 0777) # this tests more permissive subdirectories fs.set_diskspace_quota("/tmp/baz", 1) fs.set_diskspace_quota("/tmp/baz/bar", ONE_HUNDRED_192_MEGS) f = fs.open('/tmp/baz/bar', 'w') f.write('aaaa') #should violate the subquota assert_raises(IOError, f.close) finally: if fs.exists('/tmp/baz'): fs.rmtree("/tmp/baz") if fs.exists('/tmp/foo2'): fs.rmtree("/tmp/foo2") cluster.shutdown()
def test_edit(): cluster = mini_cluster.shared_cluster(conf=True) try: c = make_logged_in_client(cluster.superuser) cluster.fs.setuser(cluster.superuser) cluster.fs.mkdir('/test-filebrowser/') # File doesn't exist - should be empty test_path = '//test-filebrowser//test-edit' # (this path is non-normalized to test normalization too) edit_url = '/filebrowser/edit' + test_path response = c.get(edit_url) assert_equal(response.context['form'].data['path'], test_path) assert_equal(response.context['form'].data['contents'], "") # Just going to the edit page and not hitting save should not # create the file assert_false(cluster.fs.exists(test_path)) # Put some data in there and post new_contents = "hello world from editor" response = c.post("/filebrowser/save", dict( path=test_path, contents=new_contents), follow=True) assert_equal(response.context['form'].data['path'], test_path) assert_equal(response.context['form'].data['contents'], new_contents) # File should now exist assert_true(cluster.fs.exists(test_path)) # And its contents should be what we expect f = cluster.fs.open(test_path) assert_equal(f.read(), new_contents) f.close() # We should be able to overwrite the file with another save new_contents = "hello world again from editor" response = c.post("/filebrowser/save", dict( path=test_path, contents=new_contents), follow=True) assert_equal(response.context['form'].data['path'], test_path) assert_equal(response.context['form'].data['contents'], new_contents) f = cluster.fs.open(test_path) assert_equal(f.read(), new_contents) f.close() # TODO(todd) add test for maintaining ownership/permissions finally: cluster.shutdown()
def test_view_gz(): cluster = mini_cluster.shared_cluster(conf=True) try: c = make_logged_in_client() cluster.fs.setuser(cluster.superuser) if cluster.fs.isdir("/test-gz-filebrowser"): cluster.fs.rmtree('/test-gz-filebrowser/') cluster.fs.mkdir('/test-gz-filebrowser/') f = cluster.fs.open('/test-gz-filebrowser/test-view.gz', "w") sdf_string = '\x1f\x8b\x08\x082r\xf4K\x00\x03f\x00+NI\xe3\x02\x00\xad\x96b\xc4\x04\x00\x00\x00' f.write(sdf_string) f.close() response = c.get( '/filebrowser/view/test-gz-filebrowser/test-view.gz?compression=gzip' ) assert_equal(response.context['view']['contents'], "sdf\n") # autodetect response = c.get('/filebrowser/view/test-gz-filebrowser/test-view.gz') assert_equal(response.context['view']['contents'], "sdf\n") # offset should do nothing response = c.get( '/filebrowser/view/test-gz-filebrowser/test-view.gz?compression=gzip&offset=1' ) assert_false(response.context.has_key('view')) f = cluster.fs.open('/test-gz-filebrowser/test-view2.gz', "w") f.write("hello") f.close() # we shouldn't autodetect non gzip files response = c.get('/filebrowser/view/test-gz-filebrowser/test-view2.gz') assert_equal(response.context['view']['contents'], "hello") # we should fail to do a bad thing if they specify compression when it's not set. response = c.get( '/filebrowser/view/test-gz-filebrowser/test-view2.gz?compression=gzip' ) assert_false(response.context.has_key('view')) finally: try: cluster.fs.rmtree('/test-gz-filebrowser/') except: pass # Don't let cleanup errors mask earlier failures cluster.shutdown()
def test_i18n_namespace(): cluster = mini_cluster.shared_cluster() cluster.fs.setuser(cluster.superuser) def check_existence(name, parent, present=True): assertion = present and assert_true or assert_false listing = cluster.fs.listdir(parent) assertion(name in listing, "%s should be in %s" % (name, listing)) name = u'pt-Olá_ch-你好_ko-안녕_ru-Здравствуйте' prefix = '/tmp/i18n' dir_path = '%s/%s' % (prefix, name) file_path = '%s/%s' % (dir_path, name) try: # Create a directory cluster.fs.mkdir(dir_path) # Directory is there check_existence(name, prefix) # Create a file (same name) in the directory cluster.fs.open(file_path, 'w').close() # File is there check_existence(name, dir_path) # Test rename new_file_path = file_path + '.new' cluster.fs.rename(file_path, new_file_path) # New file is there check_existence(name + '.new', dir_path) # Test remove cluster.fs.remove(new_file_path) check_existence(name + '.new', dir_path, present=False) # Test rmtree cluster.fs.rmtree(dir_path) check_existence(name, prefix, present=False) # Test exception can handle non-ascii characters try: cluster.fs.rmtree(dir_path) except IOError, ex: LOG.info('Successfully caught error: %s' % (ex, )) finally: try: cluster.fs.rmtree(prefix) except Exception, ex: LOG.error('Failed to cleanup %s: %s' % (prefix, ex)) cluster.shutdown()
def test_i18n_namespace(): cluster = mini_cluster.shared_cluster() cluster.fs.setuser(cluster.superuser) def check_existence(name, parent, present=True): assertion = present and assert_true or assert_false listing = cluster.fs.listdir(parent) assertion(name in listing, "%s should be in %s" % (name, listing)) name = u'pt-Olá_ch-你好_ko-안녕_ru-Здравствуйте' prefix = '/tmp/i18n' dir_path = '%s/%s' % (prefix, name) file_path = '%s/%s' % (dir_path, name) try: # Create a directory cluster.fs.mkdir(dir_path) # Directory is there check_existence(name, prefix) # Create a file (same name) in the directory cluster.fs.open(file_path, 'w').close() # File is there check_existence(name, dir_path) # Test rename new_file_path = file_path + '.new' cluster.fs.rename(file_path, new_file_path) # New file is there check_existence(name + '.new', dir_path) # Test remove cluster.fs.remove(new_file_path) check_existence(name + '.new', dir_path, present=False) # Test rmtree cluster.fs.rmtree(dir_path) check_existence(name, prefix, present=False) # Test exception can handle non-ascii characters try: cluster.fs.rmtree(dir_path) except IOError, ex: LOG.info('Successfully caught error: %s' % (ex,)) finally: try: cluster.fs.rmtree(prefix) except Exception, ex: LOG.error('Failed to cleanup %s: %s' % (prefix, ex)) cluster.shutdown()
def test_listdir(): cluster = mini_cluster.shared_cluster(conf=True) try: c = make_logged_in_client() cluster.fs.setuser(cluster.superuser) # These paths contain non-ascii characters. Your editor will need the # corresponding font library to display them correctly. # # We test that mkdir can handle unicode strings as well as byte strings. # And even when the byte string can't be decoded properly (big5), the listdir # still succeeds. orig_paths = [ u'greek-Ελληνικά', u'chinese-漢語', 'listdir', 'non-utf-8-(big5)-\xb2\xc4\xa4@\xb6\xa5\xacq', ] prefix = '/test-filebrowser/' for path in orig_paths: cluster.fs.mkdir(prefix + path) response = c.get('/filebrowser/view' + prefix) paths = [f['path'] for f in response.context['files']] for path in orig_paths: if isinstance(path, unicode): uni_path = path else: uni_path = unicode(path, 'utf-8', errors='replace') assert_true( prefix + uni_path in paths, '%s should be in dir listing %s' % (prefix + uni_path, paths)) # Delete user's home if there's already something there if cluster.fs.isdir("/user/test"): cluster.fs.rmtree("/user/test") assert_false(response.context['home_directory']) # test's home directory now exists. Should be returned. cluster.fs.mkdir('/user/test') response = c.get('/filebrowser/view/test-filebrowser/') assert_equal(response.context['home_directory'], '/user/test') finally: try: cluster.fs.rmtree('/test-filebrowser') cluster.fs.rmtree('/user/test') except: pass # Don't let cleanup errors mask earlier failures cluster.shutdown()
def setup_class(cls): client = make_logged_in_client('test') cluster = mini_cluster.shared_cluster(conf=True) jobsubd = in_process_jobsubd(cluster.config_dir) # Make home directory cluster.fs.setuser(cluster.superuser) if not cluster.fs.exists("/user/test"): cluster.fs.mkdir("/user/test") cluster.fs.chown("/user/test", "test", "test") cluster.fs.setuser("test") cls.cluster = cluster cls.client = client cls.jobsubd = jobsubd
def test_listdir(): cluster = mini_cluster.shared_cluster(conf=True) try: c = make_logged_in_client() cluster.fs.setuser(cluster.superuser) # These paths contain non-ascii characters. Your editor will need the # corresponding font library to display them correctly. # # We test that mkdir can handle unicode strings as well as byte strings. # And even when the byte string can't be decoded properly (big5), the listdir # still succeeds. orig_paths = [ u'greek-Ελληνικά', u'chinese-漢語', 'listdir', 'non-utf-8-(big5)-\xb2\xc4\xa4@\xb6\xa5\xacq', ] prefix = '/test-filebrowser/' for path in orig_paths: cluster.fs.mkdir(prefix + path) response = c.get('/filebrowser/view' + prefix) paths = [f['path'] for f in response.context['files']] for path in orig_paths: if isinstance(path, unicode): uni_path = path else: uni_path = unicode(path, 'utf-8', errors='replace') assert_true(prefix + uni_path in paths, '%s should be in dir listing %s' % (prefix + uni_path, paths)) # Delete user's home if there's already something there if cluster.fs.isdir("/user/test"): cluster.fs.rmtree("/user/test") assert_false(response.context['home_directory']) # test's home directory now exists. Should be returned. cluster.fs.mkdir('/user/test') response = c.get('/filebrowser/view/test-filebrowser/') assert_equal(response.context['home_directory'], '/user/test') finally: try: cluster.fs.rmtree('/test-filebrowser') cluster.fs.rmtree('/user/test') except: pass # Don't let cleanup errors mask earlier failures cluster.shutdown()
def test_chown(): cluster = mini_cluster.shared_cluster(conf=True) try: # Only the Hadoop superuser really has carte blanche here c = make_logged_in_client(cluster.superuser) cluster.fs.setuser(cluster.superuser) PATH = "/test-chown" cluster.fs.mkdir(PATH) c.post("/filebrowser/chown", dict(path=PATH, user="******", group="y")) assert_equal("x", cluster.fs.stats(PATH)["user"]) assert_equal("y", cluster.fs.stats(PATH)["group"]) c.post("/filebrowser/chown", dict(path=PATH, user="******", user_other="z", group="y")) assert_equal("z", cluster.fs.stats(PATH)["user"]) finally: cluster.shutdown()
def test_view_gz(): cluster = mini_cluster.shared_cluster(conf=True) try: c = make_logged_in_client() cluster.fs.setuser(cluster.superuser) if cluster.fs.isdir("/test-gz-filebrowser"): cluster.fs.rmtree('/test-gz-filebrowser/') cluster.fs.mkdir('/test-gz-filebrowser/') f = cluster.fs.open('/test-gz-filebrowser/test-view.gz', "w") sdf_string = '\x1f\x8b\x08\x082r\xf4K\x00\x03f\x00+NI\xe3\x02\x00\xad\x96b\xc4\x04\x00\x00\x00' f.write(sdf_string) f.close() response = c.get('/filebrowser/view/test-gz-filebrowser/test-view.gz?compression=gzip') assert_equal(response.context['view']['contents'], "sdf\n") # autodetect response = c.get('/filebrowser/view/test-gz-filebrowser/test-view.gz') assert_equal(response.context['view']['contents'], "sdf\n") # offset should do nothing response = c.get('/filebrowser/view/test-gz-filebrowser/test-view.gz?compression=gzip&offset=1') assert_false(response.context.has_key('view')) f = cluster.fs.open('/test-gz-filebrowser/test-view2.gz', "w") f.write("hello") f.close() # we shouldn't autodetect non gzip files response = c.get('/filebrowser/view/test-gz-filebrowser/test-view2.gz') assert_equal(response.context['view']['contents'], "hello") # we should fail to do a bad thing if they specify compression when it's not set. response = c.get('/filebrowser/view/test-gz-filebrowser/test-view2.gz?compression=gzip') assert_false(response.context.has_key('view')) finally: try: cluster.fs.rmtree('/test-gz-filebrowser/') except: pass # Don't let cleanup errors mask earlier failures cluster.shutdown()
def test_quota_namespace_count(): """ Lets make sure we can violate the number of names in a directory limitation """ cluster = mini_cluster.shared_cluster() try: fs = cluster.fs fs.setuser(cluster.superuser) if fs.exists('/tmp/foo2'): fs.rmtree('/tmp/foo2') fs.mkdir("/tmp/foo2", 0777) # check the get_namespace_quota function fs.set_namespace_quota("/tmp/foo2", 4) assert_equals(fs.get_namespace_quota("/tmp/foo2"), 4) # violate the namespace count for i in range(3): f = fs.open('/tmp/foo2/works' + str(i), 'w') f.write('a') f.close() f = fs.open('/tmp/foo2/asdfsdc', 'w') f.write('a') assert_raises(IOError, f.close) # Check summary stats summary = fs.get_usage_and_quota('/tmp/foo2') assert_equals(3, summary["file_count"]) assert_equals(4, summary["file_quota"]) assert_equals(None, summary["space_quota"]) assert_true(None is not summary["space_used"]) # make sure the clear works fs.clear_namespace_quota("/tmp/foo2") assert_equals(fs.get_namespace_quota("/tmp/foo2"), None) f = fs.open('/tmp/foo2/asdfsdd', 'w') f.write('a') f.close() finally: if fs.exists('/tmp/foo2'): fs.rmtree("/tmp/foo2") cluster.shutdown()
def test_threadedness(): # Start a second thread to change the user, and # make sure that isn't reflected. cluster = mini_cluster.shared_cluster() try: fs = cluster.fs fs.setuser("alpha") class T(Thread): def run(self): fs.setuser("beta") assert_equals("beta", fs.user) t = T() t.start() t.join() assert_equals("alpha", fs.user) fs.setuser("gamma") assert_equals("gamma", fs.user) finally: cluster.shutdown()
def test_two_files_open(): """ See DESKTOP-510. There was a bug where you couldn't open two files at the same time. It boils down to a close_fds=True issue. If this doesn't hang, all is good. """ cluster = mini_cluster.shared_cluster() try: fs = cluster.fs fs.setuser(cluster.superuser) f1 = fs.open("/test_one.txt", "w") f2 = fs.open("/test_two.txt", "w") f1.write("foo") f2.write("bar") f1.close() f2.close() # This should work, not hang, etc. finally: cluster.shutdown()
def setup_class(cls): client = make_logged_in_client('test') cluster = mini_cluster.shared_cluster(conf=True) jobsubd = in_process_jobsubd(cluster.config_dir) # Make home directory cluster.fs.setuser(cluster.superuser) if not cluster.fs.exists("/user/test"): cluster.fs.mkdir("/user/test") cluster.fs.chown("/user/test", "test", "test") if not cluster.fs.exists("/tmp"): cluster.fs.mkdir("/tmp") cluster.fs.chmod("/tmp", int('777', 8)) cluster.fs.setuser("test") cls.cluster = cluster cls.client = client cls.jobsubd = jobsubd
def test_exceptions(): """ Tests that appropriate exceptions are raised. """ cluster = mini_cluster.shared_cluster() try: fs = cluster.fs fs.setuser(cluster.superuser) f = fs.open("/for_exception_test.txt", "w") f.write("foo") f.close() fs.chmod("/for_exception_test.txt", 0400) fs.setuser("notsuperuser") f = fs.open("/for_exception_test.txt") # Arguably, this should have thrown already, at open, but # we throw the exception lazily, when getting block locations. assert_raises(PermissionDeniedException, f.read) assert_raises(IOError, fs.open, "/test/doesnotexist.txt") finally: cluster.shutdown()
def test_hadoopfs(): """ Minimal tests for a few basic file system operations. """ cluster = mini_cluster.shared_cluster() try: fs = cluster.fs fs.setuser(cluster.superuser) f = fs.open("/fortest.txt", "w") try: f.write("hello") f.close() assert_equals("hello", fs.open("/fortest.txt").read()) assert_equals(5, fs.stats("/fortest.txt")["size"]) assert_true(fs.isfile("/fortest.txt")) assert_false(fs.isfile("/")) assert_true(fs.isdir("/")) assert_false(fs.isdir("/fortest.txt")) finally: fs.remove("/fortest.txt") finally: cluster.shutdown()
def test_quota_argument_smarts(): """ Test invalid quota parameters """ cluster = mini_cluster.shared_cluster() fs = cluster.fs try: fs.setuser(cluster.superuser) fs.mkdir("/tmp/foo2", 0777) fs.set_diskspace_quota("/tmp/foo2", 1) fs.set_namespace_quota("/tmp/foo2", 1) assert_raises(ValueError, fs.set_diskspace_quota, "/tmp/foo2", -5) assert_raises(ValueError, fs.set_diskspace_quota, '/', 10) assert_raises(ValueError, fs.set_namespace_quota, '/', 10) fs.set_diskspace_quota("/tmp/foo2", 1.1) # This should actually fail i think finally: fs.rmtree("/tmp/foo2") cluster.shutdown()
def test_view_i18n(): cluster = mini_cluster.shared_cluster(conf=True) try: cluster.fs.setuser(cluster.superuser) cluster.fs.mkdir('/test-filebrowser/') # Test viewing files in different encodings content = u'pt-Olá en-hello ch-你好 ko-안녕 ru-Здравствуйте' view_helper(cluster, 'utf-8', content) view_helper(cluster, 'utf-16', content) content = u'你好-big5' view_helper(cluster, 'big5', content) content = u'こんにちは-shift-jis' view_helper(cluster, 'shift_jis', content) content = u'안녕하세요-johab' view_helper(cluster, 'johab', content) # Test that the default view is home c = make_logged_in_client() response = c.get('/filebrowser/view/') assert_equal(response.context['path'], '/') cluster.fs.mkdir('/user/test') cluster.fs.chown("/user/test", "test", "test") response = c.get('/filebrowser/view/?default_to_home=1') assert_equal("http://testserver/filebrowser/view/user/test", response["location"]) finally: try: cluster.fs.rmtree('/test-filebrowser/') cluster.fs.rmtree('/user/test') except Exception, ex: LOG.error('Failed to cleanup test directory: %s' % (ex, )) cluster.shutdown()
def test_listdir(): cluster = mini_cluster.shared_cluster(conf=True) try: c = make_logged_in_client() cluster.fs.setuser(cluster.superuser) # Delete if there's already something there if cluster.fs.isdir("/user/test"): cluster.fs.rmtree("/user/test") cluster.fs.mkdir('/test-filebrowser/listdir') response = c.get('/filebrowser/view/test-filebrowser/') paths = [f['path'] for f in response.context['files']] assert_true("/test-filebrowser/listdir" in paths) # test's home dir doesn't exist yet assert_false(response.context['home_directory']) # test's home directory now exists. Should be returned. cluster.fs.mkdir('/user/test') response = c.get('/filebrowser/view/test-filebrowser/') assert_equal(response.context['home_directory'], '/user/test') finally: cluster.shutdown()
def get_shared_beeswax_server(): # Copy hive-default.xml from BEESWAX_HIVE_CONF_DIR before it is set to # /my/bogus/path default_xml = file(beeswax.conf.BEESWAX_HIVE_CONF_DIR.get() + "/hive-default.xml").read() finish = ( beeswax.conf.BEESWAX_SERVER_HOST.set_for_testing("localhost"), beeswax.conf.BEESWAX_SERVER_PORT.set_for_testing(BEESWAXD_TEST_PORT), beeswax.conf.BEESWAX_META_SERVER_HOST.set_for_testing("localhost"), beeswax.conf.BEESWAX_META_SERVER_PORT.set_for_testing( BEESWAXD_TEST_PORT + 1), # Use a bogus path to avoid loading the normal hive-site.xml beeswax.conf.BEESWAX_HIVE_CONF_DIR.set_for_testing('/my/bogus/path')) cluster = mini_cluster.shared_cluster(conf=True) # Copy hive-default.xml into the mini_cluster's conf dir, which happens to be # in the cluster's tmpdir. This tmpdir is determined during the mini_cluster # startup, during which BEESWAX_HIVE_CONF_DIR needs to be set to # /my/bogus/path. Hence the step of writing to memory. # hive-default.xml will get picked up by the beeswax_server during startup file(cluster.tmpdir + "/conf/hive-default.xml", 'w').write(default_xml) global _SHARED_BEESWAX_SERVER_PROCESS if _SHARED_BEESWAX_SERVER_PROCESS is None: p = _start_server(cluster) _SHARED_BEESWAX_SERVER_PROCESS = p def kill(): LOG.info("Killing beeswax server (pid %d)." % p.pid) os.kill(p.pid, 9) p.wait() atexit.register(kill) # Wait for server to come up, by repeatedly trying. start = time.time() started = False sleep = 0.001 while not started and time.time() - start < 20.0: try: client = beeswax.db_utils.db_client() meta_client = beeswax.db_utils.meta_client() client.echo("echo") if meta_client.getStatus() == fb303.ttypes.fb_status.ALIVE: started = True break time.sleep(sleep) sleep *= 2 except: time.sleep(sleep) sleep *= 2 pass if not started: raise Exception("Beeswax server took too long to come up.") # Make sure /tmp is 0777 cluster.fs.setuser(cluster.superuser) if not cluster.fs.isdir('/tmp'): cluster.fs.mkdir('/tmp', 0777) else: cluster.fs.chmod('/tmp', 0777) def s(): for f in finish: f() cluster.shutdown() return cluster, s
def test_jobsub_setup_and_samples(): """ Merely exercises jobsub_setup, and then runs all the examples. """ cluster = mini_cluster.shared_cluster(conf=True) jobsubd = in_process_jobsubd(cluster.config_dir) try: c = make_logged_in_client() # Create a job, to make sure that it sticks around response = c.post("/jobsub/new/jar", dict( name="should_stick_around", jarfile="foo", arguments="foo", submit="Save")) design_id = response.context["saved"] import jobsub.management.commands.jobsub_setup as jobsub_setup if not jobsub_setup.Command().has_been_setup(): jobsub_setup.Command().handle() # Make sure we have three job designs now. assert_equal(3, JobDesign.objects.filter(name__startswith="Example: ").count()) # Make sure "should_stick_around" is still there assert_equal(1, JobDesign.objects.filter(name="should_stick_around").count()) # Make sure sample user got created. assert_equal(1, User.objects.filter(username="******").count()) assert_equal(1, User.objects.filter(username="******").count()) # And now submit and run the samples # pi Example # Irritatingly, /user/test needs to exist first setup_cluster_fs(cluster) id = JobDesign.objects.get(name__contains="Example: Pi").id response = c.get("/jobsub/submit/%d" % id) assert_true("Iterations per mapper" in response.content) assert_true("Num of mappers" in response.content) response = c.post("/jobsub/submit/%d" % id, dict( iterations_per_mapper=10, num_of_mappers=1)) response = watch_till_complete(c, parse_out_id(response)) assert_true("Estimated value of Pi is" in response.context["job_data"].stdout_tail) assert_true("bin/hadoop returned 0" in response.content) # Wordcount example id = JobDesign.objects.get(name__contains="Example: Streaming Wordcount").id response = c.get("/jobsub/submit/%d" % id) response = c.post("/jobsub/submit/%d" % id, dict( output="/user/test/jobsub-streaming-test")) response = watch_till_complete(c, parse_out_id(response)) assert_true("streaming.StreamJob: Job complete:" in response.context["job_data"].stderr_tail) assert_true(cluster.fs.exists("/user/test/jobsub-streaming-test/part-00000")) # Not running sleep example, since it adds little. finally: jobsubd.exit() cluster.shutdown()
def test_job_submission(): raise SkipTest JARNAME = posixpath.basename(hadoop.conf.HADOOP_EXAMPLES_JAR.get()) c = make_logged_in_client() cluster = mini_cluster.shared_cluster(conf=True) jobsubd = in_process_jobsubd(cluster.config_dir) # Remember the number of pending jobs beforehand n_pending = c.get("/jobsub/status_bar/").context["pending_count"] try: # Create a job response = c.post("/jobsub/new/jar", dict( name="wordcount", jarfile="/user/test/%s" % JARNAME, arguments="wordcount $input $output", submit="Save")) design_id = response.context["saved"] # Submission should get a parameterization form response = c.get("/jobsub/submit/%d" % design_id) assert_true("<form " in response.content) # Create home dir setup_cluster_fs(cluster) # Prepare sample data f = cluster.fs.open("/user/test/input", "w") f.write("alpha beta gamma\nepsilon zeta theta\nalpha beta\n") f.close() # We also have to upload the jar file src = file(hadoop.conf.HADOOP_EXAMPLES_JAR.get()) try: dst = cluster.fs.open("/user/test/%s" % JARNAME, "w") try: shutil.copyfileobj(src, dst) finally: dst.close() finally: src.close() # Status_bar should be at original assert_equal(n_pending, c.get("/jobsub/status_bar/").context["pending_count"]) # Let's parameterize and submit INPUT, OUTPUT = "/user/test/input", "/user/test/output" response = c.post("/jobsub/submit/%d" % design_id, dict(input=INPUT, output=OUTPUT)) watch_id = parse_out_id(response) # Status bar at original + 1 assert_equal(n_pending + 1, c.get("/jobsub/status_bar/").context["pending_count"]) # Let's take a look response = watch_till_complete(c, watch_id) assert_equal(1, len(response.context["job_data"].hadoop_job_ids), "Should have launched and captured exactly one Hadoop job") submission = Submission.objects.get(id=watch_id) assert_equal(["wordcount", INPUT, OUTPUT], submission.submission_plan.steps[1].bin_hadoop_step.arguments[2:]) hadoop_job_id = response.context["job_data"].hadoop_job_ids[0] # Status bar back to original assert_equal(n_pending, c.get("/jobsub/status_bar/").context["pending_count"]) # Make sure the counts are right: lines = cluster.fs.open("/user/test/output/part-r-00000").read().splitlines() counts = {} for line in lines: word, count = line.split("\t", 2) count = int(count) counts[word] = count assert_equal(dict(alpha=2, beta=2, gamma=1, epsilon=1, zeta=1, theta=1), counts) # And check that the output file has correct permissions. assert_equal("test", cluster.fs.stats("/user/test/output/part-r-00000")["user"], "Wrong username for job output.") assert_equal("test", cluster.fs.stats("/user/test/output/part-r-00000")["group"], "Wrong groupname for job output.") # Just to be sure it really happened, check the Job struct # There's no way to get just one job (eek!)... job_map = dict([ (x.jobID.asString, x) for x in cluster.jt.completed_jobs().jobs ]) this_job = job_map[hadoop_job_id] # Check username and group assert_equal("test", this_job.profile.user) # Let's kill the temporary directory, and make sure watch # output still works. We do file deletion very explicitly, # because tests that might mistakenly delete your home directory # tend to cause unhappiness. server_id = Submission.objects.get(id=watch_id).submission_handle.id tmp_dir = ServerSubmissionState.objects.get(id=server_id).tmp_dir for filename in ("jobs", "stderr", "stdout", os.path.join("work", "tmp.jar")): os.remove(os.path.join(tmp_dir, filename)) os.rmdir(os.path.join(tmp_dir, "work")) os.rmdir(tmp_dir) response = c.get("/jobsub/watch/%d" % watch_id) assert_true("No longer available" in response.content) finally: cluster.shutdown() jobsubd.exit()
def test_jobsub_setup_and_samples(): """ Merely exercises jobsub_setup, and then runs all the examples. """ raise SkipTest cluster = mini_cluster.shared_cluster(conf=True) jobsubd = in_process_jobsubd(cluster.config_dir) try: c = make_logged_in_client() # Create a job, to make sure that it sticks around response = c.post("/jobsub/new/jar", dict( name="should_stick_around", jarfile="foo", arguments="foo", submit="Save")) design_id = response.context["saved"] import jobsub.management.commands.jobsub_setup as jobsub_setup if not jobsub_setup.Command().has_been_setup(): jobsub_setup.Command().handle() # Make sure we have three job designs now. assert_equal(3, JobDesign.objects.filter(name__startswith="Example: ").count()) # Make sure "should_stick_around" is still there assert_equal(1, JobDesign.objects.filter(name="should_stick_around").count()) # Make sure sample user got created. assert_equal(1, User.objects.filter(username="******").count()) assert_equal(1, User.objects.filter(username="******").count()) # And now submit and run the samples # pi Example # Irritatingly, /user/test needs to exist first setup_cluster_fs(cluster) id = JobDesign.objects.get(name__contains="Example: Pi").id response = c.get("/jobsub/submit/%d" % id) assert_true("Iterations per mapper" in response.content) assert_true("Num of mappers" in response.content) response = c.post("/jobsub/submit/%d" % id, dict( iterations_per_mapper=10, num_of_mappers=1)) response = watch_till_complete(c, parse_out_id(response)) assert_true("Estimated value of Pi is" in response.context["job_data"].stdout_tail) assert_true("bin/hadoop returned 0" in response.content) # Wordcount example id = JobDesign.objects.get(name__contains="Example: Streaming Wordcount").id response = c.get("/jobsub/submit/%d" % id) response = c.post("/jobsub/submit/%d" % id, dict( output="/user/test/jobsub-streaming-test")) response = watch_till_complete(c, parse_out_id(response)) assert_true("streaming.StreamJob: Job complete:" in response.context["job_data"].stderr_tail) assert_true(cluster.fs.exists("/user/test/jobsub-streaming-test/part-00000")) # Not running sleep example, since it adds little. finally: jobsubd.exit() cluster.shutdown()
def test_job_submission(): JARNAME = posixpath.basename(hadoop.conf.HADOOP_EXAMPLES_JAR.get()) c = make_logged_in_client() cluster = mini_cluster.shared_cluster(conf=True) jobsubd = in_process_jobsubd(cluster.config_dir) # Remember the number of pending jobs beforehand n_pending = c.get("/jobsub/status_bar/").context["pending_count"] try: # Create a job response = c.post("/jobsub/new/jar", dict( name="wordcount", jarfile="/user/test/%s" % JARNAME, arguments="wordcount $input $output", submit="Save")) design_id = response.context["saved"] # Submission should get a parameterization form response = c.get("/jobsub/submit/%d" % design_id) assert_true("<form " in response.content) # Create home dir setup_cluster_fs(cluster) # Prepare sample data f = cluster.fs.open("/user/test/input", "w") f.write("alpha beta gamma\nepsilon zeta theta\nalpha beta\n") f.close() # We also have to upload the jar file src = file(hadoop.conf.HADOOP_EXAMPLES_JAR.get()) try: dst = cluster.fs.open("/user/test/%s" % JARNAME, "w") try: shutil.copyfileobj(src, dst) finally: dst.close() finally: src.close() # Status_bar should be at original assert_equal(n_pending, c.get("/jobsub/status_bar/").context["pending_count"]) # Let's parameterize and submit INPUT, OUTPUT = "/user/test/input", "/user/test/output" response = c.post("/jobsub/submit/%d" % design_id, dict(input=INPUT, output=OUTPUT)) watch_id = parse_out_id(response) # Status bar at original + 1 assert_equal(n_pending + 1, c.get("/jobsub/status_bar/").context["pending_count"]) # Let's take a look response = watch_till_complete(c, watch_id) assert_equal(1, len(response.context["job_data"].hadoop_job_ids), "Should have launched and captured exactly one Hadoop job") submission = Submission.objects.get(id=watch_id) assert_equal(["wordcount", INPUT, OUTPUT], submission.submission_plan.steps[1].bin_hadoop_step.arguments[2:]) hadoop_job_id = response.context["job_data"].hadoop_job_ids[0] # Status bar back to original assert_equal(n_pending, c.get("/jobsub/status_bar/").context["pending_count"]) # Make sure the counts are right: lines = cluster.fs.open("/user/test/output/part-r-00000").read().splitlines() counts = {} for line in lines: word, count = line.split("\t", 2) count = int(count) counts[word] = count assert_equal(dict(alpha=2, beta=2, gamma=1, epsilon=1, zeta=1, theta=1), counts) # And check that the output file has correct permissions. assert_equal("test", cluster.fs.stats("/user/test/output/part-r-00000")["user"], "Wrong username for job output.") assert_equal("test", cluster.fs.stats("/user/test/output/part-r-00000")["group"], "Wrong groupname for job output.") # Just to be sure it really happened, check the Job struct # There's no way to get just one job (eek!)... job_map = dict([ (x.jobID.asString, x) for x in cluster.jt.completed_jobs().jobs ]) this_job = job_map[hadoop_job_id] # Check username and group assert_equal("test", this_job.profile.user) # Let's kill the temporary directory, and make sure watch # output still works. We do file deletion very explicitly, # because tests that might mistakenly delete your home directory # tend to cause unhappiness. server_id = Submission.objects.get(id=watch_id).submission_handle.id tmp_dir = ServerSubmissionState.objects.get(id=server_id).tmp_dir for filename in ("jobs", "stderr", "stdout", os.path.join("work", "tmp.jar")): os.remove(os.path.join(tmp_dir, filename)) os.rmdir(os.path.join(tmp_dir, "work")) os.rmdir(tmp_dir) response = c.get("/jobsub/watch/%d" % watch_id) assert_true("No longer available" in response.content) finally: cluster.shutdown() jobsubd.exit()
def get_shared_beeswax_server(): # Copy hive-default.xml from BEESWAX_HIVE_CONF_DIR before it is set to # /my/bogus/path default_xml = file(beeswax.conf.BEESWAX_HIVE_CONF_DIR.get()+"/hive-default.xml").read() finish = ( beeswax.conf.BEESWAX_SERVER_HOST.set_for_testing("localhost"), beeswax.conf.BEESWAX_SERVER_PORT.set_for_testing(BEESWAXD_TEST_PORT), beeswax.conf.BEESWAX_META_SERVER_HOST.set_for_testing("localhost"), beeswax.conf.BEESWAX_META_SERVER_PORT.set_for_testing(BEESWAXD_TEST_PORT + 1), # Use a bogus path to avoid loading the normal hive-site.xml beeswax.conf.BEESWAX_HIVE_CONF_DIR.set_for_testing('/my/bogus/path') ) cluster = mini_cluster.shared_cluster(conf=True) # Copy hive-default.xml into the mini_cluster's conf dir, which happens to be # in the cluster's tmpdir. This tmpdir is determined during the mini_cluster # startup, during which BEESWAX_HIVE_CONF_DIR needs to be set to # /my/bogus/path. Hence the step of writing to memory. # hive-default.xml will get picked up by the beeswax_server during startup file(cluster.tmpdir+"/conf/hive-default.xml", 'w').write(default_xml) global _SHARED_BEESWAX_SERVER_PROCESS if _SHARED_BEESWAX_SERVER_PROCESS is None: p = _start_server(cluster) _SHARED_BEESWAX_SERVER_PROCESS = p def kill(): LOG.info("Killing beeswax server (pid %d)." % p.pid) os.kill(p.pid, 9) p.wait() atexit.register(kill) # Wait for server to come up, by repeatedly trying. start = time.time() started = False sleep = 0.001 while not started and time.time() - start < 20.0: try: client = beeswax.db_utils.db_client() meta_client = beeswax.db_utils.meta_client() client.echo("echo") if meta_client.getStatus() == fb303.ttypes.fb_status.ALIVE: started = True break time.sleep(sleep) sleep *= 2 except: time.sleep(sleep) sleep *= 2 pass if not started: raise Exception("Beeswax server took too long to come up.") # Make sure /tmp is 0777 cluster.fs.setuser(cluster.superuser) if not cluster.fs.isdir('/tmp'): cluster.fs.mkdir('/tmp', 0777) else: cluster.fs.chmod('/tmp', 0777) def s(): for f in finish: f() cluster.shutdown() return cluster, s