def test_browse_partition(self): if is_live_cluster(): raise SkipTest('HUE-2902: Test is not re-entrant') partition_spec = "baz='baz_one',boom='boom_two'" response = self.client.get("/metastore/table/%s/test_partitions/partitions/%s/browse" % (self.db_name, partition_spec), follow=True) if is_live_cluster(): path = '/user/hive/warehouse/%s.db/test_partitions/baz=baz_one/boom=boom_two' % self.db_name else: path = '/user/hive/warehouse/test_partitions/baz=baz_one/boom=boom_two' filebrowser_path = urllib.unquote(reverse("filebrowser.views.view", kwargs={'path': path})) assert_equal(response.request['PATH_INFO'], filebrowser_path)
def test_has_write_access_backend(self): if is_live_cluster(): raise SkipTest('HUE-2900: Needs debugging on live cluster') client = make_logged_in_client(username='******', groupname='write_access_backend', is_superuser=False) grant_access("write_access_backend", "write_access_backend", "metastore") grant_access("write_access_backend", "write_access_backend", "beeswax") user = User.objects.get(username='******') resp = _make_query(client, 'CREATE TABLE test_perm_1 (a int);', database=self.db_name) # Only fails if we were using Sentry and won't allow SELECT to user resp = wait_for_query_to_finish(client, resp, max=30.0) def check(client, http_codes): resp = client.get('/metastore/tables/drop/%s' % self.db_name) assert_true(resp.status_code in http_codes, resp.content) resp = client.post('/metastore/tables/drop/%s' % self.db_name, {u'table_selection': [u'test_perm_1']}) assert_true(resp.status_code in http_codes, resp.content) check(client, [301]) # Denied # Add access group, created = Group.objects.get_or_create(name='write_access_backend') perm, created = HuePermission.objects.get_or_create(app='metastore', action='write') GroupPermission.objects.get_or_create(group=group, hue_permission=perm) check(client, [200, 302]) # Ok
def teardown_class(cls): if is_live_cluster(): # Delete test DB and tables query_server = get_query_server_config() client = make_logged_in_client() user = User.objects.get(username='******') db = dbms.get(user, query_server) # Kill Spark context if running if is_hive_on_spark() and cluster.is_yarn(): # TODO: We should clean up the running Hive on Spark job here pass for db_name in [cls.db_name, '%s_other' % cls.db_name]: databases = db.get_databases() if db_name in databases: tables = db.get_tables(database=db_name) for table in tables: make_query(client, 'DROP TABLE IF EXISTS `%(db)s`.`%(table)s`' % {'db': db_name, 'table': table}, wait=True) make_query(client, 'DROP VIEW IF EXISTS `%(db)s`.`myview`' % {'db': db_name}, wait=True) make_query(client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db_name}, wait=True) # Check the cleanup databases = db.get_databases() assert_false(db_name in databases) global _INITIALIZED _INITIALIZED = False
def test_useradmin_ldap_case_sensitivity(self): if is_live_cluster(): raise SkipTest('HUE-2897: Cannot yet guarantee database is case sensitive') done = [] try: # Set up LDAP tests to use a LdapTestConnection instead of an actual LDAP connection ldap_access.CACHED_LDAP_CONN = LdapTestConnection() # Test import case sensitivity done.append(desktop.conf.LDAP.IGNORE_USERNAME_CASE.set_for_testing(True)) import_ldap_users(ldap_access.CACHED_LDAP_CONN, 'Lårry', sync_groups=False, import_by_dn=False) assert_false(User.objects.filter(username='******').exists()) assert_true(User.objects.filter(username='******').exists()) # Test lower case User.objects.filter(username__iexact='Rock').delete() import_ldap_users(ldap_access.CACHED_LDAP_CONN, 'Rock', sync_groups=False, import_by_dn=False) assert_false(User.objects.filter(username='******').exists()) assert_true(User.objects.filter(username='******').exists()) done.append(desktop.conf.LDAP.FORCE_USERNAME_LOWERCASE.set_for_testing(True)) import_ldap_users(ldap_access.CACHED_LDAP_CONN, 'Rock', sync_groups=False, import_by_dn=False) assert_false(User.objects.filter(username='******').exists()) assert_true(User.objects.filter(username='******').exists()) User.objects.filter(username='******').delete() import_ldap_users(ldap_access.CACHED_LDAP_CONN, 'Rock', sync_groups=False, import_by_dn=False) assert_false(User.objects.filter(username='******').exists()) assert_true(User.objects.filter(username='******').exists()) finally: for finish in done: finish()
def get_shared_server(cls, username='******', language=settings.LANGUAGE_CODE): callback = lambda: None service_lock.acquire() if not SqoopServerProvider.is_running: # Setup cluster = pseudo_hdfs4.shared_cluster() if is_live_cluster(): finish = () else: LOG.info('\nStarting a Mini Sqoop. Requires "tools/jenkins/jenkins.sh" to be previously ran.\n') finish = ( SERVER_URL.set_for_testing("http://%s:%s/sqoop" % (socket.getfqdn(), SqoopServerProvider.TEST_PORT)), ) p = cls.start(cluster) def kill(): with open(os.path.join(cluster._tmpdir, 'sqoop/sqoop.pid'), 'r') as pidfile: pid = pidfile.read() LOG.info("Killing Sqoop server (pid %s)." % pid) os.kill(int(pid), 9) p.wait() atexit.register(kill) start = time.time() started = False sleep = 0.01 client = SqoopClient(SERVER_URL.get(), username, language) while not started and time.time() - start < 60.0: try: LOG.info('Check Sqoop status...') version = client.get_version() if version: started = True break time.sleep(sleep) sleep *= 2 except Exception, e: LOG.info('Sqoop server not started yet: %s' % e) time.sleep(sleep) sleep *= 2 pass if not started: service_lock.release() raise Exception("Sqoop server took too long to come up.") def shutdown(): for f in finish: f() cluster.stop() callback = shutdown SqoopServerProvider.is_running = True
def test_fetch_result_abbreviated(self): if not is_live_cluster(): raise SkipTest # Create session so that session object is saved to DB for server URL lookup session = self.api.create_session(lang='impala') try: # Assert that abbreviated rows returned (e.g. - 1.00K) still returns actual rows statement = "SELECT * FROM web_logs;" doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) self.client.post(reverse('notebook:fetch_result_data'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false'}) response = self.client.post(reverse('notebook:fetch_result_size'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_equal(1000, data['result']['rows']) finally: self.api.close_session(session)
def test_useradmin_ldap_force_uppercase(self): if is_live_cluster(): raise SkipTest('HUE-2897: Skipping because the DB may not be case sensitive') done = [] # Set to nonsensical value just to force new config usage. # Should continue to use cached connection. done.append(desktop.conf.LDAP.LDAP_SERVERS.set_for_testing(get_nonsense_config())) try: # Set up LDAP tests to use a LdapTestConnection instead of an actual LDAP connection ldap_access.CACHED_LDAP_CONN = LdapTestConnection() # Test upper case User.objects.filter(username__iexact='Rock').delete() done.append(desktop.conf.LDAP.IGNORE_USERNAME_CASE.set_for_testing(False)) done.append(desktop.conf.LDAP.FORCE_USERNAME_LOWERCASE.set_for_testing(False)) done.append(desktop.conf.LDAP.FORCE_USERNAME_UPPERCASE.set_for_testing(True)) import_ldap_users(ldap_access.CACHED_LDAP_CONN, 'Rock', sync_groups=False, import_by_dn=False) assert_true(User.objects.filter(username='******').exists()) finally: for finish in done: finish()
def setup_class(cls): if not is_live_cluster(): raise SkipTest() cls.client = make_logged_in_client(username="******", is_superuser=False) cls.user = User.objects.get(username="******") add_to_group("test") grant_access("test", "test", "libzookeeper") # Create a ZKNode namespace cls.namespace = "TestWithZooKeeper" # Create temporary test directory and file with contents cls.local_directory = tempfile.mkdtemp() # Create subdirectory cls.subdir_name = "subdir" subdir_path = "%s/%s" % (cls.local_directory, cls.subdir_name) os.mkdir(subdir_path, 0755) # Create file cls.filename = "test.txt" file_path = "%s/%s" % (subdir_path, cls.filename) cls.file_contents = "This is a test" file = open(file_path, "w+") file.write(cls.file_contents) file.close()
def test_fetch_result_size_mr(self): if not is_live_cluster(): # Mini-cluster does not have JHS raise SkipTest # Assert that a query with no job will return no rows or size statement = "SELECT 'hello world';" settings = [ { 'key': 'hive.execution.engine', 'value': 'mr' } ] doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0) response = self.client.post(reverse('notebook:fetch_result_size'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_equal(None, data['result']['rows']) assert_equal(None, data['result']['size']) # Assert that a query with map & reduce task returns rows statement = "SELECT DISTINCT code FROM sample_07;" doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) response = self.client.post(reverse('notebook:fetch_result_size'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_equal(823, data['result']['rows']) assert_true(data['result']['size'] > 0, data['result']) # Assert that a query with multiple jobs returns rows statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;" doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) response = self.client.post(reverse('notebook:fetch_result_size'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_equal(23, data['result']['rows']) assert_true(data['result']['size'] > 0, data['result'])
def get_shared_beeswax_server(db_name='default'): global _SHARED_HIVE_SERVER global _SHARED_HIVE_SERVER_CLOSER if _SHARED_HIVE_SERVER is None: cluster = pseudo_hdfs4.shared_cluster() if is_live_cluster(): def s(): pass else: s = _start_mini_hs2(cluster) start = time.time() started = False sleep = 1 make_logged_in_client() user = User.objects.get(username='******') query_server = get_query_server_config() db = dbms.get(user, query_server) while not started and time.time() - start <= 30: try: db.open_session(user) started = True break except Exception, e: LOG.info('HiveServer2 server could not be found after: %s' % e) time.sleep(sleep) if not started: raise Exception("Server took too long to come up.") _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s
def test_load_data(self): """ Test load data queries. These require Hadoop, because they ask the metastore about whether a table is partitioned. """ if is_live_cluster(): raise SkipTest('HUE-2902: Test is not re-entrant') # Check that view works resp = self.client.get("/metastore/table/%s/test/load" % self.db_name, follow=True) assert_true('Path' in resp.content) data_path = '%(prefix)s/tmp/foo' % {'prefix': self.cluster.fs_prefix} # Try the submission self.client.post("/metastore/table/%s/test/load" % self.db_name, {'path': data_path, 'overwrite': True}, follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace("LOAD DATA INPATH '%(data_path)s' OVERWRITE INTO TABLE `%(db)s`.`test`" % {'data_path': data_path, 'db': self.db_name}, query.query) resp = self.client.post("/metastore/table/%s/test/load" % self.db_name, {'path': data_path, 'overwrite': False}, follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace("LOAD DATA INPATH '%(data_path)s' INTO TABLE `%(db)s`.`test`" % {'data_path': data_path, 'db': self.db_name}, query.query) # Try it with partitions resp = self.client.post("/metastore/table/%s/test_partitions/load" % self.db_name, {'path': data_path, 'partition_0': "alpha", 'partition_1': "beta"}, follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace(query.query, "LOAD DATA INPATH '%(data_path)s' INTO TABLE `%(db)s`.`test_partitions` PARTITION (baz='alpha', boom='beta')" % {'data_path': data_path, 'db': self.db_name})
def test_seek_across_blocks(self): """Makes a file with a lot of blocks, seeks around""" if is_live_cluster(): raise SkipTest('HUE-2946: Skipping because requires more memory') fs = self.cluster.fs test_file = self.prefix + "/fortest-blocks.txt" fs.create(test_file, replication=1, blocksize=1024**2) f = fs.open(test_file, "w") try: data = "abcdefghijklmnopqrstuvwxyz" * 30 * 1024**2 f.write(data) f.close() for i in xrange(1, 10): f = fs.open(test_file, "r") for j in xrange(1, 100): offset = random.randint(0, len(data) - 1) f.seek(offset, os.SEEK_SET) assert_equals(data[offset:offset+50], f.read(50)) f.close() finally: fs.remove(test_file)
def test_list_tables(self): if not is_live_cluster(): raise SkipTest('HUE-2910: Skipping because test is not reentrant') for cluster in HbaseApi(self.user).getClusters(): resp = self.client.post('/hbase/api/getTableList/' + cluster['name']) content = json.loads(resp.content) assert_true('data' in content, content)
def setup_class(cls): if not is_live_cluster(): raise SkipTest() cls.cluster = pseudo_hdfs4.shared_cluster() cls.client, callback = cls.get_shared_server() cls.shutdown = [callback]
def test_list_tables(self): if not is_live_cluster(): raise SkipTest('HUE-2910: Skipping because test is not reentrant') resp = self.client.post('/hbase/api/getTableList/Cluster') content = json.loads(resp.content) assert_true('data' in content, content)
def setup_class(cls): if not is_live_cluster(): raise SkipTest() cls.client = make_logged_in_client(username='******', is_superuser=False) cls.user = User.objects.get(username='******') add_to_group('test') grant_access("test", "test", "libzookeeper")
def test_oozie_status(self): user = getpass.getuser() assert_equal(get_oozie(user).get_oozie_status()['systemMode'], 'NORMAL') if is_live_cluster(): assert_true(self.cluster.fs.exists('/user/oozie/share/lib')) else: assert_true(self.cluster.fs.exists('/user/%(user)s/share/lib' % {'user': user}))
def setup_class(cls): cls.client = make_logged_in_client(username='******', is_superuser=False) cls.user = User.objects.get(username='******') cls.user = rewrite_user(cls.user) add_to_group('test') grant_access("test", "test", "metadata") if not is_live_cluster() or not has_navigator(cls.user): raise SkipTest
def setup_class(cls): if not is_live_cluster(): raise SkipTest('These tests can only run on a live cluster') cls.client = make_logged_in_client(username='******', is_superuser=False) cls.user = User.objects.get(username='******') add_to_group('test') grant_access("test", "test", "indexer")
def test_read_partitions(self): if not is_live_cluster(): raise SkipTest partition_spec = "baz='baz_one',boom='boom_two'" response = self.client.get("/metastore/table/%s/test_partitions/partitions/%s/read" % (self.db_name, partition_spec), follow=True) response = self.client.get(reverse("beeswax:api_watch_query_refresh_json", kwargs={'id': response.context['query'].id}), follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) results = fetch_query_result_data(self.client, response) assert_true(len(results['results']) > 0, results)
def test_browse_partition(self): partition_spec = "baz='baz_one',boom='boom_two'" response = self.client.get( "/metastore/table/%s/test_partitions/partitions/%s/browse" % (self.db_name, partition_spec), follow=True ) if is_live_cluster(): path = "/user/hive/warehouse/%s.db/test_partitions/baz=baz_one/boom=boom_two" % self.db_name else: path = "/user/hive/warehouse/test_partitions/baz=baz_one/boom=boom_two" filebrowser_path = urllib.unquote(reverse("filebrowser.views.view", kwargs={"path": path})) assert_equal(response.request["PATH_INFO"], filebrowser_path)
def setup_class(cls): if not is_live_cluster(): raise SkipTest() cls.client = make_logged_in_client(username='******', is_superuser=False) cls.user = User.objects.get(username='******') add_to_group('test') grant_access("test", "test", "libsentry") cls.db = SentryClient(HOSTNAME.get(), PORT.get(), 'test')
def setup_class(cls): if not is_live_cluster() or not cls.is_navigator_enabled(): raise SkipTest cls.client = make_logged_in_client(username='******', is_superuser=False) cls.user = User.objects.get(username='******') add_to_group('test') grant_access("test", "test", "metadata") grant_access("test", "test", "navigator") cls.api = NavigatorApi()
def test_read_partitions(self): if not is_live_cluster(): raise SkipTest partition_spec = "baz='baz_one',boom=12345" response = self.client.get( "/metastore/table/%s/test_partitions/partitions/%s/read" % (self.db_name, partition_spec), follow=True) response = self.client.get( reverse("beeswax:api_watch_query_refresh_json", kwargs={'id': response.context[0]['query'].id}), follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) results = fetch_query_result_data(self.client, response) assert_true(len(results['results']) > 0, results)
def setup_class(cls): if not is_live_cluster() or not has_optimizer(): raise SkipTest cls.client = make_logged_in_client(username='******', is_superuser=False) cls.user = User.objects.get(username='******') cls.user = rewrite_user(cls.user) add_to_group('test') grant_access("test", "test", "metadata") grant_access("test", "test", "optimizer") cls.api = OptimizerApi()
def test_describe_partitioned_table_with_limit(self): if is_live_cluster(): raise SkipTest('HUE-2902: Test is not re-entrant') # Limit to 90 finish = BROWSE_PARTITIONED_TABLE_LIMIT.set_for_testing("90") try: response = self.client.get("/metastore/table/%s/test_partitions" % self.db_name) assert_true("0x%x" % 89 in response.content, response.content) assert_false("0x%x" % 90 in response.content, response.content) finally: finish()
def test_describe_view(self): if is_live_cluster(): raise SkipTest('HUE-2902: Test is not re-entrant') resp = self.client.get('/metastore/table/%s/myview' % self.db_name) assert_equal(None, resp.context['sample']) assert_true(resp.context['table'].is_view) assert_true("View" in resp.content) assert_true("Drop View" in resp.content) # Breadcrumbs assert_true(self.db_name in resp.content) assert_true("myview" in resp.content)
def setup_class(cls): if not is_live_cluster() or not is_navigator_enabled(): raise SkipTest cls.client = make_logged_in_client(username='******', is_superuser=False) cls.user = User.objects.get(username='******') add_to_group('test') grant_access("test", "test", "metadata") grant_access("test", "test", "navigator") cls.api = NavigatorApi()
def test_oozie_status(self): user = getpass.getuser() assert_equal( get_oozie(user).get_oozie_status()['systemMode'], 'NORMAL') if is_live_cluster(): assert_true(self.cluster.fs.exists('/user/oozie/share/lib')) else: assert_true( self.cluster.fs.exists('/user/%(user)s/share/lib' % {'user': user}))
def test_useradmin_ldap_case_sensitivity(): if is_live_cluster(): raise SkipTest( 'HUE-2897: Cannot yet guarantee database is case sensitive') done = [] try: reset_all_users() reset_all_groups() # Set up LDAP tests to use a LdapTestConnection instead of an actual LDAP connection ldap_access.CACHED_LDAP_CONN = LdapTestConnection() # Test import case sensitivity done.append( desktop.conf.LDAP.IGNORE_USERNAME_CASE.set_for_testing(True)) import_ldap_users(ldap_access.CACHED_LDAP_CONN, 'Lårry', sync_groups=False, import_by_dn=False) assert_false(User.objects.filter(username='******').exists()) assert_true(User.objects.filter(username='******').exists()) # Test lower case User.objects.filter(username__iexact='Rock').delete() import_ldap_users(ldap_access.CACHED_LDAP_CONN, 'Rock', sync_groups=False, import_by_dn=False) assert_false(User.objects.filter(username='******').exists()) assert_true(User.objects.filter(username='******').exists()) done.append( desktop.conf.LDAP.FORCE_USERNAME_LOWERCASE.set_for_testing(True)) import_ldap_users(ldap_access.CACHED_LDAP_CONN, 'Rock', sync_groups=False, import_by_dn=False) assert_false(User.objects.filter(username='******').exists()) assert_true(User.objects.filter(username='******').exists()) User.objects.filter(username='******').delete() import_ldap_users(ldap_access.CACHED_LDAP_CONN, 'Rock', sync_groups=False, import_by_dn=False) assert_false(User.objects.filter(username='******').exists()) assert_true(User.objects.filter(username='******').exists()) finally: for finish in done: finish()
def test_add_ldap_users_case_sensitivity(self): if is_live_cluster(): raise SkipTest( 'HUE-2897: Cannot yet guarantee database is case sensitive') done = [] try: URL = reverse(add_ldap_users) reset_all_users() reset_all_groups() # Set up LDAP tests to use a LdapTestConnection instead of an actual LDAP connection ldap_access.CACHED_LDAP_CONN = LdapTestConnection() c = make_logged_in_client('test', is_superuser=True) # Test ignore case done.append( desktop.conf.LDAP.IGNORE_USERNAME_CASE.set_for_testing(True)) User.objects.filter(username='******').delete() assert_false(User.objects.filter(username='******').exists()) assert_false(User.objects.filter(username='******').exists()) response = c.post( URL, dict(username_pattern='Moe', password1='test', password2='test')) assert_true('Location' in response, response) assert_true('/useradmin/users' in response['Location'], response) assert_false(User.objects.filter(username='******').exists()) assert_true(User.objects.filter(username='******').exists()) # Test lower case done.append( desktop.conf.LDAP.FORCE_USERNAME_LOWERCASE.set_for_testing( True)) User.objects.filter(username__iexact='Rock').delete() assert_false(User.objects.filter(username='******').exists()) assert_false(User.objects.filter(username='******').exists()) response = c.post( URL, dict(username_pattern='rock', password1='test', password2='test')) assert_true('Location' in response, response) assert_true('/useradmin/users' in response['Location'], response) assert_false(User.objects.filter(username='******').exists()) assert_true(User.objects.filter(username='******').exists()) finally: for finish in done: finish()
def test_fetch_result_size_spark(self): if not is_live_cluster() or not is_hive_on_spark(): raise SkipTest # TODO: Add session cleanup here so we don't have orphan spark sessions # Assert that a query with no job will return no rows or size statement = "SELECT 'hello world';" settings = [{'key': 'hive.execution.engine', 'value': 'spark'}] doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0) response = self.client.post(reverse('notebook:fetch_result_size'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_equal(None, data['result']['rows']) assert_equal(None, data['result']['size']) # Assert that a query that runs a job will return rows and size statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;" doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) response = self.client.post(reverse('notebook:fetch_result_size'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_true(data['result']['rows'] > 0) assert_true(data['result']['size'] > 0)
def setup_class(cls): cls.finish = [] if not is_live_cluster(): raise SkipTest cls.client = make_logged_in_client() cls.user = User.objects.get(username='******') add_to_group('test') cls.db = dbms.get(cls.user, get_query_server_config(name='impala')) cls.DATABASE = get_db_prefix(name='impala') hql = """ USE default; DROP TABLE IF EXISTS %(db)s.tweets; DROP DATABASE IF EXISTS %(db)s CASCADE; CREATE DATABASE %(db)s; USE %(db)s; """ % { 'db': cls.DATABASE } resp = _make_query(cls.client, hql, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=180.0) content = json.loads(resp.content) assert_true(content['status'] == 0, resp.content) hql = """ CREATE TABLE tweets (row_num INTEGER, id_str STRING, text STRING) STORED AS PARQUET; INSERT INTO TABLE tweets VALUES (1, "531091827395682000", "My dad looks younger than costa"); INSERT INTO TABLE tweets VALUES (2, "531091827781550000", "There is a thin line between your partner being vengeful and you reaping the consequences of your bad actions towards your partner."); INSERT INTO TABLE tweets VALUES (3, "531091827768979000", "@Mustang_Sally83 and they need to get into you :))))"); INSERT INTO TABLE tweets VALUES (4, "531091827114668000", "@RachelZJohnson thank you rach!xxx"); INSERT INTO TABLE tweets VALUES (5, "531091827949309000", "i think @WWERollins was robbed of the IC title match this week on RAW also i wonder if he will get a rematch i hope so @WWE"); """ resp = _make_query(cls.client, hql, database=cls.DATABASE, local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=180.0) content = json.loads(resp.content) assert_true(content['status'] == 0, resp.content)
def setup_class(cls): if not is_live_cluster(): raise SkipTest('TestSentryWithHadoop requires a live cluster.') if not os.path.exists(os.path.join(SENTRY_CONF_DIR.get(), 'sentry-site.xml')): raise SkipTest('Could not find sentry-site.xml, skipping sentry tests') cls.client = make_logged_in_client(username='******', is_superuser=False) cls.user = User.objects.get(username='******') add_to_group('test') grant_access("test", "test", "libsentry") cls.config_path = os.path.join(SENTRY_CONF_DIR.get(), 'sentry-site.xml')
def test_browse_partition(self): partition_spec = "baz='baz_one',boom='boom_two'" response = self.client.get( "/metastore/table/%s/test_partitions/partitions/%s/browse" % (self.db_name, partition_spec), follow=True) if is_live_cluster(): path = '/user/hive/warehouse/%s.db/test_partitions/baz=baz_one/boom=boom_two' % self.db_name else: path = '/user/hive/warehouse/test_partitions/baz=baz_one/boom=boom_two' filebrowser_path = urllib.unquote( reverse("filebrowser.views.view", kwargs={'path': path})) assert_equal(response.request['PATH_INFO'], filebrowser_path)
def setup_class(cls): if not is_live_cluster(): raise SkipTest('Sentry tests require a live sentry server') if not os.path.exists(os.path.join(SENTRY_CONF_DIR.get(), 'sentry-site.xml')): raise SkipTest('Could not find sentry-site.xml, skipping sentry tests') cls.client = make_logged_in_client(username='******', is_superuser=False) cls.user = User.objects.get(username='******') add_to_group('test') grant_access("test", "test", "libsentry") cls.db = SentryClient(HOSTNAME.get(), PORT.get(), 'test')
def setup_class(cls): if not is_live_cluster(): raise SkipTest() cls.client = make_logged_in_client(username='******', is_superuser=False) cls.user = User.objects.get(username='******') add_to_group('test') grant_access("test", "test", "indexer") resp = cls.client.post(reverse('indexer:install_examples')) content = json.loads(resp.content) assert_equal(content.get('status'), 0)
def get_shared_beeswax_server(db_name='default'): global _SHARED_HIVE_SERVER global _SHARED_HIVE_SERVER_CLOSER with _SHARED_HIVE_SERVER_LOCK: if _SHARED_HIVE_SERVER is None: cluster = pseudo_hdfs4.shared_cluster() if is_live_cluster(): def s(): pass else: s = _start_mini_hs2(cluster) start = time.time() started = False sleep = 1 make_logged_in_client() user = User.objects.get(username='******') query_server = get_query_server_config() db = dbms.get(user, query_server) while not started and time.time() - start <= 60: try: db.open_session(user) except StructuredThriftTransportException as e: LOG.exception('Failed to open Hive Server session') # Don't loop if we had an authentication error. if 'Bad status: 3' in e.message: raise except Exception as e: LOG.exception('Failed to open Hive Server session') else: started = True break time.sleep(sleep) sleep *= 2 if not started: raise Exception("Server took too long to come up.") _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s return _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER
def setup_class(cls): if not is_live_cluster(): raise SkipTest('Sentry tests require a live sentry server') if not os.path.exists( os.path.join(SENTRY_CONF_DIR.get(), 'sentry-site.xml')): raise SkipTest( 'Could not find sentry-site.xml, skipping sentry tests') cls.client = make_logged_in_client(username='******', is_superuser=False) cls.user = User.objects.get(username='******') add_to_group('test') grant_access("test", "test", "libsentry") cls.db = SentryClient(HOSTNAME.get(), PORT.get(), 'test')
def get_shared_beeswax_server(db_name='default'): global _SHARED_HIVE_SERVER global _SHARED_HIVE_SERVER_CLOSER with _SHARED_HIVE_SERVER_LOCK: if _SHARED_HIVE_SERVER is None: cluster = pseudo_hdfs4.shared_cluster() if is_live_cluster(): def s(): pass else: s = _start_mini_hs2(cluster) start = time.time() started = False sleep = 1 make_logged_in_client() user = User.objects.get(username='******') query_server = get_query_server_config() db = dbms.get(user, query_server) while not started and time.time() - start <= 60: try: db.open_session(user) except StructuredThriftTransportException, e: LOG.exception('Failed to open Hive Server session') # Don't loop if we had an authentication error. if 'Bad status: 3' in e.message: raise except Exception, e: LOG.exception('Failed to open Hive Server session') import pdb pdb.set_trace() else: started = True break time.sleep(sleep) sleep *= 2 if not started: raise Exception("Server took too long to come up.") _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s
def test_fetch_result_size_impala(self): if not is_live_cluster(): raise SkipTest # Create session so that session object is saved to DB for server URL lookup session = self.api.create_session(lang='impala') try: # Assert that a query that runs a job will return rows statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;" doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) self.client.post(reverse('notebook:fetch_result_data'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false'}) response = self.client.post(reverse('notebook:fetch_result_size'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_equal(23, data['result']['rows']) assert_equal(None, data['result']['size']) # Assert that selecting all from partitioned table works statement = "SELECT * FROM web_logs;" doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) self.client.post(reverse('notebook:fetch_result_data'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false'}) response = self.client.post(reverse('notebook:fetch_result_size'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_equal(1000, data['result']['rows']) finally: self.api.close_session(session)
def test_show_tables(self): if is_live_cluster(): raise SkipTest('HUE-2902: Test is not re-entrant') # Set max limit to 3 resets = [HS2_GET_TABLES_MAX.set_for_testing(3)] try: hql = """ CREATE TABLE test_show_tables_1 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_2 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_3 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Table should have been created response = self.client.get( "/metastore/tables/%s?filter=show_tables" % self.db_name) assert_equal(200, response.status_code) assert_equal(len(response.context['tables']), 3) assert_equal(response.context['has_metadata'], True) assert_true('name' in response.context["tables"][0]) assert_true('comment' in response.context["tables"][0]) assert_true('type' in response.context["tables"][0]) hql = """ CREATE TABLE test_show_tables_4 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_5 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Table should have been created response = self.client.get( "/metastore/tables/%s?filter=show_tables" % self.db_name) assert_equal(200, response.status_code) assert_equal(len(response.context['tables']), 5) assert_equal(response.context['has_metadata'], False) assert_true('name' in response.context["tables"][0]) assert_false('comment' in response.context["tables"][0], response.context["tables"]) assert_false('type' in response.context["tables"][0]) finally: for reset in resets: reset()
def test_job_single_logs(self): if not is_live_cluster(): raise SkipTest response = TestJobBrowserWithHadoop.client.get('/jobbrowser/jobs/%s/single_logs?format=json' % (TestJobBrowserWithHadoop.hadoop_job_id)) json_resp = json.loads(response.content) assert_true('logs' in json_resp) assert_true('Log Type: stdout' in json_resp['logs'][1]) assert_true('Log Type: stderr' in json_resp['logs'][2]) assert_true('Log Type: syslog' in json_resp['logs'][3]) # Verify that syslog contains log information for a completed oozie job match = re.search(r"^Log Type: syslog(.+)Log Length: (?P<log_length>\d+)(.+)$", json_resp['logs'][3], re.DOTALL) assert_true(match and match.group(2), 'Failed to parse log length from syslog') log_length = match.group(2) assert_true(log_length > 0, 'Log Length is 0, expected content in syslog.')
def test_drop_multi_tables(self): if is_live_cluster(): raise SkipTest('HUE-2902: Test is not re-entrant') hql = """ CREATE TABLE test_drop_1 (a int); CREATE TABLE test_drop_2 (a int); CREATE TABLE test_drop_3 (a int); """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Drop them resp = self.client.get('/metastore/tables/drop/%s' % self.db_name, follow=True) assert_true('want to delete' in resp.content, resp.content) resp = self.client.post('/metastore/tables/drop/%s' % self.db_name, {u'table_selection': [u'test_drop_1', u'test_drop_2', u'test_drop_3']}) assert_equal(resp.status_code, 302)
def test_end_to_end(self): if not is_live_cluster(): # Skipping as requires morplines libs to be setup raise SkipTest() cluster = shared_cluster() fs = cluster.fs make_logged_in_client(username="******", groupname="default", recreate=True, is_superuser=False) user = User.objects.get(username="******") collection_name = "test_collection" indexer = MorphlineIndexer("test", fs=fs, jt=cluster.jt, solr_client=self.solr_client) input_loc = "/tmp/test.csv" # upload the test file to hdfs fs.create(input_loc, data=TestIndexer.simpleCSVString, overwrite=True) # open a filestream for the file on hdfs stream = fs.open(input_loc) # guess the format of the file file_type_format = indexer.guess_format({'file': {"stream": stream, "name": "test.csv"}}) field_types = indexer.guess_field_types({"file":{"stream": stream, "name": "test.csv"}, "format": file_type_format}) format_ = field_types.copy() format_['format'] = file_type_format # find a field name available to use for the record's uuid unique_field = indexer.get_unique_field(format_) is_unique_generated = indexer.is_unique_generated(format_) # generate morphline morphline = indexer.generate_morphline_config(collection_name, format_, unique_field) schema_fields = indexer.get_kept_field_list(format_['columns']) if is_unique_generated: schema_fields += [{"name": unique_field, "type": "string"}] # create the collection from the specified fields collection_manager = CollectionManagerController("test") if collection_manager.collection_exists(collection_name): collection_manager.delete_collection(collection_name, None) collection_manager.create_collection(collection_name, schema_fields, unique_key_field=unique_field) # index the file indexer.run_morphline(MockedRequest(user=user, fs=cluster.fs, jt=cluster.jt), collection_name, morphline, input_loc)
def test_basic_flow(self): if is_live_cluster(): raise SkipTest('HUE-2902: Test is not re-entrant') # Default database should exist response = self.client.get("/metastore/databases") assert_true(self.db_name in response.context["databases"]) # Table should have been created response = self.client.get("/metastore/tables/") assert_equal(200, response.status_code) # Switch databases response = self.client.get("/metastore/tables/%s" % self.db_name) assert_true('name' in response.context["tables"][0]) assert_true("test" in response.context["table_names"]) # Should default to "default" database response = self.client.get("/metastore/tables/not_there") assert_equal(200, response.status_code) # And have detail response = self.client.get("/metastore/table/%s/test" % self.db_name) assert_true("foo" in response.content) assert_true("SerDe Library" in response.content, response.content) # Remember the number of history items. Use a generic fragment 'test' to pass verification. history_cnt = verify_history(self.client, fragment='test') # Show table data. response = self.client.get("/metastore/table/%s/test/read" % self.db_name, follow=True) response = self.client.get(reverse( "beeswax:api_watch_query_refresh_json", kwargs={'id': response.context['query'].id}), follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. results = fetch_query_result_data(self.client, response) assert_true(len(results['results']) > 0) # This should NOT go into the query history. assert_equal(verify_history(self.client, fragment='test'), history_cnt, 'Implicit queries should not be saved in the history')
def teardown_class(cls): if is_live_cluster(): # Delete test DB and tables client = make_logged_in_client() user = User.objects.get(username='******') query_server = get_query_server_config() db = dbms.get(user, query_server) for db_name in [cls.db_name, '%s_other' % cls.db_name]: tables = db.get_tables(database=db_name) for table in tables: make_query(client, 'DROP TABLE IF EXISTS `%(db)s`.`%(table)s`' % {'db': db_name, 'table': table}, wait=True) make_query(client, 'DROP VIEW IF EXISTS `%(db)s`.`myview`' % {'db': db_name}, wait=True) make_query(client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db_name}, wait=True) # Check the cleanup databases = db.get_databases() assert_false(cls.db_name in databases) assert_false('%(db)s_other' % {'db': cls.db_name} in databases)
def teardown_class(cls): if is_live_cluster(): # Delete test DB and tables query_server = get_query_server_config() client = make_logged_in_client() user = User.objects.get(username='******') db = dbms.get(user, query_server) # Kill Spark context if running if is_hive_on_spark() and cluster.is_yarn(): # TODO: We should clean up the running Hive on Spark job here pass for db_name in [cls.db_name, '%s_other' % cls.db_name]: databases = db.get_databases() if db_name in databases: tables = db.get_tables(database=db_name) for table in tables: make_query( client, 'DROP TABLE IF EXISTS `%(db)s`.`%(table)s`' % { 'db': db_name, 'table': table }, wait=True) make_query(client, 'DROP VIEW IF EXISTS `%(db)s`.`myview`' % {'db': db_name}, wait=True) make_query(client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db_name}, wait=True) # Check the cleanup databases = db.get_databases() assert_false(db_name in databases) global _INITIALIZED _INITIALIZED = False
def test_fetch_result_size_impala(self): if not is_live_cluster(): raise SkipTest # Create session so that session object is saved to DB for server URL lookup session = self.api.create_session(lang='impala') try: # Assert that a query that runs a job will return rows statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;" doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) self.client.post( reverse('notebook:fetch_result_data'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false' }) response = self.client.post(reverse('notebook:fetch_result_size'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_equal(23, data['result']['rows']) assert_equal(None, data['result']['size']) finally: self.api.close_session(session)
def test_describe_partitions(self): if is_live_cluster(): raise SkipTest('HUE-2902: Test is not re-entrant') response = self.client.get("/metastore/table/%s/test_partitions" % self.db_name) assert_true("Show Partitions (2)" in response.content, response.content) response = self.client.get("/metastore/table/%s/test_partitions/partitions" % self.db_name, follow=True) assert_true("baz_one" in response.content) assert_true("boom_two" in response.content) assert_true("baz_foo" in response.content) assert_true("boom_bar" in response.content) # Breadcrumbs assert_true(self.db_name in response.content) assert_true("test_partitions" in response.content) assert_true("partitions" in response.content) # Not partitioned response = self.client.get("/metastore/table/%s/test/partitions" % self.db_name, follow=True) assert_true("is not partitioned." in response.content)
def test_fetch_result_abbreviated(self): if not is_live_cluster(): raise SkipTest # Create session so that session object is saved to DB for server URL lookup session = self.api.create_session(lang='impala') try: # Assert that abbreviated rows returned (e.g. - 1.00K) still returns actual rows statement = "SELECT * FROM web_logs;" doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=5.0) self.client.post( reverse('notebook:fetch_result_data'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false' }) response = self.client.post(reverse('notebook:fetch_result_size'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_equal(1000, data['result']['rows']) finally: self.api.close_session(session)
def setup_class(cls): if not is_live_cluster() or not search_enabled: raise SkipTest cls.client = make_logged_in_client(username='******', is_superuser=False) cls.user = User.objects.get(username='******') add_to_group('test') grant_access("test", "test", "libsolr") grant_access("test", "test", "search") cls.user.is_superuser = True cls.user.save() resp = cls.client.post(reverse('search:install_examples')) content = json.loads(resp.content) cls.user.is_superuser = False cls.user.save() assert_equal(content.get('status'), 0)
def test_submit(self): if is_live_cluster(): raise SkipTest('HUE-2909: Skipping because test is not reentrant') script = PigScript.objects.get(id=1100713) script_dict = script.dict post_data = { 'id': script.id, 'name': script_dict['name'], 'script': script_dict['script'], 'user': script.owner, 'parameters': json.dumps(script_dict['parameters']), 'resources': json.dumps(script_dict['resources']), 'hadoopProperties': json.dumps(script_dict['hadoopProperties']), 'submissionVariables': json.dumps([{"name": "output", "value": self.cluster.fs_prefix + '/test_pig_script_submit'}]), } response = self.c.post(reverse('pig:run'), data=post_data, follow=True) job_id = json.loads(response.content)['id'] self.wait_until_completion(job_id)
def test_fetch_result_size_mr(self): if not is_live_cluster(): # Mini-cluster does not have JHS raise SkipTest # Assert that a query with no job will return no rows or size statement = "SELECT 'hello world';" settings = [{'key': 'hive.execution.engine', 'value': 'mr'}] doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0) response = self.client.post(reverse('notebook:fetch_result_size'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_equal(None, data['result']['rows']) assert_equal(None, data['result']['size']) # Assert that a query with map & reduce task returns rows statement = "SELECT DISTINCT code FROM sample_07;" doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) response = self.client.post(reverse('notebook:fetch_result_size'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true(data['result']['rows'] > 0) # Assert that a query with multiple jobs returns rows statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;" doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) response = self.client.post(reverse('notebook:fetch_result_size'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true(data['result']['rows'] > 0)
def test_failed_jobs(self): """ Test jobs with genuine failure, not just killed """ if is_live_cluster(): raise SkipTest('HUE-2902: Skipping because test is not reentrant') # Create design that will fail because the script file isn't there INPUT_DIR = TestJobBrowserWithHadoop.home_dir + '/input' OUTPUT_DIR = TestJobBrowserWithHadoop.home_dir + '/output' try: TestJobBrowserWithHadoop.cluster.fs.mkdir( TestJobBrowserWithHadoop.home_dir + "/jt-test_failed_jobs") TestJobBrowserWithHadoop.cluster.fs.mkdir(INPUT_DIR) TestJobBrowserWithHadoop.cluster.fs.rmtree(OUTPUT_DIR) except: LOG.exception('failed to teardown tests') job_name = '%s_%s' % (TestJobBrowserWithHadoop.username, 'test_failed_jobs-1') response = TestJobBrowserWithHadoop.client.post( reverse('jobsub.views.new_design', kwargs={'node_type': 'mapreduce'}), { 'name': [job_name], 'description': ['description test_failed_jobs-1'], 'args': '', 'jar_path': '/user/hue/oozie/workspaces/lib/hadoop-examples.jar', 'prepares': '[]', 'archives': '[]', 'files': '[]', 'job_properties': [ '[{"name":"mapred.input.dir","value":"%s"},\ {"name":"mapred.output.dir","value":"%s"},\ {"name":"mapred.mapper.class","value":"org.apache.hadoop.mapred.lib.dne"},\ {"name":"mapred.combiner.class","value":"org.apache.hadoop.mapred.lib.dne"},\ {"name":"mapred.reducer.class","value":"org.apache.hadoop.mapred.lib.dne"}]' % (INPUT_DIR, OUTPUT_DIR) ] }, HTTP_X_REQUESTED_WITH='XMLHttpRequest', follow=True) # Submit the job design_dict = json.loads(response.content) design_id = int(design_dict['id']) response = TestJobBrowserWithHadoop.client.post( reverse('oozie:submit_workflow', args=[design_id]), data={ u'form-MAX_NUM_FORMS': [u''], u'form-INITIAL_FORMS': [u'1'], u'form-0-name': [u'REDUCER_SLEEP_TIME'], u'form-0-value': [u'1'], u'form-TOTAL_FORMS': [u'1'] }, follow=True) oozie_jobid = response.context['oozie_workflow'].id job = OozieServerProvider.wait_until_completion(oozie_jobid) hadoop_job_id = get_hadoop_job_id(TestJobBrowserWithHadoop.oozie, oozie_jobid, 1) hadoop_job_id_short = views.get_shorter_id(hadoop_job_id) # Select only killed jobs (should be absent) # Taking advantage of the fact new jobs are at the top of the list! response = TestJobBrowserWithHadoop.client.get( '/jobbrowser/jobs/?format=json&state=killed') assert_false(hadoop_job_id_short in response.content) # Select only failed jobs (should be present) # Map job should succeed. Reduce job should fail. response = TestJobBrowserWithHadoop.client.get( '/jobbrowser/jobs/?format=json&state=failed') assert_true(hadoop_job_id_short in response.content) raise SkipTest # Not compatible with MR2 # The single job view should have the failed task table response = TestJobBrowserWithHadoop.client.get('/jobbrowser/jobs/%s' % (hadoop_job_id, )) html = response.content.lower() assert_true('failed task' in html, html) # The map task should say success (empty input) map_task_id = TestJobBrowserWithHadoop.hadoop_job_id.replace( 'job', 'task') + '_m_000000' response = TestJobBrowserWithHadoop.client.get( '/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, map_task_id)) assert_true('succeed' in response.content) assert_true('failed' not in response.content) # The reduce task should say failed reduce_task_id = hadoop_job_id.replace('job', 'task') + '_r_000000' response = TestJobBrowserWithHadoop.client.get( '/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, reduce_task_id)) assert_true('succeed' not in response.content) assert_true('failed' in response.content) # Selecting by failed state should include the failed map response = TestJobBrowserWithHadoop.client.get( '/jobbrowser/jobs/%s/tasks?taskstate=failed' % (hadoop_job_id, )) assert_true('r_000000' in response.content) assert_true('m_000000' not in response.content)