Exemplo n.º 1
0
  def test_browse_partition(self):
    if is_live_cluster():
      raise SkipTest('HUE-2902: Test is not re-entrant')

    partition_spec = "baz='baz_one',boom='boom_two'"
    response = self.client.get("/metastore/table/%s/test_partitions/partitions/%s/browse" % (self.db_name, partition_spec), follow=True)
    if is_live_cluster():
      path = '/user/hive/warehouse/%s.db/test_partitions/baz=baz_one/boom=boom_two' % self.db_name
    else:
      path = '/user/hive/warehouse/test_partitions/baz=baz_one/boom=boom_two'
    filebrowser_path = urllib.unquote(reverse("filebrowser.views.view", kwargs={'path': path}))
    assert_equal(response.request['PATH_INFO'], filebrowser_path)
Exemplo n.º 2
0
  def test_has_write_access_backend(self):
    if is_live_cluster():
      raise SkipTest('HUE-2900: Needs debugging on live cluster')

    client = make_logged_in_client(username='******', groupname='write_access_backend', is_superuser=False)
    grant_access("write_access_backend", "write_access_backend", "metastore")
    grant_access("write_access_backend", "write_access_backend", "beeswax")
    user = User.objects.get(username='******')

    resp = _make_query(client, 'CREATE TABLE test_perm_1 (a int);', database=self.db_name) # Only fails if we were using Sentry and won't allow SELECT to user
    resp = wait_for_query_to_finish(client, resp, max=30.0)

    def check(client, http_codes):
      resp = client.get('/metastore/tables/drop/%s' % self.db_name)
      assert_true(resp.status_code in http_codes, resp.content)

      resp = client.post('/metastore/tables/drop/%s' % self.db_name, {u'table_selection': [u'test_perm_1']})
      assert_true(resp.status_code in http_codes, resp.content)

    check(client, [301]) # Denied

    # Add access
    group, created = Group.objects.get_or_create(name='write_access_backend')
    perm, created = HuePermission.objects.get_or_create(app='metastore', action='write')
    GroupPermission.objects.get_or_create(group=group, hue_permission=perm)

    check(client, [200, 302]) # Ok
Exemplo n.º 3
0
  def teardown_class(cls):
    if is_live_cluster():
      # Delete test DB and tables
      query_server = get_query_server_config()
      client = make_logged_in_client()
      user = User.objects.get(username='******')

      db = dbms.get(user, query_server)

      # Kill Spark context if running
      if is_hive_on_spark() and cluster.is_yarn():
        # TODO: We should clean up the running Hive on Spark job here
        pass

      for db_name in [cls.db_name, '%s_other' % cls.db_name]:
        databases = db.get_databases()

        if db_name in databases:
          tables = db.get_tables(database=db_name)
          for table in tables:
            make_query(client, 'DROP TABLE IF EXISTS `%(db)s`.`%(table)s`' % {'db': db_name, 'table': table}, wait=True)
          make_query(client, 'DROP VIEW IF EXISTS `%(db)s`.`myview`' % {'db': db_name}, wait=True)
          make_query(client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db_name}, wait=True)

          # Check the cleanup
          databases = db.get_databases()
          assert_false(db_name in databases)

      global _INITIALIZED
      _INITIALIZED = False
Exemplo n.º 4
0
  def test_useradmin_ldap_case_sensitivity(self):
    if is_live_cluster():
      raise SkipTest('HUE-2897: Cannot yet guarantee database is case sensitive')

    done = []
    try:
      # Set up LDAP tests to use a LdapTestConnection instead of an actual LDAP connection
      ldap_access.CACHED_LDAP_CONN = LdapTestConnection()

      # Test import case sensitivity
      done.append(desktop.conf.LDAP.IGNORE_USERNAME_CASE.set_for_testing(True))
      import_ldap_users(ldap_access.CACHED_LDAP_CONN, 'Lårry', sync_groups=False, import_by_dn=False)
      assert_false(User.objects.filter(username='******').exists())
      assert_true(User.objects.filter(username='******').exists())

      # Test lower case
      User.objects.filter(username__iexact='Rock').delete()
      import_ldap_users(ldap_access.CACHED_LDAP_CONN, 'Rock', sync_groups=False, import_by_dn=False)
      assert_false(User.objects.filter(username='******').exists())
      assert_true(User.objects.filter(username='******').exists())

      done.append(desktop.conf.LDAP.FORCE_USERNAME_LOWERCASE.set_for_testing(True))

      import_ldap_users(ldap_access.CACHED_LDAP_CONN, 'Rock', sync_groups=False, import_by_dn=False)
      assert_false(User.objects.filter(username='******').exists())
      assert_true(User.objects.filter(username='******').exists())

      User.objects.filter(username='******').delete()
      import_ldap_users(ldap_access.CACHED_LDAP_CONN, 'Rock', sync_groups=False, import_by_dn=False)
      assert_false(User.objects.filter(username='******').exists())
      assert_true(User.objects.filter(username='******').exists())
    finally:
      for finish in done:
        finish()
Exemplo n.º 5
0
  def get_shared_server(cls, username='******', language=settings.LANGUAGE_CODE):
    callback = lambda: None

    service_lock.acquire()

    if not SqoopServerProvider.is_running:
      # Setup
      cluster = pseudo_hdfs4.shared_cluster()

      if is_live_cluster():
        finish = ()
      else:
        LOG.info('\nStarting a Mini Sqoop. Requires "tools/jenkins/jenkins.sh" to be previously ran.\n')

        finish = (
          SERVER_URL.set_for_testing("http://%s:%s/sqoop" % (socket.getfqdn(), SqoopServerProvider.TEST_PORT)),
        )

        p = cls.start(cluster)

        def kill():
          with open(os.path.join(cluster._tmpdir, 'sqoop/sqoop.pid'), 'r') as pidfile:
            pid = pidfile.read()
            LOG.info("Killing Sqoop server (pid %s)." % pid)
            os.kill(int(pid), 9)
            p.wait()
        atexit.register(kill)

      start = time.time()
      started = False
      sleep = 0.01

      client = SqoopClient(SERVER_URL.get(), username, language)

      while not started and time.time() - start < 60.0:
        try:
          LOG.info('Check Sqoop status...')
          version = client.get_version()
          if version:
            started = True
            break
          time.sleep(sleep)
          sleep *= 2
        except Exception, e:
          LOG.info('Sqoop server not started yet: %s' % e)
          time.sleep(sleep)
          sleep *= 2
          pass

      if not started:
        service_lock.release()
        raise Exception("Sqoop server took too long to come up.")

      def shutdown():
        for f in finish:
          f()
        cluster.stop()
      callback = shutdown

      SqoopServerProvider.is_running = True
Exemplo n.º 6
0
  def test_fetch_result_abbreviated(self):
    if not is_live_cluster():
      raise SkipTest

    # Create session so that session object is saved to DB for server URL lookup
    session = self.api.create_session(lang='impala')

    try:

      # Assert that abbreviated rows returned (e.g. - 1.00K) still returns actual rows
      statement = "SELECT * FROM web_logs;"
      doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement)
      notebook = Notebook(document=doc)
      snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0)

      self.client.post(reverse('notebook:fetch_result_data'),
                       {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false'})

      response = self.client.post(reverse('notebook:fetch_result_size'),
                                  {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

      data = json.loads(response.content)
      assert_equal(0, data['status'], data)
      assert_true('result' in data)
      assert_true('rows' in data['result'])
      assert_equal(1000, data['result']['rows'])
    finally:
      self.api.close_session(session)
Exemplo n.º 7
0
  def test_useradmin_ldap_force_uppercase(self):
    if is_live_cluster():
      raise SkipTest('HUE-2897: Skipping because the DB may not be case sensitive')

    done = []

    # Set to nonsensical value just to force new config usage.
    # Should continue to use cached connection.
    done.append(desktop.conf.LDAP.LDAP_SERVERS.set_for_testing(get_nonsense_config()))

    try:
      # Set up LDAP tests to use a LdapTestConnection instead of an actual LDAP connection
      ldap_access.CACHED_LDAP_CONN = LdapTestConnection()

      # Test upper case
      User.objects.filter(username__iexact='Rock').delete()
      done.append(desktop.conf.LDAP.IGNORE_USERNAME_CASE.set_for_testing(False))
      done.append(desktop.conf.LDAP.FORCE_USERNAME_LOWERCASE.set_for_testing(False))
      done.append(desktop.conf.LDAP.FORCE_USERNAME_UPPERCASE.set_for_testing(True))

      import_ldap_users(ldap_access.CACHED_LDAP_CONN, 'Rock', sync_groups=False, import_by_dn=False)
      assert_true(User.objects.filter(username='******').exists())
    finally:
      for finish in done:
        finish()
Exemplo n.º 8
0
Arquivo: tests.py Projeto: shobull/hue
    def setup_class(cls):

        if not is_live_cluster():
            raise SkipTest()

        cls.client = make_logged_in_client(username="******", is_superuser=False)
        cls.user = User.objects.get(username="******")
        add_to_group("test")
        grant_access("test", "test", "libzookeeper")

        # Create a ZKNode namespace
        cls.namespace = "TestWithZooKeeper"

        # Create temporary test directory and file with contents
        cls.local_directory = tempfile.mkdtemp()
        # Create subdirectory
        cls.subdir_name = "subdir"
        subdir_path = "%s/%s" % (cls.local_directory, cls.subdir_name)
        os.mkdir(subdir_path, 0755)
        # Create file
        cls.filename = "test.txt"
        file_path = "%s/%s" % (subdir_path, cls.filename)
        cls.file_contents = "This is a test"
        file = open(file_path, "w+")
        file.write(cls.file_contents)
        file.close()
Exemplo n.º 9
0
  def test_fetch_result_size_mr(self):
    if not is_live_cluster():  # Mini-cluster does not have JHS
      raise SkipTest

    # Assert that a query with no job will return no rows or size
    statement = "SELECT 'hello world';"

    settings = [
        {
            'key': 'hive.execution.engine',
            'value': 'mr'
        }
    ]
    doc = self.create_query_document(owner=self.user, statement=statement, settings=settings)
    notebook = Notebook(document=doc)
    snippet = self.execute_and_wait(doc, snippet_idx=0)

    response = self.client.post(reverse('notebook:fetch_result_size'),
                                {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

    data = json.loads(response.content)
    assert_equal(0, data['status'], data)
    assert_true('result' in data)
    assert_true('rows' in data['result'])
    assert_true('size' in data['result'])
    assert_equal(None, data['result']['rows'])
    assert_equal(None, data['result']['size'])

    # Assert that a query with map & reduce task returns rows
    statement = "SELECT DISTINCT code FROM sample_07;"
    doc = self.create_query_document(owner=self.user, statement=statement, settings=settings)
    notebook = Notebook(document=doc)
    snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0)

    response = self.client.post(reverse('notebook:fetch_result_size'),
                                {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

    data = json.loads(response.content)
    assert_equal(0, data['status'], data)
    assert_true('result' in data)
    assert_true('rows' in data['result'])
    assert_true('size' in data['result'])
    assert_equal(823, data['result']['rows'])
    assert_true(data['result']['size'] > 0, data['result'])

    # Assert that a query with multiple jobs returns rows
    statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;"
    doc = self.create_query_document(owner=self.user, statement=statement, settings=settings)
    notebook = Notebook(document=doc)
    snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0)

    response = self.client.post(reverse('notebook:fetch_result_size'),
                                {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

    data = json.loads(response.content)
    assert_equal(0, data['status'], data)
    assert_true('result' in data)
    assert_true('rows' in data['result'])
    assert_equal(23, data['result']['rows'])
    assert_true(data['result']['size'] > 0, data['result'])
Exemplo n.º 10
0
def get_shared_beeswax_server(db_name='default'):
  global _SHARED_HIVE_SERVER
  global _SHARED_HIVE_SERVER_CLOSER
  if _SHARED_HIVE_SERVER is None:

    cluster = pseudo_hdfs4.shared_cluster()

    if is_live_cluster():
      def s():
        pass
    else:
      s = _start_mini_hs2(cluster)

    start = time.time()
    started = False
    sleep = 1

    make_logged_in_client()
    user = User.objects.get(username='******')
    query_server = get_query_server_config()
    db = dbms.get(user, query_server)

    while not started and time.time() - start <= 30:
      try:
        db.open_session(user)
        started = True
        break
      except Exception, e:
        LOG.info('HiveServer2 server could not be found after: %s' % e)
        time.sleep(sleep)

    if not started:
      raise Exception("Server took too long to come up.")

    _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s
Exemplo n.º 11
0
  def test_load_data(self):
    """
    Test load data queries.
    These require Hadoop, because they ask the metastore
    about whether a table is partitioned.
    """

    if is_live_cluster():
      raise SkipTest('HUE-2902: Test is not re-entrant')

    # Check that view works
    resp = self.client.get("/metastore/table/%s/test/load" % self.db_name, follow=True)
    assert_true('Path' in resp.content)

    data_path = '%(prefix)s/tmp/foo' % {'prefix': self.cluster.fs_prefix}

    # Try the submission
    self.client.post("/metastore/table/%s/test/load" % self.db_name, {'path': data_path, 'overwrite': True}, follow=True)
    query = QueryHistory.objects.latest('id')

    assert_equal_mod_whitespace("LOAD DATA INPATH '%(data_path)s' OVERWRITE INTO TABLE `%(db)s`.`test`" % {'data_path': data_path, 'db': self.db_name}, query.query)

    resp = self.client.post("/metastore/table/%s/test/load" % self.db_name, {'path': data_path, 'overwrite': False}, follow=True)
    query = QueryHistory.objects.latest('id')
    assert_equal_mod_whitespace("LOAD DATA INPATH '%(data_path)s' INTO TABLE `%(db)s`.`test`" % {'data_path': data_path, 'db': self.db_name}, query.query)

    # Try it with partitions
    resp = self.client.post("/metastore/table/%s/test_partitions/load" % self.db_name, {'path': data_path, 'partition_0': "alpha", 'partition_1': "beta"}, follow=True)
    query = QueryHistory.objects.latest('id')
    assert_equal_mod_whitespace(query.query, "LOAD DATA INPATH '%(data_path)s' INTO TABLE `%(db)s`.`test_partitions` PARTITION (baz='alpha', boom='beta')" % {'data_path': data_path, 'db': self.db_name})
Exemplo n.º 12
0
  def test_seek_across_blocks(self):
    """Makes a file with a lot of blocks, seeks around"""
    if is_live_cluster():
      raise SkipTest('HUE-2946: Skipping because requires more memory')

    fs = self.cluster.fs
    test_file = self.prefix + "/fortest-blocks.txt"
    fs.create(test_file, replication=1, blocksize=1024**2)
    f = fs.open(test_file, "w")
    try:
      data = "abcdefghijklmnopqrstuvwxyz" * 30 * 1024**2
      f.write(data)
      f.close()

      for i in xrange(1, 10):
        f = fs.open(test_file, "r")

        for j in xrange(1, 100):
          offset = random.randint(0, len(data) - 1)
          f.seek(offset, os.SEEK_SET)
          assert_equals(data[offset:offset+50], f.read(50))
        f.close()

    finally:
      fs.remove(test_file)
Exemplo n.º 13
0
Arquivo: tests.py Projeto: mapr/hue
  def test_list_tables(self):
    if not is_live_cluster():
      raise SkipTest('HUE-2910: Skipping because test is not reentrant')

    for cluster in HbaseApi(self.user).getClusters():
      resp = self.client.post('/hbase/api/getTableList/' + cluster['name'])
      content = json.loads(resp.content)
      assert_true('data' in content, content)
Exemplo n.º 14
0
  def setup_class(cls):

    if not is_live_cluster():
      raise SkipTest()

    cls.cluster = pseudo_hdfs4.shared_cluster()
    cls.client, callback = cls.get_shared_server()
    cls.shutdown = [callback]
Exemplo n.º 15
0
  def test_list_tables(self):
    if not is_live_cluster():
      raise SkipTest('HUE-2910: Skipping because test is not reentrant')

    resp = self.client.post('/hbase/api/getTableList/Cluster')
    content = json.loads(resp.content)

    assert_true('data' in content, content)
Exemplo n.º 16
0
  def setup_class(cls):

    if not is_live_cluster():
      raise SkipTest()

    cls.client = make_logged_in_client(username='******', is_superuser=False)
    cls.user = User.objects.get(username='******')
    add_to_group('test')
    grant_access("test", "test", "libzookeeper")
Exemplo n.º 17
0
  def test_oozie_status(self):
    user = getpass.getuser()

    assert_equal(get_oozie(user).get_oozie_status()['systemMode'], 'NORMAL')

    if is_live_cluster():
      assert_true(self.cluster.fs.exists('/user/oozie/share/lib'))
    else:
      assert_true(self.cluster.fs.exists('/user/%(user)s/share/lib' % {'user': user}))
Exemplo n.º 18
0
  def setup_class(cls):
    cls.client = make_logged_in_client(username='******', is_superuser=False)
    cls.user = User.objects.get(username='******')
    cls.user = rewrite_user(cls.user)
    add_to_group('test')
    grant_access("test", "test", "metadata")

    if not is_live_cluster() or not has_navigator(cls.user):
      raise SkipTest
Exemplo n.º 19
0
  def setup_class(cls):

    if not is_live_cluster():
      raise SkipTest('These tests can only run on a live cluster')

    cls.client = make_logged_in_client(username='******', is_superuser=False)
    cls.user = User.objects.get(username='******')
    add_to_group('test')
    grant_access("test", "test", "indexer")
Exemplo n.º 20
0
  def test_read_partitions(self):
    if not is_live_cluster():
      raise SkipTest

    partition_spec = "baz='baz_one',boom='boom_two'"
    response = self.client.get("/metastore/table/%s/test_partitions/partitions/%s/read" % (self.db_name, partition_spec), follow=True)
    response = self.client.get(reverse("beeswax:api_watch_query_refresh_json", kwargs={'id': response.context['query'].id}), follow=True)
    response = wait_for_query_to_finish(self.client, response, max=30.0)
    results = fetch_query_result_data(self.client, response)
    assert_true(len(results['results']) > 0, results)
Exemplo n.º 21
0
 def test_browse_partition(self):
     partition_spec = "baz='baz_one',boom='boom_two'"
     response = self.client.get(
         "/metastore/table/%s/test_partitions/partitions/%s/browse" % (self.db_name, partition_spec), follow=True
     )
     if is_live_cluster():
         path = "/user/hive/warehouse/%s.db/test_partitions/baz=baz_one/boom=boom_two" % self.db_name
     else:
         path = "/user/hive/warehouse/test_partitions/baz=baz_one/boom=boom_two"
     filebrowser_path = urllib.unquote(reverse("filebrowser.views.view", kwargs={"path": path}))
     assert_equal(response.request["PATH_INFO"], filebrowser_path)
Exemplo n.º 22
0
  def setup_class(cls):

    if not is_live_cluster():
      raise SkipTest()

    cls.client = make_logged_in_client(username='******', is_superuser=False)
    cls.user = User.objects.get(username='******')
    add_to_group('test')
    grant_access("test", "test", "libsentry")

    cls.db = SentryClient(HOSTNAME.get(), PORT.get(), 'test')
Exemplo n.º 23
0
    def setup_class(cls):

        if not is_live_cluster() or not cls.is_navigator_enabled():
            raise SkipTest

        cls.client = make_logged_in_client(username='******', is_superuser=False)
        cls.user = User.objects.get(username='******')
        add_to_group('test')
        grant_access("test", "test", "metadata")
        grant_access("test", "test", "navigator")

        cls.api = NavigatorApi()
Exemplo n.º 24
0
 def test_read_partitions(self):
   if not is_live_cluster():
     raise SkipTest
   
   partition_spec = "baz='baz_one',boom=12345"
   response = self.client.get(
     "/metastore/table/%s/test_partitions/partitions/%s/read" % (self.db_name, partition_spec), follow=True)
   response = self.client.get(
     reverse("beeswax:api_watch_query_refresh_json", kwargs={'id': response.context[0]['query'].id}), follow=True)
   response = wait_for_query_to_finish(self.client, response, max=30.0)
   results = fetch_query_result_data(self.client, response)
   assert_true(len(results['results']) > 0, results)
Exemplo n.º 25
0
  def setup_class(cls):
    if not is_live_cluster() or not has_optimizer():
      raise SkipTest

    cls.client = make_logged_in_client(username='******', is_superuser=False)
    cls.user = User.objects.get(username='******')
    cls.user = rewrite_user(cls.user)
    add_to_group('test')
    grant_access("test", "test", "metadata")
    grant_access("test", "test", "optimizer")

    cls.api = OptimizerApi()
Exemplo n.º 26
0
  def test_describe_partitioned_table_with_limit(self):
    if is_live_cluster():
      raise SkipTest('HUE-2902: Test is not re-entrant')

    # Limit to 90
    finish = BROWSE_PARTITIONED_TABLE_LIMIT.set_for_testing("90")
    try:
      response = self.client.get("/metastore/table/%s/test_partitions" % self.db_name)
      assert_true("0x%x" % 89 in response.content, response.content)
      assert_false("0x%x" % 90 in response.content, response.content)
    finally:
      finish()
Exemplo n.º 27
0
  def test_describe_partitioned_table_with_limit(self):
    if is_live_cluster():
      raise SkipTest('HUE-2902: Test is not re-entrant')

    # Limit to 90
    finish = BROWSE_PARTITIONED_TABLE_LIMIT.set_for_testing("90")
    try:
      response = self.client.get("/metastore/table/%s/test_partitions" % self.db_name)
      assert_true("0x%x" % 89 in response.content, response.content)
      assert_false("0x%x" % 90 in response.content, response.content)
    finally:
      finish()
Exemplo n.º 28
0
  def test_describe_view(self):
    if is_live_cluster():
      raise SkipTest('HUE-2902: Test is not re-entrant')

    resp = self.client.get('/metastore/table/%s/myview' % self.db_name)
    assert_equal(None, resp.context['sample'])
    assert_true(resp.context['table'].is_view)
    assert_true("View" in resp.content)
    assert_true("Drop View" in resp.content)
    # Breadcrumbs
    assert_true(self.db_name in resp.content)
    assert_true("myview" in resp.content)
Exemplo n.º 29
0
  def setup_class(cls):

    if not is_live_cluster() or not is_navigator_enabled():
      raise SkipTest

    cls.client = make_logged_in_client(username='******', is_superuser=False)
    cls.user = User.objects.get(username='******')
    add_to_group('test')
    grant_access("test", "test", "metadata")
    grant_access("test", "test", "navigator")

    cls.api = NavigatorApi()
Exemplo n.º 30
0
    def test_oozie_status(self):
        user = getpass.getuser()

        assert_equal(
            get_oozie(user).get_oozie_status()['systemMode'], 'NORMAL')

        if is_live_cluster():
            assert_true(self.cluster.fs.exists('/user/oozie/share/lib'))
        else:
            assert_true(
                self.cluster.fs.exists('/user/%(user)s/share/lib' %
                                       {'user': user}))
Exemplo n.º 31
0
  def test_describe_view(self):
    if is_live_cluster():
      raise SkipTest('HUE-2902: Test is not re-entrant')

    resp = self.client.get('/metastore/table/%s/myview' % self.db_name)
    assert_equal(None, resp.context['sample'])
    assert_true(resp.context['table'].is_view)
    assert_true("View" in resp.content)
    assert_true("Drop View" in resp.content)
    # Breadcrumbs
    assert_true(self.db_name in resp.content)
    assert_true("myview" in resp.content)
Exemplo n.º 32
0
def test_useradmin_ldap_case_sensitivity():
    if is_live_cluster():
        raise SkipTest(
            'HUE-2897: Cannot yet guarantee database is case sensitive')

    done = []
    try:
        reset_all_users()
        reset_all_groups()

        # Set up LDAP tests to use a LdapTestConnection instead of an actual LDAP connection
        ldap_access.CACHED_LDAP_CONN = LdapTestConnection()

        # Test import case sensitivity
        done.append(
            desktop.conf.LDAP.IGNORE_USERNAME_CASE.set_for_testing(True))
        import_ldap_users(ldap_access.CACHED_LDAP_CONN,
                          'Lårry',
                          sync_groups=False,
                          import_by_dn=False)
        assert_false(User.objects.filter(username='******').exists())
        assert_true(User.objects.filter(username='******').exists())

        # Test lower case
        User.objects.filter(username__iexact='Rock').delete()
        import_ldap_users(ldap_access.CACHED_LDAP_CONN,
                          'Rock',
                          sync_groups=False,
                          import_by_dn=False)
        assert_false(User.objects.filter(username='******').exists())
        assert_true(User.objects.filter(username='******').exists())

        done.append(
            desktop.conf.LDAP.FORCE_USERNAME_LOWERCASE.set_for_testing(True))

        import_ldap_users(ldap_access.CACHED_LDAP_CONN,
                          'Rock',
                          sync_groups=False,
                          import_by_dn=False)
        assert_false(User.objects.filter(username='******').exists())
        assert_true(User.objects.filter(username='******').exists())

        User.objects.filter(username='******').delete()
        import_ldap_users(ldap_access.CACHED_LDAP_CONN,
                          'Rock',
                          sync_groups=False,
                          import_by_dn=False)
        assert_false(User.objects.filter(username='******').exists())
        assert_true(User.objects.filter(username='******').exists())
    finally:
        for finish in done:
            finish()
Exemplo n.º 33
0
    def test_add_ldap_users_case_sensitivity(self):
        if is_live_cluster():
            raise SkipTest(
                'HUE-2897: Cannot yet guarantee database is case sensitive')

        done = []
        try:
            URL = reverse(add_ldap_users)

            reset_all_users()
            reset_all_groups()

            # Set up LDAP tests to use a LdapTestConnection instead of an actual LDAP connection
            ldap_access.CACHED_LDAP_CONN = LdapTestConnection()

            c = make_logged_in_client('test', is_superuser=True)

            # Test ignore case
            done.append(
                desktop.conf.LDAP.IGNORE_USERNAME_CASE.set_for_testing(True))
            User.objects.filter(username='******').delete()
            assert_false(User.objects.filter(username='******').exists())
            assert_false(User.objects.filter(username='******').exists())
            response = c.post(
                URL,
                dict(username_pattern='Moe',
                     password1='test',
                     password2='test'))
            assert_true('Location' in response, response)
            assert_true('/useradmin/users' in response['Location'], response)
            assert_false(User.objects.filter(username='******').exists())
            assert_true(User.objects.filter(username='******').exists())

            # Test lower case
            done.append(
                desktop.conf.LDAP.FORCE_USERNAME_LOWERCASE.set_for_testing(
                    True))
            User.objects.filter(username__iexact='Rock').delete()
            assert_false(User.objects.filter(username='******').exists())
            assert_false(User.objects.filter(username='******').exists())
            response = c.post(
                URL,
                dict(username_pattern='rock',
                     password1='test',
                     password2='test'))
            assert_true('Location' in response, response)
            assert_true('/useradmin/users' in response['Location'], response)
            assert_false(User.objects.filter(username='******').exists())
            assert_true(User.objects.filter(username='******').exists())
        finally:
            for finish in done:
                finish()
Exemplo n.º 34
0
    def test_fetch_result_size_spark(self):
        if not is_live_cluster() or not is_hive_on_spark():
            raise SkipTest

        # TODO: Add session cleanup here so we don't have orphan spark sessions

        # Assert that a query with no job will return no rows or size
        statement = "SELECT 'hello world';"

        settings = [{'key': 'hive.execution.engine', 'value': 'spark'}]
        doc = self.create_query_document(owner=self.user,
                                         statement=statement,
                                         settings=settings)
        notebook = Notebook(document=doc)
        snippet = self.execute_and_wait(doc, snippet_idx=0)

        response = self.client.post(reverse('notebook:fetch_result_size'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })

        data = json.loads(response.content)
        assert_equal(0, data['status'], data)
        assert_true('result' in data)
        assert_true('rows' in data['result'])
        assert_true('size' in data['result'])
        assert_equal(None, data['result']['rows'])
        assert_equal(None, data['result']['size'])

        # Assert that a query that runs a job will return rows and size
        statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;"
        doc = self.create_query_document(owner=self.user,
                                         statement=statement,
                                         settings=settings)
        notebook = Notebook(document=doc)
        snippet = self.execute_and_wait(doc,
                                        snippet_idx=0,
                                        timeout=60.0,
                                        wait=2.0)

        response = self.client.post(reverse('notebook:fetch_result_size'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })

        data = json.loads(response.content)
        assert_equal(0, data['status'], data)
        assert_true('result' in data)
        assert_true('rows' in data['result'])
        assert_true('size' in data['result'])
        assert_true(data['result']['rows'] > 0)
        assert_true(data['result']['size'] > 0)
Exemplo n.º 35
0
    def setup_class(cls):
        cls.finish = []

        if not is_live_cluster():
            raise SkipTest

        cls.client = make_logged_in_client()
        cls.user = User.objects.get(username='******')
        add_to_group('test')
        cls.db = dbms.get(cls.user, get_query_server_config(name='impala'))
        cls.DATABASE = get_db_prefix(name='impala')

        hql = """
      USE default;
      DROP TABLE IF EXISTS %(db)s.tweets;
      DROP DATABASE IF EXISTS %(db)s CASCADE;
      CREATE DATABASE %(db)s;

      USE %(db)s;
    """ % {
            'db': cls.DATABASE
        }

        resp = _make_query(cls.client,
                           hql,
                           database='default',
                           local=False,
                           server_name='impala')
        resp = wait_for_query_to_finish(cls.client, resp, max=180.0)

        content = json.loads(resp.content)
        assert_true(content['status'] == 0, resp.content)

        hql = """
      CREATE TABLE tweets (row_num INTEGER, id_str STRING, text STRING) STORED AS PARQUET;

      INSERT INTO TABLE tweets VALUES (1, "531091827395682000", "My dad looks younger than costa");
      INSERT INTO TABLE tweets VALUES (2, "531091827781550000", "There is a thin line between your partner being vengeful and you reaping the consequences of your bad actions towards your partner.");
      INSERT INTO TABLE tweets VALUES (3, "531091827768979000", "@Mustang_Sally83 and they need to get into you :))))");
      INSERT INTO TABLE tweets VALUES (4, "531091827114668000", "@RachelZJohnson thank you rach!xxx");
      INSERT INTO TABLE tweets VALUES (5, "531091827949309000", "i think @WWERollins was robbed of the IC title match this week on RAW also i wonder if he will get a rematch i hope so @WWE");
    """

        resp = _make_query(cls.client,
                           hql,
                           database=cls.DATABASE,
                           local=False,
                           server_name='impala')
        resp = wait_for_query_to_finish(cls.client, resp, max=180.0)

        content = json.loads(resp.content)
        assert_true(content['status'] == 0, resp.content)
Exemplo n.º 36
0
  def setup_class(cls):
    if not is_live_cluster():
      raise SkipTest('TestSentryWithHadoop requires a live cluster.')

    if not os.path.exists(os.path.join(SENTRY_CONF_DIR.get(), 'sentry-site.xml')):
      raise SkipTest('Could not find sentry-site.xml, skipping sentry tests')

    cls.client = make_logged_in_client(username='******', is_superuser=False)
    cls.user = User.objects.get(username='******')
    add_to_group('test')
    grant_access("test", "test", "libsentry")

    cls.config_path = os.path.join(SENTRY_CONF_DIR.get(), 'sentry-site.xml')
Exemplo n.º 37
0
 def test_browse_partition(self):
     partition_spec = "baz='baz_one',boom='boom_two'"
     response = self.client.get(
         "/metastore/table/%s/test_partitions/partitions/%s/browse" %
         (self.db_name, partition_spec),
         follow=True)
     if is_live_cluster():
         path = '/user/hive/warehouse/%s.db/test_partitions/baz=baz_one/boom=boom_two' % self.db_name
     else:
         path = '/user/hive/warehouse/test_partitions/baz=baz_one/boom=boom_two'
     filebrowser_path = urllib.unquote(
         reverse("filebrowser.views.view", kwargs={'path': path}))
     assert_equal(response.request['PATH_INFO'], filebrowser_path)
Exemplo n.º 38
0
  def setup_class(cls):

    if not is_live_cluster():
      raise SkipTest('Sentry tests require a live sentry server')

    if not os.path.exists(os.path.join(SENTRY_CONF_DIR.get(), 'sentry-site.xml')):
      raise SkipTest('Could not find sentry-site.xml, skipping sentry tests')

    cls.client = make_logged_in_client(username='******', is_superuser=False)
    cls.user = User.objects.get(username='******')
    add_to_group('test')
    grant_access("test", "test", "libsentry")

    cls.db = SentryClient(HOSTNAME.get(), PORT.get(), 'test')
Exemplo n.º 39
0
  def setup_class(cls):

    if not is_live_cluster():
      raise SkipTest()

    cls.client = make_logged_in_client(username='******', is_superuser=False)
    cls.user = User.objects.get(username='******')
    add_to_group('test')
    grant_access("test", "test", "indexer")

    resp = cls.client.post(reverse('indexer:install_examples'))
    content = json.loads(resp.content)

    assert_equal(content.get('status'), 0)
Exemplo n.º 40
0
    def setup_class(cls):

        if not is_live_cluster():
            raise SkipTest()

        cls.client = make_logged_in_client(username='******', is_superuser=False)
        cls.user = User.objects.get(username='******')
        add_to_group('test')
        grant_access("test", "test", "indexer")

        resp = cls.client.post(reverse('indexer:install_examples'))
        content = json.loads(resp.content)

        assert_equal(content.get('status'), 0)
Exemplo n.º 41
0
def get_shared_beeswax_server(db_name='default'):
    global _SHARED_HIVE_SERVER
    global _SHARED_HIVE_SERVER_CLOSER

    with _SHARED_HIVE_SERVER_LOCK:
        if _SHARED_HIVE_SERVER is None:
            cluster = pseudo_hdfs4.shared_cluster()

            if is_live_cluster():

                def s():
                    pass
            else:
                s = _start_mini_hs2(cluster)

            start = time.time()
            started = False
            sleep = 1

            make_logged_in_client()
            user = User.objects.get(username='******')
            query_server = get_query_server_config()
            db = dbms.get(user, query_server)

            while not started and time.time() - start <= 60:
                try:
                    db.open_session(user)
                except StructuredThriftTransportException as e:
                    LOG.exception('Failed to open Hive Server session')

                    # Don't loop if we had an authentication error.
                    if 'Bad status: 3' in e.message:
                        raise
                except Exception as e:
                    LOG.exception('Failed to open Hive Server session')
                else:
                    started = True
                    break

                time.sleep(sleep)
                sleep *= 2

            if not started:
                raise Exception("Server took too long to come up.")

            _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s

        return _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER
Exemplo n.º 42
0
    def setup_class(cls):

        if not is_live_cluster():
            raise SkipTest('Sentry tests require a live sentry server')

        if not os.path.exists(
                os.path.join(SENTRY_CONF_DIR.get(), 'sentry-site.xml')):
            raise SkipTest(
                'Could not find sentry-site.xml, skipping sentry tests')

        cls.client = make_logged_in_client(username='******', is_superuser=False)
        cls.user = User.objects.get(username='******')
        add_to_group('test')
        grant_access("test", "test", "libsentry")

        cls.db = SentryClient(HOSTNAME.get(), PORT.get(), 'test')
Exemplo n.º 43
0
def get_shared_beeswax_server(db_name='default'):
  global _SHARED_HIVE_SERVER
  global _SHARED_HIVE_SERVER_CLOSER

  with _SHARED_HIVE_SERVER_LOCK:
    if _SHARED_HIVE_SERVER is None:
      cluster = pseudo_hdfs4.shared_cluster()

      if is_live_cluster():
        def s():
          pass
      else:
        s = _start_mini_hs2(cluster)

      start = time.time()
      started = False
      sleep = 1

      make_logged_in_client()
      user = User.objects.get(username='******')
      query_server = get_query_server_config()
      db = dbms.get(user, query_server)

      while not started and time.time() - start <= 60:
        try:
          db.open_session(user)
        except StructuredThriftTransportException, e:
          LOG.exception('Failed to open Hive Server session')

          # Don't loop if we had an authentication error.
          if 'Bad status: 3' in e.message:
            raise
        except Exception, e:
          LOG.exception('Failed to open Hive Server session')
          import pdb
          pdb.set_trace()
        else:
          started = True
          break

        time.sleep(sleep)
        sleep *= 2

      if not started:
        raise Exception("Server took too long to come up.")

      _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s
Exemplo n.º 44
0
  def test_fetch_result_size_impala(self):
    if not is_live_cluster():
      raise SkipTest

    # Create session so that session object is saved to DB for server URL lookup
    session = self.api.create_session(lang='impala')

    try:
      # Assert that a query that runs a job will return rows
      statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;"
      doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement)
      notebook = Notebook(document=doc)
      snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0)

      self.client.post(reverse('notebook:fetch_result_data'),
                       {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false'})

      response = self.client.post(reverse('notebook:fetch_result_size'),
                                  {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

      data = json.loads(response.content)
      assert_equal(0, data['status'], data)
      assert_true('result' in data)
      assert_true('rows' in data['result'])
      assert_true('size' in data['result'])
      assert_equal(23, data['result']['rows'])
      assert_equal(None, data['result']['size'])

      # Assert that selecting all from partitioned table works
      statement = "SELECT * FROM web_logs;"
      doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement)
      notebook = Notebook(document=doc)
      snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0)

      self.client.post(reverse('notebook:fetch_result_data'),
                       {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false'})

      response = self.client.post(reverse('notebook:fetch_result_size'),
                                 {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

      data = json.loads(response.content)
      assert_equal(0, data['status'], data)
      assert_true('result' in data)
      assert_true('rows' in data['result'])
      assert_equal(1000, data['result']['rows'])
    finally:
      self.api.close_session(session)
Exemplo n.º 45
0
    def test_show_tables(self):
        if is_live_cluster():
            raise SkipTest('HUE-2902: Test is not re-entrant')

        # Set max limit to 3
        resets = [HS2_GET_TABLES_MAX.set_for_testing(3)]

        try:
            hql = """
        CREATE TABLE test_show_tables_1 (a int) COMMENT 'Test for show_tables';
        CREATE TABLE test_show_tables_2 (a int) COMMENT 'Test for show_tables';
        CREATE TABLE test_show_tables_3 (a int) COMMENT 'Test for show_tables';
      """
            resp = _make_query(self.client, hql, database=self.db_name)
            resp = wait_for_query_to_finish(self.client, resp, max=30.0)

            # Table should have been created
            response = self.client.get(
                "/metastore/tables/%s?filter=show_tables" % self.db_name)
            assert_equal(200, response.status_code)
            assert_equal(len(response.context['tables']), 3)
            assert_equal(response.context['has_metadata'], True)
            assert_true('name' in response.context["tables"][0])
            assert_true('comment' in response.context["tables"][0])
            assert_true('type' in response.context["tables"][0])

            hql = """
        CREATE TABLE test_show_tables_4 (a int) COMMENT 'Test for show_tables';
        CREATE TABLE test_show_tables_5 (a int) COMMENT 'Test for show_tables';
      """
            resp = _make_query(self.client, hql, database=self.db_name)
            resp = wait_for_query_to_finish(self.client, resp, max=30.0)

            # Table should have been created
            response = self.client.get(
                "/metastore/tables/%s?filter=show_tables" % self.db_name)
            assert_equal(200, response.status_code)
            assert_equal(len(response.context['tables']), 5)
            assert_equal(response.context['has_metadata'], False)
            assert_true('name' in response.context["tables"][0])
            assert_false('comment' in response.context["tables"][0],
                         response.context["tables"])
            assert_false('type' in response.context["tables"][0])
        finally:
            for reset in resets:
                reset()
Exemplo n.º 46
0
  def test_job_single_logs(self):
    if not is_live_cluster():
      raise SkipTest

    response = TestJobBrowserWithHadoop.client.get('/jobbrowser/jobs/%s/single_logs?format=json' % (TestJobBrowserWithHadoop.hadoop_job_id))
    json_resp = json.loads(response.content)

    assert_true('logs' in json_resp)
    assert_true('Log Type: stdout' in json_resp['logs'][1])
    assert_true('Log Type: stderr' in json_resp['logs'][2])
    assert_true('Log Type: syslog' in json_resp['logs'][3])

    # Verify that syslog contains log information for a completed oozie job
    match = re.search(r"^Log Type: syslog(.+)Log Length: (?P<log_length>\d+)(.+)$", json_resp['logs'][3], re.DOTALL)
    assert_true(match and match.group(2), 'Failed to parse log length from syslog')
    log_length = match.group(2)
    assert_true(log_length > 0, 'Log Length is 0, expected content in syslog.')
Exemplo n.º 47
0
  def test_drop_multi_tables(self):
    if is_live_cluster():
      raise SkipTest('HUE-2902: Test is not re-entrant')

    hql = """
      CREATE TABLE test_drop_1 (a int);
      CREATE TABLE test_drop_2 (a int);
      CREATE TABLE test_drop_3 (a int);
    """
    resp = _make_query(self.client, hql, database=self.db_name)
    resp = wait_for_query_to_finish(self.client, resp, max=30.0)

    # Drop them
    resp = self.client.get('/metastore/tables/drop/%s' % self.db_name, follow=True)
    assert_true('want to delete' in resp.content, resp.content)
    resp = self.client.post('/metastore/tables/drop/%s' % self.db_name, {u'table_selection': [u'test_drop_1', u'test_drop_2', u'test_drop_3']})
    assert_equal(resp.status_code, 302)
Exemplo n.º 48
0
  def test_drop_multi_tables(self):
    if is_live_cluster():
      raise SkipTest('HUE-2902: Test is not re-entrant')

    hql = """
      CREATE TABLE test_drop_1 (a int);
      CREATE TABLE test_drop_2 (a int);
      CREATE TABLE test_drop_3 (a int);
    """
    resp = _make_query(self.client, hql, database=self.db_name)
    resp = wait_for_query_to_finish(self.client, resp, max=30.0)

    # Drop them
    resp = self.client.get('/metastore/tables/drop/%s' % self.db_name, follow=True)
    assert_true('want to delete' in resp.content, resp.content)
    resp = self.client.post('/metastore/tables/drop/%s' % self.db_name, {u'table_selection': [u'test_drop_1', u'test_drop_2', u'test_drop_3']})
    assert_equal(resp.status_code, 302)
Exemplo n.º 49
0
  def test_end_to_end(self):
    if not is_live_cluster(): # Skipping as requires morplines libs to be setup
      raise SkipTest()

    cluster = shared_cluster()
    fs = cluster.fs
    make_logged_in_client(username="******", groupname="default", recreate=True, is_superuser=False)
    user = User.objects.get(username="******")
    collection_name = "test_collection"
    indexer = MorphlineIndexer("test", fs=fs, jt=cluster.jt, solr_client=self.solr_client)
    input_loc = "/tmp/test.csv"

    # upload the test file to hdfs
    fs.create(input_loc, data=TestIndexer.simpleCSVString, overwrite=True)

    # open a filestream for the file on hdfs
    stream = fs.open(input_loc)

    # guess the format of the file
    file_type_format = indexer.guess_format({'file': {"stream": stream, "name": "test.csv"}})

    field_types = indexer.guess_field_types({"file":{"stream": stream, "name": "test.csv"}, "format": file_type_format})

    format_ = field_types.copy()
    format_['format'] = file_type_format

    # find a field name available to use for the record's uuid
    unique_field = indexer.get_unique_field(format_)
    is_unique_generated = indexer.is_unique_generated(format_)

    # generate morphline
    morphline = indexer.generate_morphline_config(collection_name, format_, unique_field)

    schema_fields = indexer.get_kept_field_list(format_['columns'])
    if is_unique_generated:
      schema_fields += [{"name": unique_field, "type": "string"}]


    # create the collection from the specified fields
    collection_manager = CollectionManagerController("test")
    if collection_manager.collection_exists(collection_name):
      collection_manager.delete_collection(collection_name, None)
    collection_manager.create_collection(collection_name, schema_fields, unique_key_field=unique_field)

    # index the file
    indexer.run_morphline(MockedRequest(user=user, fs=cluster.fs, jt=cluster.jt), collection_name, morphline, input_loc)
Exemplo n.º 50
0
    def test_basic_flow(self):
        if is_live_cluster():
            raise SkipTest('HUE-2902: Test is not re-entrant')

        # Default database should exist
        response = self.client.get("/metastore/databases")
        assert_true(self.db_name in response.context["databases"])

        # Table should have been created
        response = self.client.get("/metastore/tables/")
        assert_equal(200, response.status_code)

        # Switch databases
        response = self.client.get("/metastore/tables/%s" % self.db_name)
        assert_true('name' in response.context["tables"][0])
        assert_true("test" in response.context["table_names"])

        # Should default to "default" database
        response = self.client.get("/metastore/tables/not_there")
        assert_equal(200, response.status_code)

        # And have detail
        response = self.client.get("/metastore/table/%s/test" % self.db_name)
        assert_true("foo" in response.content)
        assert_true("SerDe Library" in response.content, response.content)

        # Remember the number of history items. Use a generic fragment 'test' to pass verification.
        history_cnt = verify_history(self.client, fragment='test')

        # Show table data.
        response = self.client.get("/metastore/table/%s/test/read" %
                                   self.db_name,
                                   follow=True)
        response = self.client.get(reverse(
            "beeswax:api_watch_query_refresh_json",
            kwargs={'id': response.context['query'].id}),
                                   follow=True)
        response = wait_for_query_to_finish(self.client, response, max=30.0)
        # Note that it may not return all rows at once. But we expect at least 10.
        results = fetch_query_result_data(self.client, response)
        assert_true(len(results['results']) > 0)
        # This should NOT go into the query history.
        assert_equal(verify_history(self.client, fragment='test'), history_cnt,
                     'Implicit queries should not be saved in the history')
Exemplo n.º 51
0
  def teardown_class(cls):
    if is_live_cluster():
      # Delete test DB and tables
      client = make_logged_in_client()
      user = User.objects.get(username='******')
      query_server = get_query_server_config()
      db = dbms.get(user, query_server)

      for db_name in [cls.db_name, '%s_other' % cls.db_name]:
        tables = db.get_tables(database=db_name)
        for table in tables:
          make_query(client, 'DROP TABLE IF EXISTS `%(db)s`.`%(table)s`' % {'db': db_name, 'table': table}, wait=True)
        make_query(client, 'DROP VIEW IF EXISTS `%(db)s`.`myview`' % {'db': db_name}, wait=True)
        make_query(client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db_name}, wait=True)

      # Check the cleanup
      databases = db.get_databases()
      assert_false(cls.db_name in databases)
      assert_false('%(db)s_other' % {'db': cls.db_name} in databases)
Exemplo n.º 52
0
    def teardown_class(cls):
        if is_live_cluster():
            # Delete test DB and tables
            query_server = get_query_server_config()
            client = make_logged_in_client()
            user = User.objects.get(username='******')

            db = dbms.get(user, query_server)

            # Kill Spark context if running
            if is_hive_on_spark() and cluster.is_yarn():
                # TODO: We should clean up the running Hive on Spark job here
                pass

            for db_name in [cls.db_name, '%s_other' % cls.db_name]:
                databases = db.get_databases()

                if db_name in databases:
                    tables = db.get_tables(database=db_name)
                    for table in tables:
                        make_query(
                            client,
                            'DROP TABLE IF EXISTS `%(db)s`.`%(table)s`' % {
                                'db': db_name,
                                'table': table
                            },
                            wait=True)
                    make_query(client,
                               'DROP VIEW IF EXISTS `%(db)s`.`myview`' %
                               {'db': db_name},
                               wait=True)
                    make_query(client,
                               'DROP DATABASE IF EXISTS %(db)s' %
                               {'db': db_name},
                               wait=True)

                    # Check the cleanup
                    databases = db.get_databases()
                    assert_false(db_name in databases)

            global _INITIALIZED
            _INITIALIZED = False
Exemplo n.º 53
0
    def test_fetch_result_size_impala(self):
        if not is_live_cluster():
            raise SkipTest

        # Create session so that session object is saved to DB for server URL lookup
        session = self.api.create_session(lang='impala')

        try:
            # Assert that a query that runs a job will return rows
            statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;"
            doc = self.create_query_document(owner=self.user,
                                             query_type='impala',
                                             statement=statement)
            notebook = Notebook(document=doc)
            snippet = self.execute_and_wait(doc,
                                            snippet_idx=0,
                                            timeout=60.0,
                                            wait=2.0)

            self.client.post(
                reverse('notebook:fetch_result_data'), {
                    'notebook': notebook.get_json(),
                    'snippet': json.dumps(snippet),
                    'rows': 100,
                    'startOver': 'false'
                })

            response = self.client.post(reverse('notebook:fetch_result_size'),
                                        {
                                            'notebook': notebook.get_json(),
                                            'snippet': json.dumps(snippet)
                                        })

            data = json.loads(response.content)
            assert_equal(0, data['status'], data)
            assert_true('result' in data)
            assert_true('rows' in data['result'])
            assert_true('size' in data['result'])
            assert_equal(23, data['result']['rows'])
            assert_equal(None, data['result']['size'])
        finally:
            self.api.close_session(session)
Exemplo n.º 54
0
  def test_describe_partitions(self):
    if is_live_cluster():
      raise SkipTest('HUE-2902: Test is not re-entrant')

    response = self.client.get("/metastore/table/%s/test_partitions" % self.db_name)
    assert_true("Show Partitions (2)" in response.content, response.content)

    response = self.client.get("/metastore/table/%s/test_partitions/partitions" % self.db_name, follow=True)
    assert_true("baz_one" in response.content)
    assert_true("boom_two" in response.content)
    assert_true("baz_foo" in response.content)
    assert_true("boom_bar" in response.content)
    # Breadcrumbs
    assert_true(self.db_name in response.content)
    assert_true("test_partitions" in response.content)
    assert_true("partitions" in response.content)

    # Not partitioned
    response = self.client.get("/metastore/table/%s/test/partitions" % self.db_name, follow=True)
    assert_true("is not partitioned." in response.content)
Exemplo n.º 55
0
    def test_fetch_result_abbreviated(self):
        if not is_live_cluster():
            raise SkipTest

        # Create session so that session object is saved to DB for server URL lookup
        session = self.api.create_session(lang='impala')

        try:

            # Assert that abbreviated rows returned (e.g. - 1.00K) still returns actual rows
            statement = "SELECT * FROM web_logs;"
            doc = self.create_query_document(owner=self.user,
                                             query_type='impala',
                                             statement=statement)
            notebook = Notebook(document=doc)
            snippet = self.execute_and_wait(doc,
                                            snippet_idx=0,
                                            timeout=60.0,
                                            wait=5.0)

            self.client.post(
                reverse('notebook:fetch_result_data'), {
                    'notebook': notebook.get_json(),
                    'snippet': json.dumps(snippet),
                    'rows': 100,
                    'startOver': 'false'
                })

            response = self.client.post(reverse('notebook:fetch_result_size'),
                                        {
                                            'notebook': notebook.get_json(),
                                            'snippet': json.dumps(snippet)
                                        })

            data = json.loads(response.content)
            assert_equal(0, data['status'], data)
            assert_true('result' in data)
            assert_true('rows' in data['result'])
            assert_equal(1000, data['result']['rows'])
        finally:
            self.api.close_session(session)
Exemplo n.º 56
0
    def setup_class(cls):

        if not is_live_cluster() or not search_enabled:
            raise SkipTest

        cls.client = make_logged_in_client(username='******', is_superuser=False)
        cls.user = User.objects.get(username='******')
        add_to_group('test')
        grant_access("test", "test", "libsolr")
        grant_access("test", "test", "search")

        cls.user.is_superuser = True
        cls.user.save()

        resp = cls.client.post(reverse('search:install_examples'))
        content = json.loads(resp.content)

        cls.user.is_superuser = False
        cls.user.save()

        assert_equal(content.get('status'), 0)
Exemplo n.º 57
0
  def test_submit(self):
    if is_live_cluster():
      raise SkipTest('HUE-2909: Skipping because test is not reentrant')

    script = PigScript.objects.get(id=1100713)
    script_dict = script.dict

    post_data = {
      'id': script.id,
      'name': script_dict['name'],
      'script': script_dict['script'],
      'user': script.owner,
      'parameters': json.dumps(script_dict['parameters']),
      'resources': json.dumps(script_dict['resources']),
      'hadoopProperties': json.dumps(script_dict['hadoopProperties']),
      'submissionVariables': json.dumps([{"name": "output", "value": self.cluster.fs_prefix + '/test_pig_script_submit'}]),
    }

    response = self.c.post(reverse('pig:run'), data=post_data, follow=True)
    job_id = json.loads(response.content)['id']

    self.wait_until_completion(job_id)
Exemplo n.º 58
0
def get_shared_beeswax_server(db_name='default'):
    global _SHARED_HIVE_SERVER
    global _SHARED_HIVE_SERVER_CLOSER
    if _SHARED_HIVE_SERVER is None:

        cluster = pseudo_hdfs4.shared_cluster()

        if is_live_cluster():

            def s():
                pass
        else:
            s = _start_mini_hs2(cluster)

        start = time.time()
        started = False
        sleep = 1

        make_logged_in_client()
        user = User.objects.get(username='******')
        query_server = get_query_server_config()
        db = dbms.get(user, query_server)

        while not started and time.time() - start <= 30:
            try:
                db.open_session(user)
                started = True
                break
            except Exception, e:
                LOG.info('HiveServer2 server could not be found after: %s' % e)
                time.sleep(sleep)

        if not started:
            raise Exception("Server took too long to come up.")

        _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s
Exemplo n.º 59
0
    def test_fetch_result_size_mr(self):
        if not is_live_cluster():  # Mini-cluster does not have JHS
            raise SkipTest

        # Assert that a query with no job will return no rows or size
        statement = "SELECT 'hello world';"

        settings = [{'key': 'hive.execution.engine', 'value': 'mr'}]
        doc = self.create_query_document(owner=self.user,
                                         statement=statement,
                                         settings=settings)
        notebook = Notebook(document=doc)
        snippet = self.execute_and_wait(doc, snippet_idx=0)

        response = self.client.post(reverse('notebook:fetch_result_size'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })

        data = json.loads(response.content)
        assert_equal(0, data['status'], data)
        assert_true('result' in data)
        assert_true('rows' in data['result'])
        assert_true('size' in data['result'])
        assert_equal(None, data['result']['rows'])
        assert_equal(None, data['result']['size'])

        # Assert that a query with map & reduce task returns rows
        statement = "SELECT DISTINCT code FROM sample_07;"
        doc = self.create_query_document(owner=self.user,
                                         statement=statement,
                                         settings=settings)
        notebook = Notebook(document=doc)
        snippet = self.execute_and_wait(doc,
                                        snippet_idx=0,
                                        timeout=60.0,
                                        wait=2.0)

        response = self.client.post(reverse('notebook:fetch_result_size'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })

        data = json.loads(response.content)
        assert_equal(0, data['status'], data)
        assert_true('result' in data)
        assert_true('rows' in data['result'])
        assert_true(data['result']['rows'] > 0)

        # Assert that a query with multiple jobs returns rows
        statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;"
        doc = self.create_query_document(owner=self.user,
                                         statement=statement,
                                         settings=settings)
        notebook = Notebook(document=doc)
        snippet = self.execute_and_wait(doc,
                                        snippet_idx=0,
                                        timeout=60.0,
                                        wait=2.0)

        response = self.client.post(reverse('notebook:fetch_result_size'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })

        data = json.loads(response.content)
        assert_equal(0, data['status'], data)
        assert_true('result' in data)
        assert_true('rows' in data['result'])
        assert_true(data['result']['rows'] > 0)
Exemplo n.º 60
0
Arquivo: tests.py Projeto: ronwxy/hue
    def test_failed_jobs(self):
        """
    Test jobs with genuine failure, not just killed
    """

        if is_live_cluster():
            raise SkipTest('HUE-2902: Skipping because test is not reentrant')

        # Create design that will fail because the script file isn't there
        INPUT_DIR = TestJobBrowserWithHadoop.home_dir + '/input'
        OUTPUT_DIR = TestJobBrowserWithHadoop.home_dir + '/output'
        try:
            TestJobBrowserWithHadoop.cluster.fs.mkdir(
                TestJobBrowserWithHadoop.home_dir + "/jt-test_failed_jobs")
            TestJobBrowserWithHadoop.cluster.fs.mkdir(INPUT_DIR)
            TestJobBrowserWithHadoop.cluster.fs.rmtree(OUTPUT_DIR)
        except:
            LOG.exception('failed to teardown tests')

        job_name = '%s_%s' % (TestJobBrowserWithHadoop.username,
                              'test_failed_jobs-1')
        response = TestJobBrowserWithHadoop.client.post(
            reverse('jobsub.views.new_design',
                    kwargs={'node_type': 'mapreduce'}), {
                        'name': [job_name],
                        'description': ['description test_failed_jobs-1'],
                        'args':
                        '',
                        'jar_path':
                        '/user/hue/oozie/workspaces/lib/hadoop-examples.jar',
                        'prepares':
                        '[]',
                        'archives':
                        '[]',
                        'files':
                        '[]',
                        'job_properties': [
                            '[{"name":"mapred.input.dir","value":"%s"},\
            {"name":"mapred.output.dir","value":"%s"},\
            {"name":"mapred.mapper.class","value":"org.apache.hadoop.mapred.lib.dne"},\
            {"name":"mapred.combiner.class","value":"org.apache.hadoop.mapred.lib.dne"},\
            {"name":"mapred.reducer.class","value":"org.apache.hadoop.mapred.lib.dne"}]'
                            % (INPUT_DIR, OUTPUT_DIR)
                        ]
                    },
            HTTP_X_REQUESTED_WITH='XMLHttpRequest',
            follow=True)

        # Submit the job
        design_dict = json.loads(response.content)
        design_id = int(design_dict['id'])
        response = TestJobBrowserWithHadoop.client.post(
            reverse('oozie:submit_workflow', args=[design_id]),
            data={
                u'form-MAX_NUM_FORMS': [u''],
                u'form-INITIAL_FORMS': [u'1'],
                u'form-0-name': [u'REDUCER_SLEEP_TIME'],
                u'form-0-value': [u'1'],
                u'form-TOTAL_FORMS': [u'1']
            },
            follow=True)
        oozie_jobid = response.context['oozie_workflow'].id
        job = OozieServerProvider.wait_until_completion(oozie_jobid)
        hadoop_job_id = get_hadoop_job_id(TestJobBrowserWithHadoop.oozie,
                                          oozie_jobid, 1)
        hadoop_job_id_short = views.get_shorter_id(hadoop_job_id)

        # Select only killed jobs (should be absent)
        # Taking advantage of the fact new jobs are at the top of the list!
        response = TestJobBrowserWithHadoop.client.get(
            '/jobbrowser/jobs/?format=json&state=killed')
        assert_false(hadoop_job_id_short in response.content)

        # Select only failed jobs (should be present)
        # Map job should succeed. Reduce job should fail.
        response = TestJobBrowserWithHadoop.client.get(
            '/jobbrowser/jobs/?format=json&state=failed')
        assert_true(hadoop_job_id_short in response.content)

        raise SkipTest  # Not compatible with MR2

        # The single job view should have the failed task table
        response = TestJobBrowserWithHadoop.client.get('/jobbrowser/jobs/%s' %
                                                       (hadoop_job_id, ))
        html = response.content.lower()
        assert_true('failed task' in html, html)

        # The map task should say success (empty input)
        map_task_id = TestJobBrowserWithHadoop.hadoop_job_id.replace(
            'job', 'task') + '_m_000000'
        response = TestJobBrowserWithHadoop.client.get(
            '/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, map_task_id))
        assert_true('succeed' in response.content)
        assert_true('failed' not in response.content)

        # The reduce task should say failed
        reduce_task_id = hadoop_job_id.replace('job', 'task') + '_r_000000'
        response = TestJobBrowserWithHadoop.client.get(
            '/jobbrowser/jobs/%s/tasks/%s' % (hadoop_job_id, reduce_task_id))
        assert_true('succeed' not in response.content)
        assert_true('failed' in response.content)

        # Selecting by failed state should include the failed map
        response = TestJobBrowserWithHadoop.client.get(
            '/jobbrowser/jobs/%s/tasks?taskstate=failed' % (hadoop_job_id, ))
        assert_true('r_000000' in response.content)
        assert_true('m_000000' not in response.content)