Пример #1
0
    def setUp(self):
        self.client = make_logged_in_client(username="******",
                                            groupname="default",
                                            recreate=True,
                                            is_superuser=False)
        self.client_not_me = make_logged_in_client(username="******",
                                                   groupname="default",
                                                   recreate=True,
                                                   is_superuser=False)

        self.user = User.objects.get(username="******")
        self.user_not_me = User.objects.get(username="******")

        # Beware: Monkey patch HS2API Mock API
        if not hasattr(
                notebook.connectors.hiveserver2,
                'original_HS2Api'):  # Could not monkey patch base.get_api
            notebook.connectors.hiveserver2.original_HS2Api = notebook.connectors.hiveserver2.HS2Api
        notebook.connectors.hiveserver2.HS2Api = MockedApi

        originalCluster.get_hdfs()
        self.original_fs = originalCluster.FS_CACHE["default"]
        originalCluster.FS_CACHE["default"] = MockFs()

        grant_access("test", "default", "notebook")
        grant_access("test", "default", "beeswax")
        grant_access("not_perm_user", "default", "notebook")
        grant_access("not_perm_user", "default", "beeswax")
        add_permission('test',
                       'has_adls',
                       permname='adls_access',
                       appname='filebrowser')
Пример #2
0
def test_update_properties():
    finish = []
    finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
    finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
    try:
        properties = {
            'user.name': 'hue',
            'test.1': 'http://localhost/test?test1=test&test2=test'
        }

        final_properties = properties.copy()
        submission = Submission(None, properties=properties, oozie_id='test')
        assert_equal(properties, submission.properties)
        submission._update_properties('jtaddress', 'deployment-directory')
        assert_equal(final_properties, submission.properties)

        cluster.clear_caches()
        fs = cluster.get_hdfs()
        jt = cluster.get_next_ha_mrcluster()[1]
        final_properties = properties.copy()
        final_properties.update({
            'jobTracker': 'jtaddress',
            'nameNode': fs.fs_defaultfs
        })
        submission = Submission(None,
                                properties=properties,
                                oozie_id='test',
                                fs=fs,
                                jt=jt)
        assert_equal(properties, submission.properties)
        submission._update_properties('jtaddress', 'deployment-directory')
        assert_equal(final_properties, submission.properties)

        finish.append(
            HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('namenode'))
        finish.append(
            MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('jobtracker'))
        cluster.clear_caches()
        fs = cluster.get_hdfs()
        jt = cluster.get_next_ha_mrcluster()[1]
        final_properties = properties.copy()
        final_properties.update({
            'jobTracker': 'jobtracker',
            'nameNode': 'namenode'
        })
        submission = Submission(None,
                                properties=properties,
                                oozie_id='test',
                                fs=fs,
                                jt=jt)
        assert_equal(properties, submission.properties)
        submission._update_properties('jtaddress', 'deployment-directory')
        assert_equal(final_properties, submission.properties)
    finally:
        cluster.clear_caches()
        for reset in finish:
            reset()
Пример #3
0
  def test_update_properties(self):
    finish = []
    finish.append(MR_CLUSTERS.set_for_testing({'default': {}}))
    finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
    finish.append(YARN_CLUSTERS.set_for_testing({'default': {}}))
    finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
    try:
      properties = {
        'user.name': 'hue',
        'test.1': 'http://localhost/test?test1=test&test2=test',
        'nameNode': 'hdfs://curacao:8020',
        'jobTracker': 'jtaddress',
        'security_enabled': False
      }

      final_properties = properties.copy()
      submission = Submission(None, properties=properties, oozie_id='test', fs=MockFs())
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)

      cluster.clear_caches()
      fs = cluster.get_hdfs()
      jt = cluster.get_next_ha_mrcluster()[1]
      final_properties = properties.copy()
      final_properties.update({
        'jobTracker': 'jtaddress',
        'nameNode': fs.fs_defaultfs
      })
      submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt)
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)

      finish.append(HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('namenode'))
      finish.append(MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('jobtracker'))
      cluster.clear_caches()
      fs = cluster.get_hdfs()
      jt = cluster.get_next_ha_mrcluster()[1]
      final_properties = properties.copy()
      final_properties.update({
        'jobTracker': 'jobtracker',
        'nameNode': 'namenode'
      })
      submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt)
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)
    finally:
      cluster.clear_caches()
      for reset in finish:
        reset()
Пример #4
0
  def handle_noargs(self, **options):
    fs = cluster.get_hdfs()
    create_directories(fs, [REMOTE_SAMPLE_DIR.get()])
    remote_dir = REMOTE_SAMPLE_DIR.get()

    # Copy examples binaries
    for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
      local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name)
      remote_data_dir = fs.join(remote_dir, name)
      LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % {
                  'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
      fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir)

    # Copy sample data
    local_dir = paths.get_thirdparty_root("sample_data")
    remote_data_dir = fs.join(remote_dir, 'data')
    LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
    fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir)

    # Load jobs
    sample_user = install_sample_user()
    management.call_command('loaddata', 'initial_pig_examples.json', verbosity=2)
    Document.objects.sync()

    if USE_NEW_EDITOR.get():
      # Get or create sample user directories
      home_dir = Directory.objects.get_home_directory(sample_user)
      examples_dir, created = Directory.objects.get_or_create(
        parent_directory=home_dir,
        owner=sample_user,
        name=Document2.EXAMPLES_DIR)

      try:
        # Don't overwrite
        doc = Document.objects.get(object_id=1100713)
        doc2 = Document2.objects.get(owner=sample_user, name=doc.name, type='link-pigscript')
        # If document exists but has been trashed, recover from Trash
        if doc2.parent_directory != examples_dir:
          doc2.parent_directory = examples_dir
          doc2.save()
      except Document.DoesNotExist:
        LOG.warn('Sample pig script document not found.')
      except Document2.DoesNotExist:
        if doc.content_object:
          data = doc.content_object.dict
          data.update({'content_type': doc.content_type.model, 'object_id': doc.object_id})
          data = json.dumps(data)

          doc2 = Document2.objects.create(
            owner=sample_user,
            parent_directory=examples_dir,
            name=doc.name,
            type='link-pigscript',
            description=doc.description,
            data=data)
          LOG.info('Successfully installed sample link to pig script: %s' % (doc2.name,))

      # Share with default group
      examples_dir.share(sample_user, Document2Permission.READ_PERM, groups=[get_default_user_group()])
Пример #5
0
  def handle_noargs(self, **options):
    self.user = install_sample_user()
    self.fs = cluster.get_hdfs()

    LOG.info(_("Creating sample directory '%s' in HDFS") % REMOTE_SAMPLE_DIR.get())
    create_directories(self.fs, [REMOTE_SAMPLE_DIR.get()])
    remote_dir = REMOTE_SAMPLE_DIR.get()

    # Copy examples binaries
    for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
      local_dir = self.fs.join(LOCAL_SAMPLE_DIR.get(), name)
      remote_data_dir = self.fs.join(remote_dir, name)
      LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % {
                  'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
      self.fs.do_as_user(self.fs.DEFAULT_USER, self.fs.copyFromLocal, local_dir, remote_data_dir)

    # Copy sample data
    local_dir = LOCAL_SAMPLE_DATA_DIR.get()
    remote_data_dir = self.fs.join(remote_dir, 'data')
    LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
    self.fs.do_as_user(self.fs.DEFAULT_USER, self.fs.copyFromLocal, local_dir, remote_data_dir)

    # Load jobs
    LOG.info(_("Installing examples..."))

    if ENABLE_V2.get():
      management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2)

    self.install_examples()

    Document.objects.sync()
Пример #6
0
  def handle_noargs(self, **options):
    fs = cluster.get_hdfs()
    remote_dir = create_directories(fs)

    # Copy examples binaries
    for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
      local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name)
      remote_data_dir = fs.join(remote_dir, name)
      LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % {
                  'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
      fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir)

    # Copy sample data
    local_dir = LOCAL_SAMPLE_DATA_DIR.get()
    remote_data_dir = fs.join(remote_dir, 'data')
    LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
    fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir)

    # Load jobs
    USERNAME = '******'
    try:
      sample_user = User.objects.get(username=USERNAME)
    except User.DoesNotExist:
      sample_user = User.objects.create(username=USERNAME, password='******', is_active=False, is_superuser=False, id=1100713, pk=1100713)
    management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2)
Пример #7
0
    def handle_noargs(self, **options):
        fs = cluster.get_hdfs()
        remote_dir = create_directories(fs)

        # Copy examples binaries
        for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
            local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name)
            remote_data_dir = fs.join(remote_dir, name)
            LOG.info(
                _('Copying examples %(local_dir)s to %(remote_data_dir)s\n') %
                {
                    'local_dir': local_dir,
                    'remote_data_dir': remote_data_dir
                })
            fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir,
                          remote_data_dir)

        # Copy sample data
        local_dir = LOCAL_SAMPLE_DATA_DIR.get()
        remote_data_dir = fs.join(remote_dir, 'data')
        LOG.info(
            _('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir,
                'remote_data_dir': remote_data_dir
            })
        fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir,
                      remote_data_dir)

        # Load jobs
        sample, created = User.objects.get_or_create(username='******')
        management.call_command('loaddata',
                                'initial_oozie_examples.json',
                                verbosity=2)
        from oozie.models import Job
        Job.objects.filter(owner__id=1100713).update(owner=sample)  # 11OOZIE
Пример #8
0
def config_validator(user):
    # dbms is dependent on beeswax.conf (this file)
    # import in method to avoid circular dependency
    from beeswax.server import dbms

    res = []
    try:
        if not 'test' in sys.argv:  # Avoid tests hanging
            server = dbms.get(user)
            server.get_databases()
    except:
        res.append(
            (NICE_NAME,
             _("The application won't work without a running HiveServer2.")))

    try:
        from hadoop import cluster
        warehouse = beeswax.hive_site.get_metastore_warehouse_dir()
        fs = cluster.get_hdfs()
        fs.stats(warehouse)
    except Exception:
        return [(NICE_NAME,
                 _('Failed to access Hive warehouse: %s') % warehouse)]

    return res
Пример #9
0
    def handle_noargs(self, **options):
        fs = cluster.get_hdfs()
        create_directories(fs, [REMOTE_SAMPLE_DIR.get()])
        remote_dir = REMOTE_SAMPLE_DIR.get()

        # Copy examples binaries
        for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
            local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name)
            remote_data_dir = fs.join(remote_dir, name)
            LOG.info(
                _('Copying examples %(local_dir)s to %(remote_data_dir)s\n') %
                {
                    'local_dir': local_dir,
                    'remote_data_dir': remote_data_dir
                })
            fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir,
                          remote_data_dir)

        # Copy sample data
        local_dir = paths.get_thirdparty_root("sample_data")
        remote_data_dir = fs.join(remote_dir, 'data')
        LOG.info(
            _('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir,
                'remote_data_dir': remote_data_dir
            })
        fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir,
                      remote_data_dir)

        # Load jobs
        install_sample_user()
        management.call_command('loaddata',
                                'initial_pig_examples.json',
                                verbosity=2)
        Document.objects.sync()
Пример #10
0
  def handle_noargs(self, **options):
    fs = cluster.get_hdfs()    
    create_directories(fs, [REMOTE_SAMPLE_DIR.get()])
    remote_dir = REMOTE_SAMPLE_DIR.get()

    # Copy examples binaries
    for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
      local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name)
      remote_data_dir = fs.join(remote_dir, name)
      LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % {
                  'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
      fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir)

    # Copy sample data
    local_dir = LOCAL_SAMPLE_DATA_DIR.get()
    remote_data_dir = fs.join(remote_dir, 'data')
    LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
    fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir)

    # Load jobs
    USERNAME = '******'
    try:
      sample_user = User.objects.get(username=USERNAME)
    except User.DoesNotExist:
      sample_user = User.objects.create(username=USERNAME, password='******', is_active=False, is_superuser=False, id=1100713, pk=1100713)
    management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2)
Пример #11
0
def config_validator(user):
  # dbms is dependent on beeswax.conf (this file)
  # import in method to avoid circular dependency
  from beeswax.server import dbms

  res = []
  try:
    if not 'test' in sys.argv: # Avoid tests hanging
      server = dbms.get(user)
      server.get_databases()
  except:
    msg = "The application won't work without a running HiveServer2."
    LOG.exception(msg)

    res.append((NICE_NAME, _(msg)))

  try:
    from hadoop import cluster
    warehouse = beeswax.hive_site.get_metastore_warehouse_dir()
    fs = cluster.get_hdfs()
    fs.stats(warehouse)
  except Exception:
    msg = 'Failed to access Hive warehouse: %s'
    LOG.exception(msg % warehouse)

    return [(NICE_NAME, _(msg) % warehouse)]

  return res
Пример #12
0
    def process_view(self, request, view_func, view_args, view_kwargs):
        """
    Sets request.fs and request.jt on every request to point to the
    configured filesystem.
    """
        request.fs_ref = request.REQUEST.get('fs',
                                             view_kwargs.get('fs', 'default'))
        if "fs" in view_kwargs:
            del view_kwargs["fs"]

        try:
            request.fs = cluster.get_hdfs(request.fs_ref)
        except KeyError:
            raise KeyError(
                _('Cannot find HDFS called "%(fs_ref)s".') %
                {'fs_ref': request.fs_ref})

        if request.user.is_authenticated():
            if request.fs is not None:
                request.fs.setuser(request.user.username)

            request.jt = cluster.get_default_mrcluster()
            if request.jt is not None:
                request.jt.setuser(request.user.username)
        else:
            request.jt = None
Пример #13
0
    def process_view(self, request, view_func, view_args, view_kwargs):
        """
    Sets request.fs and request.jt on every request to point to the
    configured filesystem.
    """
        request.fs_ref = request.REQUEST.get('fs',
                                             view_kwargs.get('fs', 'default'))
        if "fs" in view_kwargs:
            del view_kwargs["fs"]

        try:
            request.fs = cluster.get_hdfs(request.fs_ref)
        except KeyError:
            raise KeyError(
                _('Cannot find HDFS called "%(fs_ref)s".') %
                {'fs_ref': request.fs_ref})

        if request.user.is_authenticated():
            if request.fs is not None:
                request.fs.setuser(request.user.username)

            request.jt = cluster.get_default_mrcluster(
            )  # Deprecated, only there for MR1
            if request.jt is not None:
                request.jt.setuser(request.user.username)
        else:
            request.jt = None
Пример #14
0
  def handle_noargs(self, **options):
    self.user = install_sample_user()
    self.fs = cluster.get_hdfs()

    LOG.info(_("Creating sample directory '%s' in HDFS") % REMOTE_SAMPLE_DIR.get())
    create_directories(self.fs, [REMOTE_SAMPLE_DIR.get()])
    remote_dir = REMOTE_SAMPLE_DIR.get()

    # Copy examples binaries
    for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
      local_dir = self.fs.join(LOCAL_SAMPLE_DIR.get(), name)
      remote_data_dir = self.fs.join(remote_dir, name)
      LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % {
                  'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
      self.fs.do_as_user(self.fs.DEFAULT_USER, self.fs.copyFromLocal, local_dir, remote_data_dir)

    # Copy sample data
    local_dir = LOCAL_SAMPLE_DATA_DIR.get()
    remote_data_dir = self.fs.join(remote_dir, 'data')
    LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
    self.fs.do_as_user(self.fs.DEFAULT_USER, self.fs.copyFromLocal, local_dir, remote_data_dir)

    # Load jobs
    LOG.info(_("Installing examples..."))

    if ENABLE_V2.get():
      management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2)

    self.install_examples()

    Document.objects.sync()
Пример #15
0
def get_children_data(ensemble, namespace, read_only=True):
  hdfs = cluster.get_hdfs()
  if hdfs is None:
    raise PopupException(_('No [hdfs] configured in hue.ini.'))

  if hdfs.security_enabled:
    sasl_server_principal = PRINCIPAL_NAME.get()
  else:
    sasl_server_principal = None

  zk = KazooClient(hosts=ensemble, read_only=read_only, sasl_server_principal=sasl_server_principal)

  zk.start()

  children_data = []

  children = zk.get_children(namespace)

  for node in children:
    data, stat = zk.get("%s/%s" % (namespace, node))
    children_data.append(data)

  zk.stop()

  return children_data
Пример #16
0
 def test_filebrowser(self):
     sys.stdout.write("Checking HDFS access\n")
     fs = cluster.get_hdfs()
     try:
         _do_newfile_save(fs, "/tmp/smoke_fb.test", "Test", "utf-8")
         fs.remove("/tmp/smoke_fb.test")
     except Exception, ex:
         sys.stderr.write("[Hdfs/WebHdfs] Exception: %s \n" % ex)
Пример #17
0
  def handle_noargs(self, **options):
    remote_fs = cluster.get_hdfs()
    if hasattr(remote_fs, "setuser"):
      remote_fs.setuser("hue", ["supergroup"])
    logging.info("Using remote fs: %s" % str(remote_fs))

    # Copy over examples/ and script_templates/ directories
    for dirname in ("examples", "script_templates"):
      local_dir = os.path.join(jobsub.conf.LOCAL_DATA_DIR.get(), dirname)
      remote_dir = posixpath.join(jobsub.conf.REMOTE_DATA_DIR.get(), dirname)
      copy_dir(local_dir, remote_fs, remote_dir)

    # Copy over sample data
    copy_dir(jobsub.conf.SAMPLE_DATA_DIR.get(),
      remote_fs,
      posixpath.join(jobsub.conf.REMOTE_DATA_DIR.get(), "sample_data"))

    # Also copy over Hadoop examples and streaming jars
    local_src = hadoop.conf.HADOOP_EXAMPLES_JAR.get()
    if local_src is None:
      raise Exception('Failed to locate the Hadoop example jar')
    remote_dst = posixpath.join(jobsub.conf.REMOTE_DATA_DIR.get(), "examples", "hadoop-examples.jar")
    copy_file(local_src, remote_fs, remote_dst)

    # Write out the models too
    fixture_path = os.path.join(os.path.dirname(__file__), "..", "..", "fixtures", "example_data.xml")
    examples = django.core.serializers.deserialize("xml", open(fixture_path))
    sample_user = None
    sample_job_designs = []
    for example in examples:
      if isinstance(example.object, User):
        sample_user = example
      elif isinstance(example.object, jobsub.models.JobDesign):
        sample_job_designs.append(example)
      else:
        raise Exception("Unexpected fixture type.")
    if sample_user is None:
      raise Exception("Expected sample user fixture.")
    # Create the sample user if it doesn't exist
    try:
      sample_user.object = User.objects.get(username=sample_user.object.username)
    except User.DoesNotExist:
      sample_user.object.pk = None
      sample_user.object.id = None
      sample_user.save()
    for j in sample_job_designs:
      j.object.id = None
      j.object.pk = None
      j.object.owner_id = sample_user.object.id
      j.save()

    # Upon success, write to the database
    try:
      entry = jobsub.models.CheckForSetup.objects.get(id=1)
    except jobsub.models.CheckForSetup.DoesNotExist:
      entry = jobsub.models.CheckForSetup(id=1)
    entry.setup_run = True
    entry.save()
Пример #18
0
  def handle_noargs(self, **options):
    remote_fs = cluster.get_hdfs()
    if hasattr(remote_fs, "setuser"):
      remote_fs.setuser(remote_fs.superuser)
    LOG.info("Using remote fs: %s" % str(remote_fs))

    # Copy over examples/ and script_templates/ directories
    for dirname in ("examples", "script_templates"):
      local_dir = os.path.join(jobsub.conf.LOCAL_DATA_DIR.get(), dirname)
      remote_dir = posixpath.join(jobsub.conf.REMOTE_DATA_DIR.get(), dirname)
      copy_dir(local_dir, remote_fs, remote_dir)

    # Copy over sample data
    copy_dir(jobsub.conf.SAMPLE_DATA_DIR.get(),
      remote_fs,
      posixpath.join(jobsub.conf.REMOTE_DATA_DIR.get(), "sample_data"))

    # Also copy over Hadoop examples and streaming jars
    local_src = hadoop.conf.HADOOP_EXAMPLES_JAR.get()
    if local_src is None:
      raise Exception('Failed to locate the Hadoop example jar')
    remote_dst = posixpath.join(jobsub.conf.REMOTE_DATA_DIR.get(), "examples", "hadoop-examples.jar")
    copy_file(local_src, remote_fs, remote_dst)

    # Write out the models too
    fixture_path = os.path.join(os.path.dirname(__file__), "..", "..", "fixtures", "example_data.xml")
    examples = django.core.serializers.deserialize("xml", open(fixture_path))
    sample_user = None
    sample_job_designs = []
    for example in examples:
      if isinstance(example.object, User):
        sample_user = example
      elif isinstance(example.object, jobsub.models.JobDesign):
        sample_job_designs.append(example)
      else:
        raise Exception("Unexpected fixture type.")
    if sample_user is None:
      raise Exception("Expected sample user fixture.")
    # Create the sample user if it doesn't exist
    try:
      sample_user.object = User.objects.get(username=sample_user.object.username)
    except User.DoesNotExist:
      sample_user.object.pk = None
      sample_user.object.id = None
      sample_user.save()
    for j in sample_job_designs:
      j.object.id = None
      j.object.pk = None
      j.object.owner_id = sample_user.object.id
      j.save()

    # Upon success, write to the database
    try:
      entry = jobsub.models.CheckForSetup.objects.get(id=1)
    except jobsub.models.CheckForSetup.DoesNotExist:
      entry = jobsub.models.CheckForSetup(id=1)
    entry.setup_run = True
    entry.save()
Пример #19
0
  def setUp(self):
    self.client = make_logged_in_client(username="******", groupname="default", recreate=True, is_superuser=False)
    self.client_not_me = make_logged_in_client(username="******", groupname="default", recreate=True, is_superuser=False)

    self.user = User.objects.get(username="******")
    self.user_not_me = User.objects.get(username="******")

    # Beware: Monkey patch HS2API Mock API
    if not hasattr(notebook.connectors.hiveserver2, 'original_HS2Api'): # Could not monkey patch base.get_api
      notebook.connectors.hiveserver2.original_HS2Api = notebook.connectors.hiveserver2.HS2Api
    notebook.connectors.hiveserver2.HS2Api = MockedApi

    originalCluster.get_hdfs()
    self.original_fs = originalCluster.FS_CACHE["default"]
    originalCluster.FS_CACHE["default"] = MockFs()

    grant_access("test", "default", "notebook")
    grant_access("not_perm_user", "default", "notebook")
Пример #20
0
    def handle(self, *args, **options):
        fs = cluster.get_hdfs()
        create_directories(fs, [REMOTE_SAMPLE_DIR.get()])
        remote_dir = REMOTE_SAMPLE_DIR.get()
        sample_user = install_sample_user()

        # Copy examples binaries
        for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
            local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name)
            remote_data_dir = fs.join(remote_dir, name)
            LOG.info(
                _('Copying examples %(local_dir)s to %(remote_data_dir)s\n') %
                {
                    'local_dir': local_dir,
                    'remote_data_dir': remote_data_dir
                })
            fs.do_as_user(sample_user.username, fs.copyFromLocal, local_dir,
                          remote_data_dir)

        # Copy sample data
        local_dir = paths.get_thirdparty_root("sample_data")
        remote_data_dir = fs.join(remote_dir, 'data')
        LOG.info(
            _('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir,
                'remote_data_dir': remote_data_dir
            })
        fs.do_as_user(sample_user.username, fs.copyFromLocal, local_dir,
                      remote_data_dir)

        # Initialize doc2, whether editor script or link
        doc2 = None

        # Install editor pig script without doc1 link
        LOG.info("Using Hue 4, will install pig editor sample.")
        doc2 = self.install_pig_script(sample_user)

        if USE_NEW_EDITOR.get():
            # Get or create sample user directories
            LOG.info("Creating sample user directories.")

            home_dir = Directory.objects.get_home_directory(sample_user)
            examples_dir, created = Directory.objects.get_or_create(
                parent_directory=home_dir,
                owner=sample_user,
                name=Document2.EXAMPLES_DIR)

            # If document exists but has been trashed, recover from Trash
            if doc2 and doc2.parent_directory != examples_dir:
                doc2.parent_directory = examples_dir
                doc2.save()

            # Share with default group
            examples_dir.share(sample_user,
                               Document2Permission.READ_PERM,
                               groups=[get_default_user_group()])
Пример #21
0
    def __init__(self):
        self.fs = cluster.get_hdfs("default")
        # Assumes /tmp exists and is 1777

        self.fs_prefix = get_fs_prefix(self.fs)
        LOG.info("Using %s as FS root" % self.fs_prefix)

        # Might need more
        self.fs.do_as_user("test", self.fs.create_home_dir, "/user/test")
        self.fs.do_as_user("hue", self.fs.create_home_dir, "/user/hue")
Пример #22
0
    def test_end_to_end(self):
        if not is_live_cluster():
            raise SkipTest()

        fs = cluster.get_hdfs()
        collection_name = "test_collection"
        indexer = Indexer("test", fs)
        input_loc = "/tmp/test.csv"

        # upload the test file to hdfs
        fs.create(input_loc, data=IndexerTest.simpleCSVString, overwrite=True)

        # open a filestream for the file on hdfs
        stream = fs.open(input_loc)

        # guess the format of the file
        file_type_format = indexer.guess_format(
            {'file': {
                "stream": stream,
                "name": "test.csv"
            }})

        field_types = indexer.guess_field_types({
            "file": {
                "stream": stream,
                "name": "test.csv"
            },
            "format": file_type_format
        })

        format_ = field_types.copy()
        format_['format'] = file_type_format

        # find a field name available to use for the record's uuid
        unique_field = indexer.get_unique_field(format_)
        is_unique_generated = indexer.is_unique_generated(format_)

        # generate morphline
        morphline = indexer.generate_morphline_config(collection_name, format_,
                                                      unique_field)

        schema_fields = indexer.get_kept_field_list(format_['columns'])
        if is_unique_generated:
            schema_fields += [{"name": unique_field, "type": "string"}]

        # create the collection from the specified fields
        collection_manager = CollectionManagerController("test")
        if collection_manager.collection_exists(collection_name):
            collection_manager.delete_collection(collection_name, None)
        collection_manager.create_collection(collection_name,
                                             schema_fields,
                                             unique_key_field=unique_field)

        # index the file
        indexer.run_morphline(collection_name, morphline, input_loc)
Пример #23
0
  def _upload_to_hdfs(self, django_user, local_filepath, hdfs_root_destination, filename=None):
    fs = cluster.get_hdfs()

    if filename is None:
      filename = self.name
    hdfs_destination = '%s/%s' % (hdfs_root_destination, filename)

    LOG.info('Uploading local data %s to HDFS path "%s"' % (self.name, hdfs_destination))
    fs.do_as_user(django_user, fs.copyFromLocal, local_filepath, hdfs_destination)

    return hdfs_destination
Пример #24
0
  def __init__(self):
    self.fs = cluster.get_hdfs('default')
    # Assumes /tmp exists and is 1777
    self.jt = None # Deprecated

    self.fs_prefix = get_fs_prefix(self.fs)
    LOG.info('Using %s as FS root' % self.fs_prefix)

    # Might need more
    self.fs.do_as_user('test', self.fs.create_home_dir, '/user/test')
    self.fs.do_as_user('hue', self.fs.create_home_dir, '/user/hue')
Пример #25
0
    def __init__(self):
        self.fs = cluster.get_hdfs('default')
        # Assumes /tmp exists and is 1777
        self.jt = None  # Deprecated

        self.fs_prefix = get_fs_prefix(self.fs)
        LOG.info('Using %s as FS root' % self.fs_prefix)

        # Might need more
        self.fs.do_as_user('test', self.fs.create_home_dir, '/user/test')
        self.fs.do_as_user('hue', self.fs.create_home_dir, '/user/hue')
Пример #26
0
    def load(self, django_user):
        """
    Upload data to HDFS home of user then load (aka move) it into the Hive table (in the Hive metastore in HDFS).
    """
        LOAD_HQL = \
          """
      LOAD DATA INPATH
      '%(filename)s' OVERWRITE INTO TABLE %(tablename)s
      """

        fs = cluster.get_hdfs()

        if self.app_name == 'impala':
            # Because Impala does not have impersonation on by default, we use a public destination for the upload.
            from impala.conf import IMPERSONATION_ENABLED
            if not IMPERSONATION_ENABLED.get():
                tmp_public = '/tmp/public_hue_examples'
                fs.do_as_user(django_user, fs.mkdir, tmp_public, '0777')
                hdfs_root_destination = tmp_public
        else:
            hdfs_root_destination = fs.do_as_user(django_user, fs.get_home_dir)

        hdfs_destination = os.path.join(hdfs_root_destination, self.name)

        LOG.info('Uploading local data %s to HDFS table "%s"' %
                 (self.name, hdfs_destination))
        fs.do_as_user(django_user, fs.copyFromLocal, self._contents_file,
                      hdfs_destination)

        LOG.info('Loading data into table "%s"' % (self.name, ))
        hql = LOAD_HQL % {'tablename': self.name, 'filename': hdfs_destination}
        query = hql_query(hql)

        try:
            results = dbms.get(django_user,
                               self.query_server).execute_and_wait(query)
            if not results:
                msg = _(
                    'Error loading table %(table)s: Operation timeout.') % {
                        'table': self.name
                    }
                LOG.error(msg)
                raise InstallException(msg)
        except QueryServerException, ex:
            msg = _('Error loading table %(table)s: %(error)s.') % {
                'table': self.name,
                'error': ex
            }
            LOG.error(msg)
            raise InstallException(msg)
Пример #27
0
  def get_configuration_statements(self):
    configuration = []

    for f in self.file_resources:
      if not urlparse.urlsplit(f['path']).scheme:
        scheme = get_hdfs().fs_defaultfs
      else:
        scheme = ''
      configuration.append(render_to_string("hql_resource.mako", dict(type=f['type'], path=f['path'], scheme=scheme)))

    for f in self.functions:
      configuration.append(render_to_string("hql_function.mako", f))

    return configuration
Пример #28
0
  def handle_noargs(self, **options):
    self.user = install_sample_user()
    self.fs = cluster.get_hdfs()

    LOG.info(_("Creating sample directory '%s' in HDFS") % REMOTE_SAMPLE_DIR.get())
    create_directories(self.fs, [REMOTE_SAMPLE_DIR.get()])
    remote_dir = REMOTE_SAMPLE_DIR.get()

    # Copy examples binaries
    for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
      local_dir = self.fs.join(LOCAL_SAMPLE_DIR.get(), name)
      remote_data_dir = self.fs.join(remote_dir, name)
      LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % {
                  'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
      self.fs.do_as_user(self.fs.DEFAULT_USER, self.fs.copyFromLocal, local_dir, remote_data_dir)

    # Copy sample data
    local_dir = LOCAL_SAMPLE_DATA_DIR.get()
    remote_data_dir = self.fs.join(remote_dir, 'data')
    LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
    self.fs.do_as_user(self.fs.DEFAULT_USER, self.fs.copyFromLocal, local_dir, remote_data_dir)

    # Load jobs
    LOG.info(_("Installing examples..."))

    if ENABLE_V2.get():
      management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2)

    # Get or create sample user directories
    home_dir = Directory.objects.get_home_directory(self.user)
    examples_dir, created = Directory.objects.get_or_create(
      parent_directory=home_dir,
      owner=self.user,
      name=Document2.EXAMPLES_DIR
    )

    # Share oozie examples with default group
    oozie_examples = Document2.objects.filter(
      type__in=['oozie-workflow2', 'oozie-coordinator2', 'oozie-bundle2'],
      owner=self.user,
      parent_directory=None
    )
    oozie_examples.update(parent_directory=examples_dir)
    examples_dir.share(self.user, Document2Permission.READ_PERM, groups=[get_default_user_group()])

    self.install_examples()

    Document.objects.sync()
Пример #29
0
def install_sample_user():
  """
  Setup the de-activated sample user with a certain id. Do not create a user profile.
  """

  try:
    user = auth_models.User.objects.get(username=SAMPLE_USERNAME)
  except auth_models.User.DoesNotExist:
    user = auth_models.User.objects.create(username=SAMPLE_USERNAME, password='******', is_active=False, is_superuser=False, id=1100713, pk=1100713)
    LOG.info('Installed a user called "%s"' % (SAMPLE_USERNAME,))

  fs = cluster.get_hdfs()
  fs.do_as_user(SAMPLE_USERNAME, fs.create_home_dir)

  return user
Пример #30
0
  def get_configuration_statements(self):
    configuration = []

    for f in self.file_resources:
      if not urlparse.urlsplit(f['path']).scheme:
        scheme = get_hdfs().fs_defaultfs
      else:
        scheme = ''
      configuration.append('ADD %(type)s %(scheme)s%(path)s' % {'type': f['type'], 'path': f['path'], 'scheme': scheme})

    for f in self.functions:
      configuration.append("CREATE TEMPORARY FUNCTION %(name)s AS '%(class_name)s'" %
                           {'name': f['name'], 'class_name': f['class_name']})

    return configuration
Пример #31
0
  def install(self, django_user):
    if has_concurrency_support() and not self.is_transactional:
      LOG.info('Skipping table %s as non transactional' % self.name)
      return
    if not (has_concurrency_support() and self.is_transactional) and not cluster.get_hdfs():
      raise PopupException('Requiring a File System to load its data')

    self.create(django_user)

    if self.partition_files:
      for partition_spec, filepath in list(self.partition_files.items()):
        self.load_partition(django_user, partition_spec, filepath, columns=self.columns)
    else:
      self.load(django_user)

    return True
Пример #32
0
  def get_configuration_statements(self):
    configuration = []

    for f in self.file_resources:
      if not urlparse.urlsplit(f['path']).scheme:
        scheme = get_hdfs().fs_defaultfs
      else:
        scheme = ''
      configuration.append('ADD %(type)s %(scheme)s%(path)s' %
                           {'type': f['type'].upper(), 'path': f['path'], 'scheme': scheme})

    for f in self.functions:
      configuration.append("CREATE TEMPORARY FUNCTION %(name)s AS '%(class_name)s'" %
                           {'name': f['name'], 'class_name': f['class_name']})

    return configuration
Пример #33
0
  def handle_noargs(self, **options):
    self.user = install_sample_user()
    self.fs = cluster.get_hdfs()
    self.searcher = controller.CollectionManagerController(self.user)

    LOG.info(_("Installing twitter collection"))
    path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_twitter_demo/index_data.csv'))
    self._setup_collection_from_csv({
      'name': 'twitter_demo',
      'fields': self._parse_fields(path),
      'uniqueKeyField': 'id',
      'df': 'text'
    }, path)
    LOG.info(_("Twitter collection successfully installed"))

    LOG.info(_("Installing yelp collection"))
    path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_yelp_demo/index_data.csv'))
    self._setup_collection_from_csv({
      'name': 'yelp_demo',
      'fields': self._parse_fields(path),
      'uniqueKeyField': 'id',
      'df': 'text'
    }, path)
    LOG.info(_("Yelp collection successfully installed"))

    LOG.info(_("Installing jobs collection"))
    path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_jobs_demo/index_data.csv'))
    self._setup_collection_from_csv({
      'name': 'jobs_demo',
      'fields': self._parse_fields(path),
      'uniqueKeyField': 'id',
      'df': 'description'
    }, path)
    LOG.info(_("Jobs collection successfully installed"))

    LOG.info(_("Installing logs collection"))
    path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../../../../apps/search/examples/collections/solr_configs_log_analytics_demo/index_data.csv'))
    self._setup_collection_from_csv({
      'name': 'log_analytics_demo',
      'fields': self._parse_fields(path, fieldtypes={
        'region_code': 'string',
        'referer': 'string'
      }),
      'uniqueKeyField': 'id',
      'df': 'record'
    }, path)
    LOG.info(_("Logs collection successfully installed"))
Пример #34
0
    def get_configuration_statements(self):
        configuration = []

        for f in self.file_resources:
            if not urlparse.urlsplit(f['path']).scheme:
                scheme = get_hdfs().fs_defaultfs
            else:
                scheme = ''
            configuration.append(
                render_to_string(
                    "hql_resource.mako",
                    dict(type=f['type'], path=f['path'], scheme=scheme)))

        for f in self.functions:
            configuration.append(render_to_string("hql_function.mako", f))

        return configuration
Пример #35
0
  def __init__(self, hosts=None, read_only=True):
    self.hosts = hosts if hosts else ENSEMBLE.get()
    self.read_only = read_only

    hdfs = cluster.get_hdfs()

    if hdfs is None:
      raise ZookeeperConfigurationException('No [hdfs] configured in hue.ini.')

    if hdfs.security_enabled:
      self.sasl_server_principal = PRINCIPAL_NAME.get()
    else:
      self.sasl_server_principal = None

    self.zk = KazooClient(hosts=self.hosts,
                          read_only=self.read_only,
                          sasl_server_principal=self.sasl_server_principal)
Пример #36
0
  def __init__(self, hosts=None, read_only=True):
    self.hosts = hosts if hosts else ENSEMBLE.get()
    self.read_only = read_only

    hdfs = cluster.get_hdfs()

    if hdfs is None:
      raise ZookeeperConfigurationException('No [hdfs] configured in hue.ini.')

    if hdfs.security_enabled:
      self.sasl_server_principal = PRINCIPAL_NAME.get()
    else:
      self.sasl_server_principal = None

    self.zk = KazooClient(hosts=self.hosts,
                          read_only=self.read_only,
                          sasl_server_principal=self.sasl_server_principal)
Пример #37
0
  def handle(self, *args, **options):
    fs = cluster.get_hdfs()
    create_directories(fs, [REMOTE_SAMPLE_DIR.get()])
    remote_dir = REMOTE_SAMPLE_DIR.get()
    sample_user = install_sample_user()

    # Copy examples binaries
    for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
      local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name)
      remote_data_dir = fs.join(remote_dir, name)
      LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % {
                  'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
      fs.do_as_user(sample_user.username, fs.copyFromLocal, local_dir, remote_data_dir)

    # Copy sample data
    local_dir = paths.get_thirdparty_root("sample_data")
    remote_data_dir = fs.join(remote_dir, 'data')
    LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
    fs.do_as_user(sample_user.username, fs.copyFromLocal, local_dir, remote_data_dir)

    # Initialize doc2, whether editor script or link
    doc2 = None

    # Install editor pig script without doc1 link
    LOG.info("Using Hue 4, will install pig editor sample.")
    doc2 = self.install_pig_script(sample_user)

    if USE_NEW_EDITOR.get():
      # Get or create sample user directories
      LOG.info("Creating sample user directories.")

      home_dir = Directory.objects.get_home_directory(sample_user)
      examples_dir, created = Directory.objects.get_or_create(
        parent_directory=home_dir,
        owner=sample_user,
        name=Document2.EXAMPLES_DIR)

      # If document exists but has been trashed, recover from Trash
      if doc2 and doc2.parent_directory != examples_dir:
        doc2.parent_directory = examples_dir
        doc2.save()

      # Share with default group
      examples_dir.share(sample_user, Document2Permission.READ_PERM, groups=[get_default_user_group()])
Пример #38
0
    def handle(self, *args, **options):
        fs = cluster.get_hdfs()
        fs.setuser(fs.DEFAULT_USER)
        if not fs.exists(UDF_PATH):
            fs.mkdir(UDF_PATH, 0777)

        for f in args:
          file_name = os.path.split(f)[-1]
          path = fs.join(UDF_PATH, file_name)
          fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, f, path)
          UDF.objects.create(url=path, file_name=file_name, owner=User.objects.get(id=1))
        if not args:
          for f in fs.listdir(UDF_PATH):
            try:
              UDF.objects.get(file_name=f)
            except UDF.DoesNotExist:
              path = fs.join(UDF_PATH, f)
              UDF.objects.create(url=path, file_name=f, owner=User.objects.get(id=1))
Пример #39
0
def test_non_default_cluster():
    NON_DEFAULT_NAME = 'non_default'
    old_caches = clear_sys_caches()
    reset = (
        conf.HDFS_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}),
        conf.MR_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}),
    )
    try:
        # This is indeed the only hdfs/mr cluster
        assert_equal(1, len(cluster.get_all_hdfs()))
        assert_true(cluster.get_hdfs(NON_DEFAULT_NAME))

        cli = make_logged_in_client()
        # That we can get to a view without errors means that the middlewares work
        cli.get('/about')
    finally:
        for old_conf in reset:
            old_conf()
        restore_sys_caches(old_caches)
Пример #40
0
  def _get_hdfs_root_destination(self, django_user, subdir=None):
    fs = cluster.get_hdfs()

    if self.app_name == 'impala':
      # Because Impala does not have impersonation on by default, we use a public destination for the upload.
      from impala.conf import IMPERSONATION_ENABLED
      if not IMPERSONATION_ENABLED.get():
        tmp_public = '/tmp/public_hue_examples'
        if subdir:
          tmp_public += '/%s' % subdir
        fs.do_as_user(django_user, fs.mkdir, tmp_public, '0777')
        hdfs_root_destination = tmp_public
    else:
      hdfs_root_destination = fs.do_as_user(django_user, fs.get_home_dir)
      if subdir:
        hdfs_root_destination += '/%s' % subdir
        fs.do_as_user(django_user, fs.mkdir, hdfs_root_destination, '0777')

    return hdfs_root_destination
Пример #41
0
def test_non_default_cluster():
  NON_DEFAULT_NAME = 'non_default'
  cluster.clear_caches()
  reset = (
    conf.HDFS_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }),
    conf.MR_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }),
  )
  try:
    # This is indeed the only hdfs/mr cluster
    assert_equal(1, len(cluster.get_all_hdfs()))
    assert_equal(1, len(cluster.all_mrclusters()))
    assert_true(cluster.get_hdfs(NON_DEFAULT_NAME))
    assert_true(cluster.get_mrcluster(NON_DEFAULT_NAME))

    cli = make_logged_in_client()
    # That we can get to a view without errors means that the middlewares work
    cli.get('/about')
  finally:
    for old_conf in reset:
      old_conf()
Пример #42
0
Файл: models.py Проект: ymc/hue
def install_sample_user():
    """
  Setup the de-activated sample user with a certain id. Do not create a user profile.
  """

    try:
        user = auth_models.User.objects.get(username=SAMPLE_USERNAME)
    except auth_models.User.DoesNotExist:
        user = auth_models.User.objects.create(username=SAMPLE_USERNAME,
                                               password='******',
                                               is_active=False,
                                               is_superuser=False,
                                               id=1100713,
                                               pk=1100713)
        LOG.info('Installed a user called "%s"' % (SAMPLE_USERNAME, ))

    fs = cluster.get_hdfs()
    fs.do_as_user(SAMPLE_USERNAME, fs.create_home_dir)

    return user
Пример #43
0
  def load(self, django_user):
    """
    Upload data to HDFS home of user then load (aka move) it into the Hive table (in the Hive metastore in HDFS).
    """
    LOAD_HQL = \
      """
      LOAD DATA INPATH
      '%(filename)s' OVERWRITE INTO TABLE %(tablename)s
      """

    fs = cluster.get_hdfs()

    if self.app_name == 'impala':
      # Because Impala does not have impersonation on by default, we use a public destination for the upload.
      from impala.conf import IMPERSONATION_ENABLED
      if not IMPERSONATION_ENABLED.get():
        tmp_public = '/tmp/public_hue_examples'
        fs.do_as_user(django_user, fs.mkdir, tmp_public, '0777')
        hdfs_root_destination = tmp_public
    else:
      hdfs_root_destination = fs.do_as_user(django_user, fs.get_home_dir)

    hdfs_destination = os.path.join(hdfs_root_destination, self.name)

    LOG.info('Uploading local data %s to HDFS table "%s"' % (self.name, hdfs_destination))
    fs.do_as_user(django_user, fs.copyFromLocal, self._contents_file, hdfs_destination)

    LOG.info('Loading data into table "%s"' % (self.name,))
    hql = LOAD_HQL % {'tablename': self.name, 'filename': hdfs_destination}
    query = hql_query(hql)

    try:
      results = dbms.get(django_user, self.query_server).execute_and_wait(query)
      if not results:
        msg = _('Error loading table %(table)s: Operation timeout.') % {'table': self.name}
        LOG.error(msg)
        raise InstallException(msg)
    except QueryServerException, ex:
      msg = _('Error loading table %(table)s: %(error)s.') % {'table': self.name, 'error': ex}
      LOG.error(msg)
      raise InstallException(msg)
Пример #44
0
  def handle_noargs(self, **options):
    fs = cluster.get_hdfs()
    remote_dir = create_data_dir(fs)

    # Copy examples binaries
    for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
      local_dir = posixpath.join(LOCAL_SAMPLE_DIR.get(), name)
      remote_data_dir = posixpath.join(remote_dir, name)
      LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % {
                  'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
      copy_dir(fs, local_dir, remote_data_dir)

    # Copy sample data
    local_dir = LOCAL_SAMPLE_DATA_DIR.get()
    remote_data_dir = posixpath.join(remote_dir, 'data')
    LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
    copy_dir(fs, local_dir, remote_data_dir)

    # Load jobs
    management.call_command('loaddata', 'apps/oozie/src/oozie/fixtures/initial_data.json', verbosity=2)
Пример #45
0
    def handle(self, *args, **options):
        fs = cluster.get_hdfs()
        fs.setuser(fs.DEFAULT_USER)
        if not fs.exists(UDF_PATH):
            fs.mkdir(UDF_PATH, 0777)

        for f in args:
            file_name = os.path.split(f)[-1]
            path = fs.join(UDF_PATH, file_name)
            fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, f, path)
            UDF.objects.create(url=path,
                               file_name=file_name,
                               owner=User.objects.get(id=1))
        if not args:
            for f in fs.listdir(UDF_PATH):
                try:
                    UDF.objects.get(file_name=f)
                except UDF.DoesNotExist:
                    path = fs.join(UDF_PATH, f)
                    UDF.objects.create(url=path,
                                       file_name=f,
                                       owner=User.objects.get(id=1))
Пример #46
0
  def _get_hdfs_root_destination(self, django_user, subdir=None):
    fs = cluster.get_hdfs()
    hdfs_root_destination = None
    can_impersonate_hdfs = False

    if self.app_name == 'impala':
      # Impala can support impersonation, so use home instead of a public destination for the upload
      from impala.conf import IMPERSONATION_ENABLED
      can_impersonate_hdfs = IMPERSONATION_ENABLED.get()

    if can_impersonate_hdfs:
      hdfs_root_destination = fs.do_as_user(django_user, fs.get_home_dir)
      if subdir:
        hdfs_root_destination += '/%s' % subdir
        fs.do_as_user(django_user, fs.mkdir, hdfs_root_destination, '0777')
    else:
      tmp_public = '/tmp/public_hue_examples'
      if subdir:
        tmp_public += '/%s' % subdir
      fs.do_as_user(django_user, fs.mkdir, tmp_public, '0777')
      hdfs_root_destination = tmp_public

    return hdfs_root_destination
Пример #47
0
  def handle_noargs(self, **options):
    fs = cluster.get_hdfs()
    create_directories(fs, [REMOTE_SAMPLE_DIR.get()])
    remote_dir = REMOTE_SAMPLE_DIR.get()

    # Copy examples binaries
    for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
      local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name)
      remote_data_dir = fs.join(remote_dir, name)
      LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % {
                  'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
      fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir)

    # Copy sample data
    local_dir = paths.get_thirdparty_root("sample_data")
    remote_data_dir = fs.join(remote_dir, 'data')
    LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
    fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir)

    # Load jobs
    install_sample_user()
    management.call_command('loaddata', 'initial_pig_examples.json', verbosity=2)
    Document.objects.sync()
Пример #48
0
  def handle_noargs(self, **options):
    fs = cluster.get_hdfs()
    sample_user = CreateSandboxUserCommand().handle_noargs()
    fs.setuser(sample_user)
    create_directories(fs, [REMOTE_SAMPLE_DIR.get()])
    remote_dir = REMOTE_SAMPLE_DIR.get()
    # Copy examples binaries
    for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
      local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name)
      remote_data_dir = fs.join(remote_dir, name)
      LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % {
                  'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
      fs.copyFromLocal(local_dir, remote_data_dir)

    # Copy sample data
    local_dir = LOCAL_SAMPLE_DATA_DIR.get()
    remote_data_dir = fs.join(remote_dir, 'data')
    LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
    fs.copyFromLocal(local_dir, remote_data_dir)

    # Load jobs

    management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2)
Пример #49
0
  def test_end_to_end(self):
    fs = cluster.get_hdfs()
    collection_name = "test_collection"
    indexer = Indexer("test", fs)
    input_loc = "/tmp/test.csv"

    # upload the test file to hdfs
    fs.create(input_loc, data=IndexerTest.simpleCSVString, overwrite=True)

    # open a filestream for the file on hdfs
    stream = fs.open(input_loc)

    # guess the format of the file
    file_type_format = indexer.guess_format({'file': {"stream": stream, "name": "test.csv"}})

    field_types = indexer.guess_field_types({"file":{"stream": stream, "name": "test.csv"}, "format": file_type_format})

    format_ = field_types.copy()
    format_['format'] = file_type_format

    # find a field name available to use for the record's uuid
    unique_field = indexer.get_uuid_name(format_)

    # generate morphline
    morphline = indexer.generate_morphline_config(collection_name, format_, unique_field)

    schema_fields = [{"name": unique_field, "type": "string"}] + indexer.get_kept_field_list(format_['columns'])

    # create the collection from the specified fields
    collection_manager = CollectionManagerController("test")
    if collection_manager.collection_exists(collection_name):
      collection_manager.delete_collection(collection_name, None)
    collection_manager.create_collection(collection_name, schema_fields, unique_key_field=unique_field)

    # index the file
    indexer.run_morphline(collection_name, morphline, input_loc)
Пример #50
0
  def handle_noargs(self, **options):
    fs = cluster.get_hdfs()
    remote_dir = create_directories(fs)

    # Copy examples binaries
    for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
      local_dir = fs.join(LOCAL_SAMPLE_DIR.get(), name)
      remote_data_dir = fs.join(remote_dir, name)
      LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % {
                  'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
      fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir)

    # Copy sample data
    local_dir = LOCAL_SAMPLE_DATA_DIR.get()
    remote_data_dir = fs.join(remote_dir, 'data')
    LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
    fs.do_as_user(fs.DEFAULT_USER, fs.copyFromLocal, local_dir, remote_data_dir)

    # Load jobs
    sample, created = User.objects.get_or_create(username='******')
    management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2)
    from oozie.models import Job
    Job.objects.filter(owner__id=1100713).update(owner=sample) # 11OOZIE
Пример #51
0
  def handle_noargs(self, **options):
    self.user = install_sample_user()
    self.fs = cluster.get_hdfs()

    LOG.info(_("Creating sample directory '%s' in HDFS") % REMOTE_SAMPLE_DIR.get())
    create_directories(self.fs, [REMOTE_SAMPLE_DIR.get()])
    remote_dir = REMOTE_SAMPLE_DIR.get()

    # Copy examples binaries
    for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
      local_dir = self.fs.join(LOCAL_SAMPLE_DIR.get(), name)
      remote_data_dir = self.fs.join(remote_dir, name)
      LOG.info(_('Copying examples %(local_dir)s to %(remote_data_dir)s\n') % {
                  'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
      self.fs.do_as_user(self.user.username, self.fs.copyFromLocal, local_dir, remote_data_dir)

    # Copy sample data
    local_dir = LOCAL_SAMPLE_DATA_DIR.get()
    remote_data_dir = self.fs.join(remote_dir, 'data')
    LOG.info(_('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir, 'remote_data_dir': remote_data_dir})
    self.fs.do_as_user(self.user.username, self.fs.copyFromLocal, local_dir, remote_data_dir)

    # Get or create sample user directories
    home_dir = Directory.objects.get_home_directory(self.user)
    examples_dir, created = Directory.objects.get_or_create(
      parent_directory=home_dir,
      owner=self.user,
      name=Document2.EXAMPLES_DIR
    )

    # Load jobs
    LOG.info(_("Installing examples..."))

    if ENABLE_V2.get():
      management.call_command('loaddata', 'initial_oozie_examples.json', verbosity=2)

    if IS_HUE_4.get():
      # Install editor oozie examples without doc1 link
      LOG.info("Using Hue 4, will install oozie editor samples.")

      example_jobs = []
      example_jobs.append(self._install_mapreduce_example())
      example_jobs.append(self._install_java_example())
      example_jobs.append(self._install_spark_example())
      example_jobs.append(self._install_pyspark_example())

      # If documents exist but have been trashed, recover from Trash
      for doc in example_jobs:
        if doc is not None and doc.parent_directory != examples_dir:
          doc.parent_directory = examples_dir
          doc.save()

    elif USE_NEW_EDITOR.get():
      # Install as link-workflow doc2 to old Job Designs
      docs = Document.objects.get_docs(self.user, Workflow).filter(owner=self.user)
      for doc in docs:
        if doc.content_object:
          data = doc.content_object.data_dict
          data.update({'content_type': doc.content_type.model, 'object_id': doc.object_id})
          data = json.dumps(data)

          # Don't overwrite
          doc2, created = Document2.objects.get_or_create(
            owner=self.user,
            parent_directory=examples_dir,
            name=doc.name,
            type='link-workflow',
            description=doc.description,
            data=data
          )

          LOG.info('Successfully installed sample link to jobsub: %s' % (doc2.name,))

    # Share oozie examples with default group
    oozie_examples = Document2.objects.filter(
      type__in=['oozie-workflow2', 'oozie-coordinator2', 'oozie-bundle2'],
      owner=self.user,
      parent_directory=None
    )
    oozie_examples.update(parent_directory=examples_dir)
    examples_dir.share(self.user, Document2Permission.READ_PERM, groups=[get_default_user_group()])

    if not IS_HUE_4.get():
      self.install_examples()
      Document.objects.sync()
Пример #52
0
    def test_update_properties(self):
        finish = []
        finish.append(MR_CLUSTERS.set_for_testing({'default': {}}))
        finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
        finish.append(YARN_CLUSTERS.set_for_testing({'default': {}}))
        finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
        try:
            properties = {
                'user.name': 'hue',
                'test.1': 'http://localhost/test?test1=test&test2=test',
                'nameNode': 'hdfs://curacao:8020',
                'jobTracker': 'jtaddress',
                'security_enabled': False
            }

            final_properties = properties.copy()
            submission = Submission(None,
                                    properties=properties,
                                    oozie_id='test',
                                    fs=MockFs())
            assert_equal(properties, submission.properties)
            submission._update_properties('jtaddress', 'deployment-directory')
            assert_equal(final_properties, submission.properties)

            clear_sys_caches()
            fs = cluster.get_hdfs()
            final_properties = properties.copy()
            final_properties.update({
                'jobTracker': 'jtaddress',
                'nameNode': fs.fs_defaultfs
            })
            submission = Submission(None,
                                    properties=properties,
                                    oozie_id='test',
                                    fs=fs,
                                    jt=None)
            assert_equal(properties, submission.properties)
            submission._update_properties('jtaddress', 'deployment-directory')
            assert_equal(final_properties, submission.properties)

            finish.append(
                HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing(
                    'namenode'))
            finish.append(MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing(
                'jobtracker'))
            clear_sys_caches()
            fs = cluster.get_hdfs()
            final_properties = properties.copy()
            final_properties.update({
                'jobTracker': 'jobtracker',
                'nameNode': 'namenode'
            })
            submission = Submission(None,
                                    properties=properties,
                                    oozie_id='test',
                                    fs=fs,
                                    jt=None)
            assert_equal(properties, submission.properties)
        finally:
            clear_sys_caches()
            for reset in finish:
                reset()
Пример #53
0
def install_sample_user():
    """
  Setup the de-activated sample user with a certain id. Do not create a user profile.
  """
    #Moved to avoid circular import with is_admin
    from desktop.models import SAMPLE_USER_ID, SAMPLE_USER_INSTALL
    user = None

    try:
        if User.objects.filter(id=SAMPLE_USER_ID).exists():
            user = User.objects.get(id=SAMPLE_USER_ID)
            LOG.info('Sample user found with username "%s" and User ID: %s' %
                     (user.username, user.id))
        elif User.objects.filter(username=SAMPLE_USER_INSTALL).exists():
            user = User.objects.get(username=SAMPLE_USER_INSTALL)
            LOG.info('Sample user found: %s' % user.username)
        else:
            user, created = User.objects.get_or_create(
                username=SAMPLE_USER_INSTALL,
                password='******',
                is_active=False,
                is_superuser=False,
                id=SAMPLE_USER_ID,
                pk=SAMPLE_USER_ID)

            if created:
                LOG.info('Installed a user called "%s"' % SAMPLE_USER_INSTALL)

        if user.username != SAMPLE_USER_INSTALL:
            LOG.warn(
                'Sample user does not have username "%s", will attempt to modify the username.'
                % SAMPLE_USER_INSTALL)
            with transaction.atomic():
                user = User.objects.get(id=SAMPLE_USER_ID)
                user.username = SAMPLE_USER_INSTALL
                user.save()
    except Exception as ex:
        LOG.exception('Failed to get or create sample user')

    # If sample user doesn't belong to default group, add to default group
    default_group = get_default_user_group()
    if user is not None and default_group is not None and default_group not in user.groups.all(
    ):
        user.groups.add(default_group)
        user.save()

    fs = cluster.get_hdfs()
    # If home directory doesn't exist for sample user, create it
    try:
        if not fs.do_as_user(SAMPLE_USER_INSTALL, fs.get_home_dir):
            fs.do_as_user(SAMPLE_USER_INSTALL, fs.create_home_dir)
            LOG.info('Created home directory for user: %s' %
                     SAMPLE_USER_INSTALL)
        else:
            LOG.info('Home directory already exists for user: %s' %
                     SAMPLE_USER_INSTALL)
    except Exception as ex:
        LOG.exception('Failed to create home directory for user %s: %s' %
                      (SAMPLE_USER_INSTALL, str(ex)))

    return user
Пример #54
0
    def handle_noargs(self, **options):
        self.user = install_sample_user()
        self.fs = cluster.get_hdfs()

        LOG.info(
            _("Creating sample directory '%s' in HDFS") %
            REMOTE_SAMPLE_DIR.get())
        create_directories(self.fs, [REMOTE_SAMPLE_DIR.get()])
        remote_dir = REMOTE_SAMPLE_DIR.get()

        # Copy examples binaries
        for name in os.listdir(LOCAL_SAMPLE_DIR.get()):
            local_dir = self.fs.join(LOCAL_SAMPLE_DIR.get(), name)
            remote_data_dir = self.fs.join(remote_dir, name)
            LOG.info(
                _('Copying examples %(local_dir)s to %(remote_data_dir)s\n') %
                {
                    'local_dir': local_dir,
                    'remote_data_dir': remote_data_dir
                })
            self.fs.do_as_user(self.fs.DEFAULT_USER, self.fs.copyFromLocal,
                               local_dir, remote_data_dir)

        # Copy sample data
        local_dir = LOCAL_SAMPLE_DATA_DIR.get()
        remote_data_dir = self.fs.join(remote_dir, 'data')
        LOG.info(
            _('Copying data %(local_dir)s to %(remote_data_dir)s\n') % {
                'local_dir': local_dir,
                'remote_data_dir': remote_data_dir
            })
        self.fs.do_as_user(self.fs.DEFAULT_USER, self.fs.copyFromLocal,
                           local_dir, remote_data_dir)

        # Load jobs
        LOG.info(_("Installing examples..."))

        if ENABLE_V2.get():
            management.call_command('loaddata',
                                    'initial_oozie_examples.json',
                                    verbosity=2)

        # Get or create sample user directories
        home_dir = Directory.objects.get_home_directory(self.user)
        examples_dir, created = Directory.objects.get_or_create(
            parent_directory=home_dir,
            owner=self.user,
            name=Document2.EXAMPLES_DIR)

        # Share oozie examples with default group
        oozie_examples = Document2.objects.filter(type__in=[
            'oozie-workflow2', 'oozie-coordinator2', 'oozie-bundle2'
        ],
                                                  owner=self.user,
                                                  parent_directory=None)
        oozie_examples.update(parent_directory=examples_dir)
        examples_dir.share(self.user,
                           Document2Permission.READ_PERM,
                           groups=[get_default_user_group()])

        self.install_examples()

        Document.objects.sync()
Пример #55
0
Файл: models.py Проект: QLGu/hue
    LOG.info("HuePermissions: %d added, %d updated, %d up to date, %d stale" %
           (len(added),
            updated,
            uptodate,
            available - len(added) - updated - uptodate))

models.signals.post_syncdb.connect(update_app_permissions)
models.signals.post_syncdb.connect(get_default_user_group)


def install_sample_user():
  """
  Setup the de-activated sample user with a certain id. Do not create a user profile.
  """

  try:
    user = auth_models.User.objects.get(username=SAMPLE_USERNAME)
  except auth_models.User.DoesNotExist:
    try:
      user = auth_models.User.objects.create(username=SAMPLE_USERNAME, password='******', is_active=False, is_superuser=False, id=1100713, pk=1100713)
      LOG.info('Installed a user called "%s"' % (SAMPLE_USERNAME,))
    except Exception, e:
      LOG.info('Sample user race condition: %s' % e)
      user = auth_models.User.objects.get(username=SAMPLE_USERNAME)
      LOG.info('Sample user race condition, got: %s' % user)

  fs = cluster.get_hdfs()
  fs.do_as_user(SAMPLE_USERNAME, fs.create_home_dir)

  return user
Пример #56
0
def install_sample_user(django_user=None):
    """
  Setup the de-activated sample user with a certain id. Do not create a user profile.
  """
    from desktop.models import SAMPLE_USER_ID, get_sample_user_install
    from hadoop import cluster

    user = None
    django_username = get_sample_user_install(django_user)

    if ENABLE_ORGANIZATIONS.get():
        lookup = {'email': django_username}
        django_username_short = django_user.username_short
    else:
        lookup = {'username': django_username}
        django_username_short = django_username

    try:
        if User.objects.filter(
                id=SAMPLE_USER_ID).exists() and not ENABLE_ORGANIZATIONS.get():
            user = User.objects.get(id=SAMPLE_USER_ID)
            LOG.info('Sample user found with username "%s" and User ID: %s' %
                     (user.username, user.id))
        elif User.objects.filter(**lookup).exists():
            user = User.objects.get(**lookup)
            LOG.info('Sample user found: %s' % lookup)
        else:
            user_attributes = lookup.copy()
            if ENABLE_ORGANIZATIONS.get():
                user_attributes['organization'] = get_organization(
                    email=django_username)
            else:
                user_attributes['id'] = SAMPLE_USER_ID

            user_attributes.update({
                'password': '******',
                'is_active': False,
                'is_superuser': False,
            })
            user, created = User.objects.get_or_create(**user_attributes)

            if created:
                LOG.info('Installed a user "%s"' % lookup)

        if user.username != django_username and not ENABLE_ORGANIZATIONS.get():
            LOG.warn(
                'Sample user does not have username "%s", will attempt to modify the username.'
                % django_username)
            with transaction.atomic():
                user = User.objects.get(id=SAMPLE_USER_ID)
                user.username = django_username
                user.save()
    except:
        LOG.exception('Failed to get or create sample user')

    # If sample user doesn't belong to default group, add to default group
    default_group = get_default_user_group(user=user)
    if user is not None and default_group is not None and default_group not in user.groups.all(
    ):
        user.groups.add(default_group)
        user.save()

    # If home directory doesn't exist for sample user, create it
    fs = cluster.get_hdfs()
    try:
        if not fs:
            LOG.info(
                'No fs configured, skipping home directory creation for user: %s'
                % django_username_short)
        elif not fs.do_as_user(django_username_short, fs.get_home_dir):
            fs.do_as_user(django_username_short, fs.create_home_dir)
            LOG.info('Created home directory for user: %s' %
                     django_username_short)
        else:
            LOG.info('Home directory already exists for user: %s' %
                     django_username)
    except Exception as ex:
        LOG.exception('Failed to create home directory for user %s: %s' %
                      (django_username, str(ex)))

    return user
Пример #57
0
    res = []
    try:
        try:
            if not 'test' in sys.argv:  # Avoid tests hanging
                server = dbms.get(user)
                server.get_databases()
        except StructuredThriftTransportException, e:
            if 'Error validating the login' in str(e):
                msg = 'Failed to authenticate to HiveServer2, check authentication configurations.'
                LOG.exception(msg)
                res.append((NICE_NAME, _(msg)))
            else:
                raise e
    except Exception, e:
        msg = "The application won't work without a running HiveServer2."
        LOG.exception(msg)
        res.append((NICE_NAME, _(msg)))

    try:
        from hadoop import cluster
        warehouse = beeswax.hive_site.get_metastore_warehouse_dir()
        fs = cluster.get_hdfs()
        fs.stats(warehouse)
    except Exception:
        msg = 'Failed to access Hive warehouse: %s'
        LOG.exception(msg % warehouse)

        return [(NICE_NAME, _(msg) % warehouse)]

    return res