Exemplo n.º 1
0
  def listdir(self, path, glob=None):
    """
    listdir(path, glob=None) -> [ entry names ]

    Get directory entry names without stats.
    """
    dirents = self.listdir_stats(path, glob)
    return [Hdfs.basename(x.path) for x in dirents]
Exemplo n.º 2
0
  def listdir(self, path, glob=None):
    """
    listdir(path, glob=None) -> [ entry names ]

    Get directory entry names without stats.
    """
    dirents = self.listdir_stats(path, glob)
    return [Hdfs.basename(x.path) for x in dirents]
Exemplo n.º 3
0
    def run(self,
            request,
            collection_name,
            envelope,
            input_path,
            start_time=None,
            lib_path=None):
        workspace_path = self._upload_workspace(envelope)
        if lib_path is None:
            lib_path = CONFIG_JARS_LIBS_PATH.get()

        task = make_notebook(
            name=_('Indexing into %s') % collection_name,
            editor_type='notebook',
            #on_success_url=reverse('search:browse', kwargs={'name': collection_name}),
            #pub_sub_url='assist.collections.refresh',
            is_task=True,
            is_notebook=True,
            last_executed=start_time)

        if not DISABLE_HUE_3.config.default_value or True:  # CDH5
            shell_command_name = "pipeline.sh"
            shell_command = """#!/bin/bash

export SPARK_DIST_CLASSPATH=`hadoop classpath`
export SPARK_DIST_CLASSPATH=/etc/hive/conf:`hadoop classpath`
export JAVA_HOME=/usr/java/jdk1.8.0_162

SPARK_KAFKA_VERSION=0.10 spark2-submit envelope.jar envelope.conf"""
            hdfs_shell_cmd_path = os.path.join(workspace_path,
                                               shell_command_name)
            self.fs.do_as_user(self.username,
                               self.fs.create,
                               hdfs_shell_cmd_path,
                               data=shell_command)
            task.add_shell_snippet(shell_command=shell_command_name,
                                   files=[{
                                       u'value':
                                       u'%s/envelope.conf' % workspace_path
                                   }, {
                                       u'value': hdfs_shell_cmd_path
                                   }, {
                                       u'value': lib_path
                                   }])
        else:
            task.add_spark_snippet(
                clazz='com.cloudera.labs.envelope.EnvelopeMain',
                jars=Hdfs.basename(lib_path),
                arguments=[u'envelope.conf'],
                files=[
                    {
                        u'path': u'%s/envelope.conf' % workspace_path,
                        u'type': u'file'
                    },
                    {
                        u'path': lib_path,
                        u'type': u'file'
                    },
                ])

        return task.execute(request, batch=True)